1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /* Copyright (c) 2021, Microsoft Corporation. */
4 #include <uapi/linux/bpf.h>
6 #include <linux/inetdevice.h>
7 #include <linux/etherdevice.h>
8 #include <linux/ethtool.h>
9 #include <linux/filter.h>
12 #include <net/checksum.h>
13 #include <net/ip6_checksum.h>
17 /* Microsoft Azure Network Adapter (MANA) functions */
19 static int mana_open(struct net_device
*ndev
)
21 struct mana_port_context
*apc
= netdev_priv(ndev
);
24 err
= mana_alloc_queues(ndev
);
28 apc
->port_is_up
= true;
30 /* Ensure port state updated before txq state */
33 netif_carrier_on(ndev
);
34 netif_tx_wake_all_queues(ndev
);
39 static int mana_close(struct net_device
*ndev
)
41 struct mana_port_context
*apc
= netdev_priv(ndev
);
46 return mana_detach(ndev
, true);
49 static bool mana_can_tx(struct gdma_queue
*wq
)
51 return mana_gd_wq_avail_space(wq
) >= MAX_TX_WQE_SIZE
;
54 static unsigned int mana_checksum_info(struct sk_buff
*skb
)
56 if (skb
->protocol
== htons(ETH_P_IP
)) {
57 struct iphdr
*ip
= ip_hdr(skb
);
59 if (ip
->protocol
== IPPROTO_TCP
)
62 if (ip
->protocol
== IPPROTO_UDP
)
64 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
65 struct ipv6hdr
*ip6
= ipv6_hdr(skb
);
67 if (ip6
->nexthdr
== IPPROTO_TCP
)
70 if (ip6
->nexthdr
== IPPROTO_UDP
)
74 /* No csum offloading */
78 static int mana_map_skb(struct sk_buff
*skb
, struct mana_port_context
*apc
,
79 struct mana_tx_package
*tp
)
81 struct mana_skb_head
*ash
= (struct mana_skb_head
*)skb
->head
;
82 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
83 struct gdma_context
*gc
;
89 gc
= gd
->gdma_context
;
91 da
= dma_map_single(dev
, skb
->data
, skb_headlen(skb
), DMA_TO_DEVICE
);
93 if (dma_mapping_error(dev
, da
))
96 ash
->dma_handle
[0] = da
;
97 ash
->size
[0] = skb_headlen(skb
);
99 tp
->wqe_req
.sgl
[0].address
= ash
->dma_handle
[0];
100 tp
->wqe_req
.sgl
[0].mem_key
= gd
->gpa_mkey
;
101 tp
->wqe_req
.sgl
[0].size
= ash
->size
[0];
103 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
104 frag
= &skb_shinfo(skb
)->frags
[i
];
105 da
= skb_frag_dma_map(dev
, frag
, 0, skb_frag_size(frag
),
108 if (dma_mapping_error(dev
, da
))
111 ash
->dma_handle
[i
+ 1] = da
;
112 ash
->size
[i
+ 1] = skb_frag_size(frag
);
114 tp
->wqe_req
.sgl
[i
+ 1].address
= ash
->dma_handle
[i
+ 1];
115 tp
->wqe_req
.sgl
[i
+ 1].mem_key
= gd
->gpa_mkey
;
116 tp
->wqe_req
.sgl
[i
+ 1].size
= ash
->size
[i
+ 1];
122 for (i
= i
- 1; i
>= 0; i
--)
123 dma_unmap_page(dev
, ash
->dma_handle
[i
+ 1], ash
->size
[i
+ 1],
126 dma_unmap_single(dev
, ash
->dma_handle
[0], ash
->size
[0], DMA_TO_DEVICE
);
131 int mana_start_xmit(struct sk_buff
*skb
, struct net_device
*ndev
)
133 enum mana_tx_pkt_format pkt_fmt
= MANA_SHORT_PKT_FMT
;
134 struct mana_port_context
*apc
= netdev_priv(ndev
);
135 u16 txq_idx
= skb_get_queue_mapping(skb
);
136 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
137 bool ipv4
= false, ipv6
= false;
138 struct mana_tx_package pkg
= {};
139 struct netdev_queue
*net_txq
;
140 struct mana_stats_tx
*tx_stats
;
141 struct gdma_queue
*gdma_sq
;
142 unsigned int csum_type
;
143 struct mana_txq
*txq
;
147 if (unlikely(!apc
->port_is_up
))
150 if (skb_cow_head(skb
, MANA_HEADROOM
))
153 txq
= &apc
->tx_qp
[txq_idx
].txq
;
154 gdma_sq
= txq
->gdma_sq
;
155 cq
= &apc
->tx_qp
[txq_idx
].tx_cq
;
157 pkg
.tx_oob
.s_oob
.vcq_num
= cq
->gdma_id
;
158 pkg
.tx_oob
.s_oob
.vsq_frame
= txq
->vsq_frame
;
160 if (txq
->vp_offset
> MANA_SHORT_VPORT_OFFSET_MAX
) {
161 pkg
.tx_oob
.l_oob
.long_vp_offset
= txq
->vp_offset
;
162 pkt_fmt
= MANA_LONG_PKT_FMT
;
164 pkg
.tx_oob
.s_oob
.short_vp_offset
= txq
->vp_offset
;
167 pkg
.tx_oob
.s_oob
.pkt_fmt
= pkt_fmt
;
169 if (pkt_fmt
== MANA_SHORT_PKT_FMT
)
170 pkg
.wqe_req
.inline_oob_size
= sizeof(struct mana_tx_short_oob
);
172 pkg
.wqe_req
.inline_oob_size
= sizeof(struct mana_tx_oob
);
174 pkg
.wqe_req
.inline_oob_data
= &pkg
.tx_oob
;
175 pkg
.wqe_req
.flags
= 0;
176 pkg
.wqe_req
.client_data_unit
= 0;
178 pkg
.wqe_req
.num_sge
= 1 + skb_shinfo(skb
)->nr_frags
;
179 WARN_ON_ONCE(pkg
.wqe_req
.num_sge
> 30);
181 if (pkg
.wqe_req
.num_sge
<= ARRAY_SIZE(pkg
.sgl_array
)) {
182 pkg
.wqe_req
.sgl
= pkg
.sgl_array
;
184 pkg
.sgl_ptr
= kmalloc_array(pkg
.wqe_req
.num_sge
,
185 sizeof(struct gdma_sge
),
190 pkg
.wqe_req
.sgl
= pkg
.sgl_ptr
;
193 if (skb
->protocol
== htons(ETH_P_IP
))
195 else if (skb
->protocol
== htons(ETH_P_IPV6
))
198 if (skb_is_gso(skb
)) {
199 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
200 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
202 pkg
.tx_oob
.s_oob
.comp_iphdr_csum
= 1;
203 pkg
.tx_oob
.s_oob
.comp_tcp_csum
= 1;
204 pkg
.tx_oob
.s_oob
.trans_off
= skb_transport_offset(skb
);
206 pkg
.wqe_req
.client_data_unit
= skb_shinfo(skb
)->gso_size
;
207 pkg
.wqe_req
.flags
= GDMA_WR_OOB_IN_SGL
| GDMA_WR_PAD_BY_SGE0
;
209 ip_hdr(skb
)->tot_len
= 0;
210 ip_hdr(skb
)->check
= 0;
211 tcp_hdr(skb
)->check
=
212 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
213 ip_hdr(skb
)->daddr
, 0,
216 ipv6_hdr(skb
)->payload_len
= 0;
217 tcp_hdr(skb
)->check
=
218 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
219 &ipv6_hdr(skb
)->daddr
, 0,
222 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
223 csum_type
= mana_checksum_info(skb
);
225 if (csum_type
== IPPROTO_TCP
) {
226 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
227 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
229 pkg
.tx_oob
.s_oob
.comp_tcp_csum
= 1;
230 pkg
.tx_oob
.s_oob
.trans_off
= skb_transport_offset(skb
);
232 } else if (csum_type
== IPPROTO_UDP
) {
233 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
234 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
236 pkg
.tx_oob
.s_oob
.comp_udp_csum
= 1;
238 /* Can't do offload of this type of checksum */
239 if (skb_checksum_help(skb
))
244 if (mana_map_skb(skb
, apc
, &pkg
))
247 skb_queue_tail(&txq
->pending_skbs
, skb
);
250 net_txq
= netdev_get_tx_queue(ndev
, txq_idx
);
252 err
= mana_gd_post_work_request(gdma_sq
, &pkg
.wqe_req
,
253 (struct gdma_posted_wqe_info
*)skb
->cb
);
254 if (!mana_can_tx(gdma_sq
)) {
255 netif_tx_stop_queue(net_txq
);
256 apc
->eth_stats
.stop_queue
++;
260 (void)skb_dequeue_tail(&txq
->pending_skbs
);
261 netdev_warn(ndev
, "Failed to post TX OOB: %d\n", err
);
262 err
= NETDEV_TX_BUSY
;
267 atomic_inc(&txq
->pending_sends
);
269 mana_gd_wq_ring_doorbell(gd
->gdma_context
, gdma_sq
);
271 /* skb may be freed after mana_gd_post_work_request. Do not use it. */
274 tx_stats
= &txq
->stats
;
275 u64_stats_update_begin(&tx_stats
->syncp
);
277 tx_stats
->bytes
+= len
;
278 u64_stats_update_end(&tx_stats
->syncp
);
281 if (netif_tx_queue_stopped(net_txq
) && mana_can_tx(gdma_sq
)) {
282 netif_tx_wake_queue(net_txq
);
283 apc
->eth_stats
.wake_queue
++;
292 ndev
->stats
.tx_dropped
++;
294 dev_kfree_skb_any(skb
);
298 static void mana_get_stats64(struct net_device
*ndev
,
299 struct rtnl_link_stats64
*st
)
301 struct mana_port_context
*apc
= netdev_priv(ndev
);
302 unsigned int num_queues
= apc
->num_queues
;
303 struct mana_stats_rx
*rx_stats
;
304 struct mana_stats_tx
*tx_stats
;
309 if (!apc
->port_is_up
)
312 netdev_stats_to_stats64(st
, &ndev
->stats
);
314 for (q
= 0; q
< num_queues
; q
++) {
315 rx_stats
= &apc
->rxqs
[q
]->stats
;
318 start
= u64_stats_fetch_begin_irq(&rx_stats
->syncp
);
319 packets
= rx_stats
->packets
;
320 bytes
= rx_stats
->bytes
;
321 } while (u64_stats_fetch_retry_irq(&rx_stats
->syncp
, start
));
323 st
->rx_packets
+= packets
;
324 st
->rx_bytes
+= bytes
;
327 for (q
= 0; q
< num_queues
; q
++) {
328 tx_stats
= &apc
->tx_qp
[q
].txq
.stats
;
331 start
= u64_stats_fetch_begin_irq(&tx_stats
->syncp
);
332 packets
= tx_stats
->packets
;
333 bytes
= tx_stats
->bytes
;
334 } while (u64_stats_fetch_retry_irq(&tx_stats
->syncp
, start
));
336 st
->tx_packets
+= packets
;
337 st
->tx_bytes
+= bytes
;
341 static int mana_get_tx_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
344 struct mana_port_context
*apc
= netdev_priv(ndev
);
345 u32 hash
= skb_get_hash(skb
);
346 struct sock
*sk
= skb
->sk
;
349 txq
= apc
->indir_table
[hash
& MANA_INDIRECT_TABLE_MASK
];
351 if (txq
!= old_q
&& sk
&& sk_fullsock(sk
) &&
352 rcu_access_pointer(sk
->sk_dst_cache
))
353 sk_tx_queue_set(sk
, txq
);
358 static u16
mana_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
359 struct net_device
*sb_dev
)
363 if (ndev
->real_num_tx_queues
== 1)
366 txq
= sk_tx_queue_get(skb
->sk
);
368 if (txq
< 0 || skb
->ooo_okay
|| txq
>= ndev
->real_num_tx_queues
) {
369 if (skb_rx_queue_recorded(skb
))
370 txq
= skb_get_rx_queue(skb
);
372 txq
= mana_get_tx_queue(ndev
, skb
, txq
);
378 static const struct net_device_ops mana_devops
= {
379 .ndo_open
= mana_open
,
380 .ndo_stop
= mana_close
,
381 .ndo_select_queue
= mana_select_queue
,
382 .ndo_start_xmit
= mana_start_xmit
,
383 .ndo_validate_addr
= eth_validate_addr
,
384 .ndo_get_stats64
= mana_get_stats64
,
386 .ndo_xdp_xmit
= mana_xdp_xmit
,
389 static void mana_cleanup_port_context(struct mana_port_context
*apc
)
395 static int mana_init_port_context(struct mana_port_context
*apc
)
397 apc
->rxqs
= kcalloc(apc
->num_queues
, sizeof(struct mana_rxq
*),
400 return !apc
->rxqs
? -ENOMEM
: 0;
403 static int mana_send_request(struct mana_context
*ac
, void *in_buf
,
404 u32 in_len
, void *out_buf
, u32 out_len
)
406 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
407 struct gdma_resp_hdr
*resp
= out_buf
;
408 struct gdma_req_hdr
*req
= in_buf
;
409 struct device
*dev
= gc
->dev
;
410 static atomic_t activity_id
;
413 req
->dev_id
= gc
->mana
.dev_id
;
414 req
->activity_id
= atomic_inc_return(&activity_id
);
416 err
= mana_gd_send_request(gc
, in_len
, in_buf
, out_len
,
418 if (err
|| resp
->status
) {
419 dev_err(dev
, "Failed to send mana message: %d, 0x%x\n",
421 return err
? err
: -EPROTO
;
424 if (req
->dev_id
.as_uint32
!= resp
->dev_id
.as_uint32
||
425 req
->activity_id
!= resp
->activity_id
) {
426 dev_err(dev
, "Unexpected mana message response: %x,%x,%x,%x\n",
427 req
->dev_id
.as_uint32
, resp
->dev_id
.as_uint32
,
428 req
->activity_id
, resp
->activity_id
);
435 static int mana_verify_resp_hdr(const struct gdma_resp_hdr
*resp_hdr
,
436 const enum mana_command_code expected_code
,
439 if (resp_hdr
->response
.msg_type
!= expected_code
)
442 if (resp_hdr
->response
.msg_version
< GDMA_MESSAGE_V1
)
445 if (resp_hdr
->response
.msg_size
< min_size
)
451 static int mana_pf_register_hw_vport(struct mana_port_context
*apc
)
453 struct mana_register_hw_vport_resp resp
= {};
454 struct mana_register_hw_vport_req req
= {};
457 mana_gd_init_req_hdr(&req
.hdr
, MANA_REGISTER_HW_PORT
,
458 sizeof(req
), sizeof(resp
));
459 req
.attached_gfid
= 1;
460 req
.is_pf_default_vport
= 1;
461 req
.allow_all_ether_types
= 1;
463 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
466 netdev_err(apc
->ndev
, "Failed to register hw vPort: %d\n", err
);
470 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_REGISTER_HW_PORT
,
472 if (err
|| resp
.hdr
.status
) {
473 netdev_err(apc
->ndev
, "Failed to register hw vPort: %d, 0x%x\n",
474 err
, resp
.hdr
.status
);
475 return err
? err
: -EPROTO
;
478 apc
->port_handle
= resp
.hw_vport_handle
;
482 static void mana_pf_deregister_hw_vport(struct mana_port_context
*apc
)
484 struct mana_deregister_hw_vport_resp resp
= {};
485 struct mana_deregister_hw_vport_req req
= {};
488 mana_gd_init_req_hdr(&req
.hdr
, MANA_DEREGISTER_HW_PORT
,
489 sizeof(req
), sizeof(resp
));
490 req
.hw_vport_handle
= apc
->port_handle
;
492 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
495 netdev_err(apc
->ndev
, "Failed to unregister hw vPort: %d\n",
500 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DEREGISTER_HW_PORT
,
502 if (err
|| resp
.hdr
.status
)
503 netdev_err(apc
->ndev
,
504 "Failed to deregister hw vPort: %d, 0x%x\n",
505 err
, resp
.hdr
.status
);
508 static int mana_pf_register_filter(struct mana_port_context
*apc
)
510 struct mana_register_filter_resp resp
= {};
511 struct mana_register_filter_req req
= {};
514 mana_gd_init_req_hdr(&req
.hdr
, MANA_REGISTER_FILTER
,
515 sizeof(req
), sizeof(resp
));
516 req
.vport
= apc
->port_handle
;
517 memcpy(req
.mac_addr
, apc
->mac_addr
, ETH_ALEN
);
519 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
522 netdev_err(apc
->ndev
, "Failed to register filter: %d\n", err
);
526 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_REGISTER_FILTER
,
528 if (err
|| resp
.hdr
.status
) {
529 netdev_err(apc
->ndev
, "Failed to register filter: %d, 0x%x\n",
530 err
, resp
.hdr
.status
);
531 return err
? err
: -EPROTO
;
534 apc
->pf_filter_handle
= resp
.filter_handle
;
538 static void mana_pf_deregister_filter(struct mana_port_context
*apc
)
540 struct mana_deregister_filter_resp resp
= {};
541 struct mana_deregister_filter_req req
= {};
544 mana_gd_init_req_hdr(&req
.hdr
, MANA_DEREGISTER_FILTER
,
545 sizeof(req
), sizeof(resp
));
546 req
.filter_handle
= apc
->pf_filter_handle
;
548 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
551 netdev_err(apc
->ndev
, "Failed to unregister filter: %d\n",
556 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DEREGISTER_FILTER
,
558 if (err
|| resp
.hdr
.status
)
559 netdev_err(apc
->ndev
,
560 "Failed to deregister filter: %d, 0x%x\n",
561 err
, resp
.hdr
.status
);
564 static int mana_query_device_cfg(struct mana_context
*ac
, u32 proto_major_ver
,
565 u32 proto_minor_ver
, u32 proto_micro_ver
,
568 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
569 struct mana_query_device_cfg_resp resp
= {};
570 struct mana_query_device_cfg_req req
= {};
571 struct device
*dev
= gc
->dev
;
574 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_DEV_CONFIG
,
575 sizeof(req
), sizeof(resp
));
576 req
.proto_major_ver
= proto_major_ver
;
577 req
.proto_minor_ver
= proto_minor_ver
;
578 req
.proto_micro_ver
= proto_micro_ver
;
580 err
= mana_send_request(ac
, &req
, sizeof(req
), &resp
, sizeof(resp
));
582 dev_err(dev
, "Failed to query config: %d", err
);
586 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_DEV_CONFIG
,
588 if (err
|| resp
.hdr
.status
) {
589 dev_err(dev
, "Invalid query result: %d, 0x%x\n", err
,
596 *max_num_vports
= resp
.max_num_vports
;
601 static int mana_query_vport_cfg(struct mana_port_context
*apc
, u32 vport_index
,
602 u32
*max_sq
, u32
*max_rq
, u32
*num_indir_entry
)
604 struct mana_query_vport_cfg_resp resp
= {};
605 struct mana_query_vport_cfg_req req
= {};
608 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_VPORT_CONFIG
,
609 sizeof(req
), sizeof(resp
));
611 req
.vport_index
= vport_index
;
613 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
618 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_VPORT_CONFIG
,
626 *max_sq
= resp
.max_num_sq
;
627 *max_rq
= resp
.max_num_rq
;
628 *num_indir_entry
= resp
.num_indirection_ent
;
630 apc
->port_handle
= resp
.vport
;
631 ether_addr_copy(apc
->mac_addr
, resp
.mac_addr
);
636 static int mana_cfg_vport(struct mana_port_context
*apc
, u32 protection_dom_id
,
639 struct mana_config_vport_resp resp
= {};
640 struct mana_config_vport_req req
= {};
643 mana_gd_init_req_hdr(&req
.hdr
, MANA_CONFIG_VPORT_TX
,
644 sizeof(req
), sizeof(resp
));
645 req
.vport
= apc
->port_handle
;
646 req
.pdid
= protection_dom_id
;
647 req
.doorbell_pageid
= doorbell_pg_id
;
649 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
652 netdev_err(apc
->ndev
, "Failed to configure vPort: %d\n", err
);
656 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CONFIG_VPORT_TX
,
658 if (err
|| resp
.hdr
.status
) {
659 netdev_err(apc
->ndev
, "Failed to configure vPort: %d, 0x%x\n",
660 err
, resp
.hdr
.status
);
667 apc
->tx_shortform_allowed
= resp
.short_form_allowed
;
668 apc
->tx_vp_offset
= resp
.tx_vport_offset
;
673 static int mana_cfg_vport_steering(struct mana_port_context
*apc
,
675 bool update_default_rxobj
, bool update_key
,
678 u16 num_entries
= MANA_INDIRECT_TABLE_SIZE
;
679 struct mana_cfg_rx_steer_req
*req
= NULL
;
680 struct mana_cfg_rx_steer_resp resp
= {};
681 struct net_device
*ndev
= apc
->ndev
;
682 mana_handle_t
*req_indir_tab
;
686 req_buf_size
= sizeof(*req
) + sizeof(mana_handle_t
) * num_entries
;
687 req
= kzalloc(req_buf_size
, GFP_KERNEL
);
691 mana_gd_init_req_hdr(&req
->hdr
, MANA_CONFIG_VPORT_RX
, req_buf_size
,
694 req
->vport
= apc
->port_handle
;
695 req
->num_indir_entries
= num_entries
;
696 req
->indir_tab_offset
= sizeof(*req
);
698 req
->rss_enable
= apc
->rss_state
;
699 req
->update_default_rxobj
= update_default_rxobj
;
700 req
->update_hashkey
= update_key
;
701 req
->update_indir_tab
= update_tab
;
702 req
->default_rxobj
= apc
->default_rxobj
;
705 memcpy(&req
->hashkey
, apc
->hashkey
, MANA_HASH_KEY_SIZE
);
708 req_indir_tab
= (mana_handle_t
*)(req
+ 1);
709 memcpy(req_indir_tab
, apc
->rxobj_table
,
710 req
->num_indir_entries
* sizeof(mana_handle_t
));
713 err
= mana_send_request(apc
->ac
, req
, req_buf_size
, &resp
,
716 netdev_err(ndev
, "Failed to configure vPort RX: %d\n", err
);
720 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CONFIG_VPORT_RX
,
723 netdev_err(ndev
, "vPort RX configuration failed: %d\n", err
);
727 if (resp
.hdr
.status
) {
728 netdev_err(ndev
, "vPort RX configuration failed: 0x%x\n",
737 static int mana_create_wq_obj(struct mana_port_context
*apc
,
739 u32 wq_type
, struct mana_obj_spec
*wq_spec
,
740 struct mana_obj_spec
*cq_spec
,
741 mana_handle_t
*wq_obj
)
743 struct mana_create_wqobj_resp resp
= {};
744 struct mana_create_wqobj_req req
= {};
745 struct net_device
*ndev
= apc
->ndev
;
748 mana_gd_init_req_hdr(&req
.hdr
, MANA_CREATE_WQ_OBJ
,
749 sizeof(req
), sizeof(resp
));
751 req
.wq_type
= wq_type
;
752 req
.wq_gdma_region
= wq_spec
->gdma_region
;
753 req
.cq_gdma_region
= cq_spec
->gdma_region
;
754 req
.wq_size
= wq_spec
->queue_size
;
755 req
.cq_size
= cq_spec
->queue_size
;
756 req
.cq_moderation_ctx_id
= cq_spec
->modr_ctx_id
;
757 req
.cq_parent_qid
= cq_spec
->attached_eq
;
759 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
762 netdev_err(ndev
, "Failed to create WQ object: %d\n", err
);
766 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CREATE_WQ_OBJ
,
768 if (err
|| resp
.hdr
.status
) {
769 netdev_err(ndev
, "Failed to create WQ object: %d, 0x%x\n", err
,
776 if (resp
.wq_obj
== INVALID_MANA_HANDLE
) {
777 netdev_err(ndev
, "Got an invalid WQ object handle\n");
782 *wq_obj
= resp
.wq_obj
;
783 wq_spec
->queue_index
= resp
.wq_id
;
784 cq_spec
->queue_index
= resp
.cq_id
;
791 static void mana_destroy_wq_obj(struct mana_port_context
*apc
, u32 wq_type
,
792 mana_handle_t wq_obj
)
794 struct mana_destroy_wqobj_resp resp
= {};
795 struct mana_destroy_wqobj_req req
= {};
796 struct net_device
*ndev
= apc
->ndev
;
799 mana_gd_init_req_hdr(&req
.hdr
, MANA_DESTROY_WQ_OBJ
,
800 sizeof(req
), sizeof(resp
));
801 req
.wq_type
= wq_type
;
802 req
.wq_obj_handle
= wq_obj
;
804 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
807 netdev_err(ndev
, "Failed to destroy WQ object: %d\n", err
);
811 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DESTROY_WQ_OBJ
,
813 if (err
|| resp
.hdr
.status
)
814 netdev_err(ndev
, "Failed to destroy WQ object: %d, 0x%x\n", err
,
818 static void mana_destroy_eq(struct mana_context
*ac
)
820 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
821 struct gdma_queue
*eq
;
827 for (i
= 0; i
< gc
->max_num_queues
; i
++) {
832 mana_gd_destroy_queue(gc
, eq
);
839 static int mana_create_eq(struct mana_context
*ac
)
841 struct gdma_dev
*gd
= ac
->gdma_dev
;
842 struct gdma_context
*gc
= gd
->gdma_context
;
843 struct gdma_queue_spec spec
= {};
847 ac
->eqs
= kcalloc(gc
->max_num_queues
, sizeof(struct mana_eq
),
853 spec
.monitor_avl_buf
= false;
854 spec
.queue_size
= EQ_SIZE
;
855 spec
.eq
.callback
= NULL
;
856 spec
.eq
.context
= ac
->eqs
;
857 spec
.eq
.log2_throttle_limit
= LOG2_EQ_THROTTLE
;
859 for (i
= 0; i
< gc
->max_num_queues
; i
++) {
860 err
= mana_gd_create_mana_eq(gd
, &spec
, &ac
->eqs
[i
].eq
);
871 static int mana_fence_rq(struct mana_port_context
*apc
, struct mana_rxq
*rxq
)
873 struct mana_fence_rq_resp resp
= {};
874 struct mana_fence_rq_req req
= {};
877 init_completion(&rxq
->fence_event
);
879 mana_gd_init_req_hdr(&req
.hdr
, MANA_FENCE_RQ
,
880 sizeof(req
), sizeof(resp
));
881 req
.wq_obj_handle
= rxq
->rxobj
;
883 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
886 netdev_err(apc
->ndev
, "Failed to fence RQ %u: %d\n",
891 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_FENCE_RQ
, sizeof(resp
));
892 if (err
|| resp
.hdr
.status
) {
893 netdev_err(apc
->ndev
, "Failed to fence RQ %u: %d, 0x%x\n",
894 rxq
->rxq_idx
, err
, resp
.hdr
.status
);
901 if (wait_for_completion_timeout(&rxq
->fence_event
, 10 * HZ
) == 0) {
902 netdev_err(apc
->ndev
, "Failed to fence RQ %u: timed out\n",
910 static void mana_fence_rqs(struct mana_port_context
*apc
)
912 unsigned int rxq_idx
;
913 struct mana_rxq
*rxq
;
916 for (rxq_idx
= 0; rxq_idx
< apc
->num_queues
; rxq_idx
++) {
917 rxq
= apc
->rxqs
[rxq_idx
];
918 err
= mana_fence_rq(apc
, rxq
);
920 /* In case of any error, use sleep instead. */
926 static int mana_move_wq_tail(struct gdma_queue
*wq
, u32 num_units
)
931 used_space_old
= wq
->head
- wq
->tail
;
932 used_space_new
= wq
->head
- (wq
->tail
+ num_units
);
934 if (WARN_ON_ONCE(used_space_new
> used_space_old
))
937 wq
->tail
+= num_units
;
941 static void mana_unmap_skb(struct sk_buff
*skb
, struct mana_port_context
*apc
)
943 struct mana_skb_head
*ash
= (struct mana_skb_head
*)skb
->head
;
944 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
945 struct device
*dev
= gc
->dev
;
948 dma_unmap_single(dev
, ash
->dma_handle
[0], ash
->size
[0], DMA_TO_DEVICE
);
950 for (i
= 1; i
< skb_shinfo(skb
)->nr_frags
+ 1; i
++)
951 dma_unmap_page(dev
, ash
->dma_handle
[i
], ash
->size
[i
],
955 static void mana_poll_tx_cq(struct mana_cq
*cq
)
957 struct gdma_comp
*completions
= cq
->gdma_comp_buf
;
958 struct gdma_posted_wqe_info
*wqe_info
;
959 unsigned int pkt_transmitted
= 0;
960 unsigned int wqe_unit_cnt
= 0;
961 struct mana_txq
*txq
= cq
->txq
;
962 struct mana_port_context
*apc
;
963 struct netdev_queue
*net_txq
;
964 struct gdma_queue
*gdma_wq
;
965 unsigned int avail_space
;
966 struct net_device
*ndev
;
973 apc
= netdev_priv(ndev
);
975 comp_read
= mana_gd_poll_cq(cq
->gdma_cq
, completions
,
981 for (i
= 0; i
< comp_read
; i
++) {
982 struct mana_tx_comp_oob
*cqe_oob
;
984 if (WARN_ON_ONCE(!completions
[i
].is_sq
))
987 cqe_oob
= (struct mana_tx_comp_oob
*)completions
[i
].cqe_data
;
988 if (WARN_ON_ONCE(cqe_oob
->cqe_hdr
.client_type
!=
989 MANA_CQE_COMPLETION
))
992 switch (cqe_oob
->cqe_hdr
.cqe_type
) {
997 case CQE_TX_MTU_DROP
:
998 case CQE_TX_INVALID_OOB
:
999 case CQE_TX_INVALID_ETH_TYPE
:
1000 case CQE_TX_HDR_PROCESSING_ERROR
:
1001 case CQE_TX_VF_DISABLED
:
1002 case CQE_TX_VPORT_IDX_OUT_OF_RANGE
:
1003 case CQE_TX_VPORT_DISABLED
:
1004 case CQE_TX_VLAN_TAGGING_VIOLATION
:
1005 WARN_ONCE(1, "TX: CQE error %d: ignored.\n",
1006 cqe_oob
->cqe_hdr
.cqe_type
);
1010 /* If the CQE type is unexpected, log an error, assert,
1011 * and go through the error path.
1013 WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n",
1014 cqe_oob
->cqe_hdr
.cqe_type
);
1018 if (WARN_ON_ONCE(txq
->gdma_txq_id
!= completions
[i
].wq_num
))
1021 skb
= skb_dequeue(&txq
->pending_skbs
);
1022 if (WARN_ON_ONCE(!skb
))
1025 wqe_info
= (struct gdma_posted_wqe_info
*)skb
->cb
;
1026 wqe_unit_cnt
+= wqe_info
->wqe_size_in_bu
;
1028 mana_unmap_skb(skb
, apc
);
1030 napi_consume_skb(skb
, cq
->budget
);
1035 if (WARN_ON_ONCE(wqe_unit_cnt
== 0))
1038 mana_move_wq_tail(txq
->gdma_sq
, wqe_unit_cnt
);
1040 gdma_wq
= txq
->gdma_sq
;
1041 avail_space
= mana_gd_wq_avail_space(gdma_wq
);
1043 /* Ensure tail updated before checking q stop */
1046 net_txq
= txq
->net_txq
;
1047 txq_stopped
= netif_tx_queue_stopped(net_txq
);
1049 /* Ensure checking txq_stopped before apc->port_is_up. */
1052 if (txq_stopped
&& apc
->port_is_up
&& avail_space
>= MAX_TX_WQE_SIZE
) {
1053 netif_tx_wake_queue(net_txq
);
1054 apc
->eth_stats
.wake_queue
++;
1057 if (atomic_sub_return(pkt_transmitted
, &txq
->pending_sends
) < 0)
1060 cq
->work_done
= pkt_transmitted
;
1063 static void mana_post_pkt_rxq(struct mana_rxq
*rxq
)
1065 struct mana_recv_buf_oob
*recv_buf_oob
;
1069 curr_index
= rxq
->buf_index
++;
1070 if (rxq
->buf_index
== rxq
->num_rx_buf
)
1073 recv_buf_oob
= &rxq
->rx_oobs
[curr_index
];
1075 err
= mana_gd_post_and_ring(rxq
->gdma_rq
, &recv_buf_oob
->wqe_req
,
1076 &recv_buf_oob
->wqe_inf
);
1077 if (WARN_ON_ONCE(err
))
1080 WARN_ON_ONCE(recv_buf_oob
->wqe_inf
.wqe_size_in_bu
!= 1);
1083 static struct sk_buff
*mana_build_skb(void *buf_va
, uint pkt_len
,
1084 struct xdp_buff
*xdp
)
1086 struct sk_buff
*skb
= build_skb(buf_va
, PAGE_SIZE
);
1091 if (xdp
->data_hard_start
) {
1092 skb_reserve(skb
, xdp
->data
- xdp
->data_hard_start
);
1093 skb_put(skb
, xdp
->data_end
- xdp
->data
);
1095 skb_reserve(skb
, XDP_PACKET_HEADROOM
);
1096 skb_put(skb
, pkt_len
);
1102 static void mana_rx_skb(void *buf_va
, struct mana_rxcomp_oob
*cqe
,
1103 struct mana_rxq
*rxq
)
1105 struct mana_stats_rx
*rx_stats
= &rxq
->stats
;
1106 struct net_device
*ndev
= rxq
->ndev
;
1107 uint pkt_len
= cqe
->ppi
[0].pkt_len
;
1108 u16 rxq_idx
= rxq
->rxq_idx
;
1109 struct napi_struct
*napi
;
1110 struct xdp_buff xdp
= {};
1111 struct sk_buff
*skb
;
1115 rxq
->rx_cq
.work_done
++;
1116 napi
= &rxq
->rx_cq
.napi
;
1119 ++ndev
->stats
.rx_dropped
;
1123 act
= mana_run_xdp(ndev
, rxq
, &xdp
, buf_va
, pkt_len
);
1125 if (act
== XDP_REDIRECT
&& !rxq
->xdp_rc
)
1128 if (act
!= XDP_PASS
&& act
!= XDP_TX
)
1131 skb
= mana_build_skb(buf_va
, pkt_len
, &xdp
);
1136 skb
->dev
= napi
->dev
;
1138 skb
->protocol
= eth_type_trans(skb
, ndev
);
1139 skb_checksum_none_assert(skb
);
1140 skb_record_rx_queue(skb
, rxq_idx
);
1142 if ((ndev
->features
& NETIF_F_RXCSUM
) && cqe
->rx_iphdr_csum_succeed
) {
1143 if (cqe
->rx_tcp_csum_succeed
|| cqe
->rx_udp_csum_succeed
)
1144 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1147 if (cqe
->rx_hashtype
!= 0 && (ndev
->features
& NETIF_F_RXHASH
)) {
1148 hash_value
= cqe
->ppi
[0].pkt_hash
;
1150 if (cqe
->rx_hashtype
& MANA_HASH_L4
)
1151 skb_set_hash(skb
, hash_value
, PKT_HASH_TYPE_L4
);
1153 skb_set_hash(skb
, hash_value
, PKT_HASH_TYPE_L3
);
1156 u64_stats_update_begin(&rx_stats
->syncp
);
1157 rx_stats
->packets
++;
1158 rx_stats
->bytes
+= pkt_len
;
1162 u64_stats_update_end(&rx_stats
->syncp
);
1164 if (act
== XDP_TX
) {
1165 skb_set_queue_mapping(skb
, rxq_idx
);
1166 mana_xdp_tx(skb
, ndev
);
1170 napi_gro_receive(napi
, skb
);
1175 u64_stats_update_begin(&rx_stats
->syncp
);
1176 rx_stats
->xdp_drop
++;
1177 u64_stats_update_end(&rx_stats
->syncp
);
1180 WARN_ON_ONCE(rxq
->xdp_save_page
);
1181 rxq
->xdp_save_page
= virt_to_page(buf_va
);
1183 ++ndev
->stats
.rx_dropped
;
1188 static void mana_process_rx_cqe(struct mana_rxq
*rxq
, struct mana_cq
*cq
,
1189 struct gdma_comp
*cqe
)
1191 struct mana_rxcomp_oob
*oob
= (struct mana_rxcomp_oob
*)cqe
->cqe_data
;
1192 struct gdma_context
*gc
= rxq
->gdma_rq
->gdma_dev
->gdma_context
;
1193 struct net_device
*ndev
= rxq
->ndev
;
1194 struct mana_recv_buf_oob
*rxbuf_oob
;
1195 struct device
*dev
= gc
->dev
;
1196 void *new_buf
, *old_buf
;
1197 struct page
*new_page
;
1201 switch (oob
->cqe_hdr
.cqe_type
) {
1205 case CQE_RX_TRUNCATED
:
1206 ++ndev
->stats
.rx_dropped
;
1207 rxbuf_oob
= &rxq
->rx_oobs
[rxq
->buf_index
];
1208 netdev_warn_once(ndev
, "Dropped a truncated packet\n");
1211 case CQE_RX_COALESCED_4
:
1212 netdev_err(ndev
, "RX coalescing is unsupported\n");
1215 case CQE_RX_OBJECT_FENCE
:
1216 complete(&rxq
->fence_event
);
1220 netdev_err(ndev
, "Unknown RX CQE type = %d\n",
1221 oob
->cqe_hdr
.cqe_type
);
1225 pktlen
= oob
->ppi
[0].pkt_len
;
1228 /* data packets should never have packetlength of zero */
1229 netdev_err(ndev
, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
1230 rxq
->gdma_id
, cq
->gdma_id
, rxq
->rxobj
);
1234 curr
= rxq
->buf_index
;
1235 rxbuf_oob
= &rxq
->rx_oobs
[curr
];
1236 WARN_ON_ONCE(rxbuf_oob
->wqe_inf
.wqe_size_in_bu
!= 1);
1238 /* Reuse XDP dropped page if available */
1239 if (rxq
->xdp_save_page
) {
1240 new_page
= rxq
->xdp_save_page
;
1241 rxq
->xdp_save_page
= NULL
;
1243 new_page
= alloc_page(GFP_ATOMIC
);
1247 da
= dma_map_page(dev
, new_page
, XDP_PACKET_HEADROOM
, rxq
->datasize
,
1250 if (dma_mapping_error(dev
, da
)) {
1251 __free_page(new_page
);
1256 new_buf
= new_page
? page_to_virt(new_page
) : NULL
;
1259 dma_unmap_page(dev
, rxbuf_oob
->buf_dma_addr
, rxq
->datasize
,
1262 old_buf
= rxbuf_oob
->buf_va
;
1264 /* refresh the rxbuf_oob with the new page */
1265 rxbuf_oob
->buf_va
= new_buf
;
1266 rxbuf_oob
->buf_dma_addr
= da
;
1267 rxbuf_oob
->sgl
[0].address
= rxbuf_oob
->buf_dma_addr
;
1269 old_buf
= NULL
; /* drop the packet if no memory */
1272 mana_rx_skb(old_buf
, oob
, rxq
);
1275 mana_move_wq_tail(rxq
->gdma_rq
, rxbuf_oob
->wqe_inf
.wqe_size_in_bu
);
1277 mana_post_pkt_rxq(rxq
);
1280 static void mana_poll_rx_cq(struct mana_cq
*cq
)
1282 struct gdma_comp
*comp
= cq
->gdma_comp_buf
;
1283 struct mana_rxq
*rxq
= cq
->rxq
;
1286 comp_read
= mana_gd_poll_cq(cq
->gdma_cq
, comp
, CQE_POLLING_BUFFER
);
1287 WARN_ON_ONCE(comp_read
> CQE_POLLING_BUFFER
);
1289 rxq
->xdp_flush
= false;
1291 for (i
= 0; i
< comp_read
; i
++) {
1292 if (WARN_ON_ONCE(comp
[i
].is_sq
))
1295 /* verify recv cqe references the right rxq */
1296 if (WARN_ON_ONCE(comp
[i
].wq_num
!= cq
->rxq
->gdma_id
))
1299 mana_process_rx_cqe(rxq
, cq
, &comp
[i
]);
1306 static void mana_cq_handler(void *context
, struct gdma_queue
*gdma_queue
)
1308 struct mana_cq
*cq
= context
;
1311 WARN_ON_ONCE(cq
->gdma_cq
!= gdma_queue
);
1313 if (cq
->type
== MANA_CQ_TYPE_RX
)
1314 mana_poll_rx_cq(cq
);
1316 mana_poll_tx_cq(cq
);
1318 if (cq
->work_done
< cq
->budget
&&
1319 napi_complete_done(&cq
->napi
, cq
->work_done
)) {
1320 arm_bit
= SET_ARM_BIT
;
1325 mana_gd_ring_cq(gdma_queue
, arm_bit
);
1328 static int mana_poll(struct napi_struct
*napi
, int budget
)
1330 struct mana_cq
*cq
= container_of(napi
, struct mana_cq
, napi
);
1333 cq
->budget
= budget
;
1335 mana_cq_handler(cq
, cq
->gdma_cq
);
1337 return min(cq
->work_done
, budget
);
1340 static void mana_schedule_napi(void *context
, struct gdma_queue
*gdma_queue
)
1342 struct mana_cq
*cq
= context
;
1344 napi_schedule_irqoff(&cq
->napi
);
1347 static void mana_deinit_cq(struct mana_port_context
*apc
, struct mana_cq
*cq
)
1349 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1354 mana_gd_destroy_queue(gd
->gdma_context
, cq
->gdma_cq
);
1357 static void mana_deinit_txq(struct mana_port_context
*apc
, struct mana_txq
*txq
)
1359 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1364 mana_gd_destroy_queue(gd
->gdma_context
, txq
->gdma_sq
);
1367 static void mana_destroy_txq(struct mana_port_context
*apc
)
1369 struct napi_struct
*napi
;
1375 for (i
= 0; i
< apc
->num_queues
; i
++) {
1376 napi
= &apc
->tx_qp
[i
].tx_cq
.napi
;
1377 napi_synchronize(napi
);
1379 netif_napi_del(napi
);
1381 mana_destroy_wq_obj(apc
, GDMA_SQ
, apc
->tx_qp
[i
].tx_object
);
1383 mana_deinit_cq(apc
, &apc
->tx_qp
[i
].tx_cq
);
1385 mana_deinit_txq(apc
, &apc
->tx_qp
[i
].txq
);
1392 static int mana_create_txq(struct mana_port_context
*apc
,
1393 struct net_device
*net
)
1395 struct mana_context
*ac
= apc
->ac
;
1396 struct gdma_dev
*gd
= ac
->gdma_dev
;
1397 struct mana_obj_spec wq_spec
;
1398 struct mana_obj_spec cq_spec
;
1399 struct gdma_queue_spec spec
;
1400 struct gdma_context
*gc
;
1401 struct mana_txq
*txq
;
1408 apc
->tx_qp
= kcalloc(apc
->num_queues
, sizeof(struct mana_tx_qp
),
1413 /* The minimum size of the WQE is 32 bytes, hence
1414 * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
1415 * the SQ can store. This value is then used to size other queues
1416 * to prevent overflow.
1418 txq_size
= MAX_SEND_BUFFERS_PER_QUEUE
* 32;
1419 BUILD_BUG_ON(!PAGE_ALIGNED(txq_size
));
1421 cq_size
= MAX_SEND_BUFFERS_PER_QUEUE
* COMP_ENTRY_SIZE
;
1422 cq_size
= PAGE_ALIGN(cq_size
);
1424 gc
= gd
->gdma_context
;
1426 for (i
= 0; i
< apc
->num_queues
; i
++) {
1427 apc
->tx_qp
[i
].tx_object
= INVALID_MANA_HANDLE
;
1430 txq
= &apc
->tx_qp
[i
].txq
;
1432 u64_stats_init(&txq
->stats
.syncp
);
1434 txq
->net_txq
= netdev_get_tx_queue(net
, i
);
1435 txq
->vp_offset
= apc
->tx_vp_offset
;
1436 skb_queue_head_init(&txq
->pending_skbs
);
1438 memset(&spec
, 0, sizeof(spec
));
1439 spec
.type
= GDMA_SQ
;
1440 spec
.monitor_avl_buf
= true;
1441 spec
.queue_size
= txq_size
;
1442 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &txq
->gdma_sq
);
1446 /* Create SQ's CQ */
1447 cq
= &apc
->tx_qp
[i
].tx_cq
;
1448 cq
->type
= MANA_CQ_TYPE_TX
;
1452 memset(&spec
, 0, sizeof(spec
));
1453 spec
.type
= GDMA_CQ
;
1454 spec
.monitor_avl_buf
= false;
1455 spec
.queue_size
= cq_size
;
1456 spec
.cq
.callback
= mana_schedule_napi
;
1457 spec
.cq
.parent_eq
= ac
->eqs
[i
].eq
;
1458 spec
.cq
.context
= cq
;
1459 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &cq
->gdma_cq
);
1463 memset(&wq_spec
, 0, sizeof(wq_spec
));
1464 memset(&cq_spec
, 0, sizeof(cq_spec
));
1466 wq_spec
.gdma_region
= txq
->gdma_sq
->mem_info
.gdma_region
;
1467 wq_spec
.queue_size
= txq
->gdma_sq
->queue_size
;
1469 cq_spec
.gdma_region
= cq
->gdma_cq
->mem_info
.gdma_region
;
1470 cq_spec
.queue_size
= cq
->gdma_cq
->queue_size
;
1471 cq_spec
.modr_ctx_id
= 0;
1472 cq_spec
.attached_eq
= cq
->gdma_cq
->cq
.parent
->id
;
1474 err
= mana_create_wq_obj(apc
, apc
->port_handle
, GDMA_SQ
,
1476 &apc
->tx_qp
[i
].tx_object
);
1481 txq
->gdma_sq
->id
= wq_spec
.queue_index
;
1482 cq
->gdma_cq
->id
= cq_spec
.queue_index
;
1484 txq
->gdma_sq
->mem_info
.gdma_region
= GDMA_INVALID_DMA_REGION
;
1485 cq
->gdma_cq
->mem_info
.gdma_region
= GDMA_INVALID_DMA_REGION
;
1487 txq
->gdma_txq_id
= txq
->gdma_sq
->id
;
1489 cq
->gdma_id
= cq
->gdma_cq
->id
;
1491 if (WARN_ON(cq
->gdma_id
>= gc
->max_num_cqs
)) {
1496 gc
->cq_table
[cq
->gdma_id
] = cq
->gdma_cq
;
1498 netif_napi_add_tx(net
, &cq
->napi
, mana_poll
);
1499 napi_enable(&cq
->napi
);
1501 mana_gd_ring_cq(cq
->gdma_cq
, SET_ARM_BIT
);
1506 mana_destroy_txq(apc
);
1510 static void mana_destroy_rxq(struct mana_port_context
*apc
,
1511 struct mana_rxq
*rxq
, bool validate_state
)
1514 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
1515 struct mana_recv_buf_oob
*rx_oob
;
1516 struct device
*dev
= gc
->dev
;
1517 struct napi_struct
*napi
;
1523 napi
= &rxq
->rx_cq
.napi
;
1526 napi_synchronize(napi
);
1530 xdp_rxq_info_unreg(&rxq
->xdp_rxq
);
1532 netif_napi_del(napi
);
1534 mana_destroy_wq_obj(apc
, GDMA_RQ
, rxq
->rxobj
);
1536 mana_deinit_cq(apc
, &rxq
->rx_cq
);
1538 if (rxq
->xdp_save_page
)
1539 __free_page(rxq
->xdp_save_page
);
1541 for (i
= 0; i
< rxq
->num_rx_buf
; i
++) {
1542 rx_oob
= &rxq
->rx_oobs
[i
];
1544 if (!rx_oob
->buf_va
)
1547 dma_unmap_page(dev
, rx_oob
->buf_dma_addr
, rxq
->datasize
,
1550 free_page((unsigned long)rx_oob
->buf_va
);
1551 rx_oob
->buf_va
= NULL
;
1555 mana_gd_destroy_queue(gc
, rxq
->gdma_rq
);
1560 #define MANA_WQE_HEADER_SIZE 16
1561 #define MANA_WQE_SGE_SIZE 16
1563 static int mana_alloc_rx_wqe(struct mana_port_context
*apc
,
1564 struct mana_rxq
*rxq
, u32
*rxq_size
, u32
*cq_size
)
1566 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
1567 struct mana_recv_buf_oob
*rx_oob
;
1568 struct device
*dev
= gc
->dev
;
1573 WARN_ON(rxq
->datasize
== 0 || rxq
->datasize
> PAGE_SIZE
);
1578 for (buf_idx
= 0; buf_idx
< rxq
->num_rx_buf
; buf_idx
++) {
1579 rx_oob
= &rxq
->rx_oobs
[buf_idx
];
1580 memset(rx_oob
, 0, sizeof(*rx_oob
));
1582 page
= alloc_page(GFP_KERNEL
);
1586 da
= dma_map_page(dev
, page
, XDP_PACKET_HEADROOM
, rxq
->datasize
,
1589 if (dma_mapping_error(dev
, da
)) {
1594 rx_oob
->buf_va
= page_to_virt(page
);
1595 rx_oob
->buf_dma_addr
= da
;
1597 rx_oob
->num_sge
= 1;
1598 rx_oob
->sgl
[0].address
= rx_oob
->buf_dma_addr
;
1599 rx_oob
->sgl
[0].size
= rxq
->datasize
;
1600 rx_oob
->sgl
[0].mem_key
= apc
->ac
->gdma_dev
->gpa_mkey
;
1602 rx_oob
->wqe_req
.sgl
= rx_oob
->sgl
;
1603 rx_oob
->wqe_req
.num_sge
= rx_oob
->num_sge
;
1604 rx_oob
->wqe_req
.inline_oob_size
= 0;
1605 rx_oob
->wqe_req
.inline_oob_data
= NULL
;
1606 rx_oob
->wqe_req
.flags
= 0;
1607 rx_oob
->wqe_req
.client_data_unit
= 0;
1609 *rxq_size
+= ALIGN(MANA_WQE_HEADER_SIZE
+
1610 MANA_WQE_SGE_SIZE
* rx_oob
->num_sge
, 32);
1611 *cq_size
+= COMP_ENTRY_SIZE
;
1617 static int mana_push_wqe(struct mana_rxq
*rxq
)
1619 struct mana_recv_buf_oob
*rx_oob
;
1623 for (buf_idx
= 0; buf_idx
< rxq
->num_rx_buf
; buf_idx
++) {
1624 rx_oob
= &rxq
->rx_oobs
[buf_idx
];
1626 err
= mana_gd_post_and_ring(rxq
->gdma_rq
, &rx_oob
->wqe_req
,
1635 static struct mana_rxq
*mana_create_rxq(struct mana_port_context
*apc
,
1636 u32 rxq_idx
, struct mana_eq
*eq
,
1637 struct net_device
*ndev
)
1639 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1640 struct mana_obj_spec wq_spec
;
1641 struct mana_obj_spec cq_spec
;
1642 struct gdma_queue_spec spec
;
1643 struct mana_cq
*cq
= NULL
;
1644 struct gdma_context
*gc
;
1645 u32 cq_size
, rq_size
;
1646 struct mana_rxq
*rxq
;
1649 gc
= gd
->gdma_context
;
1651 rxq
= kzalloc(struct_size(rxq
, rx_oobs
, RX_BUFFERS_PER_QUEUE
),
1657 rxq
->num_rx_buf
= RX_BUFFERS_PER_QUEUE
;
1658 rxq
->rxq_idx
= rxq_idx
;
1659 rxq
->datasize
= ALIGN(MAX_FRAME_SIZE
, 64);
1660 rxq
->rxobj
= INVALID_MANA_HANDLE
;
1662 err
= mana_alloc_rx_wqe(apc
, rxq
, &rq_size
, &cq_size
);
1666 rq_size
= PAGE_ALIGN(rq_size
);
1667 cq_size
= PAGE_ALIGN(cq_size
);
1670 memset(&spec
, 0, sizeof(spec
));
1671 spec
.type
= GDMA_RQ
;
1672 spec
.monitor_avl_buf
= true;
1673 spec
.queue_size
= rq_size
;
1674 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &rxq
->gdma_rq
);
1678 /* Create RQ's CQ */
1680 cq
->type
= MANA_CQ_TYPE_RX
;
1683 memset(&spec
, 0, sizeof(spec
));
1684 spec
.type
= GDMA_CQ
;
1685 spec
.monitor_avl_buf
= false;
1686 spec
.queue_size
= cq_size
;
1687 spec
.cq
.callback
= mana_schedule_napi
;
1688 spec
.cq
.parent_eq
= eq
->eq
;
1689 spec
.cq
.context
= cq
;
1690 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &cq
->gdma_cq
);
1694 memset(&wq_spec
, 0, sizeof(wq_spec
));
1695 memset(&cq_spec
, 0, sizeof(cq_spec
));
1696 wq_spec
.gdma_region
= rxq
->gdma_rq
->mem_info
.gdma_region
;
1697 wq_spec
.queue_size
= rxq
->gdma_rq
->queue_size
;
1699 cq_spec
.gdma_region
= cq
->gdma_cq
->mem_info
.gdma_region
;
1700 cq_spec
.queue_size
= cq
->gdma_cq
->queue_size
;
1701 cq_spec
.modr_ctx_id
= 0;
1702 cq_spec
.attached_eq
= cq
->gdma_cq
->cq
.parent
->id
;
1704 err
= mana_create_wq_obj(apc
, apc
->port_handle
, GDMA_RQ
,
1705 &wq_spec
, &cq_spec
, &rxq
->rxobj
);
1709 rxq
->gdma_rq
->id
= wq_spec
.queue_index
;
1710 cq
->gdma_cq
->id
= cq_spec
.queue_index
;
1712 rxq
->gdma_rq
->mem_info
.gdma_region
= GDMA_INVALID_DMA_REGION
;
1713 cq
->gdma_cq
->mem_info
.gdma_region
= GDMA_INVALID_DMA_REGION
;
1715 rxq
->gdma_id
= rxq
->gdma_rq
->id
;
1716 cq
->gdma_id
= cq
->gdma_cq
->id
;
1718 err
= mana_push_wqe(rxq
);
1722 if (WARN_ON(cq
->gdma_id
>= gc
->max_num_cqs
)) {
1727 gc
->cq_table
[cq
->gdma_id
] = cq
->gdma_cq
;
1729 netif_napi_add_weight(ndev
, &cq
->napi
, mana_poll
, 1);
1731 WARN_ON(xdp_rxq_info_reg(&rxq
->xdp_rxq
, ndev
, rxq_idx
,
1733 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq
->xdp_rxq
,
1734 MEM_TYPE_PAGE_SHARED
, NULL
));
1736 napi_enable(&cq
->napi
);
1738 mana_gd_ring_cq(cq
->gdma_cq
, SET_ARM_BIT
);
1743 netdev_err(ndev
, "Failed to create RXQ: err = %d\n", err
);
1745 mana_destroy_rxq(apc
, rxq
, false);
1748 mana_deinit_cq(apc
, cq
);
1753 static int mana_add_rx_queues(struct mana_port_context
*apc
,
1754 struct net_device
*ndev
)
1756 struct mana_context
*ac
= apc
->ac
;
1757 struct mana_rxq
*rxq
;
1761 for (i
= 0; i
< apc
->num_queues
; i
++) {
1762 rxq
= mana_create_rxq(apc
, i
, &ac
->eqs
[i
], ndev
);
1768 u64_stats_init(&rxq
->stats
.syncp
);
1773 apc
->default_rxobj
= apc
->rxqs
[0]->rxobj
;
1778 static void mana_destroy_vport(struct mana_port_context
*apc
)
1780 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1781 struct mana_rxq
*rxq
;
1784 for (rxq_idx
= 0; rxq_idx
< apc
->num_queues
; rxq_idx
++) {
1785 rxq
= apc
->rxqs
[rxq_idx
];
1789 mana_destroy_rxq(apc
, rxq
, true);
1790 apc
->rxqs
[rxq_idx
] = NULL
;
1793 mana_destroy_txq(apc
);
1795 if (gd
->gdma_context
->is_pf
)
1796 mana_pf_deregister_hw_vport(apc
);
1799 static int mana_create_vport(struct mana_port_context
*apc
,
1800 struct net_device
*net
)
1802 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1805 apc
->default_rxobj
= INVALID_MANA_HANDLE
;
1807 if (gd
->gdma_context
->is_pf
) {
1808 err
= mana_pf_register_hw_vport(apc
);
1813 err
= mana_cfg_vport(apc
, gd
->pdid
, gd
->doorbell
);
1817 return mana_create_txq(apc
, net
);
1820 static void mana_rss_table_init(struct mana_port_context
*apc
)
1824 for (i
= 0; i
< MANA_INDIRECT_TABLE_SIZE
; i
++)
1825 apc
->indir_table
[i
] =
1826 ethtool_rxfh_indir_default(i
, apc
->num_queues
);
1829 int mana_config_rss(struct mana_port_context
*apc
, enum TRI_STATE rx
,
1830 bool update_hash
, bool update_tab
)
1837 for (i
= 0; i
< MANA_INDIRECT_TABLE_SIZE
; i
++) {
1838 queue_idx
= apc
->indir_table
[i
];
1839 apc
->rxobj_table
[i
] = apc
->rxqs
[queue_idx
]->rxobj
;
1843 err
= mana_cfg_vport_steering(apc
, rx
, true, update_hash
, update_tab
);
1847 mana_fence_rqs(apc
);
1852 static int mana_init_port(struct net_device
*ndev
)
1854 struct mana_port_context
*apc
= netdev_priv(ndev
);
1855 u32 max_txq
, max_rxq
, max_queues
;
1856 int port_idx
= apc
->port_idx
;
1857 u32 num_indirect_entries
;
1860 err
= mana_init_port_context(apc
);
1864 err
= mana_query_vport_cfg(apc
, port_idx
, &max_txq
, &max_rxq
,
1865 &num_indirect_entries
);
1867 netdev_err(ndev
, "Failed to query info for vPort %d\n",
1872 max_queues
= min_t(u32
, max_txq
, max_rxq
);
1873 if (apc
->max_queues
> max_queues
)
1874 apc
->max_queues
= max_queues
;
1876 if (apc
->num_queues
> apc
->max_queues
)
1877 apc
->num_queues
= apc
->max_queues
;
1879 eth_hw_addr_set(ndev
, apc
->mac_addr
);
1889 int mana_alloc_queues(struct net_device
*ndev
)
1891 struct mana_port_context
*apc
= netdev_priv(ndev
);
1892 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1895 err
= mana_create_vport(apc
, ndev
);
1899 err
= netif_set_real_num_tx_queues(ndev
, apc
->num_queues
);
1903 err
= mana_add_rx_queues(apc
, ndev
);
1907 apc
->rss_state
= apc
->num_queues
> 1 ? TRI_STATE_TRUE
: TRI_STATE_FALSE
;
1909 err
= netif_set_real_num_rx_queues(ndev
, apc
->num_queues
);
1913 mana_rss_table_init(apc
);
1915 err
= mana_config_rss(apc
, TRI_STATE_TRUE
, true, true);
1919 if (gd
->gdma_context
->is_pf
) {
1920 err
= mana_pf_register_filter(apc
);
1925 mana_chn_setxdp(apc
, mana_xdp_get(apc
));
1930 mana_destroy_vport(apc
);
1934 int mana_attach(struct net_device
*ndev
)
1936 struct mana_port_context
*apc
= netdev_priv(ndev
);
1941 err
= mana_init_port(ndev
);
1945 if (apc
->port_st_save
) {
1946 err
= mana_alloc_queues(ndev
);
1948 mana_cleanup_port_context(apc
);
1953 apc
->port_is_up
= apc
->port_st_save
;
1955 /* Ensure port state updated before txq state */
1958 if (apc
->port_is_up
)
1959 netif_carrier_on(ndev
);
1961 netif_device_attach(ndev
);
1966 static int mana_dealloc_queues(struct net_device
*ndev
)
1968 struct mana_port_context
*apc
= netdev_priv(ndev
);
1969 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1970 struct mana_txq
*txq
;
1973 if (apc
->port_is_up
)
1976 mana_chn_setxdp(apc
, NULL
);
1978 if (gd
->gdma_context
->is_pf
)
1979 mana_pf_deregister_filter(apc
);
1981 /* No packet can be transmitted now since apc->port_is_up is false.
1982 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
1983 * a txq because it may not timely see apc->port_is_up being cleared
1984 * to false, but it doesn't matter since mana_start_xmit() drops any
1985 * new packets due to apc->port_is_up being false.
1987 * Drain all the in-flight TX packets
1989 for (i
= 0; i
< apc
->num_queues
; i
++) {
1990 txq
= &apc
->tx_qp
[i
].txq
;
1992 while (atomic_read(&txq
->pending_sends
) > 0)
1993 usleep_range(1000, 2000);
1996 /* We're 100% sure the queues can no longer be woken up, because
1997 * we're sure now mana_poll_tx_cq() can't be running.
2000 apc
->rss_state
= TRI_STATE_FALSE
;
2001 err
= mana_config_rss(apc
, TRI_STATE_FALSE
, false, false);
2003 netdev_err(ndev
, "Failed to disable vPort: %d\n", err
);
2007 mana_destroy_vport(apc
);
2012 int mana_detach(struct net_device
*ndev
, bool from_close
)
2014 struct mana_port_context
*apc
= netdev_priv(ndev
);
2019 apc
->port_st_save
= apc
->port_is_up
;
2020 apc
->port_is_up
= false;
2022 /* Ensure port state updated before txq state */
2025 netif_tx_disable(ndev
);
2026 netif_carrier_off(ndev
);
2028 if (apc
->port_st_save
) {
2029 err
= mana_dealloc_queues(ndev
);
2035 netif_device_detach(ndev
);
2036 mana_cleanup_port_context(apc
);
2042 static int mana_probe_port(struct mana_context
*ac
, int port_idx
,
2043 struct net_device
**ndev_storage
)
2045 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
2046 struct mana_port_context
*apc
;
2047 struct net_device
*ndev
;
2050 ndev
= alloc_etherdev_mq(sizeof(struct mana_port_context
),
2051 gc
->max_num_queues
);
2055 *ndev_storage
= ndev
;
2057 apc
= netdev_priv(ndev
);
2060 apc
->max_queues
= gc
->max_num_queues
;
2061 apc
->num_queues
= gc
->max_num_queues
;
2062 apc
->port_handle
= INVALID_MANA_HANDLE
;
2063 apc
->pf_filter_handle
= INVALID_MANA_HANDLE
;
2064 apc
->port_idx
= port_idx
;
2066 ndev
->netdev_ops
= &mana_devops
;
2067 ndev
->ethtool_ops
= &mana_ethtool_ops
;
2068 ndev
->mtu
= ETH_DATA_LEN
;
2069 ndev
->max_mtu
= ndev
->mtu
;
2070 ndev
->min_mtu
= ndev
->mtu
;
2071 ndev
->needed_headroom
= MANA_HEADROOM
;
2072 SET_NETDEV_DEV(ndev
, gc
->dev
);
2074 netif_carrier_off(ndev
);
2076 netdev_rss_key_fill(apc
->hashkey
, MANA_HASH_KEY_SIZE
);
2078 err
= mana_init_port(ndev
);
2082 netdev_lockdep_set_classes(ndev
);
2084 ndev
->hw_features
= NETIF_F_SG
| NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
;
2085 ndev
->hw_features
|= NETIF_F_RXCSUM
;
2086 ndev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
2087 ndev
->hw_features
|= NETIF_F_RXHASH
;
2088 ndev
->features
= ndev
->hw_features
;
2089 ndev
->vlan_features
= 0;
2091 err
= register_netdev(ndev
);
2093 netdev_err(ndev
, "Unable to register netdev.\n");
2103 *ndev_storage
= NULL
;
2104 netdev_err(ndev
, "Failed to probe vPort %d: %d\n", port_idx
, err
);
2109 int mana_probe(struct gdma_dev
*gd
, bool resuming
)
2111 struct gdma_context
*gc
= gd
->gdma_context
;
2112 struct mana_context
*ac
= gd
->driver_data
;
2113 struct device
*dev
= gc
->dev
;
2119 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
2120 MANA_MAJOR_VERSION
, MANA_MINOR_VERSION
, MANA_MICRO_VERSION
);
2122 err
= mana_gd_register_device(gd
);
2127 ac
= kzalloc(sizeof(*ac
), GFP_KERNEL
);
2132 gd
->driver_data
= ac
;
2135 err
= mana_create_eq(ac
);
2139 err
= mana_query_device_cfg(ac
, MANA_MAJOR_VERSION
, MANA_MINOR_VERSION
,
2140 MANA_MICRO_VERSION
, &num_ports
);
2145 ac
->num_ports
= num_ports
;
2147 if (ac
->num_ports
!= num_ports
) {
2148 dev_err(dev
, "The number of vPorts changed: %d->%d\n",
2149 ac
->num_ports
, num_ports
);
2155 if (ac
->num_ports
== 0)
2156 dev_err(dev
, "Failed to detect any vPort\n");
2158 if (ac
->num_ports
> MAX_PORTS_IN_MANA_DEV
)
2159 ac
->num_ports
= MAX_PORTS_IN_MANA_DEV
;
2162 for (i
= 0; i
< ac
->num_ports
; i
++) {
2163 err
= mana_probe_port(ac
, i
, &ac
->ports
[i
]);
2168 for (i
= 0; i
< ac
->num_ports
; i
++) {
2170 err
= mana_attach(ac
->ports
[i
]);
2178 mana_remove(gd
, false);
2183 void mana_remove(struct gdma_dev
*gd
, bool suspending
)
2185 struct gdma_context
*gc
= gd
->gdma_context
;
2186 struct mana_context
*ac
= gd
->driver_data
;
2187 struct device
*dev
= gc
->dev
;
2188 struct net_device
*ndev
;
2192 for (i
= 0; i
< ac
->num_ports
; i
++) {
2193 ndev
= ac
->ports
[i
];
2196 dev_err(dev
, "No net device to remove\n");
2200 /* All cleanup actions should stay after rtnl_lock(), otherwise
2201 * other functions may access partially cleaned up data.
2205 err
= mana_detach(ndev
, false);
2207 netdev_err(ndev
, "Failed to detach vPort %d: %d\n",
2211 /* No need to unregister the ndev. */
2216 unregister_netdevice(ndev
);
2223 mana_destroy_eq(ac
);
2226 mana_gd_deregister_device(gd
);
2231 gd
->driver_data
= NULL
;
2232 gd
->gdma_context
= NULL
;