]> git.ipfire.org Git - thirdparty/linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
MAINTAINERS: Fix Hyperv vIOMMU driver file name
[thirdparty/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2005 - 2016 Broadcom
4 * All rights reserved.
5 *
6 * Contact Information:
7 * linux-drivers@emulex.com
8 *
9 * Emulex
10 * 3333 Susan Street
11 * Costa Mesa, CA 92626
12 */
13
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/aer.h>
20 #include <linux/if_bridge.h>
21 #include <net/busy_poll.h>
22 #include <net/vxlan.h>
23
24 MODULE_VERSION(DRV_VER);
25 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
26 MODULE_AUTHOR("Emulex Corporation");
27 MODULE_LICENSE("GPL");
28
29 /* num_vfs module param is obsolete.
30 * Use sysfs method to enable/disable VFs.
31 */
32 static unsigned int num_vfs;
33 module_param(num_vfs, uint, 0444);
34 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
35
36 static ushort rx_frag_size = 2048;
37 module_param(rx_frag_size, ushort, 0444);
38 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
39
40 /* Per-module error detection/recovery workq shared across all functions.
41 * Each function schedules its own work request on this shared workq.
42 */
43 static struct workqueue_struct *be_err_recovery_workq;
44
45 static const struct pci_device_id be_dev_ids[] = {
46 #ifdef CONFIG_BE2NET_BE2
47 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
48 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
49 #endif /* CONFIG_BE2NET_BE2 */
50 #ifdef CONFIG_BE2NET_BE3
51 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
53 #endif /* CONFIG_BE2NET_BE3 */
54 #ifdef CONFIG_BE2NET_LANCER
55 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
56 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
57 #endif /* CONFIG_BE2NET_LANCER */
58 #ifdef CONFIG_BE2NET_SKYHAWK
59 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
60 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
61 #endif /* CONFIG_BE2NET_SKYHAWK */
62 { 0 }
63 };
64 MODULE_DEVICE_TABLE(pci, be_dev_ids);
65
66 /* Workqueue used by all functions for defering cmd calls to the adapter */
67 static struct workqueue_struct *be_wq;
68
69 /* UE Status Low CSR */
70 static const char * const ue_status_low_desc[] = {
71 "CEV",
72 "CTX",
73 "DBUF",
74 "ERX",
75 "Host",
76 "MPU",
77 "NDMA",
78 "PTC ",
79 "RDMA ",
80 "RXF ",
81 "RXIPS ",
82 "RXULP0 ",
83 "RXULP1 ",
84 "RXULP2 ",
85 "TIM ",
86 "TPOST ",
87 "TPRE ",
88 "TXIPS ",
89 "TXULP0 ",
90 "TXULP1 ",
91 "UC ",
92 "WDMA ",
93 "TXULP2 ",
94 "HOST1 ",
95 "P0_OB_LINK ",
96 "P1_OB_LINK ",
97 "HOST_GPIO ",
98 "MBOX ",
99 "ERX2 ",
100 "SPARE ",
101 "JTAG ",
102 "MPU_INTPEND "
103 };
104
105 /* UE Status High CSR */
106 static const char * const ue_status_hi_desc[] = {
107 "LPCMEMHOST",
108 "MGMT_MAC",
109 "PCS0ONLINE",
110 "MPU_IRAM",
111 "PCS1ONLINE",
112 "PCTL0",
113 "PCTL1",
114 "PMEM",
115 "RR",
116 "TXPB",
117 "RXPP",
118 "XAUI",
119 "TXP",
120 "ARM",
121 "IPC",
122 "HOST2",
123 "HOST3",
124 "HOST4",
125 "HOST5",
126 "HOST6",
127 "HOST7",
128 "ECRC",
129 "Poison TLP",
130 "NETC",
131 "PERIPH",
132 "LLTXULP",
133 "D2P",
134 "RCON",
135 "LDMA",
136 "LLTXP",
137 "LLTXPB",
138 "Unknown"
139 };
140
141 #define BE_VF_IF_EN_FLAGS (BE_IF_FLAGS_UNTAGGED | \
142 BE_IF_FLAGS_BROADCAST | \
143 BE_IF_FLAGS_MULTICAST | \
144 BE_IF_FLAGS_PASS_L3L4_ERRORS)
145
146 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
147 {
148 struct be_dma_mem *mem = &q->dma_mem;
149
150 if (mem->va) {
151 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
152 mem->dma);
153 mem->va = NULL;
154 }
155 }
156
157 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
158 u16 len, u16 entry_size)
159 {
160 struct be_dma_mem *mem = &q->dma_mem;
161
162 memset(q, 0, sizeof(*q));
163 q->len = len;
164 q->entry_size = entry_size;
165 mem->size = len * entry_size;
166 mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
167 &mem->dma, GFP_KERNEL);
168 if (!mem->va)
169 return -ENOMEM;
170 return 0;
171 }
172
173 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
174 {
175 u32 reg, enabled;
176
177 pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
178 &reg);
179 enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
180
181 if (!enabled && enable)
182 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
183 else if (enabled && !enable)
184 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
185 else
186 return;
187
188 pci_write_config_dword(adapter->pdev,
189 PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
190 }
191
192 static void be_intr_set(struct be_adapter *adapter, bool enable)
193 {
194 int status = 0;
195
196 /* On lancer interrupts can't be controlled via this register */
197 if (lancer_chip(adapter))
198 return;
199
200 if (be_check_error(adapter, BE_ERROR_EEH))
201 return;
202
203 status = be_cmd_intr_set(adapter, enable);
204 if (status)
205 be_reg_intr_set(adapter, enable);
206 }
207
208 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
209 {
210 u32 val = 0;
211
212 if (be_check_error(adapter, BE_ERROR_HW))
213 return;
214
215 val |= qid & DB_RQ_RING_ID_MASK;
216 val |= posted << DB_RQ_NUM_POSTED_SHIFT;
217
218 wmb();
219 iowrite32(val, adapter->db + DB_RQ_OFFSET);
220 }
221
222 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
223 u16 posted)
224 {
225 u32 val = 0;
226
227 if (be_check_error(adapter, BE_ERROR_HW))
228 return;
229
230 val |= txo->q.id & DB_TXULP_RING_ID_MASK;
231 val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
232
233 wmb();
234 iowrite32(val, adapter->db + txo->db_offset);
235 }
236
237 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
238 bool arm, bool clear_int, u16 num_popped,
239 u32 eq_delay_mult_enc)
240 {
241 u32 val = 0;
242
243 val |= qid & DB_EQ_RING_ID_MASK;
244 val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
245
246 if (be_check_error(adapter, BE_ERROR_HW))
247 return;
248
249 if (arm)
250 val |= 1 << DB_EQ_REARM_SHIFT;
251 if (clear_int)
252 val |= 1 << DB_EQ_CLR_SHIFT;
253 val |= 1 << DB_EQ_EVNT_SHIFT;
254 val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
255 val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
256 iowrite32(val, adapter->db + DB_EQ_OFFSET);
257 }
258
259 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
260 {
261 u32 val = 0;
262
263 val |= qid & DB_CQ_RING_ID_MASK;
264 val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
265 DB_CQ_RING_ID_EXT_MASK_SHIFT);
266
267 if (be_check_error(adapter, BE_ERROR_HW))
268 return;
269
270 if (arm)
271 val |= 1 << DB_CQ_REARM_SHIFT;
272 val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
273 iowrite32(val, adapter->db + DB_CQ_OFFSET);
274 }
275
276 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
277 {
278 int i;
279
280 /* Check if mac has already been added as part of uc-list */
281 for (i = 0; i < adapter->uc_macs; i++) {
282 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
283 /* mac already added, skip addition */
284 adapter->pmac_id[0] = adapter->pmac_id[i + 1];
285 return 0;
286 }
287 }
288
289 return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
290 &adapter->pmac_id[0], 0);
291 }
292
293 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
294 {
295 int i;
296
297 /* Skip deletion if the programmed mac is
298 * being used in uc-list
299 */
300 for (i = 0; i < adapter->uc_macs; i++) {
301 if (adapter->pmac_id[i + 1] == pmac_id)
302 return;
303 }
304 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
305 }
306
307 static int be_mac_addr_set(struct net_device *netdev, void *p)
308 {
309 struct be_adapter *adapter = netdev_priv(netdev);
310 struct device *dev = &adapter->pdev->dev;
311 struct sockaddr *addr = p;
312 int status;
313 u8 mac[ETH_ALEN];
314 u32 old_pmac_id = adapter->pmac_id[0];
315
316 if (!is_valid_ether_addr(addr->sa_data))
317 return -EADDRNOTAVAIL;
318
319 /* Proceed further only if, User provided MAC is different
320 * from active MAC
321 */
322 if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
323 return 0;
324
325 /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
326 * address
327 */
328 if (BEx_chip(adapter) && be_virtfn(adapter) &&
329 !check_privilege(adapter, BE_PRIV_FILTMGMT))
330 return -EPERM;
331
332 /* if device is not running, copy MAC to netdev->dev_addr */
333 if (!netif_running(netdev))
334 goto done;
335
336 /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
337 * privilege or if PF did not provision the new MAC address.
338 * On BE3, this cmd will always fail if the VF doesn't have the
339 * FILTMGMT privilege. This failure is OK, only if the PF programmed
340 * the MAC for the VF.
341 */
342 mutex_lock(&adapter->rx_filter_lock);
343 status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
344 if (!status) {
345
346 /* Delete the old programmed MAC. This call may fail if the
347 * old MAC was already deleted by the PF driver.
348 */
349 if (adapter->pmac_id[0] != old_pmac_id)
350 be_dev_mac_del(adapter, old_pmac_id);
351 }
352
353 mutex_unlock(&adapter->rx_filter_lock);
354 /* Decide if the new MAC is successfully activated only after
355 * querying the FW
356 */
357 status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
358 adapter->if_handle, true, 0);
359 if (status)
360 goto err;
361
362 /* The MAC change did not happen, either due to lack of privilege
363 * or PF didn't pre-provision.
364 */
365 if (!ether_addr_equal(addr->sa_data, mac)) {
366 status = -EPERM;
367 goto err;
368 }
369
370 /* Remember currently programmed MAC */
371 ether_addr_copy(adapter->dev_mac, addr->sa_data);
372 done:
373 ether_addr_copy(netdev->dev_addr, addr->sa_data);
374 dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
375 return 0;
376 err:
377 dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
378 return status;
379 }
380
381 /* BE2 supports only v0 cmd */
382 static void *hw_stats_from_cmd(struct be_adapter *adapter)
383 {
384 if (BE2_chip(adapter)) {
385 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
386
387 return &cmd->hw_stats;
388 } else if (BE3_chip(adapter)) {
389 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
390
391 return &cmd->hw_stats;
392 } else {
393 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
394
395 return &cmd->hw_stats;
396 }
397 }
398
399 /* BE2 supports only v0 cmd */
400 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
401 {
402 if (BE2_chip(adapter)) {
403 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
404
405 return &hw_stats->erx;
406 } else if (BE3_chip(adapter)) {
407 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
408
409 return &hw_stats->erx;
410 } else {
411 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
412
413 return &hw_stats->erx;
414 }
415 }
416
417 static void populate_be_v0_stats(struct be_adapter *adapter)
418 {
419 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
420 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
421 struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
422 struct be_port_rxf_stats_v0 *port_stats =
423 &rxf_stats->port[adapter->port_num];
424 struct be_drv_stats *drvs = &adapter->drv_stats;
425
426 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
427 drvs->rx_pause_frames = port_stats->rx_pause_frames;
428 drvs->rx_crc_errors = port_stats->rx_crc_errors;
429 drvs->rx_control_frames = port_stats->rx_control_frames;
430 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
431 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
432 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
433 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
434 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
435 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
436 drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
437 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
438 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
439 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
440 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
441 drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
442 drvs->rx_dropped_header_too_small =
443 port_stats->rx_dropped_header_too_small;
444 drvs->rx_address_filtered =
445 port_stats->rx_address_filtered +
446 port_stats->rx_vlan_filtered;
447 drvs->rx_alignment_symbol_errors =
448 port_stats->rx_alignment_symbol_errors;
449
450 drvs->tx_pauseframes = port_stats->tx_pauseframes;
451 drvs->tx_controlframes = port_stats->tx_controlframes;
452
453 if (adapter->port_num)
454 drvs->jabber_events = rxf_stats->port1_jabber_events;
455 else
456 drvs->jabber_events = rxf_stats->port0_jabber_events;
457 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
458 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
459 drvs->forwarded_packets = rxf_stats->forwarded_packets;
460 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
461 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
462 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
463 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
464 }
465
466 static void populate_be_v1_stats(struct be_adapter *adapter)
467 {
468 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
469 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
470 struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
471 struct be_port_rxf_stats_v1 *port_stats =
472 &rxf_stats->port[adapter->port_num];
473 struct be_drv_stats *drvs = &adapter->drv_stats;
474
475 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
476 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
477 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
478 drvs->rx_pause_frames = port_stats->rx_pause_frames;
479 drvs->rx_crc_errors = port_stats->rx_crc_errors;
480 drvs->rx_control_frames = port_stats->rx_control_frames;
481 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
482 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
483 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
484 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
485 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
486 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
487 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
488 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
489 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
490 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
491 drvs->rx_dropped_header_too_small =
492 port_stats->rx_dropped_header_too_small;
493 drvs->rx_input_fifo_overflow_drop =
494 port_stats->rx_input_fifo_overflow_drop;
495 drvs->rx_address_filtered = port_stats->rx_address_filtered;
496 drvs->rx_alignment_symbol_errors =
497 port_stats->rx_alignment_symbol_errors;
498 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
499 drvs->tx_pauseframes = port_stats->tx_pauseframes;
500 drvs->tx_controlframes = port_stats->tx_controlframes;
501 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
502 drvs->jabber_events = port_stats->jabber_events;
503 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
504 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
505 drvs->forwarded_packets = rxf_stats->forwarded_packets;
506 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
507 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
508 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
509 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
510 }
511
512 static void populate_be_v2_stats(struct be_adapter *adapter)
513 {
514 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
515 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
516 struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
517 struct be_port_rxf_stats_v2 *port_stats =
518 &rxf_stats->port[adapter->port_num];
519 struct be_drv_stats *drvs = &adapter->drv_stats;
520
521 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
522 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
523 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
524 drvs->rx_pause_frames = port_stats->rx_pause_frames;
525 drvs->rx_crc_errors = port_stats->rx_crc_errors;
526 drvs->rx_control_frames = port_stats->rx_control_frames;
527 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
528 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
529 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
530 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
531 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
532 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
533 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
534 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
535 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
536 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
537 drvs->rx_dropped_header_too_small =
538 port_stats->rx_dropped_header_too_small;
539 drvs->rx_input_fifo_overflow_drop =
540 port_stats->rx_input_fifo_overflow_drop;
541 drvs->rx_address_filtered = port_stats->rx_address_filtered;
542 drvs->rx_alignment_symbol_errors =
543 port_stats->rx_alignment_symbol_errors;
544 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
545 drvs->tx_pauseframes = port_stats->tx_pauseframes;
546 drvs->tx_controlframes = port_stats->tx_controlframes;
547 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
548 drvs->jabber_events = port_stats->jabber_events;
549 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
550 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
551 drvs->forwarded_packets = rxf_stats->forwarded_packets;
552 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
553 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
554 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
555 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
556 if (be_roce_supported(adapter)) {
557 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
558 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
559 drvs->rx_roce_frames = port_stats->roce_frames_received;
560 drvs->roce_drops_crc = port_stats->roce_drops_crc;
561 drvs->roce_drops_payload_len =
562 port_stats->roce_drops_payload_len;
563 }
564 }
565
566 static void populate_lancer_stats(struct be_adapter *adapter)
567 {
568 struct be_drv_stats *drvs = &adapter->drv_stats;
569 struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
570
571 be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
572 drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
573 drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
574 drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
575 drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
576 drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
577 drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
578 drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
579 drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
580 drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
581 drvs->rx_dropped_tcp_length =
582 pport_stats->rx_dropped_invalid_tcp_length;
583 drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
584 drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
585 drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
586 drvs->rx_dropped_header_too_small =
587 pport_stats->rx_dropped_header_too_small;
588 drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
589 drvs->rx_address_filtered =
590 pport_stats->rx_address_filtered +
591 pport_stats->rx_vlan_filtered;
592 drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
593 drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
594 drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
595 drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
596 drvs->jabber_events = pport_stats->rx_jabbers;
597 drvs->forwarded_packets = pport_stats->num_forwards_lo;
598 drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
599 drvs->rx_drops_too_many_frags =
600 pport_stats->rx_drops_too_many_frags_lo;
601 }
602
603 static void accumulate_16bit_val(u32 *acc, u16 val)
604 {
605 #define lo(x) (x & 0xFFFF)
606 #define hi(x) (x & 0xFFFF0000)
607 bool wrapped = val < lo(*acc);
608 u32 newacc = hi(*acc) + val;
609
610 if (wrapped)
611 newacc += 65536;
612 WRITE_ONCE(*acc, newacc);
613 }
614
615 static void populate_erx_stats(struct be_adapter *adapter,
616 struct be_rx_obj *rxo, u32 erx_stat)
617 {
618 if (!BEx_chip(adapter))
619 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
620 else
621 /* below erx HW counter can actually wrap around after
622 * 65535. Driver accumulates a 32-bit value
623 */
624 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
625 (u16)erx_stat);
626 }
627
628 void be_parse_stats(struct be_adapter *adapter)
629 {
630 struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
631 struct be_rx_obj *rxo;
632 int i;
633 u32 erx_stat;
634
635 if (lancer_chip(adapter)) {
636 populate_lancer_stats(adapter);
637 } else {
638 if (BE2_chip(adapter))
639 populate_be_v0_stats(adapter);
640 else if (BE3_chip(adapter))
641 /* for BE3 */
642 populate_be_v1_stats(adapter);
643 else
644 populate_be_v2_stats(adapter);
645
646 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
647 for_all_rx_queues(adapter, rxo, i) {
648 erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
649 populate_erx_stats(adapter, rxo, erx_stat);
650 }
651 }
652 }
653
654 static void be_get_stats64(struct net_device *netdev,
655 struct rtnl_link_stats64 *stats)
656 {
657 struct be_adapter *adapter = netdev_priv(netdev);
658 struct be_drv_stats *drvs = &adapter->drv_stats;
659 struct be_rx_obj *rxo;
660 struct be_tx_obj *txo;
661 u64 pkts, bytes;
662 unsigned int start;
663 int i;
664
665 for_all_rx_queues(adapter, rxo, i) {
666 const struct be_rx_stats *rx_stats = rx_stats(rxo);
667
668 do {
669 start = u64_stats_fetch_begin_irq(&rx_stats->sync);
670 pkts = rx_stats(rxo)->rx_pkts;
671 bytes = rx_stats(rxo)->rx_bytes;
672 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
673 stats->rx_packets += pkts;
674 stats->rx_bytes += bytes;
675 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
676 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
677 rx_stats(rxo)->rx_drops_no_frags;
678 }
679
680 for_all_tx_queues(adapter, txo, i) {
681 const struct be_tx_stats *tx_stats = tx_stats(txo);
682
683 do {
684 start = u64_stats_fetch_begin_irq(&tx_stats->sync);
685 pkts = tx_stats(txo)->tx_pkts;
686 bytes = tx_stats(txo)->tx_bytes;
687 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
688 stats->tx_packets += pkts;
689 stats->tx_bytes += bytes;
690 }
691
692 /* bad pkts received */
693 stats->rx_errors = drvs->rx_crc_errors +
694 drvs->rx_alignment_symbol_errors +
695 drvs->rx_in_range_errors +
696 drvs->rx_out_range_errors +
697 drvs->rx_frame_too_long +
698 drvs->rx_dropped_too_small +
699 drvs->rx_dropped_too_short +
700 drvs->rx_dropped_header_too_small +
701 drvs->rx_dropped_tcp_length +
702 drvs->rx_dropped_runt;
703
704 /* detailed rx errors */
705 stats->rx_length_errors = drvs->rx_in_range_errors +
706 drvs->rx_out_range_errors +
707 drvs->rx_frame_too_long;
708
709 stats->rx_crc_errors = drvs->rx_crc_errors;
710
711 /* frame alignment errors */
712 stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
713
714 /* receiver fifo overrun */
715 /* drops_no_pbuf is no per i/f, it's per BE card */
716 stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
717 drvs->rx_input_fifo_overflow_drop +
718 drvs->rx_drops_no_pbuf;
719 }
720
721 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
722 {
723 struct net_device *netdev = adapter->netdev;
724
725 if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
726 netif_carrier_off(netdev);
727 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
728 }
729
730 if (link_status)
731 netif_carrier_on(netdev);
732 else
733 netif_carrier_off(netdev);
734
735 netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
736 }
737
738 static int be_gso_hdr_len(struct sk_buff *skb)
739 {
740 if (skb->encapsulation)
741 return skb_inner_transport_offset(skb) +
742 inner_tcp_hdrlen(skb);
743 return skb_transport_offset(skb) + tcp_hdrlen(skb);
744 }
745
746 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
747 {
748 struct be_tx_stats *stats = tx_stats(txo);
749 u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
750 /* Account for headers which get duplicated in TSO pkt */
751 u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
752
753 u64_stats_update_begin(&stats->sync);
754 stats->tx_reqs++;
755 stats->tx_bytes += skb->len + dup_hdr_len;
756 stats->tx_pkts += tx_pkts;
757 if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
758 stats->tx_vxlan_offload_pkts += tx_pkts;
759 u64_stats_update_end(&stats->sync);
760 }
761
762 /* Returns number of WRBs needed for the skb */
763 static u32 skb_wrb_cnt(struct sk_buff *skb)
764 {
765 /* +1 for the header wrb */
766 return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
767 }
768
769 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
770 {
771 wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
772 wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
773 wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
774 wrb->rsvd0 = 0;
775 }
776
777 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
778 * to avoid the swap and shift/mask operations in wrb_fill().
779 */
780 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
781 {
782 wrb->frag_pa_hi = 0;
783 wrb->frag_pa_lo = 0;
784 wrb->frag_len = 0;
785 wrb->rsvd0 = 0;
786 }
787
788 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
789 struct sk_buff *skb)
790 {
791 u8 vlan_prio;
792 u16 vlan_tag;
793
794 vlan_tag = skb_vlan_tag_get(skb);
795 vlan_prio = skb_vlan_tag_get_prio(skb);
796 /* If vlan priority provided by OS is NOT in available bmap */
797 if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
798 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
799 adapter->recommended_prio_bits;
800
801 return vlan_tag;
802 }
803
804 /* Used only for IP tunnel packets */
805 static u16 skb_inner_ip_proto(struct sk_buff *skb)
806 {
807 return (inner_ip_hdr(skb)->version == 4) ?
808 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
809 }
810
811 static u16 skb_ip_proto(struct sk_buff *skb)
812 {
813 return (ip_hdr(skb)->version == 4) ?
814 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
815 }
816
817 static inline bool be_is_txq_full(struct be_tx_obj *txo)
818 {
819 return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
820 }
821
822 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
823 {
824 return atomic_read(&txo->q.used) < txo->q.len / 2;
825 }
826
827 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
828 {
829 return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
830 }
831
832 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
833 struct sk_buff *skb,
834 struct be_wrb_params *wrb_params)
835 {
836 u16 proto;
837
838 if (skb_is_gso(skb)) {
839 BE_WRB_F_SET(wrb_params->features, LSO, 1);
840 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
841 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
842 BE_WRB_F_SET(wrb_params->features, LSO6, 1);
843 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
844 if (skb->encapsulation) {
845 BE_WRB_F_SET(wrb_params->features, IPCS, 1);
846 proto = skb_inner_ip_proto(skb);
847 } else {
848 proto = skb_ip_proto(skb);
849 }
850 if (proto == IPPROTO_TCP)
851 BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
852 else if (proto == IPPROTO_UDP)
853 BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
854 }
855
856 if (skb_vlan_tag_present(skb)) {
857 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
858 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
859 }
860
861 BE_WRB_F_SET(wrb_params->features, CRC, 1);
862 }
863
864 static void wrb_fill_hdr(struct be_adapter *adapter,
865 struct be_eth_hdr_wrb *hdr,
866 struct be_wrb_params *wrb_params,
867 struct sk_buff *skb)
868 {
869 memset(hdr, 0, sizeof(*hdr));
870
871 SET_TX_WRB_HDR_BITS(crc, hdr,
872 BE_WRB_F_GET(wrb_params->features, CRC));
873 SET_TX_WRB_HDR_BITS(ipcs, hdr,
874 BE_WRB_F_GET(wrb_params->features, IPCS));
875 SET_TX_WRB_HDR_BITS(tcpcs, hdr,
876 BE_WRB_F_GET(wrb_params->features, TCPCS));
877 SET_TX_WRB_HDR_BITS(udpcs, hdr,
878 BE_WRB_F_GET(wrb_params->features, UDPCS));
879
880 SET_TX_WRB_HDR_BITS(lso, hdr,
881 BE_WRB_F_GET(wrb_params->features, LSO));
882 SET_TX_WRB_HDR_BITS(lso6, hdr,
883 BE_WRB_F_GET(wrb_params->features, LSO6));
884 SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
885
886 /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
887 * hack is not needed, the evt bit is set while ringing DB.
888 */
889 SET_TX_WRB_HDR_BITS(event, hdr,
890 BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
891 SET_TX_WRB_HDR_BITS(vlan, hdr,
892 BE_WRB_F_GET(wrb_params->features, VLAN));
893 SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
894
895 SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
896 SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
897 SET_TX_WRB_HDR_BITS(mgmt, hdr,
898 BE_WRB_F_GET(wrb_params->features, OS2BMC));
899 }
900
901 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
902 bool unmap_single)
903 {
904 dma_addr_t dma;
905 u32 frag_len = le32_to_cpu(wrb->frag_len);
906
907
908 dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
909 (u64)le32_to_cpu(wrb->frag_pa_lo);
910 if (frag_len) {
911 if (unmap_single)
912 dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
913 else
914 dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
915 }
916 }
917
918 /* Grab a WRB header for xmit */
919 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
920 {
921 u32 head = txo->q.head;
922
923 queue_head_inc(&txo->q);
924 return head;
925 }
926
927 /* Set up the WRB header for xmit */
928 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
929 struct be_tx_obj *txo,
930 struct be_wrb_params *wrb_params,
931 struct sk_buff *skb, u16 head)
932 {
933 u32 num_frags = skb_wrb_cnt(skb);
934 struct be_queue_info *txq = &txo->q;
935 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
936
937 wrb_fill_hdr(adapter, hdr, wrb_params, skb);
938 be_dws_cpu_to_le(hdr, sizeof(*hdr));
939
940 BUG_ON(txo->sent_skb_list[head]);
941 txo->sent_skb_list[head] = skb;
942 txo->last_req_hdr = head;
943 atomic_add(num_frags, &txq->used);
944 txo->last_req_wrb_cnt = num_frags;
945 txo->pend_wrb_cnt += num_frags;
946 }
947
948 /* Setup a WRB fragment (buffer descriptor) for xmit */
949 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
950 int len)
951 {
952 struct be_eth_wrb *wrb;
953 struct be_queue_info *txq = &txo->q;
954
955 wrb = queue_head_node(txq);
956 wrb_fill(wrb, busaddr, len);
957 queue_head_inc(txq);
958 }
959
960 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
961 * was invoked. The producer index is restored to the previous packet and the
962 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
963 */
964 static void be_xmit_restore(struct be_adapter *adapter,
965 struct be_tx_obj *txo, u32 head, bool map_single,
966 u32 copied)
967 {
968 struct device *dev;
969 struct be_eth_wrb *wrb;
970 struct be_queue_info *txq = &txo->q;
971
972 dev = &adapter->pdev->dev;
973 txq->head = head;
974
975 /* skip the first wrb (hdr); it's not mapped */
976 queue_head_inc(txq);
977 while (copied) {
978 wrb = queue_head_node(txq);
979 unmap_tx_frag(dev, wrb, map_single);
980 map_single = false;
981 copied -= le32_to_cpu(wrb->frag_len);
982 queue_head_inc(txq);
983 }
984
985 txq->head = head;
986 }
987
988 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
989 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
990 * of WRBs used up by the packet.
991 */
992 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
993 struct sk_buff *skb,
994 struct be_wrb_params *wrb_params)
995 {
996 u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
997 struct device *dev = &adapter->pdev->dev;
998 bool map_single = false;
999 u32 head;
1000 dma_addr_t busaddr;
1001 int len;
1002
1003 head = be_tx_get_wrb_hdr(txo);
1004
1005 if (skb->len > skb->data_len) {
1006 len = skb_headlen(skb);
1007
1008 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1009 if (dma_mapping_error(dev, busaddr))
1010 goto dma_err;
1011 map_single = true;
1012 be_tx_setup_wrb_frag(txo, busaddr, len);
1013 copied += len;
1014 }
1015
1016 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1017 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1018 len = skb_frag_size(frag);
1019
1020 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1021 if (dma_mapping_error(dev, busaddr))
1022 goto dma_err;
1023 be_tx_setup_wrb_frag(txo, busaddr, len);
1024 copied += len;
1025 }
1026
1027 be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1028
1029 be_tx_stats_update(txo, skb);
1030 return wrb_cnt;
1031
1032 dma_err:
1033 adapter->drv_stats.dma_map_errors++;
1034 be_xmit_restore(adapter, txo, head, map_single, copied);
1035 return 0;
1036 }
1037
1038 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1039 {
1040 return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1041 }
1042
1043 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1044 struct sk_buff *skb,
1045 struct be_wrb_params
1046 *wrb_params)
1047 {
1048 bool insert_vlan = false;
1049 u16 vlan_tag = 0;
1050
1051 skb = skb_share_check(skb, GFP_ATOMIC);
1052 if (unlikely(!skb))
1053 return skb;
1054
1055 if (skb_vlan_tag_present(skb)) {
1056 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1057 insert_vlan = true;
1058 }
1059
1060 if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1061 if (!insert_vlan) {
1062 vlan_tag = adapter->pvid;
1063 insert_vlan = true;
1064 }
1065 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1066 * skip VLAN insertion
1067 */
1068 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1069 }
1070
1071 if (insert_vlan) {
1072 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1073 vlan_tag);
1074 if (unlikely(!skb))
1075 return skb;
1076 __vlan_hwaccel_clear_tag(skb);
1077 }
1078
1079 /* Insert the outer VLAN, if any */
1080 if (adapter->qnq_vid) {
1081 vlan_tag = adapter->qnq_vid;
1082 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1083 vlan_tag);
1084 if (unlikely(!skb))
1085 return skb;
1086 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1087 }
1088
1089 return skb;
1090 }
1091
1092 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1093 {
1094 struct ethhdr *eh = (struct ethhdr *)skb->data;
1095 u16 offset = ETH_HLEN;
1096
1097 if (eh->h_proto == htons(ETH_P_IPV6)) {
1098 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1099
1100 offset += sizeof(struct ipv6hdr);
1101 if (ip6h->nexthdr != NEXTHDR_TCP &&
1102 ip6h->nexthdr != NEXTHDR_UDP) {
1103 struct ipv6_opt_hdr *ehdr =
1104 (struct ipv6_opt_hdr *)(skb->data + offset);
1105
1106 /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1107 if (ehdr->hdrlen == 0xff)
1108 return true;
1109 }
1110 }
1111 return false;
1112 }
1113
1114 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1115 {
1116 return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1117 }
1118
1119 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1120 {
1121 return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1122 }
1123
1124 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1125 struct sk_buff *skb,
1126 struct be_wrb_params
1127 *wrb_params)
1128 {
1129 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1130 unsigned int eth_hdr_len;
1131 struct iphdr *ip;
1132
1133 /* For padded packets, BE HW modifies tot_len field in IP header
1134 * incorrecly when VLAN tag is inserted by HW.
1135 * For padded packets, Lancer computes incorrect checksum.
1136 */
1137 eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1138 VLAN_ETH_HLEN : ETH_HLEN;
1139 if (skb->len <= 60 &&
1140 (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1141 is_ipv4_pkt(skb)) {
1142 ip = (struct iphdr *)ip_hdr(skb);
1143 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1144 }
1145
1146 /* If vlan tag is already inlined in the packet, skip HW VLAN
1147 * tagging in pvid-tagging mode
1148 */
1149 if (be_pvid_tagging_enabled(adapter) &&
1150 veh->h_vlan_proto == htons(ETH_P_8021Q))
1151 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1152
1153 /* HW has a bug wherein it will calculate CSUM for VLAN
1154 * pkts even though it is disabled.
1155 * Manually insert VLAN in pkt.
1156 */
1157 if (skb->ip_summed != CHECKSUM_PARTIAL &&
1158 skb_vlan_tag_present(skb)) {
1159 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1160 if (unlikely(!skb))
1161 goto err;
1162 }
1163
1164 /* HW may lockup when VLAN HW tagging is requested on
1165 * certain ipv6 packets. Drop such pkts if the HW workaround to
1166 * skip HW tagging is not enabled by FW.
1167 */
1168 if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1169 (adapter->pvid || adapter->qnq_vid) &&
1170 !qnq_async_evt_rcvd(adapter)))
1171 goto tx_drop;
1172
1173 /* Manual VLAN tag insertion to prevent:
1174 * ASIC lockup when the ASIC inserts VLAN tag into
1175 * certain ipv6 packets. Insert VLAN tags in driver,
1176 * and set event, completion, vlan bits accordingly
1177 * in the Tx WRB.
1178 */
1179 if (be_ipv6_tx_stall_chk(adapter, skb) &&
1180 be_vlan_tag_tx_chk(adapter, skb)) {
1181 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1182 if (unlikely(!skb))
1183 goto err;
1184 }
1185
1186 return skb;
1187 tx_drop:
1188 dev_kfree_skb_any(skb);
1189 err:
1190 return NULL;
1191 }
1192
1193 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1194 struct sk_buff *skb,
1195 struct be_wrb_params *wrb_params)
1196 {
1197 int err;
1198
1199 /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1200 * packets that are 32b or less may cause a transmit stall
1201 * on that port. The workaround is to pad such packets
1202 * (len <= 32 bytes) to a minimum length of 36b.
1203 */
1204 if (skb->len <= 32) {
1205 if (skb_put_padto(skb, 36))
1206 return NULL;
1207 }
1208
1209 if (BEx_chip(adapter) || lancer_chip(adapter)) {
1210 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1211 if (!skb)
1212 return NULL;
1213 }
1214
1215 /* The stack can send us skbs with length greater than
1216 * what the HW can handle. Trim the extra bytes.
1217 */
1218 WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1219 err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1220 WARN_ON(err);
1221
1222 return skb;
1223 }
1224
1225 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1226 {
1227 struct be_queue_info *txq = &txo->q;
1228 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1229
1230 /* Mark the last request eventable if it hasn't been marked already */
1231 if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1232 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1233
1234 /* compose a dummy wrb if there are odd set of wrbs to notify */
1235 if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1236 wrb_fill_dummy(queue_head_node(txq));
1237 queue_head_inc(txq);
1238 atomic_inc(&txq->used);
1239 txo->pend_wrb_cnt++;
1240 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1241 TX_HDR_WRB_NUM_SHIFT);
1242 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1243 TX_HDR_WRB_NUM_SHIFT);
1244 }
1245 be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1246 txo->pend_wrb_cnt = 0;
1247 }
1248
1249 /* OS2BMC related */
1250
1251 #define DHCP_CLIENT_PORT 68
1252 #define DHCP_SERVER_PORT 67
1253 #define NET_BIOS_PORT1 137
1254 #define NET_BIOS_PORT2 138
1255 #define DHCPV6_RAS_PORT 547
1256
1257 #define is_mc_allowed_on_bmc(adapter, eh) \
1258 (!is_multicast_filt_enabled(adapter) && \
1259 is_multicast_ether_addr(eh->h_dest) && \
1260 !is_broadcast_ether_addr(eh->h_dest))
1261
1262 #define is_bc_allowed_on_bmc(adapter, eh) \
1263 (!is_broadcast_filt_enabled(adapter) && \
1264 is_broadcast_ether_addr(eh->h_dest))
1265
1266 #define is_arp_allowed_on_bmc(adapter, skb) \
1267 (is_arp(skb) && is_arp_filt_enabled(adapter))
1268
1269 #define is_arp(skb) (skb->protocol == htons(ETH_P_ARP))
1270
1271 #define is_arp_filt_enabled(adapter) \
1272 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1273
1274 #define is_dhcp_client_filt_enabled(adapter) \
1275 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1276
1277 #define is_dhcp_srvr_filt_enabled(adapter) \
1278 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1279
1280 #define is_nbios_filt_enabled(adapter) \
1281 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1282
1283 #define is_ipv6_na_filt_enabled(adapter) \
1284 (adapter->bmc_filt_mask & \
1285 BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1286
1287 #define is_ipv6_ra_filt_enabled(adapter) \
1288 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1289
1290 #define is_ipv6_ras_filt_enabled(adapter) \
1291 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1292
1293 #define is_broadcast_filt_enabled(adapter) \
1294 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1295
1296 #define is_multicast_filt_enabled(adapter) \
1297 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1298
1299 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1300 struct sk_buff **skb)
1301 {
1302 struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1303 bool os2bmc = false;
1304
1305 if (!be_is_os2bmc_enabled(adapter))
1306 goto done;
1307
1308 if (!is_multicast_ether_addr(eh->h_dest))
1309 goto done;
1310
1311 if (is_mc_allowed_on_bmc(adapter, eh) ||
1312 is_bc_allowed_on_bmc(adapter, eh) ||
1313 is_arp_allowed_on_bmc(adapter, (*skb))) {
1314 os2bmc = true;
1315 goto done;
1316 }
1317
1318 if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1319 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1320 u8 nexthdr = hdr->nexthdr;
1321
1322 if (nexthdr == IPPROTO_ICMPV6) {
1323 struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1324
1325 switch (icmp6->icmp6_type) {
1326 case NDISC_ROUTER_ADVERTISEMENT:
1327 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1328 goto done;
1329 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1330 os2bmc = is_ipv6_na_filt_enabled(adapter);
1331 goto done;
1332 default:
1333 break;
1334 }
1335 }
1336 }
1337
1338 if (is_udp_pkt((*skb))) {
1339 struct udphdr *udp = udp_hdr((*skb));
1340
1341 switch (ntohs(udp->dest)) {
1342 case DHCP_CLIENT_PORT:
1343 os2bmc = is_dhcp_client_filt_enabled(adapter);
1344 goto done;
1345 case DHCP_SERVER_PORT:
1346 os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1347 goto done;
1348 case NET_BIOS_PORT1:
1349 case NET_BIOS_PORT2:
1350 os2bmc = is_nbios_filt_enabled(adapter);
1351 goto done;
1352 case DHCPV6_RAS_PORT:
1353 os2bmc = is_ipv6_ras_filt_enabled(adapter);
1354 goto done;
1355 default:
1356 break;
1357 }
1358 }
1359 done:
1360 /* For packets over a vlan, which are destined
1361 * to BMC, asic expects the vlan to be inline in the packet.
1362 */
1363 if (os2bmc)
1364 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1365
1366 return os2bmc;
1367 }
1368
1369 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1370 {
1371 struct be_adapter *adapter = netdev_priv(netdev);
1372 u16 q_idx = skb_get_queue_mapping(skb);
1373 struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1374 struct be_wrb_params wrb_params = { 0 };
1375 bool flush = !netdev_xmit_more();
1376 u16 wrb_cnt;
1377
1378 skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1379 if (unlikely(!skb))
1380 goto drop;
1381
1382 be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1383
1384 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385 if (unlikely(!wrb_cnt)) {
1386 dev_kfree_skb_any(skb);
1387 goto drop;
1388 }
1389
1390 /* if os2bmc is enabled and if the pkt is destined to bmc,
1391 * enqueue the pkt a 2nd time with mgmt bit set.
1392 */
1393 if (be_send_pkt_to_bmc(adapter, &skb)) {
1394 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1395 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1396 if (unlikely(!wrb_cnt))
1397 goto drop;
1398 else
1399 skb_get(skb);
1400 }
1401
1402 if (be_is_txq_full(txo)) {
1403 netif_stop_subqueue(netdev, q_idx);
1404 tx_stats(txo)->tx_stops++;
1405 }
1406
1407 if (flush || __netif_subqueue_stopped(netdev, q_idx))
1408 be_xmit_flush(adapter, txo);
1409
1410 return NETDEV_TX_OK;
1411 drop:
1412 tx_stats(txo)->tx_drv_drops++;
1413 /* Flush the already enqueued tx requests */
1414 if (flush && txo->pend_wrb_cnt)
1415 be_xmit_flush(adapter, txo);
1416
1417 return NETDEV_TX_OK;
1418 }
1419
1420 static void be_tx_timeout(struct net_device *netdev)
1421 {
1422 struct be_adapter *adapter = netdev_priv(netdev);
1423 struct device *dev = &adapter->pdev->dev;
1424 struct be_tx_obj *txo;
1425 struct sk_buff *skb;
1426 struct tcphdr *tcphdr;
1427 struct udphdr *udphdr;
1428 u32 *entry;
1429 int status;
1430 int i, j;
1431
1432 for_all_tx_queues(adapter, txo, i) {
1433 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1434 i, txo->q.head, txo->q.tail,
1435 atomic_read(&txo->q.used), txo->q.id);
1436
1437 entry = txo->q.dma_mem.va;
1438 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1439 if (entry[j] != 0 || entry[j + 1] != 0 ||
1440 entry[j + 2] != 0 || entry[j + 3] != 0) {
1441 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1442 j, entry[j], entry[j + 1],
1443 entry[j + 2], entry[j + 3]);
1444 }
1445 }
1446
1447 entry = txo->cq.dma_mem.va;
1448 dev_info(dev, "TXCQ Dump: %d H: %d T: %d used: %d\n",
1449 i, txo->cq.head, txo->cq.tail,
1450 atomic_read(&txo->cq.used));
1451 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1452 if (entry[j] != 0 || entry[j + 1] != 0 ||
1453 entry[j + 2] != 0 || entry[j + 3] != 0) {
1454 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1455 j, entry[j], entry[j + 1],
1456 entry[j + 2], entry[j + 3]);
1457 }
1458 }
1459
1460 for (j = 0; j < TX_Q_LEN; j++) {
1461 if (txo->sent_skb_list[j]) {
1462 skb = txo->sent_skb_list[j];
1463 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1464 tcphdr = tcp_hdr(skb);
1465 dev_info(dev, "TCP source port %d\n",
1466 ntohs(tcphdr->source));
1467 dev_info(dev, "TCP dest port %d\n",
1468 ntohs(tcphdr->dest));
1469 dev_info(dev, "TCP sequence num %d\n",
1470 ntohs(tcphdr->seq));
1471 dev_info(dev, "TCP ack_seq %d\n",
1472 ntohs(tcphdr->ack_seq));
1473 } else if (ip_hdr(skb)->protocol ==
1474 IPPROTO_UDP) {
1475 udphdr = udp_hdr(skb);
1476 dev_info(dev, "UDP source port %d\n",
1477 ntohs(udphdr->source));
1478 dev_info(dev, "UDP dest port %d\n",
1479 ntohs(udphdr->dest));
1480 }
1481 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1482 j, skb, skb->len, skb->protocol);
1483 }
1484 }
1485 }
1486
1487 if (lancer_chip(adapter)) {
1488 dev_info(dev, "Initiating reset due to tx timeout\n");
1489 dev_info(dev, "Resetting adapter\n");
1490 status = lancer_physdev_ctrl(adapter,
1491 PHYSDEV_CONTROL_FW_RESET_MASK);
1492 if (status)
1493 dev_err(dev, "Reset failed .. Reboot server\n");
1494 }
1495 }
1496
1497 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1498 {
1499 return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1500 BE_IF_FLAGS_ALL_PROMISCUOUS;
1501 }
1502
1503 static int be_set_vlan_promisc(struct be_adapter *adapter)
1504 {
1505 struct device *dev = &adapter->pdev->dev;
1506 int status;
1507
1508 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1509 return 0;
1510
1511 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1512 if (!status) {
1513 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1514 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1515 } else {
1516 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1517 }
1518 return status;
1519 }
1520
1521 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1522 {
1523 struct device *dev = &adapter->pdev->dev;
1524 int status;
1525
1526 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1527 if (!status) {
1528 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1529 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1530 }
1531 return status;
1532 }
1533
1534 /*
1535 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1536 * If the user configures more, place BE in vlan promiscuous mode.
1537 */
1538 static int be_vid_config(struct be_adapter *adapter)
1539 {
1540 struct device *dev = &adapter->pdev->dev;
1541 u16 vids[BE_NUM_VLANS_SUPPORTED];
1542 u16 num = 0, i = 0;
1543 int status = 0;
1544
1545 /* No need to change the VLAN state if the I/F is in promiscuous */
1546 if (adapter->netdev->flags & IFF_PROMISC)
1547 return 0;
1548
1549 if (adapter->vlans_added > be_max_vlans(adapter))
1550 return be_set_vlan_promisc(adapter);
1551
1552 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1553 status = be_clear_vlan_promisc(adapter);
1554 if (status)
1555 return status;
1556 }
1557 /* Construct VLAN Table to give to HW */
1558 for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1559 vids[num++] = cpu_to_le16(i);
1560
1561 status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1562 if (status) {
1563 dev_err(dev, "Setting HW VLAN filtering failed\n");
1564 /* Set to VLAN promisc mode as setting VLAN filter failed */
1565 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1566 addl_status(status) ==
1567 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1568 return be_set_vlan_promisc(adapter);
1569 }
1570 return status;
1571 }
1572
1573 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1574 {
1575 struct be_adapter *adapter = netdev_priv(netdev);
1576 int status = 0;
1577
1578 mutex_lock(&adapter->rx_filter_lock);
1579
1580 /* Packets with VID 0 are always received by Lancer by default */
1581 if (lancer_chip(adapter) && vid == 0)
1582 goto done;
1583
1584 if (test_bit(vid, adapter->vids))
1585 goto done;
1586
1587 set_bit(vid, adapter->vids);
1588 adapter->vlans_added++;
1589
1590 status = be_vid_config(adapter);
1591 done:
1592 mutex_unlock(&adapter->rx_filter_lock);
1593 return status;
1594 }
1595
1596 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1597 {
1598 struct be_adapter *adapter = netdev_priv(netdev);
1599 int status = 0;
1600
1601 mutex_lock(&adapter->rx_filter_lock);
1602
1603 /* Packets with VID 0 are always received by Lancer by default */
1604 if (lancer_chip(adapter) && vid == 0)
1605 goto done;
1606
1607 if (!test_bit(vid, adapter->vids))
1608 goto done;
1609
1610 clear_bit(vid, adapter->vids);
1611 adapter->vlans_added--;
1612
1613 status = be_vid_config(adapter);
1614 done:
1615 mutex_unlock(&adapter->rx_filter_lock);
1616 return status;
1617 }
1618
1619 static void be_set_all_promisc(struct be_adapter *adapter)
1620 {
1621 be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1622 adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1623 }
1624
1625 static void be_set_mc_promisc(struct be_adapter *adapter)
1626 {
1627 int status;
1628
1629 if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1630 return;
1631
1632 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1633 if (!status)
1634 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1635 }
1636
1637 static void be_set_uc_promisc(struct be_adapter *adapter)
1638 {
1639 int status;
1640
1641 if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1642 return;
1643
1644 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1645 if (!status)
1646 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1647 }
1648
1649 static void be_clear_uc_promisc(struct be_adapter *adapter)
1650 {
1651 int status;
1652
1653 if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1654 return;
1655
1656 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1657 if (!status)
1658 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1659 }
1660
1661 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1662 * We use a single callback function for both sync and unsync. We really don't
1663 * add/remove addresses through this callback. But, we use it to detect changes
1664 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1665 */
1666 static int be_uc_list_update(struct net_device *netdev,
1667 const unsigned char *addr)
1668 {
1669 struct be_adapter *adapter = netdev_priv(netdev);
1670
1671 adapter->update_uc_list = true;
1672 return 0;
1673 }
1674
1675 static int be_mc_list_update(struct net_device *netdev,
1676 const unsigned char *addr)
1677 {
1678 struct be_adapter *adapter = netdev_priv(netdev);
1679
1680 adapter->update_mc_list = true;
1681 return 0;
1682 }
1683
1684 static void be_set_mc_list(struct be_adapter *adapter)
1685 {
1686 struct net_device *netdev = adapter->netdev;
1687 struct netdev_hw_addr *ha;
1688 bool mc_promisc = false;
1689 int status;
1690
1691 netif_addr_lock_bh(netdev);
1692 __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1693
1694 if (netdev->flags & IFF_PROMISC) {
1695 adapter->update_mc_list = false;
1696 } else if (netdev->flags & IFF_ALLMULTI ||
1697 netdev_mc_count(netdev) > be_max_mc(adapter)) {
1698 /* Enable multicast promisc if num configured exceeds
1699 * what we support
1700 */
1701 mc_promisc = true;
1702 adapter->update_mc_list = false;
1703 } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1704 /* Update mc-list unconditionally if the iface was previously
1705 * in mc-promisc mode and now is out of that mode.
1706 */
1707 adapter->update_mc_list = true;
1708 }
1709
1710 if (adapter->update_mc_list) {
1711 int i = 0;
1712
1713 /* cache the mc-list in adapter */
1714 netdev_for_each_mc_addr(ha, netdev) {
1715 ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1716 i++;
1717 }
1718 adapter->mc_count = netdev_mc_count(netdev);
1719 }
1720 netif_addr_unlock_bh(netdev);
1721
1722 if (mc_promisc) {
1723 be_set_mc_promisc(adapter);
1724 } else if (adapter->update_mc_list) {
1725 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1726 if (!status)
1727 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1728 else
1729 be_set_mc_promisc(adapter);
1730
1731 adapter->update_mc_list = false;
1732 }
1733 }
1734
1735 static void be_clear_mc_list(struct be_adapter *adapter)
1736 {
1737 struct net_device *netdev = adapter->netdev;
1738
1739 __dev_mc_unsync(netdev, NULL);
1740 be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1741 adapter->mc_count = 0;
1742 }
1743
1744 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1745 {
1746 if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1747 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1748 return 0;
1749 }
1750
1751 return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1752 adapter->if_handle,
1753 &adapter->pmac_id[uc_idx + 1], 0);
1754 }
1755
1756 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1757 {
1758 if (pmac_id == adapter->pmac_id[0])
1759 return;
1760
1761 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1762 }
1763
1764 static void be_set_uc_list(struct be_adapter *adapter)
1765 {
1766 struct net_device *netdev = adapter->netdev;
1767 struct netdev_hw_addr *ha;
1768 bool uc_promisc = false;
1769 int curr_uc_macs = 0, i;
1770
1771 netif_addr_lock_bh(netdev);
1772 __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1773
1774 if (netdev->flags & IFF_PROMISC) {
1775 adapter->update_uc_list = false;
1776 } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1777 uc_promisc = true;
1778 adapter->update_uc_list = false;
1779 } else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1780 /* Update uc-list unconditionally if the iface was previously
1781 * in uc-promisc mode and now is out of that mode.
1782 */
1783 adapter->update_uc_list = true;
1784 }
1785
1786 if (adapter->update_uc_list) {
1787 /* cache the uc-list in adapter array */
1788 i = 0;
1789 netdev_for_each_uc_addr(ha, netdev) {
1790 ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1791 i++;
1792 }
1793 curr_uc_macs = netdev_uc_count(netdev);
1794 }
1795 netif_addr_unlock_bh(netdev);
1796
1797 if (uc_promisc) {
1798 be_set_uc_promisc(adapter);
1799 } else if (adapter->update_uc_list) {
1800 be_clear_uc_promisc(adapter);
1801
1802 for (i = 0; i < adapter->uc_macs; i++)
1803 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1804
1805 for (i = 0; i < curr_uc_macs; i++)
1806 be_uc_mac_add(adapter, i);
1807 adapter->uc_macs = curr_uc_macs;
1808 adapter->update_uc_list = false;
1809 }
1810 }
1811
1812 static void be_clear_uc_list(struct be_adapter *adapter)
1813 {
1814 struct net_device *netdev = adapter->netdev;
1815 int i;
1816
1817 __dev_uc_unsync(netdev, NULL);
1818 for (i = 0; i < adapter->uc_macs; i++)
1819 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1820
1821 adapter->uc_macs = 0;
1822 }
1823
1824 static void __be_set_rx_mode(struct be_adapter *adapter)
1825 {
1826 struct net_device *netdev = adapter->netdev;
1827
1828 mutex_lock(&adapter->rx_filter_lock);
1829
1830 if (netdev->flags & IFF_PROMISC) {
1831 if (!be_in_all_promisc(adapter))
1832 be_set_all_promisc(adapter);
1833 } else if (be_in_all_promisc(adapter)) {
1834 /* We need to re-program the vlan-list or clear
1835 * vlan-promisc mode (if needed) when the interface
1836 * comes out of promisc mode.
1837 */
1838 be_vid_config(adapter);
1839 }
1840
1841 be_set_uc_list(adapter);
1842 be_set_mc_list(adapter);
1843
1844 mutex_unlock(&adapter->rx_filter_lock);
1845 }
1846
1847 static void be_work_set_rx_mode(struct work_struct *work)
1848 {
1849 struct be_cmd_work *cmd_work =
1850 container_of(work, struct be_cmd_work, work);
1851
1852 __be_set_rx_mode(cmd_work->adapter);
1853 kfree(cmd_work);
1854 }
1855
1856 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1857 {
1858 struct be_adapter *adapter = netdev_priv(netdev);
1859 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1860 int status;
1861
1862 if (!sriov_enabled(adapter))
1863 return -EPERM;
1864
1865 if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1866 return -EINVAL;
1867
1868 /* Proceed further only if user provided MAC is different
1869 * from active MAC
1870 */
1871 if (ether_addr_equal(mac, vf_cfg->mac_addr))
1872 return 0;
1873
1874 if (BEx_chip(adapter)) {
1875 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1876 vf + 1);
1877
1878 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1879 &vf_cfg->pmac_id, vf + 1);
1880 } else {
1881 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1882 vf + 1);
1883 }
1884
1885 if (status) {
1886 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1887 mac, vf, status);
1888 return be_cmd_status(status);
1889 }
1890
1891 ether_addr_copy(vf_cfg->mac_addr, mac);
1892
1893 return 0;
1894 }
1895
1896 static int be_get_vf_config(struct net_device *netdev, int vf,
1897 struct ifla_vf_info *vi)
1898 {
1899 struct be_adapter *adapter = netdev_priv(netdev);
1900 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901
1902 if (!sriov_enabled(adapter))
1903 return -EPERM;
1904
1905 if (vf >= adapter->num_vfs)
1906 return -EINVAL;
1907
1908 vi->vf = vf;
1909 vi->max_tx_rate = vf_cfg->tx_rate;
1910 vi->min_tx_rate = 0;
1911 vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1912 vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1913 memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1914 vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1915 vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1916
1917 return 0;
1918 }
1919
1920 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1921 {
1922 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1923 u16 vids[BE_NUM_VLANS_SUPPORTED];
1924 int vf_if_id = vf_cfg->if_handle;
1925 int status;
1926
1927 /* Enable Transparent VLAN Tagging */
1928 status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1929 if (status)
1930 return status;
1931
1932 /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1933 vids[0] = 0;
1934 status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1935 if (!status)
1936 dev_info(&adapter->pdev->dev,
1937 "Cleared guest VLANs on VF%d", vf);
1938
1939 /* After TVT is enabled, disallow VFs to program VLAN filters */
1940 if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1941 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1942 ~BE_PRIV_FILTMGMT, vf + 1);
1943 if (!status)
1944 vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1945 }
1946 return 0;
1947 }
1948
1949 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1950 {
1951 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1952 struct device *dev = &adapter->pdev->dev;
1953 int status;
1954
1955 /* Reset Transparent VLAN Tagging. */
1956 status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1957 vf_cfg->if_handle, 0, 0);
1958 if (status)
1959 return status;
1960
1961 /* Allow VFs to program VLAN filtering */
1962 if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1963 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1964 BE_PRIV_FILTMGMT, vf + 1);
1965 if (!status) {
1966 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1967 dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1968 }
1969 }
1970
1971 dev_info(dev,
1972 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1973 return 0;
1974 }
1975
1976 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1977 __be16 vlan_proto)
1978 {
1979 struct be_adapter *adapter = netdev_priv(netdev);
1980 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1981 int status;
1982
1983 if (!sriov_enabled(adapter))
1984 return -EPERM;
1985
1986 if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1987 return -EINVAL;
1988
1989 if (vlan_proto != htons(ETH_P_8021Q))
1990 return -EPROTONOSUPPORT;
1991
1992 if (vlan || qos) {
1993 vlan |= qos << VLAN_PRIO_SHIFT;
1994 status = be_set_vf_tvt(adapter, vf, vlan);
1995 } else {
1996 status = be_clear_vf_tvt(adapter, vf);
1997 }
1998
1999 if (status) {
2000 dev_err(&adapter->pdev->dev,
2001 "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2002 status);
2003 return be_cmd_status(status);
2004 }
2005
2006 vf_cfg->vlan_tag = vlan;
2007 return 0;
2008 }
2009
2010 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2011 int min_tx_rate, int max_tx_rate)
2012 {
2013 struct be_adapter *adapter = netdev_priv(netdev);
2014 struct device *dev = &adapter->pdev->dev;
2015 int percent_rate, status = 0;
2016 u16 link_speed = 0;
2017 u8 link_status;
2018
2019 if (!sriov_enabled(adapter))
2020 return -EPERM;
2021
2022 if (vf >= adapter->num_vfs)
2023 return -EINVAL;
2024
2025 if (min_tx_rate)
2026 return -EINVAL;
2027
2028 if (!max_tx_rate)
2029 goto config_qos;
2030
2031 status = be_cmd_link_status_query(adapter, &link_speed,
2032 &link_status, 0);
2033 if (status)
2034 goto err;
2035
2036 if (!link_status) {
2037 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2038 status = -ENETDOWN;
2039 goto err;
2040 }
2041
2042 if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2043 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2044 link_speed);
2045 status = -EINVAL;
2046 goto err;
2047 }
2048
2049 /* On Skyhawk the QOS setting must be done only as a % value */
2050 percent_rate = link_speed / 100;
2051 if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2052 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2053 percent_rate);
2054 status = -EINVAL;
2055 goto err;
2056 }
2057
2058 config_qos:
2059 status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2060 if (status)
2061 goto err;
2062
2063 adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2064 return 0;
2065
2066 err:
2067 dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2068 max_tx_rate, vf);
2069 return be_cmd_status(status);
2070 }
2071
2072 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2073 int link_state)
2074 {
2075 struct be_adapter *adapter = netdev_priv(netdev);
2076 int status;
2077
2078 if (!sriov_enabled(adapter))
2079 return -EPERM;
2080
2081 if (vf >= adapter->num_vfs)
2082 return -EINVAL;
2083
2084 status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2085 if (status) {
2086 dev_err(&adapter->pdev->dev,
2087 "Link state change on VF %d failed: %#x\n", vf, status);
2088 return be_cmd_status(status);
2089 }
2090
2091 adapter->vf_cfg[vf].plink_tracking = link_state;
2092
2093 return 0;
2094 }
2095
2096 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2097 {
2098 struct be_adapter *adapter = netdev_priv(netdev);
2099 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2100 u8 spoofchk;
2101 int status;
2102
2103 if (!sriov_enabled(adapter))
2104 return -EPERM;
2105
2106 if (vf >= adapter->num_vfs)
2107 return -EINVAL;
2108
2109 if (BEx_chip(adapter))
2110 return -EOPNOTSUPP;
2111
2112 if (enable == vf_cfg->spoofchk)
2113 return 0;
2114
2115 spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2116
2117 status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2118 0, spoofchk);
2119 if (status) {
2120 dev_err(&adapter->pdev->dev,
2121 "Spoofchk change on VF %d failed: %#x\n", vf, status);
2122 return be_cmd_status(status);
2123 }
2124
2125 vf_cfg->spoofchk = enable;
2126 return 0;
2127 }
2128
2129 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2130 ulong now)
2131 {
2132 aic->rx_pkts_prev = rx_pkts;
2133 aic->tx_reqs_prev = tx_pkts;
2134 aic->jiffies = now;
2135 }
2136
2137 static int be_get_new_eqd(struct be_eq_obj *eqo)
2138 {
2139 struct be_adapter *adapter = eqo->adapter;
2140 int eqd, start;
2141 struct be_aic_obj *aic;
2142 struct be_rx_obj *rxo;
2143 struct be_tx_obj *txo;
2144 u64 rx_pkts = 0, tx_pkts = 0;
2145 ulong now;
2146 u32 pps, delta;
2147 int i;
2148
2149 aic = &adapter->aic_obj[eqo->idx];
2150 if (!aic->enable) {
2151 if (aic->jiffies)
2152 aic->jiffies = 0;
2153 eqd = aic->et_eqd;
2154 return eqd;
2155 }
2156
2157 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2158 do {
2159 start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2160 rx_pkts += rxo->stats.rx_pkts;
2161 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2162 }
2163
2164 for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2165 do {
2166 start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2167 tx_pkts += txo->stats.tx_reqs;
2168 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2169 }
2170
2171 /* Skip, if wrapped around or first calculation */
2172 now = jiffies;
2173 if (!aic->jiffies || time_before(now, aic->jiffies) ||
2174 rx_pkts < aic->rx_pkts_prev ||
2175 tx_pkts < aic->tx_reqs_prev) {
2176 be_aic_update(aic, rx_pkts, tx_pkts, now);
2177 return aic->prev_eqd;
2178 }
2179
2180 delta = jiffies_to_msecs(now - aic->jiffies);
2181 if (delta == 0)
2182 return aic->prev_eqd;
2183
2184 pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2185 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2186 eqd = (pps / 15000) << 2;
2187
2188 if (eqd < 8)
2189 eqd = 0;
2190 eqd = min_t(u32, eqd, aic->max_eqd);
2191 eqd = max_t(u32, eqd, aic->min_eqd);
2192
2193 be_aic_update(aic, rx_pkts, tx_pkts, now);
2194
2195 return eqd;
2196 }
2197
2198 /* For Skyhawk-R only */
2199 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2200 {
2201 struct be_adapter *adapter = eqo->adapter;
2202 struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2203 ulong now = jiffies;
2204 int eqd;
2205 u32 mult_enc;
2206
2207 if (!aic->enable)
2208 return 0;
2209
2210 if (jiffies_to_msecs(now - aic->jiffies) < 1)
2211 eqd = aic->prev_eqd;
2212 else
2213 eqd = be_get_new_eqd(eqo);
2214
2215 if (eqd > 100)
2216 mult_enc = R2I_DLY_ENC_1;
2217 else if (eqd > 60)
2218 mult_enc = R2I_DLY_ENC_2;
2219 else if (eqd > 20)
2220 mult_enc = R2I_DLY_ENC_3;
2221 else
2222 mult_enc = R2I_DLY_ENC_0;
2223
2224 aic->prev_eqd = eqd;
2225
2226 return mult_enc;
2227 }
2228
2229 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2230 {
2231 struct be_set_eqd set_eqd[MAX_EVT_QS];
2232 struct be_aic_obj *aic;
2233 struct be_eq_obj *eqo;
2234 int i, num = 0, eqd;
2235
2236 for_all_evt_queues(adapter, eqo, i) {
2237 aic = &adapter->aic_obj[eqo->idx];
2238 eqd = be_get_new_eqd(eqo);
2239 if (force_update || eqd != aic->prev_eqd) {
2240 set_eqd[num].delay_multiplier = (eqd * 65)/100;
2241 set_eqd[num].eq_id = eqo->q.id;
2242 aic->prev_eqd = eqd;
2243 num++;
2244 }
2245 }
2246
2247 if (num)
2248 be_cmd_modify_eqd(adapter, set_eqd, num);
2249 }
2250
2251 static void be_rx_stats_update(struct be_rx_obj *rxo,
2252 struct be_rx_compl_info *rxcp)
2253 {
2254 struct be_rx_stats *stats = rx_stats(rxo);
2255
2256 u64_stats_update_begin(&stats->sync);
2257 stats->rx_compl++;
2258 stats->rx_bytes += rxcp->pkt_size;
2259 stats->rx_pkts++;
2260 if (rxcp->tunneled)
2261 stats->rx_vxlan_offload_pkts++;
2262 if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2263 stats->rx_mcast_pkts++;
2264 if (rxcp->err)
2265 stats->rx_compl_err++;
2266 u64_stats_update_end(&stats->sync);
2267 }
2268
2269 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2270 {
2271 /* L4 checksum is not reliable for non TCP/UDP packets.
2272 * Also ignore ipcksm for ipv6 pkts
2273 */
2274 return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2275 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2276 }
2277
2278 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2279 {
2280 struct be_adapter *adapter = rxo->adapter;
2281 struct be_rx_page_info *rx_page_info;
2282 struct be_queue_info *rxq = &rxo->q;
2283 u32 frag_idx = rxq->tail;
2284
2285 rx_page_info = &rxo->page_info_tbl[frag_idx];
2286 BUG_ON(!rx_page_info->page);
2287
2288 if (rx_page_info->last_frag) {
2289 dma_unmap_page(&adapter->pdev->dev,
2290 dma_unmap_addr(rx_page_info, bus),
2291 adapter->big_page_size, DMA_FROM_DEVICE);
2292 rx_page_info->last_frag = false;
2293 } else {
2294 dma_sync_single_for_cpu(&adapter->pdev->dev,
2295 dma_unmap_addr(rx_page_info, bus),
2296 rx_frag_size, DMA_FROM_DEVICE);
2297 }
2298
2299 queue_tail_inc(rxq);
2300 atomic_dec(&rxq->used);
2301 return rx_page_info;
2302 }
2303
2304 /* Throwaway the data in the Rx completion */
2305 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2306 struct be_rx_compl_info *rxcp)
2307 {
2308 struct be_rx_page_info *page_info;
2309 u16 i, num_rcvd = rxcp->num_rcvd;
2310
2311 for (i = 0; i < num_rcvd; i++) {
2312 page_info = get_rx_page_info(rxo);
2313 put_page(page_info->page);
2314 memset(page_info, 0, sizeof(*page_info));
2315 }
2316 }
2317
2318 /*
2319 * skb_fill_rx_data forms a complete skb for an ether frame
2320 * indicated by rxcp.
2321 */
2322 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2323 struct be_rx_compl_info *rxcp)
2324 {
2325 struct be_rx_page_info *page_info;
2326 u16 i, j;
2327 u16 hdr_len, curr_frag_len, remaining;
2328 u8 *start;
2329
2330 page_info = get_rx_page_info(rxo);
2331 start = page_address(page_info->page) + page_info->page_offset;
2332 prefetch(start);
2333
2334 /* Copy data in the first descriptor of this completion */
2335 curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2336
2337 skb->len = curr_frag_len;
2338 if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2339 memcpy(skb->data, start, curr_frag_len);
2340 /* Complete packet has now been moved to data */
2341 put_page(page_info->page);
2342 skb->data_len = 0;
2343 skb->tail += curr_frag_len;
2344 } else {
2345 hdr_len = ETH_HLEN;
2346 memcpy(skb->data, start, hdr_len);
2347 skb_shinfo(skb)->nr_frags = 1;
2348 skb_frag_set_page(skb, 0, page_info->page);
2349 skb_shinfo(skb)->frags[0].page_offset =
2350 page_info->page_offset + hdr_len;
2351 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2352 curr_frag_len - hdr_len);
2353 skb->data_len = curr_frag_len - hdr_len;
2354 skb->truesize += rx_frag_size;
2355 skb->tail += hdr_len;
2356 }
2357 page_info->page = NULL;
2358
2359 if (rxcp->pkt_size <= rx_frag_size) {
2360 BUG_ON(rxcp->num_rcvd != 1);
2361 return;
2362 }
2363
2364 /* More frags present for this completion */
2365 remaining = rxcp->pkt_size - curr_frag_len;
2366 for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2367 page_info = get_rx_page_info(rxo);
2368 curr_frag_len = min(remaining, rx_frag_size);
2369
2370 /* Coalesce all frags from the same physical page in one slot */
2371 if (page_info->page_offset == 0) {
2372 /* Fresh page */
2373 j++;
2374 skb_frag_set_page(skb, j, page_info->page);
2375 skb_shinfo(skb)->frags[j].page_offset =
2376 page_info->page_offset;
2377 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378 skb_shinfo(skb)->nr_frags++;
2379 } else {
2380 put_page(page_info->page);
2381 }
2382
2383 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384 skb->len += curr_frag_len;
2385 skb->data_len += curr_frag_len;
2386 skb->truesize += rx_frag_size;
2387 remaining -= curr_frag_len;
2388 page_info->page = NULL;
2389 }
2390 BUG_ON(j > MAX_SKB_FRAGS);
2391 }
2392
2393 /* Process the RX completion indicated by rxcp when GRO is disabled */
2394 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2395 struct be_rx_compl_info *rxcp)
2396 {
2397 struct be_adapter *adapter = rxo->adapter;
2398 struct net_device *netdev = adapter->netdev;
2399 struct sk_buff *skb;
2400
2401 skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2402 if (unlikely(!skb)) {
2403 rx_stats(rxo)->rx_drops_no_skbs++;
2404 be_rx_compl_discard(rxo, rxcp);
2405 return;
2406 }
2407
2408 skb_fill_rx_data(rxo, skb, rxcp);
2409
2410 if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2411 skb->ip_summed = CHECKSUM_UNNECESSARY;
2412 else
2413 skb_checksum_none_assert(skb);
2414
2415 skb->protocol = eth_type_trans(skb, netdev);
2416 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2417 if (netdev->features & NETIF_F_RXHASH)
2418 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2419
2420 skb->csum_level = rxcp->tunneled;
2421 skb_mark_napi_id(skb, napi);
2422
2423 if (rxcp->vlanf)
2424 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2425
2426 netif_receive_skb(skb);
2427 }
2428
2429 /* Process the RX completion indicated by rxcp when GRO is enabled */
2430 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2431 struct napi_struct *napi,
2432 struct be_rx_compl_info *rxcp)
2433 {
2434 struct be_adapter *adapter = rxo->adapter;
2435 struct be_rx_page_info *page_info;
2436 struct sk_buff *skb = NULL;
2437 u16 remaining, curr_frag_len;
2438 u16 i, j;
2439
2440 skb = napi_get_frags(napi);
2441 if (!skb) {
2442 be_rx_compl_discard(rxo, rxcp);
2443 return;
2444 }
2445
2446 remaining = rxcp->pkt_size;
2447 for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2448 page_info = get_rx_page_info(rxo);
2449
2450 curr_frag_len = min(remaining, rx_frag_size);
2451
2452 /* Coalesce all frags from the same physical page in one slot */
2453 if (i == 0 || page_info->page_offset == 0) {
2454 /* First frag or Fresh page */
2455 j++;
2456 skb_frag_set_page(skb, j, page_info->page);
2457 skb_shinfo(skb)->frags[j].page_offset =
2458 page_info->page_offset;
2459 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2460 } else {
2461 put_page(page_info->page);
2462 }
2463 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2464 skb->truesize += rx_frag_size;
2465 remaining -= curr_frag_len;
2466 memset(page_info, 0, sizeof(*page_info));
2467 }
2468 BUG_ON(j > MAX_SKB_FRAGS);
2469
2470 skb_shinfo(skb)->nr_frags = j + 1;
2471 skb->len = rxcp->pkt_size;
2472 skb->data_len = rxcp->pkt_size;
2473 skb->ip_summed = CHECKSUM_UNNECESSARY;
2474 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2475 if (adapter->netdev->features & NETIF_F_RXHASH)
2476 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2477
2478 skb->csum_level = rxcp->tunneled;
2479
2480 if (rxcp->vlanf)
2481 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2482
2483 napi_gro_frags(napi);
2484 }
2485
2486 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2487 struct be_rx_compl_info *rxcp)
2488 {
2489 rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2490 rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2491 rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2492 rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2493 rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2494 rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2495 rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2496 rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2497 rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2498 rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2499 rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2500 if (rxcp->vlanf) {
2501 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2502 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2503 }
2504 rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2505 rxcp->tunneled =
2506 GET_RX_COMPL_V1_BITS(tunneled, compl);
2507 }
2508
2509 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2510 struct be_rx_compl_info *rxcp)
2511 {
2512 rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2513 rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2514 rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2515 rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2516 rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2517 rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2518 rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2519 rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2520 rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2521 rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2522 rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2523 if (rxcp->vlanf) {
2524 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2525 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2526 }
2527 rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2528 rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2529 }
2530
2531 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2532 {
2533 struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2534 struct be_rx_compl_info *rxcp = &rxo->rxcp;
2535 struct be_adapter *adapter = rxo->adapter;
2536
2537 /* For checking the valid bit it is Ok to use either definition as the
2538 * valid bit is at the same position in both v0 and v1 Rx compl */
2539 if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2540 return NULL;
2541
2542 rmb();
2543 be_dws_le_to_cpu(compl, sizeof(*compl));
2544
2545 if (adapter->be3_native)
2546 be_parse_rx_compl_v1(compl, rxcp);
2547 else
2548 be_parse_rx_compl_v0(compl, rxcp);
2549
2550 if (rxcp->ip_frag)
2551 rxcp->l4_csum = 0;
2552
2553 if (rxcp->vlanf) {
2554 /* In QNQ modes, if qnq bit is not set, then the packet was
2555 * tagged only with the transparent outer vlan-tag and must
2556 * not be treated as a vlan packet by host
2557 */
2558 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2559 rxcp->vlanf = 0;
2560
2561 if (!lancer_chip(adapter))
2562 rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2563
2564 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2565 !test_bit(rxcp->vlan_tag, adapter->vids))
2566 rxcp->vlanf = 0;
2567 }
2568
2569 /* As the compl has been parsed, reset it; we wont touch it again */
2570 compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2571
2572 queue_tail_inc(&rxo->cq);
2573 return rxcp;
2574 }
2575
2576 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2577 {
2578 u32 order = get_order(size);
2579
2580 if (order > 0)
2581 gfp |= __GFP_COMP;
2582 return alloc_pages(gfp, order);
2583 }
2584
2585 /*
2586 * Allocate a page, split it to fragments of size rx_frag_size and post as
2587 * receive buffers to BE
2588 */
2589 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2590 {
2591 struct be_adapter *adapter = rxo->adapter;
2592 struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2593 struct be_queue_info *rxq = &rxo->q;
2594 struct page *pagep = NULL;
2595 struct device *dev = &adapter->pdev->dev;
2596 struct be_eth_rx_d *rxd;
2597 u64 page_dmaaddr = 0, frag_dmaaddr;
2598 u32 posted, page_offset = 0, notify = 0;
2599
2600 page_info = &rxo->page_info_tbl[rxq->head];
2601 for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2602 if (!pagep) {
2603 pagep = be_alloc_pages(adapter->big_page_size, gfp);
2604 if (unlikely(!pagep)) {
2605 rx_stats(rxo)->rx_post_fail++;
2606 break;
2607 }
2608 page_dmaaddr = dma_map_page(dev, pagep, 0,
2609 adapter->big_page_size,
2610 DMA_FROM_DEVICE);
2611 if (dma_mapping_error(dev, page_dmaaddr)) {
2612 put_page(pagep);
2613 pagep = NULL;
2614 adapter->drv_stats.dma_map_errors++;
2615 break;
2616 }
2617 page_offset = 0;
2618 } else {
2619 get_page(pagep);
2620 page_offset += rx_frag_size;
2621 }
2622 page_info->page_offset = page_offset;
2623 page_info->page = pagep;
2624
2625 rxd = queue_head_node(rxq);
2626 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2627 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2628 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2629
2630 /* Any space left in the current big page for another frag? */
2631 if ((page_offset + rx_frag_size + rx_frag_size) >
2632 adapter->big_page_size) {
2633 pagep = NULL;
2634 page_info->last_frag = true;
2635 dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2636 } else {
2637 dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2638 }
2639
2640 prev_page_info = page_info;
2641 queue_head_inc(rxq);
2642 page_info = &rxo->page_info_tbl[rxq->head];
2643 }
2644
2645 /* Mark the last frag of a page when we break out of the above loop
2646 * with no more slots available in the RXQ
2647 */
2648 if (pagep) {
2649 prev_page_info->last_frag = true;
2650 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2651 }
2652
2653 if (posted) {
2654 atomic_add(posted, &rxq->used);
2655 if (rxo->rx_post_starved)
2656 rxo->rx_post_starved = false;
2657 do {
2658 notify = min(MAX_NUM_POST_ERX_DB, posted);
2659 be_rxq_notify(adapter, rxq->id, notify);
2660 posted -= notify;
2661 } while (posted);
2662 } else if (atomic_read(&rxq->used) == 0) {
2663 /* Let be_worker replenish when memory is available */
2664 rxo->rx_post_starved = true;
2665 }
2666 }
2667
2668 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2669 {
2670 switch (status) {
2671 case BE_TX_COMP_HDR_PARSE_ERR:
2672 tx_stats(txo)->tx_hdr_parse_err++;
2673 break;
2674 case BE_TX_COMP_NDMA_ERR:
2675 tx_stats(txo)->tx_dma_err++;
2676 break;
2677 case BE_TX_COMP_ACL_ERR:
2678 tx_stats(txo)->tx_spoof_check_err++;
2679 break;
2680 }
2681 }
2682
2683 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2684 {
2685 switch (status) {
2686 case LANCER_TX_COMP_LSO_ERR:
2687 tx_stats(txo)->tx_tso_err++;
2688 break;
2689 case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2690 case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2691 tx_stats(txo)->tx_spoof_check_err++;
2692 break;
2693 case LANCER_TX_COMP_QINQ_ERR:
2694 tx_stats(txo)->tx_qinq_err++;
2695 break;
2696 case LANCER_TX_COMP_PARITY_ERR:
2697 tx_stats(txo)->tx_internal_parity_err++;
2698 break;
2699 case LANCER_TX_COMP_DMA_ERR:
2700 tx_stats(txo)->tx_dma_err++;
2701 break;
2702 case LANCER_TX_COMP_SGE_ERR:
2703 tx_stats(txo)->tx_sge_err++;
2704 break;
2705 }
2706 }
2707
2708 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2709 struct be_tx_obj *txo)
2710 {
2711 struct be_queue_info *tx_cq = &txo->cq;
2712 struct be_tx_compl_info *txcp = &txo->txcp;
2713 struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2714
2715 if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2716 return NULL;
2717
2718 /* Ensure load ordering of valid bit dword and other dwords below */
2719 rmb();
2720 be_dws_le_to_cpu(compl, sizeof(*compl));
2721
2722 txcp->status = GET_TX_COMPL_BITS(status, compl);
2723 txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2724
2725 if (txcp->status) {
2726 if (lancer_chip(adapter)) {
2727 lancer_update_tx_err(txo, txcp->status);
2728 /* Reset the adapter incase of TSO,
2729 * SGE or Parity error
2730 */
2731 if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2732 txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2733 txcp->status == LANCER_TX_COMP_SGE_ERR)
2734 be_set_error(adapter, BE_ERROR_TX);
2735 } else {
2736 be_update_tx_err(txo, txcp->status);
2737 }
2738 }
2739
2740 if (be_check_error(adapter, BE_ERROR_TX))
2741 return NULL;
2742
2743 compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2744 queue_tail_inc(tx_cq);
2745 return txcp;
2746 }
2747
2748 static u16 be_tx_compl_process(struct be_adapter *adapter,
2749 struct be_tx_obj *txo, u16 last_index)
2750 {
2751 struct sk_buff **sent_skbs = txo->sent_skb_list;
2752 struct be_queue_info *txq = &txo->q;
2753 struct sk_buff *skb = NULL;
2754 bool unmap_skb_hdr = false;
2755 struct be_eth_wrb *wrb;
2756 u16 num_wrbs = 0;
2757 u32 frag_index;
2758
2759 do {
2760 if (sent_skbs[txq->tail]) {
2761 /* Free skb from prev req */
2762 if (skb)
2763 dev_consume_skb_any(skb);
2764 skb = sent_skbs[txq->tail];
2765 sent_skbs[txq->tail] = NULL;
2766 queue_tail_inc(txq); /* skip hdr wrb */
2767 num_wrbs++;
2768 unmap_skb_hdr = true;
2769 }
2770 wrb = queue_tail_node(txq);
2771 frag_index = txq->tail;
2772 unmap_tx_frag(&adapter->pdev->dev, wrb,
2773 (unmap_skb_hdr && skb_headlen(skb)));
2774 unmap_skb_hdr = false;
2775 queue_tail_inc(txq);
2776 num_wrbs++;
2777 } while (frag_index != last_index);
2778 dev_consume_skb_any(skb);
2779
2780 return num_wrbs;
2781 }
2782
2783 /* Return the number of events in the event queue */
2784 static inline int events_get(struct be_eq_obj *eqo)
2785 {
2786 struct be_eq_entry *eqe;
2787 int num = 0;
2788
2789 do {
2790 eqe = queue_tail_node(&eqo->q);
2791 if (eqe->evt == 0)
2792 break;
2793
2794 rmb();
2795 eqe->evt = 0;
2796 num++;
2797 queue_tail_inc(&eqo->q);
2798 } while (true);
2799
2800 return num;
2801 }
2802
2803 /* Leaves the EQ is disarmed state */
2804 static void be_eq_clean(struct be_eq_obj *eqo)
2805 {
2806 int num = events_get(eqo);
2807
2808 be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2809 }
2810
2811 /* Free posted rx buffers that were not used */
2812 static void be_rxq_clean(struct be_rx_obj *rxo)
2813 {
2814 struct be_queue_info *rxq = &rxo->q;
2815 struct be_rx_page_info *page_info;
2816
2817 while (atomic_read(&rxq->used) > 0) {
2818 page_info = get_rx_page_info(rxo);
2819 put_page(page_info->page);
2820 memset(page_info, 0, sizeof(*page_info));
2821 }
2822 BUG_ON(atomic_read(&rxq->used));
2823 rxq->tail = 0;
2824 rxq->head = 0;
2825 }
2826
2827 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2828 {
2829 struct be_queue_info *rx_cq = &rxo->cq;
2830 struct be_rx_compl_info *rxcp;
2831 struct be_adapter *adapter = rxo->adapter;
2832 int flush_wait = 0;
2833
2834 /* Consume pending rx completions.
2835 * Wait for the flush completion (identified by zero num_rcvd)
2836 * to arrive. Notify CQ even when there are no more CQ entries
2837 * for HW to flush partially coalesced CQ entries.
2838 * In Lancer, there is no need to wait for flush compl.
2839 */
2840 for (;;) {
2841 rxcp = be_rx_compl_get(rxo);
2842 if (!rxcp) {
2843 if (lancer_chip(adapter))
2844 break;
2845
2846 if (flush_wait++ > 50 ||
2847 be_check_error(adapter,
2848 BE_ERROR_HW)) {
2849 dev_warn(&adapter->pdev->dev,
2850 "did not receive flush compl\n");
2851 break;
2852 }
2853 be_cq_notify(adapter, rx_cq->id, true, 0);
2854 mdelay(1);
2855 } else {
2856 be_rx_compl_discard(rxo, rxcp);
2857 be_cq_notify(adapter, rx_cq->id, false, 1);
2858 if (rxcp->num_rcvd == 0)
2859 break;
2860 }
2861 }
2862
2863 /* After cleanup, leave the CQ in unarmed state */
2864 be_cq_notify(adapter, rx_cq->id, false, 0);
2865 }
2866
2867 static void be_tx_compl_clean(struct be_adapter *adapter)
2868 {
2869 struct device *dev = &adapter->pdev->dev;
2870 u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2871 struct be_tx_compl_info *txcp;
2872 struct be_queue_info *txq;
2873 u32 end_idx, notified_idx;
2874 struct be_tx_obj *txo;
2875 int i, pending_txqs;
2876
2877 /* Stop polling for compls when HW has been silent for 10ms */
2878 do {
2879 pending_txqs = adapter->num_tx_qs;
2880
2881 for_all_tx_queues(adapter, txo, i) {
2882 cmpl = 0;
2883 num_wrbs = 0;
2884 txq = &txo->q;
2885 while ((txcp = be_tx_compl_get(adapter, txo))) {
2886 num_wrbs +=
2887 be_tx_compl_process(adapter, txo,
2888 txcp->end_index);
2889 cmpl++;
2890 }
2891 if (cmpl) {
2892 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2893 atomic_sub(num_wrbs, &txq->used);
2894 timeo = 0;
2895 }
2896 if (!be_is_tx_compl_pending(txo))
2897 pending_txqs--;
2898 }
2899
2900 if (pending_txqs == 0 || ++timeo > 10 ||
2901 be_check_error(adapter, BE_ERROR_HW))
2902 break;
2903
2904 mdelay(1);
2905 } while (true);
2906
2907 /* Free enqueued TX that was never notified to HW */
2908 for_all_tx_queues(adapter, txo, i) {
2909 txq = &txo->q;
2910
2911 if (atomic_read(&txq->used)) {
2912 dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2913 i, atomic_read(&txq->used));
2914 notified_idx = txq->tail;
2915 end_idx = txq->tail;
2916 index_adv(&end_idx, atomic_read(&txq->used) - 1,
2917 txq->len);
2918 /* Use the tx-compl process logic to handle requests
2919 * that were not sent to the HW.
2920 */
2921 num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2922 atomic_sub(num_wrbs, &txq->used);
2923 BUG_ON(atomic_read(&txq->used));
2924 txo->pend_wrb_cnt = 0;
2925 /* Since hw was never notified of these requests,
2926 * reset TXQ indices
2927 */
2928 txq->head = notified_idx;
2929 txq->tail = notified_idx;
2930 }
2931 }
2932 }
2933
2934 static void be_evt_queues_destroy(struct be_adapter *adapter)
2935 {
2936 struct be_eq_obj *eqo;
2937 int i;
2938
2939 for_all_evt_queues(adapter, eqo, i) {
2940 if (eqo->q.created) {
2941 be_eq_clean(eqo);
2942 be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2943 netif_napi_del(&eqo->napi);
2944 free_cpumask_var(eqo->affinity_mask);
2945 }
2946 be_queue_free(adapter, &eqo->q);
2947 }
2948 }
2949
2950 static int be_evt_queues_create(struct be_adapter *adapter)
2951 {
2952 struct be_queue_info *eq;
2953 struct be_eq_obj *eqo;
2954 struct be_aic_obj *aic;
2955 int i, rc;
2956
2957 /* need enough EQs to service both RX and TX queues */
2958 adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2959 max(adapter->cfg_num_rx_irqs,
2960 adapter->cfg_num_tx_irqs));
2961
2962 for_all_evt_queues(adapter, eqo, i) {
2963 int numa_node = dev_to_node(&adapter->pdev->dev);
2964
2965 aic = &adapter->aic_obj[i];
2966 eqo->adapter = adapter;
2967 eqo->idx = i;
2968 aic->max_eqd = BE_MAX_EQD;
2969 aic->enable = true;
2970
2971 eq = &eqo->q;
2972 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973 sizeof(struct be_eq_entry));
2974 if (rc)
2975 return rc;
2976
2977 rc = be_cmd_eq_create(adapter, eqo);
2978 if (rc)
2979 return rc;
2980
2981 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982 return -ENOMEM;
2983 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984 eqo->affinity_mask);
2985 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986 BE_NAPI_WEIGHT);
2987 }
2988 return 0;
2989 }
2990
2991 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992 {
2993 struct be_queue_info *q;
2994
2995 q = &adapter->mcc_obj.q;
2996 if (q->created)
2997 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998 be_queue_free(adapter, q);
2999
3000 q = &adapter->mcc_obj.cq;
3001 if (q->created)
3002 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003 be_queue_free(adapter, q);
3004 }
3005
3006 /* Must be called only after TX qs are created as MCC shares TX EQ */
3007 static int be_mcc_queues_create(struct be_adapter *adapter)
3008 {
3009 struct be_queue_info *q, *cq;
3010
3011 cq = &adapter->mcc_obj.cq;
3012 if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013 sizeof(struct be_mcc_compl)))
3014 goto err;
3015
3016 /* Use the default EQ for MCC completions */
3017 if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018 goto mcc_cq_free;
3019
3020 q = &adapter->mcc_obj.q;
3021 if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022 goto mcc_cq_destroy;
3023
3024 if (be_cmd_mccq_create(adapter, q, cq))
3025 goto mcc_q_free;
3026
3027 return 0;
3028
3029 mcc_q_free:
3030 be_queue_free(adapter, q);
3031 mcc_cq_destroy:
3032 be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033 mcc_cq_free:
3034 be_queue_free(adapter, cq);
3035 err:
3036 return -1;
3037 }
3038
3039 static void be_tx_queues_destroy(struct be_adapter *adapter)
3040 {
3041 struct be_queue_info *q;
3042 struct be_tx_obj *txo;
3043 u8 i;
3044
3045 for_all_tx_queues(adapter, txo, i) {
3046 q = &txo->q;
3047 if (q->created)
3048 be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049 be_queue_free(adapter, q);
3050
3051 q = &txo->cq;
3052 if (q->created)
3053 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054 be_queue_free(adapter, q);
3055 }
3056 }
3057
3058 static int be_tx_qs_create(struct be_adapter *adapter)
3059 {
3060 struct be_queue_info *cq;
3061 struct be_tx_obj *txo;
3062 struct be_eq_obj *eqo;
3063 int status, i;
3064
3065 adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067 for_all_tx_queues(adapter, txo, i) {
3068 cq = &txo->cq;
3069 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070 sizeof(struct be_eth_tx_compl));
3071 if (status)
3072 return status;
3073
3074 u64_stats_init(&txo->stats.sync);
3075 u64_stats_init(&txo->stats.sync_compl);
3076
3077 /* If num_evt_qs is less than num_tx_qs, then more than
3078 * one txq share an eq
3079 */
3080 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082 if (status)
3083 return status;
3084
3085 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086 sizeof(struct be_eth_wrb));
3087 if (status)
3088 return status;
3089
3090 status = be_cmd_txq_create(adapter, txo);
3091 if (status)
3092 return status;
3093
3094 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095 eqo->idx);
3096 }
3097
3098 dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099 adapter->num_tx_qs);
3100 return 0;
3101 }
3102
3103 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104 {
3105 struct be_queue_info *q;
3106 struct be_rx_obj *rxo;
3107 int i;
3108
3109 for_all_rx_queues(adapter, rxo, i) {
3110 q = &rxo->cq;
3111 if (q->created)
3112 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113 be_queue_free(adapter, q);
3114 }
3115 }
3116
3117 static int be_rx_cqs_create(struct be_adapter *adapter)
3118 {
3119 struct be_queue_info *eq, *cq;
3120 struct be_rx_obj *rxo;
3121 int rc, i;
3122
3123 adapter->num_rss_qs =
3124 min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126 /* We'll use RSS only if atleast 2 RSS rings are supported. */
3127 if (adapter->num_rss_qs < 2)
3128 adapter->num_rss_qs = 0;
3129
3130 adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132 /* When the interface is not capable of RSS rings (and there is no
3133 * need to create a default RXQ) we'll still need one RXQ
3134 */
3135 if (adapter->num_rx_qs == 0)
3136 adapter->num_rx_qs = 1;
3137
3138 adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139 for_all_rx_queues(adapter, rxo, i) {
3140 rxo->adapter = adapter;
3141 cq = &rxo->cq;
3142 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143 sizeof(struct be_eth_rx_compl));
3144 if (rc)
3145 return rc;
3146
3147 u64_stats_init(&rxo->stats.sync);
3148 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150 if (rc)
3151 return rc;
3152 }
3153
3154 dev_info(&adapter->pdev->dev,
3155 "created %d RX queue(s)\n", adapter->num_rx_qs);
3156 return 0;
3157 }
3158
3159 static irqreturn_t be_intx(int irq, void *dev)
3160 {
3161 struct be_eq_obj *eqo = dev;
3162 struct be_adapter *adapter = eqo->adapter;
3163 int num_evts = 0;
3164
3165 /* IRQ is not expected when NAPI is scheduled as the EQ
3166 * will not be armed.
3167 * But, this can happen on Lancer INTx where it takes
3168 * a while to de-assert INTx or in BE2 where occasionaly
3169 * an interrupt may be raised even when EQ is unarmed.
3170 * If NAPI is already scheduled, then counting & notifying
3171 * events will orphan them.
3172 */
3173 if (napi_schedule_prep(&eqo->napi)) {
3174 num_evts = events_get(eqo);
3175 __napi_schedule(&eqo->napi);
3176 if (num_evts)
3177 eqo->spurious_intr = 0;
3178 }
3179 be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181 /* Return IRQ_HANDLED only for the the first spurious intr
3182 * after a valid intr to stop the kernel from branding
3183 * this irq as a bad one!
3184 */
3185 if (num_evts || eqo->spurious_intr++ == 0)
3186 return IRQ_HANDLED;
3187 else
3188 return IRQ_NONE;
3189 }
3190
3191 static irqreturn_t be_msix(int irq, void *dev)
3192 {
3193 struct be_eq_obj *eqo = dev;
3194
3195 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196 napi_schedule(&eqo->napi);
3197 return IRQ_HANDLED;
3198 }
3199
3200 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201 {
3202 return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203 }
3204
3205 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206 int budget)
3207 {
3208 struct be_adapter *adapter = rxo->adapter;
3209 struct be_queue_info *rx_cq = &rxo->cq;
3210 struct be_rx_compl_info *rxcp;
3211 u32 work_done;
3212 u32 frags_consumed = 0;
3213
3214 for (work_done = 0; work_done < budget; work_done++) {
3215 rxcp = be_rx_compl_get(rxo);
3216 if (!rxcp)
3217 break;
3218
3219 /* Is it a flush compl that has no data */
3220 if (unlikely(rxcp->num_rcvd == 0))
3221 goto loop_continue;
3222
3223 /* Discard compl with partial DMA Lancer B0 */
3224 if (unlikely(!rxcp->pkt_size)) {
3225 be_rx_compl_discard(rxo, rxcp);
3226 goto loop_continue;
3227 }
3228
3229 /* On BE drop pkts that arrive due to imperfect filtering in
3230 * promiscuous mode on some skews
3231 */
3232 if (unlikely(rxcp->port != adapter->port_num &&
3233 !lancer_chip(adapter))) {
3234 be_rx_compl_discard(rxo, rxcp);
3235 goto loop_continue;
3236 }
3237
3238 if (do_gro(rxcp))
3239 be_rx_compl_process_gro(rxo, napi, rxcp);
3240 else
3241 be_rx_compl_process(rxo, napi, rxcp);
3242
3243 loop_continue:
3244 frags_consumed += rxcp->num_rcvd;
3245 be_rx_stats_update(rxo, rxcp);
3246 }
3247
3248 if (work_done) {
3249 be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251 /* When an rx-obj gets into post_starved state, just
3252 * let be_worker do the posting.
3253 */
3254 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255 !rxo->rx_post_starved)
3256 be_post_rx_frags(rxo, GFP_ATOMIC,
3257 max_t(u32, MAX_RX_POST,
3258 frags_consumed));
3259 }
3260
3261 return work_done;
3262 }
3263
3264
3265 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266 int idx)
3267 {
3268 int num_wrbs = 0, work_done = 0;
3269 struct be_tx_compl_info *txcp;
3270
3271 while ((txcp = be_tx_compl_get(adapter, txo))) {
3272 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273 work_done++;
3274 }
3275
3276 if (work_done) {
3277 be_cq_notify(adapter, txo->cq.id, true, work_done);
3278 atomic_sub(num_wrbs, &txo->q.used);
3279
3280 /* As Tx wrbs have been freed up, wake up netdev queue
3281 * if it was stopped due to lack of tx wrbs. */
3282 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283 be_can_txq_wake(txo)) {
3284 netif_wake_subqueue(adapter->netdev, idx);
3285 }
3286
3287 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288 tx_stats(txo)->tx_compl += work_done;
3289 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290 }
3291 }
3292
3293 int be_poll(struct napi_struct *napi, int budget)
3294 {
3295 struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296 struct be_adapter *adapter = eqo->adapter;
3297 int max_work = 0, work, i, num_evts;
3298 struct be_rx_obj *rxo;
3299 struct be_tx_obj *txo;
3300 u32 mult_enc = 0;
3301
3302 num_evts = events_get(eqo);
3303
3304 for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305 be_process_tx(adapter, txo, i);
3306
3307 /* This loop will iterate twice for EQ0 in which
3308 * completions of the last RXQ (default one) are also processed
3309 * For other EQs the loop iterates only once
3310 */
3311 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312 work = be_process_rx(rxo, napi, budget);
3313 max_work = max(work, max_work);
3314 }
3315
3316 if (is_mcc_eqo(eqo))
3317 be_process_mcc(adapter);
3318
3319 if (max_work < budget) {
3320 napi_complete_done(napi, max_work);
3321
3322 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323 * delay via a delay multiplier encoding value
3324 */
3325 if (skyhawk_chip(adapter))
3326 mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329 mult_enc);
3330 } else {
3331 /* As we'll continue in polling mode, count and clear events */
3332 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333 }
3334 return max_work;
3335 }
3336
3337 void be_detect_error(struct be_adapter *adapter)
3338 {
3339 u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340 u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341 struct device *dev = &adapter->pdev->dev;
3342 u16 val;
3343 u32 i;
3344
3345 if (be_check_error(adapter, BE_ERROR_HW))
3346 return;
3347
3348 if (lancer_chip(adapter)) {
3349 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351 be_set_error(adapter, BE_ERROR_UE);
3352 sliport_err1 = ioread32(adapter->db +
3353 SLIPORT_ERROR1_OFFSET);
3354 sliport_err2 = ioread32(adapter->db +
3355 SLIPORT_ERROR2_OFFSET);
3356 /* Do not log error messages if its a FW reset */
3357 if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358 sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359 dev_info(dev, "Reset is in progress\n");
3360 } else {
3361 dev_err(dev, "Error detected in the card\n");
3362 dev_err(dev, "ERR: sliport status 0x%x\n",
3363 sliport_status);
3364 dev_err(dev, "ERR: sliport error1 0x%x\n",
3365 sliport_err1);
3366 dev_err(dev, "ERR: sliport error2 0x%x\n",
3367 sliport_err2);
3368 }
3369 }
3370 } else {
3371 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373 ue_lo_mask = ioread32(adapter->pcicfg +
3374 PCICFG_UE_STATUS_LOW_MASK);
3375 ue_hi_mask = ioread32(adapter->pcicfg +
3376 PCICFG_UE_STATUS_HI_MASK);
3377
3378 ue_lo = (ue_lo & ~ue_lo_mask);
3379 ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381 if (ue_lo || ue_hi) {
3382 /* On certain platforms BE3 hardware can indicate
3383 * spurious UEs. In case of a UE in the chip,
3384 * the POST register correctly reports either a
3385 * FAT_LOG_START state (FW is currently dumping
3386 * FAT log data) or a ARMFW_UE state. Check for the
3387 * above states to ascertain if the UE is valid or not.
3388 */
3389 if (BE3_chip(adapter)) {
3390 val = be_POST_stage_get(adapter);
3391 if ((val & POST_STAGE_FAT_LOG_START)
3392 != POST_STAGE_FAT_LOG_START &&
3393 (val & POST_STAGE_ARMFW_UE)
3394 != POST_STAGE_ARMFW_UE &&
3395 (val & POST_STAGE_RECOVERABLE_ERR)
3396 != POST_STAGE_RECOVERABLE_ERR)
3397 return;
3398 }
3399
3400 dev_err(dev, "Error detected in the adapter");
3401 be_set_error(adapter, BE_ERROR_UE);
3402
3403 for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404 if (ue_lo & 1)
3405 dev_err(dev, "UE: %s bit set\n",
3406 ue_status_low_desc[i]);
3407 }
3408 for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409 if (ue_hi & 1)
3410 dev_err(dev, "UE: %s bit set\n",
3411 ue_status_hi_desc[i]);
3412 }
3413 }
3414 }
3415 }
3416
3417 static void be_msix_disable(struct be_adapter *adapter)
3418 {
3419 if (msix_enabled(adapter)) {
3420 pci_disable_msix(adapter->pdev);
3421 adapter->num_msix_vec = 0;
3422 adapter->num_msix_roce_vec = 0;
3423 }
3424 }
3425
3426 static int be_msix_enable(struct be_adapter *adapter)
3427 {
3428 unsigned int i, max_roce_eqs;
3429 struct device *dev = &adapter->pdev->dev;
3430 int num_vec;
3431
3432 /* If RoCE is supported, program the max number of vectors that
3433 * could be used for NIC and RoCE, else, just program the number
3434 * we'll use initially.
3435 */
3436 if (be_roce_supported(adapter)) {
3437 max_roce_eqs =
3438 be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441 } else {
3442 num_vec = max(adapter->cfg_num_rx_irqs,
3443 adapter->cfg_num_tx_irqs);
3444 }
3445
3446 for (i = 0; i < num_vec; i++)
3447 adapter->msix_entries[i].entry = i;
3448
3449 num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450 MIN_MSIX_VECTORS, num_vec);
3451 if (num_vec < 0)
3452 goto fail;
3453
3454 if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455 adapter->num_msix_roce_vec = num_vec / 2;
3456 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457 adapter->num_msix_roce_vec);
3458 }
3459
3460 adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462 dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463 adapter->num_msix_vec);
3464 return 0;
3465
3466 fail:
3467 dev_warn(dev, "MSIx enable failed\n");
3468
3469 /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470 if (be_virtfn(adapter))
3471 return num_vec;
3472 return 0;
3473 }
3474
3475 static inline int be_msix_vec_get(struct be_adapter *adapter,
3476 struct be_eq_obj *eqo)
3477 {
3478 return adapter->msix_entries[eqo->msix_idx].vector;
3479 }
3480
3481 static int be_msix_register(struct be_adapter *adapter)
3482 {
3483 struct net_device *netdev = adapter->netdev;
3484 struct be_eq_obj *eqo;
3485 int status, i, vec;
3486
3487 for_all_evt_queues(adapter, eqo, i) {
3488 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489 vec = be_msix_vec_get(adapter, eqo);
3490 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491 if (status)
3492 goto err_msix;
3493
3494 irq_set_affinity_hint(vec, eqo->affinity_mask);
3495 }
3496
3497 return 0;
3498 err_msix:
3499 for (i--; i >= 0; i--) {
3500 eqo = &adapter->eq_obj[i];
3501 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502 }
3503 dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504 status);
3505 be_msix_disable(adapter);
3506 return status;
3507 }
3508
3509 static int be_irq_register(struct be_adapter *adapter)
3510 {
3511 struct net_device *netdev = adapter->netdev;
3512 int status;
3513
3514 if (msix_enabled(adapter)) {
3515 status = be_msix_register(adapter);
3516 if (status == 0)
3517 goto done;
3518 /* INTx is not supported for VF */
3519 if (be_virtfn(adapter))
3520 return status;
3521 }
3522
3523 /* INTx: only the first EQ is used */
3524 netdev->irq = adapter->pdev->irq;
3525 status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526 &adapter->eq_obj[0]);
3527 if (status) {
3528 dev_err(&adapter->pdev->dev,
3529 "INTx request IRQ failed - err %d\n", status);
3530 return status;
3531 }
3532 done:
3533 adapter->isr_registered = true;
3534 return 0;
3535 }
3536
3537 static void be_irq_unregister(struct be_adapter *adapter)
3538 {
3539 struct net_device *netdev = adapter->netdev;
3540 struct be_eq_obj *eqo;
3541 int i, vec;
3542
3543 if (!adapter->isr_registered)
3544 return;
3545
3546 /* INTx */
3547 if (!msix_enabled(adapter)) {
3548 free_irq(netdev->irq, &adapter->eq_obj[0]);
3549 goto done;
3550 }
3551
3552 /* MSIx */
3553 for_all_evt_queues(adapter, eqo, i) {
3554 vec = be_msix_vec_get(adapter, eqo);
3555 irq_set_affinity_hint(vec, NULL);
3556 free_irq(vec, eqo);
3557 }
3558
3559 done:
3560 adapter->isr_registered = false;
3561 }
3562
3563 static void be_rx_qs_destroy(struct be_adapter *adapter)
3564 {
3565 struct rss_info *rss = &adapter->rss_info;
3566 struct be_queue_info *q;
3567 struct be_rx_obj *rxo;
3568 int i;
3569
3570 for_all_rx_queues(adapter, rxo, i) {
3571 q = &rxo->q;
3572 if (q->created) {
3573 /* If RXQs are destroyed while in an "out of buffer"
3574 * state, there is a possibility of an HW stall on
3575 * Lancer. So, post 64 buffers to each queue to relieve
3576 * the "out of buffer" condition.
3577 * Make sure there's space in the RXQ before posting.
3578 */
3579 if (lancer_chip(adapter)) {
3580 be_rx_cq_clean(rxo);
3581 if (atomic_read(&q->used) == 0)
3582 be_post_rx_frags(rxo, GFP_KERNEL,
3583 MAX_RX_POST);
3584 }
3585
3586 be_cmd_rxq_destroy(adapter, q);
3587 be_rx_cq_clean(rxo);
3588 be_rxq_clean(rxo);
3589 }
3590 be_queue_free(adapter, q);
3591 }
3592
3593 if (rss->rss_flags) {
3594 rss->rss_flags = RSS_ENABLE_NONE;
3595 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596 128, rss->rss_hkey);
3597 }
3598 }
3599
3600 static void be_disable_if_filters(struct be_adapter *adapter)
3601 {
3602 /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */
3603 if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604 check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606 eth_zero_addr(adapter->dev_mac);
3607 }
3608
3609 be_clear_uc_list(adapter);
3610 be_clear_mc_list(adapter);
3611
3612 /* The IFACE flags are enabled in the open path and cleared
3613 * in the close path. When a VF gets detached from the host and
3614 * assigned to a VM the following happens:
3615 * - VF's IFACE flags get cleared in the detach path
3616 * - IFACE create is issued by the VF in the attach path
3617 * Due to a bug in the BE3/Skyhawk-R FW
3618 * (Lancer FW doesn't have the bug), the IFACE capability flags
3619 * specified along with the IFACE create cmd issued by a VF are not
3620 * honoured by FW. As a consequence, if a *new* driver
3621 * (that enables/disables IFACE flags in open/close)
3622 * is loaded in the host and an *old* driver is * used by a VM/VF,
3623 * the IFACE gets created *without* the needed flags.
3624 * To avoid this, disable RX-filter flags only for Lancer.
3625 */
3626 if (lancer_chip(adapter)) {
3627 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629 }
3630 }
3631
3632 static int be_close(struct net_device *netdev)
3633 {
3634 struct be_adapter *adapter = netdev_priv(netdev);
3635 struct be_eq_obj *eqo;
3636 int i;
3637
3638 /* This protection is needed as be_close() may be called even when the
3639 * adapter is in cleared state (after eeh perm failure)
3640 */
3641 if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642 return 0;
3643
3644 /* Before attempting cleanup ensure all the pending cmds in the
3645 * config_wq have finished execution
3646 */
3647 flush_workqueue(be_wq);
3648
3649 be_disable_if_filters(adapter);
3650
3651 if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652 for_all_evt_queues(adapter, eqo, i) {
3653 napi_disable(&eqo->napi);
3654 }
3655 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656 }
3657
3658 be_async_mcc_disable(adapter);
3659
3660 /* Wait for all pending tx completions to arrive so that
3661 * all tx skbs are freed.
3662 */
3663 netif_tx_disable(netdev);
3664 be_tx_compl_clean(adapter);
3665
3666 be_rx_qs_destroy(adapter);
3667
3668 for_all_evt_queues(adapter, eqo, i) {
3669 if (msix_enabled(adapter))
3670 synchronize_irq(be_msix_vec_get(adapter, eqo));
3671 else
3672 synchronize_irq(netdev->irq);
3673 be_eq_clean(eqo);
3674 }
3675
3676 be_irq_unregister(adapter);
3677
3678 return 0;
3679 }
3680
3681 static int be_rx_qs_create(struct be_adapter *adapter)
3682 {
3683 struct rss_info *rss = &adapter->rss_info;
3684 u8 rss_key[RSS_HASH_KEY_LEN];
3685 struct be_rx_obj *rxo;
3686 int rc, i, j;
3687
3688 for_all_rx_queues(adapter, rxo, i) {
3689 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690 sizeof(struct be_eth_rx_d));
3691 if (rc)
3692 return rc;
3693 }
3694
3695 if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696 rxo = default_rxo(adapter);
3697 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698 rx_frag_size, adapter->if_handle,
3699 false, &rxo->rss_id);
3700 if (rc)
3701 return rc;
3702 }
3703
3704 for_all_rss_queues(adapter, rxo, i) {
3705 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706 rx_frag_size, adapter->if_handle,
3707 true, &rxo->rss_id);
3708 if (rc)
3709 return rc;
3710 }
3711
3712 if (be_multi_rxq(adapter)) {
3713 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714 for_all_rss_queues(adapter, rxo, i) {
3715 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716 break;
3717 rss->rsstable[j + i] = rxo->rss_id;
3718 rss->rss_queue[j + i] = i;
3719 }
3720 }
3721 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722 RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724 if (!BEx_chip(adapter))
3725 rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726 RSS_ENABLE_UDP_IPV6;
3727
3728 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730 RSS_INDIR_TABLE_LEN, rss_key);
3731 if (rc) {
3732 rss->rss_flags = RSS_ENABLE_NONE;
3733 return rc;
3734 }
3735
3736 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737 } else {
3738 /* Disable RSS, if only default RX Q is created */
3739 rss->rss_flags = RSS_ENABLE_NONE;
3740 }
3741
3742
3743 /* Post 1 less than RXQ-len to avoid head being equal to tail,
3744 * which is a queue empty condition
3745 */
3746 for_all_rx_queues(adapter, rxo, i)
3747 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749 return 0;
3750 }
3751
3752 static int be_enable_if_filters(struct be_adapter *adapter)
3753 {
3754 int status;
3755
3756 status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757 if (status)
3758 return status;
3759
3760 /* Normally this condition usually true as the ->dev_mac is zeroed.
3761 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762 * subsequent be_dev_mac_add() can fail (after fresh boot)
3763 */
3764 if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765 int old_pmac_id = -1;
3766
3767 /* Remember old programmed MAC if any - can happen on BE3 VF */
3768 if (!is_zero_ether_addr(adapter->dev_mac))
3769 old_pmac_id = adapter->pmac_id[0];
3770
3771 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772 if (status)
3773 return status;
3774
3775 /* Delete the old programmed MAC as we successfully programmed
3776 * a new MAC
3777 */
3778 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779 be_dev_mac_del(adapter, old_pmac_id);
3780
3781 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782 }
3783
3784 if (adapter->vlans_added)
3785 be_vid_config(adapter);
3786
3787 __be_set_rx_mode(adapter);
3788
3789 return 0;
3790 }
3791
3792 static int be_open(struct net_device *netdev)
3793 {
3794 struct be_adapter *adapter = netdev_priv(netdev);
3795 struct be_eq_obj *eqo;
3796 struct be_rx_obj *rxo;
3797 struct be_tx_obj *txo;
3798 u8 link_status;
3799 int status, i;
3800
3801 status = be_rx_qs_create(adapter);
3802 if (status)
3803 goto err;
3804
3805 status = be_enable_if_filters(adapter);
3806 if (status)
3807 goto err;
3808
3809 status = be_irq_register(adapter);
3810 if (status)
3811 goto err;
3812
3813 for_all_rx_queues(adapter, rxo, i)
3814 be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816 for_all_tx_queues(adapter, txo, i)
3817 be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819 be_async_mcc_enable(adapter);
3820
3821 for_all_evt_queues(adapter, eqo, i) {
3822 napi_enable(&eqo->napi);
3823 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824 }
3825 adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827 status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828 if (!status)
3829 be_link_status_update(adapter, link_status);
3830
3831 netif_tx_start_all_queues(netdev);
3832 if (skyhawk_chip(adapter))
3833 udp_tunnel_get_rx_info(netdev);
3834
3835 return 0;
3836 err:
3837 be_close(adapter->netdev);
3838 return -EIO;
3839 }
3840
3841 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842 {
3843 u32 addr;
3844
3845 addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847 mac[5] = (u8)(addr & 0xFF);
3848 mac[4] = (u8)((addr >> 8) & 0xFF);
3849 mac[3] = (u8)((addr >> 16) & 0xFF);
3850 /* Use the OUI from the current MAC address */
3851 memcpy(mac, adapter->netdev->dev_addr, 3);
3852 }
3853
3854 /*
3855 * Generate a seed MAC address from the PF MAC Address using jhash.
3856 * MAC Address for VFs are assigned incrementally starting from the seed.
3857 * These addresses are programmed in the ASIC by the PF and the VF driver
3858 * queries for the MAC address during its probe.
3859 */
3860 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861 {
3862 u32 vf;
3863 int status = 0;
3864 u8 mac[ETH_ALEN];
3865 struct be_vf_cfg *vf_cfg;
3866
3867 be_vf_eth_addr_generate(adapter, mac);
3868
3869 for_all_vfs(adapter, vf_cfg, vf) {
3870 if (BEx_chip(adapter))
3871 status = be_cmd_pmac_add(adapter, mac,
3872 vf_cfg->if_handle,
3873 &vf_cfg->pmac_id, vf + 1);
3874 else
3875 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876 vf + 1);
3877
3878 if (status)
3879 dev_err(&adapter->pdev->dev,
3880 "Mac address assignment failed for VF %d\n",
3881 vf);
3882 else
3883 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885 mac[5] += 1;
3886 }
3887 return status;
3888 }
3889
3890 static int be_vfs_mac_query(struct be_adapter *adapter)
3891 {
3892 int status, vf;
3893 u8 mac[ETH_ALEN];
3894 struct be_vf_cfg *vf_cfg;
3895
3896 for_all_vfs(adapter, vf_cfg, vf) {
3897 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898 mac, vf_cfg->if_handle,
3899 false, vf+1);
3900 if (status)
3901 return status;
3902 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903 }
3904 return 0;
3905 }
3906
3907 static void be_vf_clear(struct be_adapter *adapter)
3908 {
3909 struct be_vf_cfg *vf_cfg;
3910 u32 vf;
3911
3912 if (pci_vfs_assigned(adapter->pdev)) {
3913 dev_warn(&adapter->pdev->dev,
3914 "VFs are assigned to VMs: not disabling VFs\n");
3915 goto done;
3916 }
3917
3918 pci_disable_sriov(adapter->pdev);
3919
3920 for_all_vfs(adapter, vf_cfg, vf) {
3921 if (BEx_chip(adapter))
3922 be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923 vf_cfg->pmac_id, vf + 1);
3924 else
3925 be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926 vf + 1);
3927
3928 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929 }
3930
3931 if (BE3_chip(adapter))
3932 be_cmd_set_hsw_config(adapter, 0, 0,
3933 adapter->if_handle,
3934 PORT_FWD_TYPE_PASSTHRU, 0);
3935 done:
3936 kfree(adapter->vf_cfg);
3937 adapter->num_vfs = 0;
3938 adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939 }
3940
3941 static void be_clear_queues(struct be_adapter *adapter)
3942 {
3943 be_mcc_queues_destroy(adapter);
3944 be_rx_cqs_destroy(adapter);
3945 be_tx_queues_destroy(adapter);
3946 be_evt_queues_destroy(adapter);
3947 }
3948
3949 static void be_cancel_worker(struct be_adapter *adapter)
3950 {
3951 if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952 cancel_delayed_work_sync(&adapter->work);
3953 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954 }
3955 }
3956
3957 static void be_cancel_err_detection(struct be_adapter *adapter)
3958 {
3959 struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961 if (!be_err_recovery_workq)
3962 return;
3963
3964 if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965 cancel_delayed_work_sync(&err_rec->err_detection_work);
3966 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967 }
3968 }
3969
3970 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3971 {
3972 struct net_device *netdev = adapter->netdev;
3973 struct device *dev = &adapter->pdev->dev;
3974 struct be_vxlan_port *vxlan_port;
3975 __be16 port;
3976 int status;
3977
3978 vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3979 struct be_vxlan_port, list);
3980 port = vxlan_port->port;
3981
3982 status = be_cmd_manage_iface(adapter, adapter->if_handle,
3983 OP_CONVERT_NORMAL_TO_TUNNEL);
3984 if (status) {
3985 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3986 return status;
3987 }
3988 adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3989
3990 status = be_cmd_set_vxlan_port(adapter, port);
3991 if (status) {
3992 dev_warn(dev, "Failed to add VxLAN port\n");
3993 return status;
3994 }
3995 adapter->vxlan_port = port;
3996
3997 netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3998 NETIF_F_TSO | NETIF_F_TSO6 |
3999 NETIF_F_GSO_UDP_TUNNEL;
4000
4001 dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4002 be16_to_cpu(port));
4003 return 0;
4004 }
4005
4006 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4007 {
4008 struct net_device *netdev = adapter->netdev;
4009
4010 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4011 be_cmd_manage_iface(adapter, adapter->if_handle,
4012 OP_CONVERT_TUNNEL_TO_NORMAL);
4013
4014 if (adapter->vxlan_port)
4015 be_cmd_set_vxlan_port(adapter, 0);
4016
4017 adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4018 adapter->vxlan_port = 0;
4019
4020 netdev->hw_enc_features = 0;
4021 }
4022
4023 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4024 struct be_resources *vft_res)
4025 {
4026 struct be_resources res = adapter->pool_res;
4027 u32 vf_if_cap_flags = res.vf_if_cap_flags;
4028 struct be_resources res_mod = {0};
4029 u16 num_vf_qs = 1;
4030
4031 /* Distribute the queue resources among the PF and it's VFs */
4032 if (num_vfs) {
4033 /* Divide the rx queues evenly among the VFs and the PF, capped
4034 * at VF-EQ-count. Any remainder queues belong to the PF.
4035 */
4036 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4037 res.max_rss_qs / (num_vfs + 1));
4038
4039 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4040 * RSS Tables per port. Provide RSS on VFs, only if number of
4041 * VFs requested is less than it's PF Pool's RSS Tables limit.
4042 */
4043 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4044 num_vf_qs = 1;
4045 }
4046
4047 /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4048 * which are modifiable using SET_PROFILE_CONFIG cmd.
4049 */
4050 be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4051 RESOURCE_MODIFIABLE, 0);
4052
4053 /* If RSS IFACE capability flags are modifiable for a VF, set the
4054 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4055 * more than 1 RSSQ is available for a VF.
4056 * Otherwise, provision only 1 queue pair for VF.
4057 */
4058 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4059 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4060 if (num_vf_qs > 1) {
4061 vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4062 if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4063 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4064 } else {
4065 vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4066 BE_IF_FLAGS_DEFQ_RSS);
4067 }
4068 } else {
4069 num_vf_qs = 1;
4070 }
4071
4072 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4073 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4074 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4075 }
4076
4077 vft_res->vf_if_cap_flags = vf_if_cap_flags;
4078 vft_res->max_rx_qs = num_vf_qs;
4079 vft_res->max_rss_qs = num_vf_qs;
4080 vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4081 vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4082
4083 /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4084 * among the PF and it's VFs, if the fields are changeable
4085 */
4086 if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4087 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4088
4089 if (res_mod.max_vlans == FIELD_MODIFIABLE)
4090 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4091
4092 if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4093 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4094
4095 if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4096 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4097 }
4098
4099 static void be_if_destroy(struct be_adapter *adapter)
4100 {
4101 be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4102
4103 kfree(adapter->pmac_id);
4104 adapter->pmac_id = NULL;
4105
4106 kfree(adapter->mc_list);
4107 adapter->mc_list = NULL;
4108
4109 kfree(adapter->uc_list);
4110 adapter->uc_list = NULL;
4111 }
4112
4113 static int be_clear(struct be_adapter *adapter)
4114 {
4115 struct pci_dev *pdev = adapter->pdev;
4116 struct be_resources vft_res = {0};
4117
4118 be_cancel_worker(adapter);
4119
4120 flush_workqueue(be_wq);
4121
4122 if (sriov_enabled(adapter))
4123 be_vf_clear(adapter);
4124
4125 /* Re-configure FW to distribute resources evenly across max-supported
4126 * number of VFs, only when VFs are not already enabled.
4127 */
4128 if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4129 !pci_vfs_assigned(pdev)) {
4130 be_calculate_vf_res(adapter,
4131 pci_sriov_get_totalvfs(pdev),
4132 &vft_res);
4133 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4134 pci_sriov_get_totalvfs(pdev),
4135 &vft_res);
4136 }
4137
4138 be_disable_vxlan_offloads(adapter);
4139
4140 be_if_destroy(adapter);
4141
4142 be_clear_queues(adapter);
4143
4144 be_msix_disable(adapter);
4145 adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4146 return 0;
4147 }
4148
4149 static int be_vfs_if_create(struct be_adapter *adapter)
4150 {
4151 struct be_resources res = {0};
4152 u32 cap_flags, en_flags, vf;
4153 struct be_vf_cfg *vf_cfg;
4154 int status;
4155
4156 /* If a FW profile exists, then cap_flags are updated */
4157 cap_flags = BE_VF_IF_EN_FLAGS;
4158
4159 for_all_vfs(adapter, vf_cfg, vf) {
4160 if (!BE3_chip(adapter)) {
4161 status = be_cmd_get_profile_config(adapter, &res, NULL,
4162 ACTIVE_PROFILE_TYPE,
4163 RESOURCE_LIMITS,
4164 vf + 1);
4165 if (!status) {
4166 cap_flags = res.if_cap_flags;
4167 /* Prevent VFs from enabling VLAN promiscuous
4168 * mode
4169 */
4170 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4171 }
4172 }
4173
4174 /* PF should enable IF flags during proxy if_create call */
4175 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4176 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4177 &vf_cfg->if_handle, vf + 1);
4178 if (status)
4179 return status;
4180 }
4181
4182 return 0;
4183 }
4184
4185 static int be_vf_setup_init(struct be_adapter *adapter)
4186 {
4187 struct be_vf_cfg *vf_cfg;
4188 int vf;
4189
4190 adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4191 GFP_KERNEL);
4192 if (!adapter->vf_cfg)
4193 return -ENOMEM;
4194
4195 for_all_vfs(adapter, vf_cfg, vf) {
4196 vf_cfg->if_handle = -1;
4197 vf_cfg->pmac_id = -1;
4198 }
4199 return 0;
4200 }
4201
4202 static int be_vf_setup(struct be_adapter *adapter)
4203 {
4204 struct device *dev = &adapter->pdev->dev;
4205 struct be_vf_cfg *vf_cfg;
4206 int status, old_vfs, vf;
4207 bool spoofchk;
4208
4209 old_vfs = pci_num_vf(adapter->pdev);
4210
4211 status = be_vf_setup_init(adapter);
4212 if (status)
4213 goto err;
4214
4215 if (old_vfs) {
4216 for_all_vfs(adapter, vf_cfg, vf) {
4217 status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4218 if (status)
4219 goto err;
4220 }
4221
4222 status = be_vfs_mac_query(adapter);
4223 if (status)
4224 goto err;
4225 } else {
4226 status = be_vfs_if_create(adapter);
4227 if (status)
4228 goto err;
4229
4230 status = be_vf_eth_addr_config(adapter);
4231 if (status)
4232 goto err;
4233 }
4234
4235 for_all_vfs(adapter, vf_cfg, vf) {
4236 /* Allow VFs to programs MAC/VLAN filters */
4237 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4238 vf + 1);
4239 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4240 status = be_cmd_set_fn_privileges(adapter,
4241 vf_cfg->privileges |
4242 BE_PRIV_FILTMGMT,
4243 vf + 1);
4244 if (!status) {
4245 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4246 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4247 vf);
4248 }
4249 }
4250
4251 /* Allow full available bandwidth */
4252 if (!old_vfs)
4253 be_cmd_config_qos(adapter, 0, 0, vf + 1);
4254
4255 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4256 vf_cfg->if_handle, NULL,
4257 &spoofchk);
4258 if (!status)
4259 vf_cfg->spoofchk = spoofchk;
4260
4261 if (!old_vfs) {
4262 be_cmd_enable_vf(adapter, vf + 1);
4263 be_cmd_set_logical_link_config(adapter,
4264 IFLA_VF_LINK_STATE_AUTO,
4265 vf+1);
4266 }
4267 }
4268
4269 if (!old_vfs) {
4270 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4271 if (status) {
4272 dev_err(dev, "SRIOV enable failed\n");
4273 adapter->num_vfs = 0;
4274 goto err;
4275 }
4276 }
4277
4278 if (BE3_chip(adapter)) {
4279 /* On BE3, enable VEB only when SRIOV is enabled */
4280 status = be_cmd_set_hsw_config(adapter, 0, 0,
4281 adapter->if_handle,
4282 PORT_FWD_TYPE_VEB, 0);
4283 if (status)
4284 goto err;
4285 }
4286
4287 adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4288 return 0;
4289 err:
4290 dev_err(dev, "VF setup failed\n");
4291 be_vf_clear(adapter);
4292 return status;
4293 }
4294
4295 /* Converting function_mode bits on BE3 to SH mc_type enums */
4296
4297 static u8 be_convert_mc_type(u32 function_mode)
4298 {
4299 if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4300 return vNIC1;
4301 else if (function_mode & QNQ_MODE)
4302 return FLEX10;
4303 else if (function_mode & VNIC_MODE)
4304 return vNIC2;
4305 else if (function_mode & UMC_ENABLED)
4306 return UMC;
4307 else
4308 return MC_NONE;
4309 }
4310
4311 /* On BE2/BE3 FW does not suggest the supported limits */
4312 static void BEx_get_resources(struct be_adapter *adapter,
4313 struct be_resources *res)
4314 {
4315 bool use_sriov = adapter->num_vfs ? 1 : 0;
4316
4317 if (be_physfn(adapter))
4318 res->max_uc_mac = BE_UC_PMAC_COUNT;
4319 else
4320 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4321
4322 adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4323
4324 if (be_is_mc(adapter)) {
4325 /* Assuming that there are 4 channels per port,
4326 * when multi-channel is enabled
4327 */
4328 if (be_is_qnq_mode(adapter))
4329 res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4330 else
4331 /* In a non-qnq multichannel mode, the pvid
4332 * takes up one vlan entry
4333 */
4334 res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4335 } else {
4336 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4337 }
4338
4339 res->max_mcast_mac = BE_MAX_MC;
4340
4341 /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4342 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4343 * *only* if it is RSS-capable.
4344 */
4345 if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) ||
4346 be_virtfn(adapter) ||
4347 (be_is_mc(adapter) &&
4348 !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4349 res->max_tx_qs = 1;
4350 } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4351 struct be_resources super_nic_res = {0};
4352
4353 /* On a SuperNIC profile, the driver needs to use the
4354 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4355 */
4356 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4357 ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4358 0);
4359 /* Some old versions of BE3 FW don't report max_tx_qs value */
4360 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4361 } else {
4362 res->max_tx_qs = BE3_MAX_TX_QS;
4363 }
4364
4365 if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4366 !use_sriov && be_physfn(adapter))
4367 res->max_rss_qs = (adapter->be3_native) ?
4368 BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4369 res->max_rx_qs = res->max_rss_qs + 1;
4370
4371 if (be_physfn(adapter))
4372 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4373 BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4374 else
4375 res->max_evt_qs = 1;
4376
4377 res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4378 res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4379 if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4380 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4381 }
4382
4383 static void be_setup_init(struct be_adapter *adapter)
4384 {
4385 adapter->vlan_prio_bmap = 0xff;
4386 adapter->phy.link_speed = -1;
4387 adapter->if_handle = -1;
4388 adapter->be3_native = false;
4389 adapter->if_flags = 0;
4390 adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4391 if (be_physfn(adapter))
4392 adapter->cmd_privileges = MAX_PRIVILEGES;
4393 else
4394 adapter->cmd_privileges = MIN_PRIVILEGES;
4395 }
4396
4397 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4398 * However, this HW limitation is not exposed to the host via any SLI cmd.
4399 * As a result, in the case of SRIOV and in particular multi-partition configs
4400 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4401 * for distribution between the VFs. This self-imposed limit will determine the
4402 * no: of VFs for which RSS can be enabled.
4403 */
4404 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4405 {
4406 struct be_port_resources port_res = {0};
4407 u8 rss_tables_on_port;
4408 u16 max_vfs = be_max_vfs(adapter);
4409
4410 be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4411 RESOURCE_LIMITS, 0);
4412
4413 rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4414
4415 /* Each PF Pool's RSS Tables limit =
4416 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4417 */
4418 adapter->pool_res.max_rss_tables =
4419 max_vfs * rss_tables_on_port / port_res.max_vfs;
4420 }
4421
4422 static int be_get_sriov_config(struct be_adapter *adapter)
4423 {
4424 struct be_resources res = {0};
4425 int max_vfs, old_vfs;
4426
4427 be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4428 RESOURCE_LIMITS, 0);
4429
4430 /* Some old versions of BE3 FW don't report max_vfs value */
4431 if (BE3_chip(adapter) && !res.max_vfs) {
4432 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4433 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4434 }
4435
4436 adapter->pool_res = res;
4437
4438 /* If during previous unload of the driver, the VFs were not disabled,
4439 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4440 * Instead use the TotalVFs value stored in the pci-dev struct.
4441 */
4442 old_vfs = pci_num_vf(adapter->pdev);
4443 if (old_vfs) {
4444 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4445 old_vfs);
4446
4447 adapter->pool_res.max_vfs =
4448 pci_sriov_get_totalvfs(adapter->pdev);
4449 adapter->num_vfs = old_vfs;
4450 }
4451
4452 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4453 be_calculate_pf_pool_rss_tables(adapter);
4454 dev_info(&adapter->pdev->dev,
4455 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4456 be_max_pf_pool_rss_tables(adapter));
4457 }
4458 return 0;
4459 }
4460
4461 static void be_alloc_sriov_res(struct be_adapter *adapter)
4462 {
4463 int old_vfs = pci_num_vf(adapter->pdev);
4464 struct be_resources vft_res = {0};
4465 int status;
4466
4467 be_get_sriov_config(adapter);
4468
4469 if (!old_vfs)
4470 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4471
4472 /* When the HW is in SRIOV capable configuration, the PF-pool
4473 * resources are given to PF during driver load, if there are no
4474 * old VFs. This facility is not available in BE3 FW.
4475 * Also, this is done by FW in Lancer chip.
4476 */
4477 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4478 be_calculate_vf_res(adapter, 0, &vft_res);
4479 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4480 &vft_res);
4481 if (status)
4482 dev_err(&adapter->pdev->dev,
4483 "Failed to optimize SRIOV resources\n");
4484 }
4485 }
4486
4487 static int be_get_resources(struct be_adapter *adapter)
4488 {
4489 struct device *dev = &adapter->pdev->dev;
4490 struct be_resources res = {0};
4491 int status;
4492
4493 /* For Lancer, SH etc read per-function resource limits from FW.
4494 * GET_FUNC_CONFIG returns per function guaranteed limits.
4495 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4496 */
4497 if (BEx_chip(adapter)) {
4498 BEx_get_resources(adapter, &res);
4499 } else {
4500 status = be_cmd_get_func_config(adapter, &res);
4501 if (status)
4502 return status;
4503
4504 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4505 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4506 !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4507 res.max_rss_qs -= 1;
4508 }
4509
4510 /* If RoCE is supported stash away half the EQs for RoCE */
4511 res.max_nic_evt_qs = be_roce_supported(adapter) ?
4512 res.max_evt_qs / 2 : res.max_evt_qs;
4513 adapter->res = res;
4514
4515 /* If FW supports RSS default queue, then skip creating non-RSS
4516 * queue for non-IP traffic.
4517 */
4518 adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4519 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4520
4521 dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4522 be_max_txqs(adapter), be_max_rxqs(adapter),
4523 be_max_rss(adapter), be_max_nic_eqs(adapter),
4524 be_max_vfs(adapter));
4525 dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4526 be_max_uc(adapter), be_max_mc(adapter),
4527 be_max_vlans(adapter));
4528
4529 /* Ensure RX and TX queues are created in pairs at init time */
4530 adapter->cfg_num_rx_irqs =
4531 min_t(u16, netif_get_num_default_rss_queues(),
4532 be_max_qp_irqs(adapter));
4533 adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4534 return 0;
4535 }
4536
4537 static int be_get_config(struct be_adapter *adapter)
4538 {
4539 int status, level;
4540 u16 profile_id;
4541
4542 status = be_cmd_get_cntl_attributes(adapter);
4543 if (status)
4544 return status;
4545
4546 status = be_cmd_query_fw_cfg(adapter);
4547 if (status)
4548 return status;
4549
4550 if (!lancer_chip(adapter) && be_physfn(adapter))
4551 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4552
4553 if (BEx_chip(adapter)) {
4554 level = be_cmd_get_fw_log_level(adapter);
4555 adapter->msg_enable =
4556 level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4557 }
4558
4559 be_cmd_get_acpi_wol_cap(adapter);
4560 pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4561 pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4562
4563 be_cmd_query_port_name(adapter);
4564
4565 if (be_physfn(adapter)) {
4566 status = be_cmd_get_active_profile(adapter, &profile_id);
4567 if (!status)
4568 dev_info(&adapter->pdev->dev,
4569 "Using profile 0x%x\n", profile_id);
4570 }
4571
4572 return 0;
4573 }
4574
4575 static int be_mac_setup(struct be_adapter *adapter)
4576 {
4577 u8 mac[ETH_ALEN];
4578 int status;
4579
4580 if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4581 status = be_cmd_get_perm_mac(adapter, mac);
4582 if (status)
4583 return status;
4584
4585 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4586 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4587
4588 /* Initial MAC for BE3 VFs is already programmed by PF */
4589 if (BEx_chip(adapter) && be_virtfn(adapter))
4590 memcpy(adapter->dev_mac, mac, ETH_ALEN);
4591 }
4592
4593 return 0;
4594 }
4595
4596 static void be_schedule_worker(struct be_adapter *adapter)
4597 {
4598 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4599 adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4600 }
4601
4602 static void be_destroy_err_recovery_workq(void)
4603 {
4604 if (!be_err_recovery_workq)
4605 return;
4606
4607 flush_workqueue(be_err_recovery_workq);
4608 destroy_workqueue(be_err_recovery_workq);
4609 be_err_recovery_workq = NULL;
4610 }
4611
4612 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4613 {
4614 struct be_error_recovery *err_rec = &adapter->error_recovery;
4615
4616 if (!be_err_recovery_workq)
4617 return;
4618
4619 queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4620 msecs_to_jiffies(delay));
4621 adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4622 }
4623
4624 static int be_setup_queues(struct be_adapter *adapter)
4625 {
4626 struct net_device *netdev = adapter->netdev;
4627 int status;
4628
4629 status = be_evt_queues_create(adapter);
4630 if (status)
4631 goto err;
4632
4633 status = be_tx_qs_create(adapter);
4634 if (status)
4635 goto err;
4636
4637 status = be_rx_cqs_create(adapter);
4638 if (status)
4639 goto err;
4640
4641 status = be_mcc_queues_create(adapter);
4642 if (status)
4643 goto err;
4644
4645 status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4646 if (status)
4647 goto err;
4648
4649 status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4650 if (status)
4651 goto err;
4652
4653 return 0;
4654 err:
4655 dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4656 return status;
4657 }
4658
4659 static int be_if_create(struct be_adapter *adapter)
4660 {
4661 u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4662 u32 cap_flags = be_if_cap_flags(adapter);
4663 int status;
4664
4665 /* alloc required memory for other filtering fields */
4666 adapter->pmac_id = kcalloc(be_max_uc(adapter),
4667 sizeof(*adapter->pmac_id), GFP_KERNEL);
4668 if (!adapter->pmac_id)
4669 return -ENOMEM;
4670
4671 adapter->mc_list = kcalloc(be_max_mc(adapter),
4672 sizeof(*adapter->mc_list), GFP_KERNEL);
4673 if (!adapter->mc_list)
4674 return -ENOMEM;
4675
4676 adapter->uc_list = kcalloc(be_max_uc(adapter),
4677 sizeof(*adapter->uc_list), GFP_KERNEL);
4678 if (!adapter->uc_list)
4679 return -ENOMEM;
4680
4681 if (adapter->cfg_num_rx_irqs == 1)
4682 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4683
4684 en_flags &= cap_flags;
4685 /* will enable all the needed filter flags in be_open() */
4686 status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4687 &adapter->if_handle, 0);
4688
4689 if (status)
4690 return status;
4691
4692 return 0;
4693 }
4694
4695 int be_update_queues(struct be_adapter *adapter)
4696 {
4697 struct net_device *netdev = adapter->netdev;
4698 int status;
4699
4700 if (netif_running(netdev)) {
4701 /* device cannot transmit now, avoid dev_watchdog timeouts */
4702 netif_carrier_off(netdev);
4703
4704 be_close(netdev);
4705 }
4706
4707 be_cancel_worker(adapter);
4708
4709 /* If any vectors have been shared with RoCE we cannot re-program
4710 * the MSIx table.
4711 */
4712 if (!adapter->num_msix_roce_vec)
4713 be_msix_disable(adapter);
4714
4715 be_clear_queues(adapter);
4716 status = be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4717 if (status)
4718 return status;
4719
4720 if (!msix_enabled(adapter)) {
4721 status = be_msix_enable(adapter);
4722 if (status)
4723 return status;
4724 }
4725
4726 status = be_if_create(adapter);
4727 if (status)
4728 return status;
4729
4730 status = be_setup_queues(adapter);
4731 if (status)
4732 return status;
4733
4734 be_schedule_worker(adapter);
4735
4736 /* The IF was destroyed and re-created. We need to clear
4737 * all promiscuous flags valid for the destroyed IF.
4738 * Without this promisc mode is not restored during
4739 * be_open() because the driver thinks that it is
4740 * already enabled in HW.
4741 */
4742 adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4743
4744 if (netif_running(netdev))
4745 status = be_open(netdev);
4746
4747 return status;
4748 }
4749
4750 static inline int fw_major_num(const char *fw_ver)
4751 {
4752 int fw_major = 0, i;
4753
4754 i = sscanf(fw_ver, "%d.", &fw_major);
4755 if (i != 1)
4756 return 0;
4757
4758 return fw_major;
4759 }
4760
4761 /* If it is error recovery, FLR the PF
4762 * Else if any VFs are already enabled don't FLR the PF
4763 */
4764 static bool be_reset_required(struct be_adapter *adapter)
4765 {
4766 if (be_error_recovering(adapter))
4767 return true;
4768 else
4769 return pci_num_vf(adapter->pdev) == 0;
4770 }
4771
4772 /* Wait for the FW to be ready and perform the required initialization */
4773 static int be_func_init(struct be_adapter *adapter)
4774 {
4775 int status;
4776
4777 status = be_fw_wait_ready(adapter);
4778 if (status)
4779 return status;
4780
4781 /* FW is now ready; clear errors to allow cmds/doorbell */
4782 be_clear_error(adapter, BE_CLEAR_ALL);
4783
4784 if (be_reset_required(adapter)) {
4785 status = be_cmd_reset_function(adapter);
4786 if (status)
4787 return status;
4788
4789 /* Wait for interrupts to quiesce after an FLR */
4790 msleep(100);
4791 }
4792
4793 /* Tell FW we're ready to fire cmds */
4794 status = be_cmd_fw_init(adapter);
4795 if (status)
4796 return status;
4797
4798 /* Allow interrupts for other ULPs running on NIC function */
4799 be_intr_set(adapter, true);
4800
4801 return 0;
4802 }
4803
4804 static int be_setup(struct be_adapter *adapter)
4805 {
4806 struct device *dev = &adapter->pdev->dev;
4807 int status;
4808
4809 status = be_func_init(adapter);
4810 if (status)
4811 return status;
4812
4813 be_setup_init(adapter);
4814
4815 if (!lancer_chip(adapter))
4816 be_cmd_req_native_mode(adapter);
4817
4818 /* invoke this cmd first to get pf_num and vf_num which are needed
4819 * for issuing profile related cmds
4820 */
4821 if (!BEx_chip(adapter)) {
4822 status = be_cmd_get_func_config(adapter, NULL);
4823 if (status)
4824 return status;
4825 }
4826
4827 status = be_get_config(adapter);
4828 if (status)
4829 goto err;
4830
4831 if (!BE2_chip(adapter) && be_physfn(adapter))
4832 be_alloc_sriov_res(adapter);
4833
4834 status = be_get_resources(adapter);
4835 if (status)
4836 goto err;
4837
4838 status = be_msix_enable(adapter);
4839 if (status)
4840 goto err;
4841
4842 /* will enable all the needed filter flags in be_open() */
4843 status = be_if_create(adapter);
4844 if (status)
4845 goto err;
4846
4847 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4848 rtnl_lock();
4849 status = be_setup_queues(adapter);
4850 rtnl_unlock();
4851 if (status)
4852 goto err;
4853
4854 be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4855
4856 status = be_mac_setup(adapter);
4857 if (status)
4858 goto err;
4859
4860 be_cmd_get_fw_ver(adapter);
4861 dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4862
4863 if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4864 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4865 adapter->fw_ver);
4866 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4867 }
4868
4869 status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4870 adapter->rx_fc);
4871 if (status)
4872 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4873 &adapter->rx_fc);
4874
4875 dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4876 adapter->tx_fc, adapter->rx_fc);
4877
4878 if (be_physfn(adapter))
4879 be_cmd_set_logical_link_config(adapter,
4880 IFLA_VF_LINK_STATE_AUTO, 0);
4881
4882 /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4883 * confusing a linux bridge or OVS that it might be connected to.
4884 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4885 * when SRIOV is not enabled.
4886 */
4887 if (BE3_chip(adapter))
4888 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4889 PORT_FWD_TYPE_PASSTHRU, 0);
4890
4891 if (adapter->num_vfs)
4892 be_vf_setup(adapter);
4893
4894 status = be_cmd_get_phy_info(adapter);
4895 if (!status && be_pause_supported(adapter))
4896 adapter->phy.fc_autoneg = 1;
4897
4898 if (be_physfn(adapter) && !lancer_chip(adapter))
4899 be_cmd_set_features(adapter);
4900
4901 be_schedule_worker(adapter);
4902 adapter->flags |= BE_FLAGS_SETUP_DONE;
4903 return 0;
4904 err:
4905 be_clear(adapter);
4906 return status;
4907 }
4908
4909 #ifdef CONFIG_NET_POLL_CONTROLLER
4910 static void be_netpoll(struct net_device *netdev)
4911 {
4912 struct be_adapter *adapter = netdev_priv(netdev);
4913 struct be_eq_obj *eqo;
4914 int i;
4915
4916 for_all_evt_queues(adapter, eqo, i) {
4917 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4918 napi_schedule(&eqo->napi);
4919 }
4920 }
4921 #endif
4922
4923 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4924 {
4925 const struct firmware *fw;
4926 int status;
4927
4928 if (!netif_running(adapter->netdev)) {
4929 dev_err(&adapter->pdev->dev,
4930 "Firmware load not allowed (interface is down)\n");
4931 return -ENETDOWN;
4932 }
4933
4934 status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4935 if (status)
4936 goto fw_exit;
4937
4938 dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4939
4940 if (lancer_chip(adapter))
4941 status = lancer_fw_download(adapter, fw);
4942 else
4943 status = be_fw_download(adapter, fw);
4944
4945 if (!status)
4946 be_cmd_get_fw_ver(adapter);
4947
4948 fw_exit:
4949 release_firmware(fw);
4950 return status;
4951 }
4952
4953 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4954 u16 flags, struct netlink_ext_ack *extack)
4955 {
4956 struct be_adapter *adapter = netdev_priv(dev);
4957 struct nlattr *attr, *br_spec;
4958 int rem;
4959 int status = 0;
4960 u16 mode = 0;
4961
4962 if (!sriov_enabled(adapter))
4963 return -EOPNOTSUPP;
4964
4965 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4966 if (!br_spec)
4967 return -EINVAL;
4968
4969 nla_for_each_nested(attr, br_spec, rem) {
4970 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4971 continue;
4972
4973 if (nla_len(attr) < sizeof(mode))
4974 return -EINVAL;
4975
4976 mode = nla_get_u16(attr);
4977 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4978 return -EOPNOTSUPP;
4979
4980 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4981 return -EINVAL;
4982
4983 status = be_cmd_set_hsw_config(adapter, 0, 0,
4984 adapter->if_handle,
4985 mode == BRIDGE_MODE_VEPA ?
4986 PORT_FWD_TYPE_VEPA :
4987 PORT_FWD_TYPE_VEB, 0);
4988 if (status)
4989 goto err;
4990
4991 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4992 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4993
4994 return status;
4995 }
4996 err:
4997 dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4998 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4999
5000 return status;
5001 }
5002
5003 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5004 struct net_device *dev, u32 filter_mask,
5005 int nlflags)
5006 {
5007 struct be_adapter *adapter = netdev_priv(dev);
5008 int status = 0;
5009 u8 hsw_mode;
5010
5011 /* BE and Lancer chips support VEB mode only */
5012 if (BEx_chip(adapter) || lancer_chip(adapter)) {
5013 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5014 if (!pci_sriov_get_totalvfs(adapter->pdev))
5015 return 0;
5016 hsw_mode = PORT_FWD_TYPE_VEB;
5017 } else {
5018 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5019 adapter->if_handle, &hsw_mode,
5020 NULL);
5021 if (status)
5022 return 0;
5023
5024 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5025 return 0;
5026 }
5027
5028 return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5029 hsw_mode == PORT_FWD_TYPE_VEPA ?
5030 BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5031 0, 0, nlflags, filter_mask, NULL);
5032 }
5033
5034 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5035 void (*func)(struct work_struct *))
5036 {
5037 struct be_cmd_work *work;
5038
5039 work = kzalloc(sizeof(*work), GFP_ATOMIC);
5040 if (!work) {
5041 dev_err(&adapter->pdev->dev,
5042 "be_work memory allocation failed\n");
5043 return NULL;
5044 }
5045
5046 INIT_WORK(&work->work, func);
5047 work->adapter = adapter;
5048 return work;
5049 }
5050
5051 /* VxLAN offload Notes:
5052 *
5053 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5054 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5055 * is expected to work across all types of IP tunnels once exported. Skyhawk
5056 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5057 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5058 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5059 * those other tunnels are unexported on the fly through ndo_features_check().
5060 *
5061 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5062 * adds more than one port, disable offloads and re-enable them again when
5063 * there's only one port left. We maintain a list of ports for this purpose.
5064 */
5065 static void be_work_add_vxlan_port(struct work_struct *work)
5066 {
5067 struct be_cmd_work *cmd_work =
5068 container_of(work, struct be_cmd_work, work);
5069 struct be_adapter *adapter = cmd_work->adapter;
5070 struct device *dev = &adapter->pdev->dev;
5071 __be16 port = cmd_work->info.vxlan_port;
5072 struct be_vxlan_port *vxlan_port;
5073 int status;
5074
5075 /* Bump up the alias count if it is an existing port */
5076 list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5077 if (vxlan_port->port == port) {
5078 vxlan_port->port_aliases++;
5079 goto done;
5080 }
5081 }
5082
5083 /* Add a new port to our list. We don't need a lock here since port
5084 * add/delete are done only in the context of a single-threaded work
5085 * queue (be_wq).
5086 */
5087 vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5088 if (!vxlan_port)
5089 goto done;
5090
5091 vxlan_port->port = port;
5092 INIT_LIST_HEAD(&vxlan_port->list);
5093 list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5094 adapter->vxlan_port_count++;
5095
5096 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5097 dev_info(dev,
5098 "Only one UDP port supported for VxLAN offloads\n");
5099 dev_info(dev, "Disabling VxLAN offloads\n");
5100 goto err;
5101 }
5102
5103 if (adapter->vxlan_port_count > 1)
5104 goto done;
5105
5106 status = be_enable_vxlan_offloads(adapter);
5107 if (!status)
5108 goto done;
5109
5110 err:
5111 be_disable_vxlan_offloads(adapter);
5112 done:
5113 kfree(cmd_work);
5114 return;
5115 }
5116
5117 static void be_work_del_vxlan_port(struct work_struct *work)
5118 {
5119 struct be_cmd_work *cmd_work =
5120 container_of(work, struct be_cmd_work, work);
5121 struct be_adapter *adapter = cmd_work->adapter;
5122 __be16 port = cmd_work->info.vxlan_port;
5123 struct be_vxlan_port *vxlan_port;
5124
5125 /* Nothing to be done if a port alias is being deleted */
5126 list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5127 if (vxlan_port->port == port) {
5128 if (vxlan_port->port_aliases) {
5129 vxlan_port->port_aliases--;
5130 goto done;
5131 }
5132 break;
5133 }
5134 }
5135
5136 /* No port aliases left; delete the port from the list */
5137 list_del(&vxlan_port->list);
5138 adapter->vxlan_port_count--;
5139
5140 /* Disable VxLAN offload if this is the offloaded port */
5141 if (adapter->vxlan_port == vxlan_port->port) {
5142 WARN_ON(adapter->vxlan_port_count);
5143 be_disable_vxlan_offloads(adapter);
5144 dev_info(&adapter->pdev->dev,
5145 "Disabled VxLAN offloads for UDP port %d\n",
5146 be16_to_cpu(port));
5147 goto out;
5148 }
5149
5150 /* If only 1 port is left, re-enable VxLAN offload */
5151 if (adapter->vxlan_port_count == 1)
5152 be_enable_vxlan_offloads(adapter);
5153
5154 out:
5155 kfree(vxlan_port);
5156 done:
5157 kfree(cmd_work);
5158 }
5159
5160 static void be_cfg_vxlan_port(struct net_device *netdev,
5161 struct udp_tunnel_info *ti,
5162 void (*func)(struct work_struct *))
5163 {
5164 struct be_adapter *adapter = netdev_priv(netdev);
5165 struct be_cmd_work *cmd_work;
5166
5167 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5168 return;
5169
5170 if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5171 return;
5172
5173 cmd_work = be_alloc_work(adapter, func);
5174 if (cmd_work) {
5175 cmd_work->info.vxlan_port = ti->port;
5176 queue_work(be_wq, &cmd_work->work);
5177 }
5178 }
5179
5180 static void be_del_vxlan_port(struct net_device *netdev,
5181 struct udp_tunnel_info *ti)
5182 {
5183 be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5184 }
5185
5186 static void be_add_vxlan_port(struct net_device *netdev,
5187 struct udp_tunnel_info *ti)
5188 {
5189 be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5190 }
5191
5192 static netdev_features_t be_features_check(struct sk_buff *skb,
5193 struct net_device *dev,
5194 netdev_features_t features)
5195 {
5196 struct be_adapter *adapter = netdev_priv(dev);
5197 u8 l4_hdr = 0;
5198
5199 if (skb_is_gso(skb)) {
5200 /* IPv6 TSO requests with extension hdrs are a problem
5201 * to Lancer and BE3 HW. Disable TSO6 feature.
5202 */
5203 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5204 features &= ~NETIF_F_TSO6;
5205
5206 /* Lancer cannot handle the packet with MSS less than 256.
5207 * Also it can't handle a TSO packet with a single segment
5208 * Disable the GSO support in such cases
5209 */
5210 if (lancer_chip(adapter) &&
5211 (skb_shinfo(skb)->gso_size < 256 ||
5212 skb_shinfo(skb)->gso_segs == 1))
5213 features &= ~NETIF_F_GSO_MASK;
5214 }
5215
5216 /* The code below restricts offload features for some tunneled and
5217 * Q-in-Q packets.
5218 * Offload features for normal (non tunnel) packets are unchanged.
5219 */
5220 features = vlan_features_check(skb, features);
5221 if (!skb->encapsulation ||
5222 !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5223 return features;
5224
5225 /* It's an encapsulated packet and VxLAN offloads are enabled. We
5226 * should disable tunnel offload features if it's not a VxLAN packet,
5227 * as tunnel offloads have been enabled only for VxLAN. This is done to
5228 * allow other tunneled traffic like GRE work fine while VxLAN
5229 * offloads are configured in Skyhawk-R.
5230 */
5231 switch (vlan_get_protocol(skb)) {
5232 case htons(ETH_P_IP):
5233 l4_hdr = ip_hdr(skb)->protocol;
5234 break;
5235 case htons(ETH_P_IPV6):
5236 l4_hdr = ipv6_hdr(skb)->nexthdr;
5237 break;
5238 default:
5239 return features;
5240 }
5241
5242 if (l4_hdr != IPPROTO_UDP ||
5243 skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5244 skb->inner_protocol != htons(ETH_P_TEB) ||
5245 skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5246 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5247 !adapter->vxlan_port ||
5248 udp_hdr(skb)->dest != adapter->vxlan_port)
5249 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5250
5251 return features;
5252 }
5253
5254 static int be_get_phys_port_id(struct net_device *dev,
5255 struct netdev_phys_item_id *ppid)
5256 {
5257 int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5258 struct be_adapter *adapter = netdev_priv(dev);
5259 u8 *id;
5260
5261 if (MAX_PHYS_ITEM_ID_LEN < id_len)
5262 return -ENOSPC;
5263
5264 ppid->id[0] = adapter->hba_port_num + 1;
5265 id = &ppid->id[1];
5266 for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5267 i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5268 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5269
5270 ppid->id_len = id_len;
5271
5272 return 0;
5273 }
5274
5275 static void be_set_rx_mode(struct net_device *dev)
5276 {
5277 struct be_adapter *adapter = netdev_priv(dev);
5278 struct be_cmd_work *work;
5279
5280 work = be_alloc_work(adapter, be_work_set_rx_mode);
5281 if (work)
5282 queue_work(be_wq, &work->work);
5283 }
5284
5285 static const struct net_device_ops be_netdev_ops = {
5286 .ndo_open = be_open,
5287 .ndo_stop = be_close,
5288 .ndo_start_xmit = be_xmit,
5289 .ndo_set_rx_mode = be_set_rx_mode,
5290 .ndo_set_mac_address = be_mac_addr_set,
5291 .ndo_get_stats64 = be_get_stats64,
5292 .ndo_validate_addr = eth_validate_addr,
5293 .ndo_vlan_rx_add_vid = be_vlan_add_vid,
5294 .ndo_vlan_rx_kill_vid = be_vlan_rem_vid,
5295 .ndo_set_vf_mac = be_set_vf_mac,
5296 .ndo_set_vf_vlan = be_set_vf_vlan,
5297 .ndo_set_vf_rate = be_set_vf_tx_rate,
5298 .ndo_get_vf_config = be_get_vf_config,
5299 .ndo_set_vf_link_state = be_set_vf_link_state,
5300 .ndo_set_vf_spoofchk = be_set_vf_spoofchk,
5301 .ndo_tx_timeout = be_tx_timeout,
5302 #ifdef CONFIG_NET_POLL_CONTROLLER
5303 .ndo_poll_controller = be_netpoll,
5304 #endif
5305 .ndo_bridge_setlink = be_ndo_bridge_setlink,
5306 .ndo_bridge_getlink = be_ndo_bridge_getlink,
5307 .ndo_udp_tunnel_add = be_add_vxlan_port,
5308 .ndo_udp_tunnel_del = be_del_vxlan_port,
5309 .ndo_features_check = be_features_check,
5310 .ndo_get_phys_port_id = be_get_phys_port_id,
5311 };
5312
5313 static void be_netdev_init(struct net_device *netdev)
5314 {
5315 struct be_adapter *adapter = netdev_priv(netdev);
5316
5317 netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5318 NETIF_F_GSO_UDP_TUNNEL |
5319 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5320 NETIF_F_HW_VLAN_CTAG_TX;
5321 if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5322 netdev->hw_features |= NETIF_F_RXHASH;
5323
5324 netdev->features |= netdev->hw_features |
5325 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5326
5327 netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5328 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5329
5330 netdev->priv_flags |= IFF_UNICAST_FLT;
5331
5332 netdev->flags |= IFF_MULTICAST;
5333
5334 netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5335
5336 netdev->netdev_ops = &be_netdev_ops;
5337
5338 netdev->ethtool_ops = &be_ethtool_ops;
5339
5340 /* MTU range: 256 - 9000 */
5341 netdev->min_mtu = BE_MIN_MTU;
5342 netdev->max_mtu = BE_MAX_MTU;
5343 }
5344
5345 static void be_cleanup(struct be_adapter *adapter)
5346 {
5347 struct net_device *netdev = adapter->netdev;
5348
5349 rtnl_lock();
5350 netif_device_detach(netdev);
5351 if (netif_running(netdev))
5352 be_close(netdev);
5353 rtnl_unlock();
5354
5355 be_clear(adapter);
5356 }
5357
5358 static int be_resume(struct be_adapter *adapter)
5359 {
5360 struct net_device *netdev = adapter->netdev;
5361 int status;
5362
5363 status = be_setup(adapter);
5364 if (status)
5365 return status;
5366
5367 rtnl_lock();
5368 if (netif_running(netdev))
5369 status = be_open(netdev);
5370 rtnl_unlock();
5371
5372 if (status)
5373 return status;
5374
5375 netif_device_attach(netdev);
5376
5377 return 0;
5378 }
5379
5380 static void be_soft_reset(struct be_adapter *adapter)
5381 {
5382 u32 val;
5383
5384 dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5385 val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5386 val |= SLIPORT_SOFTRESET_SR_MASK;
5387 iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5388 }
5389
5390 static bool be_err_is_recoverable(struct be_adapter *adapter)
5391 {
5392 struct be_error_recovery *err_rec = &adapter->error_recovery;
5393 unsigned long initial_idle_time =
5394 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5395 unsigned long recovery_interval =
5396 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5397 u16 ue_err_code;
5398 u32 val;
5399
5400 val = be_POST_stage_get(adapter);
5401 if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5402 return false;
5403 ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5404 if (ue_err_code == 0)
5405 return false;
5406
5407 dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5408 ue_err_code);
5409
5410 if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5411 dev_err(&adapter->pdev->dev,
5412 "Cannot recover within %lu sec from driver load\n",
5413 jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5414 return false;
5415 }
5416
5417 if (err_rec->last_recovery_time && time_before_eq(
5418 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5419 dev_err(&adapter->pdev->dev,
5420 "Cannot recover within %lu sec from last recovery\n",
5421 jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5422 return false;
5423 }
5424
5425 if (ue_err_code == err_rec->last_err_code) {
5426 dev_err(&adapter->pdev->dev,
5427 "Cannot recover from a consecutive TPE error\n");
5428 return false;
5429 }
5430
5431 err_rec->last_recovery_time = jiffies;
5432 err_rec->last_err_code = ue_err_code;
5433 return true;
5434 }
5435
5436 static int be_tpe_recover(struct be_adapter *adapter)
5437 {
5438 struct be_error_recovery *err_rec = &adapter->error_recovery;
5439 int status = -EAGAIN;
5440 u32 val;
5441
5442 switch (err_rec->recovery_state) {
5443 case ERR_RECOVERY_ST_NONE:
5444 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5445 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5446 break;
5447
5448 case ERR_RECOVERY_ST_DETECT:
5449 val = be_POST_stage_get(adapter);
5450 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5451 POST_STAGE_RECOVERABLE_ERR) {
5452 dev_err(&adapter->pdev->dev,
5453 "Unrecoverable HW error detected: 0x%x\n", val);
5454 status = -EINVAL;
5455 err_rec->resched_delay = 0;
5456 break;
5457 }
5458
5459 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5460
5461 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5462 * milliseconds before it checks for final error status in
5463 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5464 * If it does, then PF0 initiates a Soft Reset.
5465 */
5466 if (adapter->pf_num == 0) {
5467 err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5468 err_rec->resched_delay = err_rec->ue_to_reset_time -
5469 ERR_RECOVERY_UE_DETECT_DURATION;
5470 break;
5471 }
5472
5473 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5474 err_rec->resched_delay = err_rec->ue_to_poll_time -
5475 ERR_RECOVERY_UE_DETECT_DURATION;
5476 break;
5477
5478 case ERR_RECOVERY_ST_RESET:
5479 if (!be_err_is_recoverable(adapter)) {
5480 dev_err(&adapter->pdev->dev,
5481 "Failed to meet recovery criteria\n");
5482 status = -EIO;
5483 err_rec->resched_delay = 0;
5484 break;
5485 }
5486 be_soft_reset(adapter);
5487 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5488 err_rec->resched_delay = err_rec->ue_to_poll_time -
5489 err_rec->ue_to_reset_time;
5490 break;
5491
5492 case ERR_RECOVERY_ST_PRE_POLL:
5493 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5494 err_rec->resched_delay = 0;
5495 status = 0; /* done */
5496 break;
5497
5498 default:
5499 status = -EINVAL;
5500 err_rec->resched_delay = 0;
5501 break;
5502 }
5503
5504 return status;
5505 }
5506
5507 static int be_err_recover(struct be_adapter *adapter)
5508 {
5509 int status;
5510
5511 if (!lancer_chip(adapter)) {
5512 if (!adapter->error_recovery.recovery_supported ||
5513 adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5514 return -EIO;
5515 status = be_tpe_recover(adapter);
5516 if (status)
5517 goto err;
5518 }
5519
5520 /* Wait for adapter to reach quiescent state before
5521 * destroying queues
5522 */
5523 status = be_fw_wait_ready(adapter);
5524 if (status)
5525 goto err;
5526
5527 adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5528
5529 be_cleanup(adapter);
5530
5531 status = be_resume(adapter);
5532 if (status)
5533 goto err;
5534
5535 adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5536
5537 err:
5538 return status;
5539 }
5540
5541 static void be_err_detection_task(struct work_struct *work)
5542 {
5543 struct be_error_recovery *err_rec =
5544 container_of(work, struct be_error_recovery,
5545 err_detection_work.work);
5546 struct be_adapter *adapter =
5547 container_of(err_rec, struct be_adapter,
5548 error_recovery);
5549 u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5550 struct device *dev = &adapter->pdev->dev;
5551 int recovery_status;
5552
5553 be_detect_error(adapter);
5554 if (!be_check_error(adapter, BE_ERROR_HW))
5555 goto reschedule_task;
5556
5557 recovery_status = be_err_recover(adapter);
5558 if (!recovery_status) {
5559 err_rec->recovery_retries = 0;
5560 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5561 dev_info(dev, "Adapter recovery successful\n");
5562 goto reschedule_task;
5563 } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5564 /* BEx/SH recovery state machine */
5565 if (adapter->pf_num == 0 &&
5566 err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5567 dev_err(&adapter->pdev->dev,
5568 "Adapter recovery in progress\n");
5569 resched_delay = err_rec->resched_delay;
5570 goto reschedule_task;
5571 } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5572 /* For VFs, check if PF have allocated resources
5573 * every second.
5574 */
5575 dev_err(dev, "Re-trying adapter recovery\n");
5576 goto reschedule_task;
5577 } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5578 ERR_RECOVERY_MAX_RETRY_COUNT) {
5579 /* In case of another error during recovery, it takes 30 sec
5580 * for adapter to come out of error. Retry error recovery after
5581 * this time interval.
5582 */
5583 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5584 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5585 goto reschedule_task;
5586 } else {
5587 dev_err(dev, "Adapter recovery failed\n");
5588 dev_err(dev, "Please reboot server to recover\n");
5589 }
5590
5591 return;
5592
5593 reschedule_task:
5594 be_schedule_err_detection(adapter, resched_delay);
5595 }
5596
5597 static void be_log_sfp_info(struct be_adapter *adapter)
5598 {
5599 int status;
5600
5601 status = be_cmd_query_sfp_info(adapter);
5602 if (!status) {
5603 dev_err(&adapter->pdev->dev,
5604 "Port %c: %s Vendor: %s part no: %s",
5605 adapter->port_name,
5606 be_misconfig_evt_port_state[adapter->phy_state],
5607 adapter->phy.vendor_name,
5608 adapter->phy.vendor_pn);
5609 }
5610 adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5611 }
5612
5613 static void be_worker(struct work_struct *work)
5614 {
5615 struct be_adapter *adapter =
5616 container_of(work, struct be_adapter, work.work);
5617 struct be_rx_obj *rxo;
5618 int i;
5619
5620 if (be_physfn(adapter) &&
5621 MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5622 be_cmd_get_die_temperature(adapter);
5623
5624 /* when interrupts are not yet enabled, just reap any pending
5625 * mcc completions
5626 */
5627 if (!netif_running(adapter->netdev)) {
5628 local_bh_disable();
5629 be_process_mcc(adapter);
5630 local_bh_enable();
5631 goto reschedule;
5632 }
5633
5634 if (!adapter->stats_cmd_sent) {
5635 if (lancer_chip(adapter))
5636 lancer_cmd_get_pport_stats(adapter,
5637 &adapter->stats_cmd);
5638 else
5639 be_cmd_get_stats(adapter, &adapter->stats_cmd);
5640 }
5641
5642 for_all_rx_queues(adapter, rxo, i) {
5643 /* Replenish RX-queues starved due to memory
5644 * allocation failures.
5645 */
5646 if (rxo->rx_post_starved)
5647 be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5648 }
5649
5650 /* EQ-delay update for Skyhawk is done while notifying EQ */
5651 if (!skyhawk_chip(adapter))
5652 be_eqd_update(adapter, false);
5653
5654 if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5655 be_log_sfp_info(adapter);
5656
5657 reschedule:
5658 adapter->work_counter++;
5659 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5660 }
5661
5662 static void be_unmap_pci_bars(struct be_adapter *adapter)
5663 {
5664 if (adapter->csr)
5665 pci_iounmap(adapter->pdev, adapter->csr);
5666 if (adapter->db)
5667 pci_iounmap(adapter->pdev, adapter->db);
5668 if (adapter->pcicfg && adapter->pcicfg_mapped)
5669 pci_iounmap(adapter->pdev, adapter->pcicfg);
5670 }
5671
5672 static int db_bar(struct be_adapter *adapter)
5673 {
5674 if (lancer_chip(adapter) || be_virtfn(adapter))
5675 return 0;
5676 else
5677 return 4;
5678 }
5679
5680 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5681 {
5682 if (skyhawk_chip(adapter)) {
5683 adapter->roce_db.size = 4096;
5684 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5685 db_bar(adapter));
5686 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5687 db_bar(adapter));
5688 }
5689 return 0;
5690 }
5691
5692 static int be_map_pci_bars(struct be_adapter *adapter)
5693 {
5694 struct pci_dev *pdev = adapter->pdev;
5695 u8 __iomem *addr;
5696 u32 sli_intf;
5697
5698 pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5699 adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5700 SLI_INTF_FAMILY_SHIFT;
5701 adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5702
5703 if (BEx_chip(adapter) && be_physfn(adapter)) {
5704 adapter->csr = pci_iomap(pdev, 2, 0);
5705 if (!adapter->csr)
5706 return -ENOMEM;
5707 }
5708
5709 addr = pci_iomap(pdev, db_bar(adapter), 0);
5710 if (!addr)
5711 goto pci_map_err;
5712 adapter->db = addr;
5713
5714 if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5715 if (be_physfn(adapter)) {
5716 /* PCICFG is the 2nd BAR in BE2 */
5717 addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5718 if (!addr)
5719 goto pci_map_err;
5720 adapter->pcicfg = addr;
5721 adapter->pcicfg_mapped = true;
5722 } else {
5723 adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5724 adapter->pcicfg_mapped = false;
5725 }
5726 }
5727
5728 be_roce_map_pci_bars(adapter);
5729 return 0;
5730
5731 pci_map_err:
5732 dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5733 be_unmap_pci_bars(adapter);
5734 return -ENOMEM;
5735 }
5736
5737 static void be_drv_cleanup(struct be_adapter *adapter)
5738 {
5739 struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5740 struct device *dev = &adapter->pdev->dev;
5741
5742 if (mem->va)
5743 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5744
5745 mem = &adapter->rx_filter;
5746 if (mem->va)
5747 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5748
5749 mem = &adapter->stats_cmd;
5750 if (mem->va)
5751 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5752 }
5753
5754 /* Allocate and initialize various fields in be_adapter struct */
5755 static int be_drv_init(struct be_adapter *adapter)
5756 {
5757 struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5758 struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5759 struct be_dma_mem *rx_filter = &adapter->rx_filter;
5760 struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5761 struct device *dev = &adapter->pdev->dev;
5762 int status = 0;
5763
5764 mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5765 mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5766 &mbox_mem_alloc->dma,
5767 GFP_KERNEL);
5768 if (!mbox_mem_alloc->va)
5769 return -ENOMEM;
5770
5771 mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5772 mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5773 mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5774
5775 rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5776 rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5777 &rx_filter->dma, GFP_KERNEL);
5778 if (!rx_filter->va) {
5779 status = -ENOMEM;
5780 goto free_mbox;
5781 }
5782
5783 if (lancer_chip(adapter))
5784 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5785 else if (BE2_chip(adapter))
5786 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5787 else if (BE3_chip(adapter))
5788 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5789 else
5790 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5791 stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5792 &stats_cmd->dma, GFP_KERNEL);
5793 if (!stats_cmd->va) {
5794 status = -ENOMEM;
5795 goto free_rx_filter;
5796 }
5797
5798 mutex_init(&adapter->mbox_lock);
5799 mutex_init(&adapter->mcc_lock);
5800 mutex_init(&adapter->rx_filter_lock);
5801 spin_lock_init(&adapter->mcc_cq_lock);
5802 init_completion(&adapter->et_cmd_compl);
5803
5804 pci_save_state(adapter->pdev);
5805
5806 INIT_DELAYED_WORK(&adapter->work, be_worker);
5807
5808 adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5809 adapter->error_recovery.resched_delay = 0;
5810 INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5811 be_err_detection_task);
5812
5813 adapter->rx_fc = true;
5814 adapter->tx_fc = true;
5815
5816 /* Must be a power of 2 or else MODULO will BUG_ON */
5817 adapter->be_get_temp_freq = 64;
5818
5819 INIT_LIST_HEAD(&adapter->vxlan_port_list);
5820 return 0;
5821
5822 free_rx_filter:
5823 dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5824 free_mbox:
5825 dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5826 mbox_mem_alloc->dma);
5827 return status;
5828 }
5829
5830 static void be_remove(struct pci_dev *pdev)
5831 {
5832 struct be_adapter *adapter = pci_get_drvdata(pdev);
5833
5834 if (!adapter)
5835 return;
5836
5837 be_roce_dev_remove(adapter);
5838 be_intr_set(adapter, false);
5839
5840 be_cancel_err_detection(adapter);
5841
5842 unregister_netdev(adapter->netdev);
5843
5844 be_clear(adapter);
5845
5846 if (!pci_vfs_assigned(adapter->pdev))
5847 be_cmd_reset_function(adapter);
5848
5849 /* tell fw we're done with firing cmds */
5850 be_cmd_fw_clean(adapter);
5851
5852 be_unmap_pci_bars(adapter);
5853 be_drv_cleanup(adapter);
5854
5855 pci_disable_pcie_error_reporting(pdev);
5856
5857 pci_release_regions(pdev);
5858 pci_disable_device(pdev);
5859
5860 free_netdev(adapter->netdev);
5861 }
5862
5863 static ssize_t be_hwmon_show_temp(struct device *dev,
5864 struct device_attribute *dev_attr,
5865 char *buf)
5866 {
5867 struct be_adapter *adapter = dev_get_drvdata(dev);
5868
5869 /* Unit: millidegree Celsius */
5870 if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5871 return -EIO;
5872 else
5873 return sprintf(buf, "%u\n",
5874 adapter->hwmon_info.be_on_die_temp * 1000);
5875 }
5876
5877 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5878 be_hwmon_show_temp, NULL, 1);
5879
5880 static struct attribute *be_hwmon_attrs[] = {
5881 &sensor_dev_attr_temp1_input.dev_attr.attr,
5882 NULL
5883 };
5884
5885 ATTRIBUTE_GROUPS(be_hwmon);
5886
5887 static char *mc_name(struct be_adapter *adapter)
5888 {
5889 char *str = ""; /* default */
5890
5891 switch (adapter->mc_type) {
5892 case UMC:
5893 str = "UMC";
5894 break;
5895 case FLEX10:
5896 str = "FLEX10";
5897 break;
5898 case vNIC1:
5899 str = "vNIC-1";
5900 break;
5901 case nPAR:
5902 str = "nPAR";
5903 break;
5904 case UFP:
5905 str = "UFP";
5906 break;
5907 case vNIC2:
5908 str = "vNIC-2";
5909 break;
5910 default:
5911 str = "";
5912 }
5913
5914 return str;
5915 }
5916
5917 static inline char *func_name(struct be_adapter *adapter)
5918 {
5919 return be_physfn(adapter) ? "PF" : "VF";
5920 }
5921
5922 static inline char *nic_name(struct pci_dev *pdev)
5923 {
5924 switch (pdev->device) {
5925 case OC_DEVICE_ID1:
5926 return OC_NAME;
5927 case OC_DEVICE_ID2:
5928 return OC_NAME_BE;
5929 case OC_DEVICE_ID3:
5930 case OC_DEVICE_ID4:
5931 return OC_NAME_LANCER;
5932 case BE_DEVICE_ID2:
5933 return BE3_NAME;
5934 case OC_DEVICE_ID5:
5935 case OC_DEVICE_ID6:
5936 return OC_NAME_SH;
5937 default:
5938 return BE_NAME;
5939 }
5940 }
5941
5942 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5943 {
5944 struct be_adapter *adapter;
5945 struct net_device *netdev;
5946 int status = 0;
5947
5948 dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5949
5950 status = pci_enable_device(pdev);
5951 if (status)
5952 goto do_none;
5953
5954 status = pci_request_regions(pdev, DRV_NAME);
5955 if (status)
5956 goto disable_dev;
5957 pci_set_master(pdev);
5958
5959 netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5960 if (!netdev) {
5961 status = -ENOMEM;
5962 goto rel_reg;
5963 }
5964 adapter = netdev_priv(netdev);
5965 adapter->pdev = pdev;
5966 pci_set_drvdata(pdev, adapter);
5967 adapter->netdev = netdev;
5968 SET_NETDEV_DEV(netdev, &pdev->dev);
5969
5970 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5971 if (!status) {
5972 netdev->features |= NETIF_F_HIGHDMA;
5973 } else {
5974 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5975 if (status) {
5976 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5977 goto free_netdev;
5978 }
5979 }
5980
5981 status = pci_enable_pcie_error_reporting(pdev);
5982 if (!status)
5983 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5984
5985 status = be_map_pci_bars(adapter);
5986 if (status)
5987 goto free_netdev;
5988
5989 status = be_drv_init(adapter);
5990 if (status)
5991 goto unmap_bars;
5992
5993 status = be_setup(adapter);
5994 if (status)
5995 goto drv_cleanup;
5996
5997 be_netdev_init(netdev);
5998 status = register_netdev(netdev);
5999 if (status != 0)
6000 goto unsetup;
6001
6002 be_roce_dev_add(adapter);
6003
6004 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6005 adapter->error_recovery.probe_time = jiffies;
6006
6007 /* On Die temperature not supported for VF. */
6008 if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6009 adapter->hwmon_info.hwmon_dev =
6010 devm_hwmon_device_register_with_groups(&pdev->dev,
6011 DRV_NAME,
6012 adapter,
6013 be_hwmon_groups);
6014 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6015 }
6016
6017 dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6018 func_name(adapter), mc_name(adapter), adapter->port_name);
6019
6020 return 0;
6021
6022 unsetup:
6023 be_clear(adapter);
6024 drv_cleanup:
6025 be_drv_cleanup(adapter);
6026 unmap_bars:
6027 be_unmap_pci_bars(adapter);
6028 free_netdev:
6029 free_netdev(netdev);
6030 rel_reg:
6031 pci_release_regions(pdev);
6032 disable_dev:
6033 pci_disable_device(pdev);
6034 do_none:
6035 dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6036 return status;
6037 }
6038
6039 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6040 {
6041 struct be_adapter *adapter = pci_get_drvdata(pdev);
6042
6043 be_intr_set(adapter, false);
6044 be_cancel_err_detection(adapter);
6045
6046 be_cleanup(adapter);
6047
6048 pci_save_state(pdev);
6049 pci_disable_device(pdev);
6050 pci_set_power_state(pdev, pci_choose_state(pdev, state));
6051 return 0;
6052 }
6053
6054 static int be_pci_resume(struct pci_dev *pdev)
6055 {
6056 struct be_adapter *adapter = pci_get_drvdata(pdev);
6057 int status = 0;
6058
6059 status = pci_enable_device(pdev);
6060 if (status)
6061 return status;
6062
6063 pci_restore_state(pdev);
6064
6065 status = be_resume(adapter);
6066 if (status)
6067 return status;
6068
6069 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6070
6071 return 0;
6072 }
6073
6074 /*
6075 * An FLR will stop BE from DMAing any data.
6076 */
6077 static void be_shutdown(struct pci_dev *pdev)
6078 {
6079 struct be_adapter *adapter = pci_get_drvdata(pdev);
6080
6081 if (!adapter)
6082 return;
6083
6084 be_roce_dev_shutdown(adapter);
6085 cancel_delayed_work_sync(&adapter->work);
6086 be_cancel_err_detection(adapter);
6087
6088 netif_device_detach(adapter->netdev);
6089
6090 be_cmd_reset_function(adapter);
6091
6092 pci_disable_device(pdev);
6093 }
6094
6095 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6096 pci_channel_state_t state)
6097 {
6098 struct be_adapter *adapter = pci_get_drvdata(pdev);
6099
6100 dev_err(&adapter->pdev->dev, "EEH error detected\n");
6101
6102 be_roce_dev_remove(adapter);
6103
6104 if (!be_check_error(adapter, BE_ERROR_EEH)) {
6105 be_set_error(adapter, BE_ERROR_EEH);
6106
6107 be_cancel_err_detection(adapter);
6108
6109 be_cleanup(adapter);
6110 }
6111
6112 if (state == pci_channel_io_perm_failure)
6113 return PCI_ERS_RESULT_DISCONNECT;
6114
6115 pci_disable_device(pdev);
6116
6117 /* The error could cause the FW to trigger a flash debug dump.
6118 * Resetting the card while flash dump is in progress
6119 * can cause it not to recover; wait for it to finish.
6120 * Wait only for first function as it is needed only once per
6121 * adapter.
6122 */
6123 if (pdev->devfn == 0)
6124 ssleep(30);
6125
6126 return PCI_ERS_RESULT_NEED_RESET;
6127 }
6128
6129 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6130 {
6131 struct be_adapter *adapter = pci_get_drvdata(pdev);
6132 int status;
6133
6134 dev_info(&adapter->pdev->dev, "EEH reset\n");
6135
6136 status = pci_enable_device(pdev);
6137 if (status)
6138 return PCI_ERS_RESULT_DISCONNECT;
6139
6140 pci_set_master(pdev);
6141 pci_restore_state(pdev);
6142
6143 /* Check if card is ok and fw is ready */
6144 dev_info(&adapter->pdev->dev,
6145 "Waiting for FW to be ready after EEH reset\n");
6146 status = be_fw_wait_ready(adapter);
6147 if (status)
6148 return PCI_ERS_RESULT_DISCONNECT;
6149
6150 be_clear_error(adapter, BE_CLEAR_ALL);
6151 return PCI_ERS_RESULT_RECOVERED;
6152 }
6153
6154 static void be_eeh_resume(struct pci_dev *pdev)
6155 {
6156 int status = 0;
6157 struct be_adapter *adapter = pci_get_drvdata(pdev);
6158
6159 dev_info(&adapter->pdev->dev, "EEH resume\n");
6160
6161 pci_save_state(pdev);
6162
6163 status = be_resume(adapter);
6164 if (status)
6165 goto err;
6166
6167 be_roce_dev_add(adapter);
6168
6169 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6170 return;
6171 err:
6172 dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6173 }
6174
6175 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6176 {
6177 struct be_adapter *adapter = pci_get_drvdata(pdev);
6178 struct be_resources vft_res = {0};
6179 int status;
6180
6181 if (!num_vfs)
6182 be_vf_clear(adapter);
6183
6184 adapter->num_vfs = num_vfs;
6185
6186 if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6187 dev_warn(&pdev->dev,
6188 "Cannot disable VFs while they are assigned\n");
6189 return -EBUSY;
6190 }
6191
6192 /* When the HW is in SRIOV capable configuration, the PF-pool resources
6193 * are equally distributed across the max-number of VFs. The user may
6194 * request only a subset of the max-vfs to be enabled.
6195 * Based on num_vfs, redistribute the resources across num_vfs so that
6196 * each VF will have access to more number of resources.
6197 * This facility is not available in BE3 FW.
6198 * Also, this is done by FW in Lancer chip.
6199 */
6200 if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6201 be_calculate_vf_res(adapter, adapter->num_vfs,
6202 &vft_res);
6203 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6204 adapter->num_vfs, &vft_res);
6205 if (status)
6206 dev_err(&pdev->dev,
6207 "Failed to optimize SR-IOV resources\n");
6208 }
6209
6210 status = be_get_resources(adapter);
6211 if (status)
6212 return be_cmd_status(status);
6213
6214 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6215 rtnl_lock();
6216 status = be_update_queues(adapter);
6217 rtnl_unlock();
6218 if (status)
6219 return be_cmd_status(status);
6220
6221 if (adapter->num_vfs)
6222 status = be_vf_setup(adapter);
6223
6224 if (!status)
6225 return adapter->num_vfs;
6226
6227 return 0;
6228 }
6229
6230 static const struct pci_error_handlers be_eeh_handlers = {
6231 .error_detected = be_eeh_err_detected,
6232 .slot_reset = be_eeh_reset,
6233 .resume = be_eeh_resume,
6234 };
6235
6236 static struct pci_driver be_driver = {
6237 .name = DRV_NAME,
6238 .id_table = be_dev_ids,
6239 .probe = be_probe,
6240 .remove = be_remove,
6241 .suspend = be_suspend,
6242 .resume = be_pci_resume,
6243 .shutdown = be_shutdown,
6244 .sriov_configure = be_pci_sriov_configure,
6245 .err_handler = &be_eeh_handlers
6246 };
6247
6248 static int __init be_init_module(void)
6249 {
6250 int status;
6251
6252 if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6253 rx_frag_size != 2048) {
6254 printk(KERN_WARNING DRV_NAME
6255 " : Module param rx_frag_size must be 2048/4096/8192."
6256 " Using 2048\n");
6257 rx_frag_size = 2048;
6258 }
6259
6260 if (num_vfs > 0) {
6261 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6262 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6263 }
6264
6265 be_wq = create_singlethread_workqueue("be_wq");
6266 if (!be_wq) {
6267 pr_warn(DRV_NAME "workqueue creation failed\n");
6268 return -1;
6269 }
6270
6271 be_err_recovery_workq =
6272 create_singlethread_workqueue("be_err_recover");
6273 if (!be_err_recovery_workq)
6274 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6275
6276 status = pci_register_driver(&be_driver);
6277 if (status) {
6278 destroy_workqueue(be_wq);
6279 be_destroy_err_recovery_workq();
6280 }
6281 return status;
6282 }
6283 module_init(be_init_module);
6284
6285 static void __exit be_exit_module(void)
6286 {
6287 pci_unregister_driver(&be_driver);
6288
6289 be_destroy_err_recovery_workq();
6290
6291 if (be_wq)
6292 destroy_workqueue(be_wq);
6293 }
6294 module_exit(be_exit_module);