#include <linux/platform_device.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/page_pool/helpers.h>
#include "bcmasp.h"
#include "bcmasp_intf_defs.h"
struct bcmasp_desc *desc;
struct sk_buff *skb;
dma_addr_t valid;
+ struct page *page;
void *data;
u64 flags;
u32 len;
+ /* Hardware advances DMA_VALID as it writes each descriptor
+ * (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read.
+ */
valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1;
if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE)
valid = intf->rx_edpkt_dma_addr;
while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) {
desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index];
- /* Ensure that descriptor has been fully written to DRAM by
- * hardware before reading by the CPU
+ /* Ensure the descriptor has been fully written to DRAM by
+ * the hardware before the CPU reads it.
*/
rmb();
- /* Calculate virt addr by offsetting from physical addr */
+ /* Locate the packet data inside the streaming ring buffer. */
data = intf->rx_ring_cpu +
(DESC_ADDR(desc->buf) - intf->rx_ring_dma);
len = desc->size;
- skb = napi_alloc_skb(napi, len);
- if (!skb) {
+ /* Allocate a page pool page as the SKB data area so the
+ * kernel can recycle it efficiently after the packet is
+ * consumed, avoiding repeated slab allocations.
+ */
+ page = page_pool_dev_alloc_pages(intf->rx_page_pool);
+ if (!page) {
u64_stats_update_begin(&stats->syncp);
u64_stats_inc(&stats->rx_dropped);
u64_stats_update_end(&stats->syncp);
intf->mib.alloc_rx_skb_failed++;
+ goto next;
+ }
+ skb = napi_build_skb(page_address(page), PAGE_SIZE);
+ if (!skb) {
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_inc(&stats->rx_dropped);
+ u64_stats_update_end(&stats->syncp);
+ intf->mib.alloc_rx_skb_failed++;
+ page_pool_recycle_direct(intf->rx_page_pool, page);
goto next;
}
+ /* Reserve headroom then copy the full descriptor payload
+ * (hardware prepends a 2-byte alignment pad at the start).
+ */
+ skb_reserve(skb, NET_SKB_PAD);
skb_put(skb, len);
memcpy(skb->data, data, len);
+ skb_mark_for_recycle(skb);
+ /* Skip the 2-byte hardware alignment pad. */
skb_pull(skb, 2);
len -= 2;
if (likely(intf->crc_fwd)) {
u64_stats_update_end(&stats->syncp);
next:
+ /* Return this portion of the streaming ring buffer to HW. */
rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size),
RX_EDPKT_RING_BUFFER_READ);
phy_print_status(phydev);
}
-static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
+static struct page_pool *
+bcmasp_rx_page_pool_create(struct bcmasp_intf *intf)
+{
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = NUM_4K_BUFFERS,
+ .nid = NUMA_NO_NODE,
+ .dev = &intf->parent->pdev->dev,
+ .napi = &intf->rx_napi,
+ .netdev = intf->ndev,
+ .offset = 0,
+ .max_len = PAGE_SIZE,
+ };
+
+ return page_pool_create(&pp_params);
+}
+
+static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
struct page *buffer_pg;
+ int ret;
- /* Alloc RX */
+ /* Contiguous streaming ring that hardware writes packet data into. */
intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
if (!buffer_pg)
intf->rx_ring_cpu = page_to_virt(buffer_pg);
intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
DMA_FROM_DEVICE);
- if (dma_mapping_error(kdev, intf->rx_ring_dma))
- goto free_rx_buffer;
+ if (dma_mapping_error(kdev, intf->rx_ring_dma)) {
+ ret = -ENOMEM;
+ goto free_ring_pages;
+ }
+
+ /* Page pool for SKB data areas (copy targets, not DMA buffers). */
+ intf->rx_page_pool = bcmasp_rx_page_pool_create(intf);
+ if (IS_ERR(intf->rx_page_pool)) {
+ ret = PTR_ERR(intf->rx_page_pool);
+ intf->rx_page_pool = NULL;
+ goto free_ring_dma;
+ }
+
+ return 0;
+
+free_ring_dma:
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+free_ring_pages:
+ __free_pages(buffer_pg, intf->rx_buf_order);
+ return ret;
+}
+
+static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+
+ page_pool_destroy(intf->rx_page_pool);
+ intf->rx_page_pool = NULL;
+ dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+ DMA_FROM_DEVICE);
+ __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+}
+
+static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
+{
+ struct device *kdev = &intf->parent->pdev->dev;
+ int ret;
+
+ /* Alloc RX */
+ ret = bcmasp_alloc_rx_buffers(intf);
+ if (ret)
+ return ret;
intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
- &intf->rx_edpkt_dma_addr, GFP_KERNEL);
+ &intf->rx_edpkt_dma_addr,
+ GFP_KERNEL);
if (!intf->rx_edpkt_cpu)
- goto free_rx_buffer_dma;
+ goto free_rx_buffers;
/* Alloc TX */
intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
free_rx_edpkt_dma:
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
intf->rx_edpkt_dma_addr);
-free_rx_buffer_dma:
- dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
-free_rx_buffer:
- __free_pages(buffer_pg, intf->rx_buf_order);
+free_rx_buffers:
+ bcmasp_reclaim_rx_buffers(intf);
return -ENOMEM;
}
/* RX buffers */
dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
intf->rx_edpkt_dma_addr);
- dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
- DMA_FROM_DEVICE);
- __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+ bcmasp_reclaim_rx_buffers(intf);
/* TX buffers */
dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
/* Make sure channels are disabled */
rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
- /* Rx SPB */
+ /* Streaming data ring: hardware writes raw packet bytes here. */
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE);
rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
RX_EDPKT_RING_BUFFER_VALID);
- /* EDPKT */
+ /* EDPKT descriptor ring: hardware fills descriptors pointing into
+ * the streaming ring buffer above (RBUF_4K mode).
+ */
rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K <<
RX_EDPKT_CFG_CFG0_DBUF_SHIFT) |
(RX_EDPKT_CFG_CFG0_64_ALN <<