]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: bcmasp: Switch to page pool for RX path
authorFlorian Fainelli <florian.fainelli@broadcom.com>
Wed, 8 Apr 2026 00:18:13 +0000 (17:18 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 9 Apr 2026 02:24:12 +0000 (19:24 -0700)
This shows an improvement of 1.9% in reducing the CPU cycles and data
cache misses.

Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Justin Chen <justin.chen@broadcom.com>
Reviewed-by: Nicolai Buchwitz <nb@tipi-net.de>
Link: https://patch.msgid.link/20260408001813.635679-1-florian.fainelli@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/asp2/bcmasp.h
drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
drivers/net/ethernet/broadcom/asp2/bcmasp_intf_defs.h

index dd164acafd01f91c10952de3d6738237d4fad21d..4287edc7ddd606a28034727ba60e6972655615ff 100644 (file)
@@ -272,6 +272,7 @@ config BCMASP
        depends on OF
        select PHYLIB
        select MDIO_BCM_UNIMAC
+       select PAGE_POOL
        help
          This configuration enables the Broadcom ASP 2.0 Ethernet controller
          driver which is present in Broadcom STB SoCs such as 72165.
index 29cd87335ec8007a4356e0db974cc7884ecb0ca4..8c8ffaeadc79f866c569db56f98d6c48e5745f74 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/phy.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
 #include <uapi/linux/ethtool.h>
+#include <net/page_pool/helpers.h>
 
 #define ASP_INTR2_OFFSET                       0x1000
 #define  ASP_INTR2_STATUS                      0x0
@@ -298,16 +299,19 @@ struct bcmasp_intf {
        void __iomem                    *rx_edpkt_cfg;
        void __iomem                    *rx_edpkt_dma;
        int                             rx_edpkt_index;
-       int                             rx_buf_order;
        struct bcmasp_desc              *rx_edpkt_cpu;
        dma_addr_t                      rx_edpkt_dma_addr;
        dma_addr_t                      rx_edpkt_dma_read;
        dma_addr_t                      rx_edpkt_dma_valid;
 
-       /* RX buffer prefetcher ring*/
+       /* Streaming RX data ring (RBUF_4K mode) */
        void                            *rx_ring_cpu;
        dma_addr_t                      rx_ring_dma;
        dma_addr_t                      rx_ring_dma_valid;
+       int                             rx_buf_order;
+
+       /* Page pool for recycling RX SKB data pages */
+       struct page_pool                *rx_page_pool;
        struct napi_struct              rx_napi;
 
        struct bcmasp_res               res;
index b368ec2fea4305da988a17f0548ab5f6def066ce..ec63f50a849e210478626bc0ed42058921051039 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/page_pool/helpers.h>
 
 #include "bcmasp.h"
 #include "bcmasp_intf_defs.h"
@@ -482,10 +483,14 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
        struct bcmasp_desc *desc;
        struct sk_buff *skb;
        dma_addr_t valid;
+       struct page *page;
        void *data;
        u64 flags;
        u32 len;
 
+       /* Hardware advances DMA_VALID as it writes each descriptor
+        * (RBUF_4K streaming mode); software chases with rx_edpkt_dma_read.
+        */
        valid = rx_edpkt_dma_rq(intf, RX_EDPKT_DMA_VALID) + 1;
        if (valid == intf->rx_edpkt_dma_addr + DESC_RING_SIZE)
                valid = intf->rx_edpkt_dma_addr;
@@ -493,12 +498,12 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
        while ((processed < budget) && (valid != intf->rx_edpkt_dma_read)) {
                desc = &intf->rx_edpkt_cpu[intf->rx_edpkt_index];
 
-               /* Ensure that descriptor has been fully written to DRAM by
-                * hardware before reading by the CPU
+               /* Ensure the descriptor has been fully written to DRAM by
+                * the hardware before the CPU reads it.
                 */
                rmb();
 
-               /* Calculate virt addr by offsetting from physical addr */
+               /* Locate the packet data inside the streaming ring buffer. */
                data = intf->rx_ring_cpu +
                        (DESC_ADDR(desc->buf) - intf->rx_ring_dma);
 
@@ -524,19 +529,38 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
 
                len = desc->size;
 
-               skb = napi_alloc_skb(napi, len);
-               if (!skb) {
+               /* Allocate a page pool page as the SKB data area so the
+                * kernel can recycle it efficiently after the packet is
+                * consumed, avoiding repeated slab allocations.
+                */
+               page = page_pool_dev_alloc_pages(intf->rx_page_pool);
+               if (!page) {
                        u64_stats_update_begin(&stats->syncp);
                        u64_stats_inc(&stats->rx_dropped);
                        u64_stats_update_end(&stats->syncp);
                        intf->mib.alloc_rx_skb_failed++;
+                       goto next;
+               }
 
+               skb = napi_build_skb(page_address(page), PAGE_SIZE);
+               if (!skb) {
+                       u64_stats_update_begin(&stats->syncp);
+                       u64_stats_inc(&stats->rx_dropped);
+                       u64_stats_update_end(&stats->syncp);
+                       intf->mib.alloc_rx_skb_failed++;
+                       page_pool_recycle_direct(intf->rx_page_pool, page);
                        goto next;
                }
 
+               /* Reserve headroom then copy the full descriptor payload
+                * (hardware prepends a 2-byte alignment pad at the start).
+                */
+               skb_reserve(skb, NET_SKB_PAD);
                skb_put(skb, len);
                memcpy(skb->data, data, len);
+               skb_mark_for_recycle(skb);
 
+               /* Skip the 2-byte hardware alignment pad. */
                skb_pull(skb, 2);
                len -= 2;
                if (likely(intf->crc_fwd)) {
@@ -558,6 +582,7 @@ static int bcmasp_rx_poll(struct napi_struct *napi, int budget)
                u64_stats_update_end(&stats->syncp);
 
 next:
+               /* Return this portion of the streaming ring buffer to HW. */
                rx_edpkt_cfg_wq(intf, (DESC_ADDR(desc->buf) + desc->size),
                                RX_EDPKT_RING_BUFFER_READ);
 
@@ -661,12 +686,31 @@ static void bcmasp_adj_link(struct net_device *dev)
                phy_print_status(phydev);
 }
 
-static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
+static struct page_pool *
+bcmasp_rx_page_pool_create(struct bcmasp_intf *intf)
+{
+       struct page_pool_params pp_params = {
+               .order          = 0,
+               .flags          = 0,
+               .pool_size      = NUM_4K_BUFFERS,
+               .nid            = NUMA_NO_NODE,
+               .dev            = &intf->parent->pdev->dev,
+               .napi           = &intf->rx_napi,
+               .netdev         = intf->ndev,
+               .offset         = 0,
+               .max_len        = PAGE_SIZE,
+       };
+
+       return page_pool_create(&pp_params);
+}
+
+static int bcmasp_alloc_rx_buffers(struct bcmasp_intf *intf)
 {
        struct device *kdev = &intf->parent->pdev->dev;
        struct page *buffer_pg;
+       int ret;
 
-       /* Alloc RX */
+       /* Contiguous streaming ring that hardware writes packet data into. */
        intf->rx_buf_order = get_order(RING_BUFFER_SIZE);
        buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order);
        if (!buffer_pg)
@@ -675,13 +719,55 @@ static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
        intf->rx_ring_cpu = page_to_virt(buffer_pg);
        intf->rx_ring_dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE,
                                         DMA_FROM_DEVICE);
-       if (dma_mapping_error(kdev, intf->rx_ring_dma))
-               goto free_rx_buffer;
+       if (dma_mapping_error(kdev, intf->rx_ring_dma)) {
+               ret = -ENOMEM;
+               goto free_ring_pages;
+       }
+
+       /* Page pool for SKB data areas (copy targets, not DMA buffers). */
+       intf->rx_page_pool = bcmasp_rx_page_pool_create(intf);
+       if (IS_ERR(intf->rx_page_pool)) {
+               ret = PTR_ERR(intf->rx_page_pool);
+               intf->rx_page_pool = NULL;
+               goto free_ring_dma;
+       }
+
+       return 0;
+
+free_ring_dma:
+       dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+                      DMA_FROM_DEVICE);
+free_ring_pages:
+       __free_pages(buffer_pg, intf->rx_buf_order);
+       return ret;
+}
+
+static void bcmasp_reclaim_rx_buffers(struct bcmasp_intf *intf)
+{
+       struct device *kdev = &intf->parent->pdev->dev;
+
+       page_pool_destroy(intf->rx_page_pool);
+       intf->rx_page_pool = NULL;
+       dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
+                      DMA_FROM_DEVICE);
+       __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+}
+
+static int bcmasp_alloc_buffers(struct bcmasp_intf *intf)
+{
+       struct device *kdev = &intf->parent->pdev->dev;
+       int ret;
+
+       /* Alloc RX */
+       ret = bcmasp_alloc_rx_buffers(intf);
+       if (ret)
+               return ret;
 
        intf->rx_edpkt_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
-                                               &intf->rx_edpkt_dma_addr, GFP_KERNEL);
+                                               &intf->rx_edpkt_dma_addr,
+                                               GFP_KERNEL);
        if (!intf->rx_edpkt_cpu)
-               goto free_rx_buffer_dma;
+               goto free_rx_buffers;
 
        /* Alloc TX */
        intf->tx_spb_cpu = dma_alloc_coherent(kdev, DESC_RING_SIZE,
@@ -701,11 +787,8 @@ free_tx_spb_dma:
 free_rx_edpkt_dma:
        dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
                          intf->rx_edpkt_dma_addr);
-free_rx_buffer_dma:
-       dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
-                      DMA_FROM_DEVICE);
-free_rx_buffer:
-       __free_pages(buffer_pg, intf->rx_buf_order);
+free_rx_buffers:
+       bcmasp_reclaim_rx_buffers(intf);
 
        return -ENOMEM;
 }
@@ -717,9 +800,7 @@ static void bcmasp_reclaim_free_buffers(struct bcmasp_intf *intf)
        /* RX buffers */
        dma_free_coherent(kdev, DESC_RING_SIZE, intf->rx_edpkt_cpu,
                          intf->rx_edpkt_dma_addr);
-       dma_unmap_page(kdev, intf->rx_ring_dma, RING_BUFFER_SIZE,
-                      DMA_FROM_DEVICE);
-       __free_pages(virt_to_page(intf->rx_ring_cpu), intf->rx_buf_order);
+       bcmasp_reclaim_rx_buffers(intf);
 
        /* TX buffers */
        dma_free_coherent(kdev, DESC_RING_SIZE, intf->tx_spb_cpu,
@@ -738,7 +819,7 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
        /* Make sure channels are disabled */
        rx_edpkt_cfg_wl(intf, 0x0, RX_EDPKT_CFG_ENABLE);
 
-       /* Rx SPB */
+       /* Streaming data ring: hardware writes raw packet bytes here. */
        rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_READ);
        rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_WRITE);
        rx_edpkt_cfg_wq(intf, intf->rx_ring_dma, RX_EDPKT_RING_BUFFER_BASE);
@@ -747,7 +828,9 @@ static void bcmasp_init_rx(struct bcmasp_intf *intf)
        rx_edpkt_cfg_wq(intf, intf->rx_ring_dma_valid,
                        RX_EDPKT_RING_BUFFER_VALID);
 
-       /* EDPKT */
+       /* EDPKT descriptor ring: hardware fills descriptors pointing into
+        * the streaming ring buffer above (RBUF_4K mode).
+        */
        rx_edpkt_cfg_wl(intf, (RX_EDPKT_CFG_CFG0_RBUF_4K <<
                        RX_EDPKT_CFG_CFG0_DBUF_SHIFT) |
                       (RX_EDPKT_CFG_CFG0_64_ALN <<
index af7418348e8157fd56ad13ef98237c4cffd76afd..0318f257452abdc62dae31599c478072297d6166 100644 (file)
        ((((intf)->channel - 6) * 0x14) + 0xa2000)
 #define  RX_SPB_TOP_BLKOUT             0x00
 
+/*
+ * Number of 4 KB pages that make up the contiguous RBUF_4K streaming ring
+ * and the page pool used as copy-target SKB data areas.
+ */
 #define NUM_4K_BUFFERS                 32
 #define RING_BUFFER_SIZE               (PAGE_SIZE * NUM_4K_BUFFERS)