]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
eth: fbnic: Add XDP pass, drop, abort support
authorMohsin Bashir <mohsin.bashr@gmail.com>
Wed, 13 Aug 2025 22:13:15 +0000 (15:13 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 19 Aug 2025 08:51:16 +0000 (10:51 +0200)
Add basic support for attaching an XDP program to the device and support
for PASS/DROP/ABORT actions. In fbnic, buffers are always mapped as
DMA_BIDIRECTIONAL.

The BPF program pointer can be read either on a per-packet basis or on a
per-NAPI poll basis. Both approaches are functionally equivalent, in the
current code. Stick to per-packet as it limits number of arguments we need
to pass around.

On the XDP hot path, check that packets with fragments are only allowed
when multi-buffer support is enabled for the XDP program. Ideally, this
check should not be necessary because ndo_bpf verifies that for XDP
programs without multi-buff support, MTU is less than the hds_thresh.
However, the MTU currently does not enforce the receive size which would
require cleaning up the data path and bouncing the link. For practical
reasons, prioritize the ability to enter and exit BPF mode with different
MTU sizes without requiring a full reconfig.

Testing:

Hook a simple XDP program that passes all the packets destined for a
specific port

iperf3 -c 192.168.1.10 -P 5 -p 12345
Connecting to host 192.168.1.10, port 12345
[  5] local 192.168.1.9 port 46702 connected to 192.168.1.10 port 12345
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
- - - - - - - - - - - - - - - - - - - - - - - - -
[SUM]   1.00-2.00   sec  3.86 GBytes  33.2 Gbits/sec    0

XDP_DROP:
Hook an XDP program that drops packets destined for a specific port

 iperf3 -c 192.168.1.10 -P 5 -p 12345
^C- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[SUM]   0.00-0.00   sec  0.00 Bytes  0.00 bits/sec    0       sender
[SUM]   0.00-0.00   sec  0.00 Bytes  0.00 bits/sec            receiver
iperf3: interrupt - the client has terminated

XDP with HDS:

- Validate XDP attachment failure when HDS is low
   ~] ethtool -G eth0 hds-thresh 512
   ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp
   ~] Error: fbnic: MTU too high, or HDS threshold is too low for single
      buffer XDP.

- Validate successful XDP attachment when HDS threshold is appropriate
  ~] ethtool -G eth0 hds-thresh 1536
  ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp

- Validate when the XDP program is attached, changing HDS thresh to a
  lower value fails
  ~] ethtool -G eth0 hds-thresh 512
  ~] netlink error: fbnic: Use higher HDS threshold or multi-buf capable
     program

- Validate HDS thresh does not matter when xdp frags support is
  available
  ~] ethtool -G eth0 hds-thresh 512
  ~] sudo ip link set eth0 xdpdrv obj xdp_pass_mb_12345.o sec xdp.frags

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Mohsin Bashir <mohsin.bashr@gmail.com>
Link: https://patch.msgid.link/20250813221319.3367670-6-mohsin.bashr@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
drivers/net/ethernet/meta/fbnic/fbnic_txrx.h

index 569ddd767f9d0f3ce1c1d597d40fd3cc5b120bc8..4c231433a63d8a164cd75093529918a52840e43d 100644 (file)
@@ -329,6 +329,17 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
                return -EINVAL;
        }
 
+       /* If an XDP program is attached, we should check for potential frame
+        * splitting. If the new HDS threshold can cause splitting, we should
+        * only allow if the attached XDP program can handle frags.
+        */
+       if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
+                                    kernel_ring->hds_thresh)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Use higher HDS threshold or multi-buf capable program");
+               return -EINVAL;
+       }
+
        if (!netif_running(netdev)) {
                fbnic_set_rings(fbn, ring, kernel_ring);
                return 0;
index a7eb7a367b98309d67c7338472736fa9d785531e..fb81d1a7bc5107d1ab851b57fa938b4690d13eac 100644 (file)
@@ -508,6 +508,40 @@ static void fbnic_get_stats64(struct net_device *dev,
        }
 }
 
+bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
+                             u32 hds_thresh)
+{
+       if (!prog)
+               return false;
+
+       if (prog->aux->xdp_has_frags)
+               return false;
+
+       return mtu + ETH_HLEN > hds_thresh;
+}
+
+static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+       struct bpf_prog *prog = bpf->prog, *prev_prog;
+       struct fbnic_net *fbn = netdev_priv(netdev);
+
+       if (bpf->command != XDP_SETUP_PROG)
+               return -EINVAL;
+
+       if (fbnic_check_split_frames(prog, netdev->mtu,
+                                    fbn->hds_thresh)) {
+               NL_SET_ERR_MSG_MOD(bpf->extack,
+                                  "MTU too high, or HDS threshold is too low for single buffer XDP");
+               return -EOPNOTSUPP;
+       }
+
+       prev_prog = xchg(&fbn->xdp_prog, prog);
+       if (prev_prog)
+               bpf_prog_put(prev_prog);
+
+       return 0;
+}
+
 static const struct net_device_ops fbnic_netdev_ops = {
        .ndo_open               = fbnic_open,
        .ndo_stop               = fbnic_stop,
@@ -517,6 +551,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
        .ndo_set_mac_address    = fbnic_set_mac,
        .ndo_set_rx_mode        = fbnic_set_rx_mode,
        .ndo_get_stats64        = fbnic_get_stats64,
+       .ndo_bpf                = fbnic_bpf,
        .ndo_hwtstamp_get       = fbnic_hwtstamp_get,
        .ndo_hwtstamp_set       = fbnic_hwtstamp_set,
 };
index 04c5c7ed6c3a447612bff75e5277c073d8c2a971..bfa79ea910d85996c4108112f5576d4a5edaed8d 100644 (file)
@@ -18,6 +18,8 @@
 #define FBNIC_TUN_GSO_FEATURES         NETIF_F_GSO_IPXIP6
 
 struct fbnic_net {
+       struct bpf_prog *xdp_prog;
+
        struct fbnic_ring *tx[FBNIC_MAX_TXQS];
        struct fbnic_ring *rx[FBNIC_MAX_RXQS];
 
@@ -104,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
 int fbnic_phylink_get_fecparam(struct net_device *netdev,
                               struct ethtool_fecparam *fecparam);
 int fbnic_phylink_init(struct net_device *netdev);
+
+bool fbnic_check_split_frames(struct bpf_prog *prog,
+                             unsigned int mtu, u32 hds_threshold);
 #endif /* _FBNIC_NETDEV_H_ */
index 65d1e40addec0b4dc09ca3523fd0e7c2bdf3ae40..a669e169e3ad55b62a3533a2c170fa43f2e1257e 100644 (file)
@@ -2,17 +2,26 @@
 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
 
 #include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 #include <linux/iopoll.h>
 #include <linux/pci.h>
 #include <net/netdev_queues.h>
 #include <net/page_pool/helpers.h>
 #include <net/tcp.h>
+#include <net/xdp.h>
 
 #include "fbnic.h"
 #include "fbnic_csr.h"
 #include "fbnic_netdev.h"
 #include "fbnic_txrx.h"
 
+enum {
+       FBNIC_XDP_PASS = 0,
+       FBNIC_XDP_CONSUME,
+       FBNIC_XDP_LEN_ERR,
+};
+
 enum {
        FBNIC_XMIT_CB_TS        = 0x01,
 };
@@ -877,7 +886,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd,
 
        headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD;
        frame_sz = hdr_pg_end - hdr_pg_start;
-       xdp_init_buff(&pkt->buff, frame_sz, NULL);
+       xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq);
        hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) *
                        FBNIC_BD_FRAG_SIZE;
 
@@ -967,6 +976,39 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
        return skb;
 }
 
+static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv,
+                                    struct fbnic_pkt_buff *pkt)
+{
+       struct fbnic_net *fbn = netdev_priv(nv->napi.dev);
+       struct bpf_prog *xdp_prog;
+       int act;
+
+       xdp_prog = READ_ONCE(fbn->xdp_prog);
+       if (!xdp_prog)
+               goto xdp_pass;
+
+       /* Should never happen, config paths enforce HDS threshold > MTU */
+       if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags)
+               return ERR_PTR(-FBNIC_XDP_LEN_ERR);
+
+       act = bpf_prog_run_xdp(xdp_prog, &pkt->buff);
+       switch (act) {
+       case XDP_PASS:
+xdp_pass:
+               return fbnic_build_skb(nv, pkt);
+       default:
+               bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act);
+               fallthrough;
+       case XDP_ABORTED:
+               trace_xdp_exception(nv->napi.dev, xdp_prog, act);
+               fallthrough;
+       case XDP_DROP:
+               break;
+       }
+
+       return ERR_PTR(-FBNIC_XDP_CONSUME);
+}
+
 static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
 {
        return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 :
@@ -1065,7 +1107,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
                        if (unlikely(pkt->add_frag_failed))
                                skb = NULL;
                        else if (likely(!fbnic_rcd_metadata_err(rcd)))
-                               skb = fbnic_build_skb(nv, pkt);
+                               skb = fbnic_run_xdp(nv, pkt);
 
                        /* Populate skb and invalidate XDP */
                        if (!IS_ERR_OR_NULL(skb)) {
@@ -1251,6 +1293,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn,
        }
 
        for (j = 0; j < nv->rxt_count; j++, i++) {
+               xdp_rxq_info_unreg(&nv->qt[i].xdp_rxq);
                fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0);
                fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1);
                fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl);
@@ -1423,6 +1466,11 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
                fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS);
                fbn->rx[rxq_idx] = &qt->cmpl;
 
+               err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, rxq_idx,
+                                      nv->napi.napi_id);
+               if (err)
+                       goto free_ring_cur_qt;
+
                /* Update Rx queue index */
                rxt_count--;
                rxq_idx += v_count;
@@ -1433,6 +1481,25 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn,
 
        return 0;
 
+       while (rxt_count < nv->rxt_count) {
+               qt--;
+
+               xdp_rxq_info_unreg(&qt->xdp_rxq);
+free_ring_cur_qt:
+               fbnic_remove_rx_ring(fbn, &qt->sub0);
+               fbnic_remove_rx_ring(fbn, &qt->sub1);
+               fbnic_remove_rx_ring(fbn, &qt->cmpl);
+               rxt_count++;
+       }
+       while (txt_count < nv->txt_count) {
+               qt--;
+
+               fbnic_remove_tx_ring(fbn, &qt->sub0);
+               fbnic_remove_tx_ring(fbn, &qt->cmpl);
+
+               txt_count++;
+       }
+       fbnic_napi_free_irq(fbd, nv);
 pp_destroy:
        page_pool_destroy(nv->page_pool);
 napi_del:
@@ -1709,8 +1776,10 @@ static void fbnic_free_nv_resources(struct fbnic_net *fbn,
        for (i = 0; i < nv->txt_count; i++)
                fbnic_free_qt_resources(fbn, &nv->qt[i]);
 
-       for (j = 0; j < nv->rxt_count; j++, i++)
+       for (j = 0; j < nv->rxt_count; j++, i++) {
                fbnic_free_qt_resources(fbn, &nv->qt[i]);
+               xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
+       }
 }
 
 static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
@@ -1722,19 +1791,32 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn,
        for (i = 0; i < nv->txt_count; i++) {
                err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]);
                if (err)
-                       goto free_resources;
+                       goto free_qt_resources;
        }
 
        /* Allocate Rx Resources */
        for (j = 0; j < nv->rxt_count; j++, i++) {
+               /* Register XDP memory model for completion queue */
+               err = xdp_reg_mem_model(&nv->qt[i].xdp_rxq.mem,
+                                       MEM_TYPE_PAGE_POOL,
+                                       nv->page_pool);
+               if (err)
+                       goto xdp_unreg_mem_model;
+
                err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]);
                if (err)
-                       goto free_resources;
+                       goto xdp_unreg_cur_model;
        }
 
        return 0;
 
-free_resources:
+xdp_unreg_mem_model:
+       while (j-- && i--) {
+               fbnic_free_qt_resources(fbn, &nv->qt[i]);
+xdp_unreg_cur_model:
+               xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq);
+       }
+free_qt_resources:
        while (i--)
                fbnic_free_qt_resources(fbn, &nv->qt[i]);
        return err;
@@ -2026,7 +2108,7 @@ void fbnic_flush(struct fbnic_net *fbn)
                        memset(qt->cmpl.desc, 0, qt->cmpl.size);
 
                        fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0);
-                       qt->cmpl.pkt->buff.data_hard_start = NULL;
+                       memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff));
                }
        }
 }
index d236152bbaaafe73461898430153ce05db595008..5536f72a1c85e6bdbd01704db8f1ca410bc8e9b0 100644 (file)
@@ -128,6 +128,7 @@ struct fbnic_ring {
 
 struct fbnic_q_triad {
        struct fbnic_ring sub0, sub1, cmpl;
+       struct xdp_rxq_info xdp_rxq;
 };
 
 struct fbnic_napi_vector {