AC_DEFINE([HAVE_PACKET_EBPF],[1],[Recent ebpf fanout support is available]),
[],
[[#include <linux/if_packet.h>]])
+ AC_CHECK_LIB(bpf, bpf_set_link_xdp_fd,have_xdp="yes",have_xdp="no")
+ if test "$have_xdp" = "yes"; then
+ AC_DEFINE([HAVE_PACKET_XDP],[1],[XDP support is available])
+ fi
fi;
# Check for DAG support.
SURICATA_BUILD_CONF="Suricata Configuration:
AF_PACKET support: ${enable_af_packet}
eBPF support: ${enable_ebpf}
+ XDP support: ${have_xdp}
PF_RING support: ${enable_pfring}
NFQueue support: ${enable_nfqueue}
NFLOG support: ${enable_nflog}
if BUILD_EBPF
-all: lb.bpf filter.bpf bypass_filter.bpf
+all: lb.bpf filter.bpf bypass_filter.bpf xdp_filter.bpf
%.bpf: %.c
${CC} -Wall -O2 -D__KERNEL__ -D__ASM_SYSREG_H -emit-llvm -c $< -o - | ${LLC} -march=bpf -filetype=obj -o $@
return -1;
}
-
char __license[] SEC("license") = "GPL";
uint32_t __version SEC("version") = LINUX_VERSION_CODE;
--- /dev/null
+//#include <bcc/proto.h>
+#define KBUILD_MODNAME "foo"
+#include <stdint.h>
+#include <string.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define LINUX_VERSION_CODE 263682
+
+struct vlan_hdr {
+ __u16 h_vlan_TCI;
+ __u16 h_vlan_encapsulated_proto;
+} __attribute__((__aligned__(8))) ;
+
+struct flowv4_keys {
+ __u32 src;
+ __u32 dst;
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u32 ip_proto;
+} __attribute__((__aligned__(8)));
+
+struct flowv6_keys {
+ __u32 src[4];
+ __u32 dst[4];
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u32 ip_proto;
+} __attribute__((__aligned__(8)));
+
+struct pair {
+ uint64_t time;
+ uint64_t packets;
+ uint64_t bytes;
+} __attribute__((__aligned__(8)));
+
+struct bpf_map_def SEC("maps") flow_table_v4 = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(struct flowv4_keys),
+ .value_size = sizeof(struct pair),
+ .max_entries = 32768,
+};
+
+struct bpf_map_def SEC("maps") flow_table_v6 = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(struct flowv6_keys),
+ .value_size = sizeof(struct pair),
+ .max_entries = 32768,
+};
+
+static __always_inline int get_sport(void *trans_data, void *data_end,
+ uint8_t protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if ((void *)(th + 1) > data_end)
+ return -1;
+ return th->source;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if ((void *)(uh + 1) > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ uint8_t protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if ((void *)(th + 1) > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if ((void *)(uh + 1) > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static int __always_inline filter_ipv4(void *data, __u64 nh_off, void *data_end)
+{
+ struct iphdr *iph = data + nh_off;
+ int dport;
+ int sport;
+ struct flowv4_keys tuple;
+ struct pair *value;
+
+ if ((void *)(iph + 1) > data_end)
+ return XDP_PASS;
+
+ tuple.ip_proto = (uint32_t) iph->protocol;
+ tuple.src = iph->saddr;
+ tuple.dst = iph->daddr;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_PASS;
+
+ sport = get_sport(iph + 1, data_end, iph->protocol);
+ if (sport == -1)
+ return XDP_PASS;
+
+ tuple.port16[0] = (uint16_t)sport;
+ tuple.port16[1] = (uint16_t)dport;
+ value = bpf_map_lookup_elem(&flow_table_v4, &tuple);
+#if 0
+ {
+ char fmt[] = "Current flow src: %u:%d\n";
+ char fmt1[] = "Current flow dst: %u:%d\n";
+ bpf_trace_printk(fmt, sizeof(fmt), tuple.src, tuple.port16[0]);
+ bpf_trace_printk(fmt1, sizeof(fmt1), tuple.dst, tuple.port16[1]);
+ }
+#endif
+ if (value) {
+#if 0
+ char fmt[] = "Found flow v4: %u %d -> %d\n";
+ bpf_trace_printk(fmt, sizeof(fmt), tuple.src, sport, dport);
+#endif
+ value->packets++;
+ value->bytes += data_end - data;
+ value->time = bpf_ktime_get_ns();
+ return XDP_DROP;
+ }
+ return XDP_PASS;
+}
+
+static int __always_inline filter_ipv6(void *data, __u64 nh_off, void *data_end)
+{
+ struct ipv6hdr *ip6h = data + nh_off;
+ int dport;
+ int sport;
+ struct flowv6_keys tuple;
+ struct pair *value;
+
+ if ((void *)(ip6h + 1) > data_end)
+ return 0;
+ if (!((ip6h->nexthdr == IPPROTO_UDP) || (ip6h->nexthdr == IPPROTO_TCP)))
+ return XDP_PASS;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_PASS;
+
+ sport = get_sport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (sport == -1)
+ return XDP_PASS;
+
+ tuple.ip_proto = ip6h->nexthdr;
+ __builtin_memcpy(tuple.src, ip6h->saddr.s6_addr32, sizeof(tuple.src));
+ __builtin_memcpy(tuple.dst, ip6h->daddr.s6_addr32, sizeof(tuple.dst));
+ tuple.port16[0] = sport;
+ tuple.port16[1] = dport;
+
+ value = bpf_map_lookup_elem(&flow_table_v6, &tuple);
+ if (value) {
+#if 0
+ char fmt6[] = "Found IPv6 flow: %d -> %d\n";
+ bpf_trace_printk(fmt6, sizeof(fmt6), sport, dport);
+#endif
+ value->packets++;
+ value->bytes += data_end - data;
+ value->time = bpf_ktime_get_ns();
+ return XDP_DROP;
+ }
+ return XDP_PASS;
+}
+
+int SEC("xdp") xdp_hashfilter(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ int rc = XDP_PASS;
+ uint16_t h_proto;
+ uint64_t nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return rc;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ return rc;
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+ if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ return rc;
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ if (h_proto == __constant_htons(ETH_P_IP))
+ return filter_ipv4(data, nh_off, data_end);
+ else if (h_proto == __constant_htons(ETH_P_IPV6))
+ return filter_ipv6(data, nh_off, data_end);
+ else
+ rc = XDP_PASS;
+
+ return rc;
+}
+
+char __license[] SEC("license") = "GPL";
+
+uint32_t __version SEC("version") = LINUX_VERSION_CODE;
/* One shot loading of the eBPF file */
if (aconf->ebpf_lb_file && cluster_type == PACKET_FANOUT_EBPF) {
int ret = EBPFLoadFile(aconf->ebpf_lb_file, "loadbalancer",
- &aconf->ebpf_lb_fd);
+ &aconf->ebpf_lb_fd, EBPF_SOCKET_FILTER);
if (ret != 0) {
SCLogWarning(SC_ERR_INVALID_VALUE, "Error when loading eBPF lb file");
}
if (aconf->ebpf_filter_file) {
#ifdef HAVE_PACKET_EBPF
int ret = EBPFLoadFile(aconf->ebpf_filter_file, "filter",
- &aconf->ebpf_filter_fd);
+ &aconf->ebpf_filter_fd, EBPF_SOCKET_FILTER);
if (ret != 0) {
SCLogWarning(SC_ERR_INVALID_VALUE,
"Error when loading eBPF filter file");
#endif
}
+ if (ConfGetChildValueWithDefault(if_root, if_default, "xdp-filter-file", &ebpf_file) != 1) {
+ aconf->xdp_filter_file = NULL;
+ } else {
+ SCLogInfo("af-packet will use '%s' as XDP filter file",
+ ebpf_file);
+ aconf->xdp_filter_file = ebpf_file;
+ ConfGetChildValueBoolWithDefault(if_root, if_default, "bypass", &conf_val);
+ if (conf_val) {
+ SCLogConfig("Using bypass kernel functionality for AF_PACKET (iface %s)",
+ aconf->iface);
+ aconf->flags |= AFP_XDPBYPASS;
+ RunModeEnablesBypassManager();
+ }
+#ifdef HAVE_PACKET_XDP
+ const char *xdp_mode;
+ if (ConfGetChildValueWithDefault(if_root, if_default, "xdp-mode", &xdp_mode) != 1) {
+ aconf->xdp_mode = XDP_FLAGS_SKB_MODE;
+ } else {
+ if (!strcmp(xdp_mode, "soft")) {
+ aconf->xdp_mode = XDP_FLAGS_SKB_MODE;
+ } else if (!strcmp(xdp_mode, "driver")) {
+ aconf->xdp_mode = XDP_FLAGS_DRV_MODE;
+ } else if (!strcmp(xdp_mode, "hw")) {
+ aconf->xdp_mode = XDP_FLAGS_HW_MODE;
+ } else {
+ SCLogWarning(SC_ERR_INVALID_VALUE,
+ "Invalid xdp-mode value: '%s'", xdp_mode);
+ }
+ }
+#endif
+ }
+
+ /* One shot loading of the eBPF file */
+ if (aconf->xdp_filter_file) {
+#ifdef HAVE_PACKET_XDP
+ int ret = EBPFLoadFile(aconf->xdp_filter_file, "xdp",
+ &aconf->xdp_filter_fd, EBPF_XDP_CODE);
+ if (ret != 0) {
+ SCLogWarning(SC_ERR_INVALID_VALUE,
+ "Error when loading XDP filter file");
+ } else {
+ ret = EBPFSetupXDP(aconf->iface, aconf->xdp_filter_fd, aconf->xdp_mode);
+ if (ret != 0) {
+ SCLogWarning(SC_ERR_INVALID_VALUE,
+ "Error when setting up XDP");
+ /* FIXME error handling */
+ }
+ }
+#else
+ SCLogError(SC_ERR_UNIMPLEMENTED, "XDP support is not built-in");
+#endif
+ }
+
if ((ConfGetChildValueIntWithDefault(if_root, if_default, "buffer-size", &value)) == 1) {
aconf->buffer_size = value;
} else {
};
static int AFPBypassCallback(Packet *p);
+static int AFPXDPBypassCallback(Packet *p);
#define MAX_MAPS 32
/**
unsigned int ring_buflen;
uint8_t *ring_buf;
+ uint8_t xdp_mode;
+
int map_fd[MAX_MAPS];
} AFPThreadVars;
if (ptv->flags & AFP_BYPASS) {
p->BypassPacketsFlow = AFPBypassCallback;
}
+ if (ptv->flags & AFP_XDPBYPASS) {
+ p->BypassPacketsFlow = AFPXDPBypassCallback;
+ }
/* get timestamp of packet via ioctl */
if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
if (ptv->flags & AFP_BYPASS) {
p->BypassPacketsFlow = AFPBypassCallback;
}
+ if (ptv->flags & AFP_XDPBYPASS) {
+ p->BypassPacketsFlow = AFPXDPBypassCallback;
+ }
/* Suricata will treat packet so telling it is busy, this
* status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
if (ptv->flags & AFP_BYPASS) {
p->BypassPacketsFlow = AFPBypassCallback;
}
+ if (ptv->flags & AFP_XDPBYPASS) {
+ p->BypassPacketsFlow = AFPXDPBypassCallback;
+ }
ptv->pkts++;
p->livedev = ptv->livedev;
key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
key.port16[0] = GET_TCP_SRC_PORT(p);
key.port16[1] = GET_TCP_DST_PORT(p);
+
key.ip_proto = IPV4_GET_IPPROTO(p);
if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
return 0;
return 0;
}
+static int AFPXDPBypassCallback(Packet *p)
+{
+#ifdef HAVE_PACKET_XDP
+ SCLogDebug("Calling af_packet callback function");
+ /* Only bypass TCP and UDP */
+ if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
+ return 0;
+ }
+
+ /* Bypassing tunneled packets is currently not supported
+ * because we can't discard the inner packet only due to
+ * primitive parsing in eBPF */
+ if (IS_TUNNEL_PKT(p)) {
+ return 0;
+ }
+ struct timespec curtime;
+ uint64_t inittime = 0;
+ if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
+ inittime = curtime.tv_sec * 1000000000;
+ }
+ if (PKT_IS_IPV4(p)) {
+ /* FIXME cache this and handle error at cache time*/
+ int mapd = EBPFGetMapFDByName("flow_table_v4");
+ if (mapd == -1) {
+ SCLogNotice("Can't find eBPF map fd for '%s'", "flow_table_v4");
+ return 0;
+ }
+ /* FIXME error handling */
+ struct flowv4_keys key = {};
+ key.src = GET_IPV4_SRC_ADDR_U32(p);
+ key.dst = GET_IPV4_DST_ADDR_U32(p);
+ /* FIXME htons or not depending of XDP and af_packet eBPF */
+ key.port16[0] = htons(GET_TCP_SRC_PORT(p));
+ key.port16[1] = htons(GET_TCP_DST_PORT(p));
+ key.ip_proto = IPV4_GET_IPPROTO(p);
+ if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+ return 0;
+ }
+ key.src = GET_IPV4_DST_ADDR_U32(p);
+ key.dst = GET_IPV4_SRC_ADDR_U32(p);
+ key.port16[0] = htons(GET_TCP_DST_PORT(p));
+ key.port16[1] = htons(GET_TCP_SRC_PORT(p));
+ if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+ return 0;
+ }
+ return 1;
+ }
+ /* For IPv6 case we don't handle extended header in eBPF */
+ if (PKT_IS_IPV6(p) &&
+ ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
+ /* FIXME cache this and handle error at cache time*/
+ int mapd = EBPFGetMapFDByName("flow_table_v6");
+ int i = 0;
+ if (mapd == -1) {
+ SCLogNotice("Can't find eBPF map fd for '%s'", "flow_table_v6");
+ return 0;
+ }
+ SCLogDebug("add an IPv6");
+ /* FIXME error handling */
+ /* FIXME filter out next hdr IPV6 packets */
+ struct flowv6_keys key = {};
+ for (i = 0; i < 4; i++) {
+ key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
+ key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
+ }
+ key.port16[0] = htons(GET_TCP_SRC_PORT(p));
+ key.port16[1] = htons(GET_TCP_DST_PORT(p));
+ key.ip_proto = IPV6_GET_NH(p);
+ if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+ return 0;
+ }
+ for (i = 0; i < 4; i++) {
+ key.src[i] = GET_IPV6_DST_ADDR(p)[i];
+ key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
+ }
+ key.port16[0] = htons(GET_TCP_DST_PORT(p));
+ key.port16[1] = htons(GET_TCP_SRC_PORT(p));
+ if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+ return 0;
+ }
+ return 1;
+ }
+#endif
+ return 0;
+}
+
/**
* \brief Init function for ReceiveAFP.
*
}
ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
+ ptv->xdp_mode = afpconfig->xdp_mode;
#ifdef PACKET_STATISTICS
ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
AFPSwitchState(ptv, AFP_STATE_DOWN);
+#ifdef HAVE_PACKET_XDP
+ EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
+#endif
if (ptv->data != NULL) {
SCFree(ptv->data);
ptv->data = NULL;
#define AFP_VLAN_DISABLED (1<<5)
#define AFP_MMAP_LOCKED (1<<6)
#define AFP_BYPASS (1<<7)
+#define AFP_XDPBYPASS (1<<8)
#define AFP_COPY_MODE_NONE 0
#define AFP_COPY_MODE_TAP 1
int ebpf_lb_fd;
const char *ebpf_filter_file;
int ebpf_filter_fd;
+ const char *xdp_filter_file;
+ int xdp_filter_fd;
+ uint8_t xdp_mode;
const char *out_iface;
SC_ATOMIC_DECLARE(unsigned int, ref);
void (*DerefFunc)(void *);
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
+#include <net/if.h>
#include "config.h"
#define BPF_MAP_MAX_COUNT 16
* \param val a pointer to an integer that will be the file desc
* \return -1 in case of error and 0 in case of success
*/
-int EBPFLoadFile(const char *path, const char * section, int *val)
+int EBPFLoadFile(const char *path, const char * section, int *val, uint8_t flags)
{
int err, pfd;
bool found = false;
bpf_object__for_each_program(bpfprog, bpfobj) {
const char *title = bpf_program__title(bpfprog, 0);
if (!strcmp(title, section)) {
- bpf_program__set_socket_filter(bpfprog);
+ if (flags & EBPF_SOCKET_FILTER) {
+ bpf_program__set_socket_filter(bpfprog);
+ } else {
+ bpf_program__set_xdp(bpfprog);
+ }
found = true;
break;
}
return 0;
}
+
+int EBPFSetupXDP(const char *iface, int fd, uint8_t flags)
+{
+#ifdef HAVE_PACKET_XDP
+ unsigned int ifindex = if_nametoindex(iface);
+ if (ifindex == 0) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Unknown interface '%s'", iface);
+ return -1;
+ }
+ int err = bpf_set_link_xdp_fd(ifindex, fd, flags);
+ if (err != 0) {
+ char buf[129];
+ libbpf_strerror(err, buf, sizeof(buf));
+ SCLogError(SC_ERR_INVALID_VALUE, "Unable to set XDP on '%s': %s (%d)",
+ iface, buf, err);
+ return -1;
+ }
+#endif
+ return 0;
+}
+
+
int EBPFForEachFlowV4Table(const char *name,
int (*FlowCallback)(int fd, struct flowv4_keys *key, struct pair *value, void *data),
struct flows_stats *flowstats,
#ifndef __UTIL_EBPF_H__
#define __UTIL_EBPF_H__
+#ifdef HAVE_PACKET_EBPF
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
+#define XDP_FLAGS_SKB_MODE (1U << 1)
+#define XDP_FLAGS_DRV_MODE (1U << 2)
+#define XDP_FLAGS_HW_MODE (1U << 3)
+
+
+
struct flowv4_keys {
__be32 src;
__be32 dst;
__be16 port16[2];
};
__u32 ip_proto;
-};
+} __attribute__((__aligned__(8)));
struct flowv6_keys {
__be32 src[4];
__be16 port16[2];
};
__u32 ip_proto;
-};
+} __attribute__((__aligned__(8)));
struct pair {
uint64_t time;
uint64_t packets;
uint64_t bytes;
-};
+} __attribute__((__aligned__(8)));
struct flows_stats {
uint64_t count;
uint64_t bytes;
};
+#define EBPF_SOCKET_FILTER (1<<0)
+#define EBPF_XDP_CODE (1<<1)
+
int EBPFGetMapFDByName(const char *name);
-int EBPFLoadFile(const char *path, const char * section, int *val);
+int EBPFLoadFile(const char *path, const char * section, int *val, uint8_t flags);
+int EBPFSetupXDP(const char *iface, int fd, uint8_t flags);
int EBPFForEachFlowV4Table(const char *name,
int (*FlowCallback)(int fd, struct flowv4_keys *key, struct pair *value, void *data),
void EBPFDeleteKey(int fd, void *key);
#endif
+
+#endif
# eBPF file containing a 'filter' function that will be inserted into the
# kernel and used as packet filter function
#ebpf-filter-file: @e_sysconfdir@/ebpf/filter.bpf
+ # eBPF file containing a 'xdp' function that will be inserted into the
+ # kernel and used as XDP packet filter function
+ #ebpf-filter-file: @e_sysconfdir@filter.bpf
+ # Xdp mode, "soft" for skb based version, "driver" for network card based
+ # and "hw" for card supporting eBPF.
+ #xdp-mode: driver
+ #xdp-filter-file: @e_sysconfdir@xdp_filter.bpf
# if the ebpf filter implements a bypass function, you can set 'bypass' to
# yes and benefit from these feature
#bypass: yes