From: Eric Leblond Date: Sun, 26 Nov 2017 19:27:13 +0000 (+0100) Subject: af-packet: add support for eBPF cluster and filter X-Git-Tag: suricata-4.1.0-beta1~224 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=91e1256b0134ebe89b89e18bf785d20679c25225;p=thirdparty%2Fsuricata.git af-packet: add support for eBPF cluster and filter This patch introduces the ebpf cluster mode. This mode is using an extended BPF function that is loaded into the kernel and provide the load balancing. An example of cluster function is provided in the ebpf subdirectory and provide ippair load balancing function. This is a function which uses the same method as the one used in autofp ippair to provide a symetrical load balancing based on IP addresses. A simple filter example allowing to drop IPv6 is added to the source. This patch also prepares the infrastructure to be able to load and use map inside eBPF files. This will be used later for flow bypass. --- diff --git a/Makefile.am b/Makefile.am index 4823047296..078f0e2ed1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,7 +5,7 @@ ACLOCAL_AMFLAGS = -I m4 EXTRA_DIST = ChangeLog COPYING LICENSE suricata.yaml.in \ classification.config threshold.config \ reference.config -SUBDIRS = $(HTP_DIR) rust src qa rules doc contrib scripts etc python +SUBDIRS = $(HTP_DIR) rust src qa rules doc contrib scripts etc python ebpf CLEANFILES = stamp-h[0-9]* diff --git a/configure.ac b/configure.ac index bdfe927e40..60690c092c 100644 --- a/configure.ac +++ b/configure.ac @@ -73,6 +73,7 @@ AC_PROG_INSTALL AC_PROG_LN_S AC_PROG_MAKE_SET + AC_PROG_GREP AC_PATH_PROG(HAVE_PKG_CONFIG, pkg-config, "no") if test "$HAVE_PKG_CONFIG" = "no"; then @@ -393,6 +394,25 @@ ]) AM_CONDITIONAL([BUILD_UNITTESTS], [test "x$enable_unittests" = "xyes"]) + # enable the building of ebpf files + AC_ARG_ENABLE(ebpf-build, + AS_HELP_STRING([--enable-ebpf-build], [Enable compilation of ebpf files]),,[enable_ebpf_build=no]) + AM_CONDITIONAL([BUILD_EBPF], [test "x$enable_ebpf_build" = "xyes"]) + + if test "x$enable_ebpf_build" = "xyes"; then + if echo $CC | grep clang; then + AC_MSG_CHECKING([llc binary for clang version]) + llc_version_line=$($CC --version|$GREP version) + llc_version=$(echo $llc_version_line| cut -d '(' -f 1 | $GREP -E -o '@<:@0-9@:>@\.@<:@0-9@:>@') + AC_MSG_RESULT($llc_version) + LLC="llc-$llc_version" + AC_SUBST(LLC) + else + echo "clang needed to build ebpf files" + exit 1 + fi + fi + # enable workaround for old barnyard2 for unified alert output AC_ARG_ENABLE(old-barnyard2, AS_HELP_STRING([--enable-old-barnyard2], [Use workaround for old barnyard2 in unified2 output]),,[enable_old_barnyard2=no]) @@ -1319,7 +1339,7 @@ AC_CHECK_DECL([SOF_TIMESTAMPING_RAW_HARDWARE], AC_DEFINE([HAVE_HW_TIMESTAMPING],[1],[Hardware timestamping support is available]), [], - [[#include ]]) + [[#include ]]), ]) # Netmap support @@ -1469,6 +1489,37 @@ ;; esac + + AC_ARG_ENABLE(ebpf, + AS_HELP_STRING([--enable-ebpf],[Enable eBPF support]), + [ enable_ebpf="yes"], + [ enable_ebpf="no"]) + + if test "$enable_ebpf" = "yes"; then + AC_CHECK_LIB(elf,elf_begin,,LIBELF="no") + if test "$LIBELF" = "no"; then + echo + echo " libelf library and development headers not found but" + echo " but needed to use eBPF code" + echo + exit 1 + fi; + + AC_CHECK_LIB(bpf,bpf_object__open,,LIBBPF="no") + if test "$LIBBPF" = "no"; then + echo + echo " libbpf library and development headers not found but" + echo " but needed to use eBPF code. It can be found in the" + echo " Linux kernel tree under tools/lib/bpf" + echo + exit 1 + fi; + AC_CHECK_DECL([PACKET_FANOUT_EBPF], + AC_DEFINE([HAVE_PACKET_EBPF],[1],[Recent ebpf fanout support is available]), + [], + [[#include ]]) + fi; + # Check for DAG support. AC_ARG_ENABLE(dag, AS_HELP_STRING([--enable-dag],[Enable DAG capture]), @@ -2197,10 +2248,11 @@ AC_SUBST(CONFIGURE_SYSCONDIR) AC_SUBST(CONFIGURE_LOCALSTATEDIR) AC_SUBST(PACKAGE_VERSION) -AC_OUTPUT(Makefile src/Makefile rust/Makefile rust/Cargo.toml rust/.cargo/config qa/Makefile qa/coccinelle/Makefile rules/Makefile doc/Makefile doc/userguide/Makefile contrib/Makefile contrib/file_processor/Makefile contrib/file_processor/Action/Makefile contrib/file_processor/Processor/Makefile contrib/tile_pcie_logd/Makefile suricata.yaml scripts/Makefile scripts/suricatasc/Makefile scripts/suricatasc/suricatasc etc/Makefile etc/suricata.logrotate etc/suricata.service python/Makefile) +AC_OUTPUT(Makefile src/Makefile rust/Makefile rust/Cargo.toml rust/.cargo/config qa/Makefile qa/coccinelle/Makefile rules/Makefile doc/Makefile doc/userguide/Makefile contrib/Makefile contrib/file_processor/Makefile contrib/file_processor/Action/Makefile contrib/file_processor/Processor/Makefile contrib/tile_pcie_logd/Makefile suricata.yaml scripts/Makefile scripts/suricatasc/Makefile scripts/suricatasc/suricatasc etc/Makefile etc/suricata.logrotate etc/suricata.service python/Makefile ebpf/Makefile) SURICATA_BUILD_CONF="Suricata Configuration: AF_PACKET support: ${enable_af_packet} + eBPF support: ${enable_ebpf} PF_RING support: ${enable_pfring} NFQueue support: ${enable_nfqueue} NFLOG support: ${enable_nflog} diff --git a/ebpf/Makefile.am b/ebpf/Makefile.am new file mode 100644 index 0000000000..cd35cd8360 --- /dev/null +++ b/ebpf/Makefile.am @@ -0,0 +1,10 @@ +if BUILD_EBPF + +all: lb.bpf filter.bpf + +%.bpf: %.c + ${CC} -Wall -O2 -D__KERNEL__ -D__ASM_SYSREG_H -emit-llvm -c $< -o - | ${LLC} -march=bpf -filetype=obj -o $@ + +CLEANFILES = *.bpf + +endif diff --git a/ebpf/bpf_helpers.h b/ebpf/bpf_helpers.h new file mode 100644 index 0000000000..7904a2a493 --- /dev/null +++ b/ebpf/bpf_helpers.h @@ -0,0 +1,137 @@ +#ifndef __BPF_HELPERS_H +#define __BPF_HELPERS_H + +/* helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +/* helper functions called from eBPF programs written in C */ +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static int (*bpf_map_update_elem)(void *map, void *key, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_update_elem; +static int (*bpf_map_delete_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_delete_elem; +static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = + (void *) BPF_FUNC_probe_read; +static unsigned long long (*bpf_ktime_get_ns)(void) = + (void *) BPF_FUNC_ktime_get_ns; +static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = + (void *) BPF_FUNC_trace_printk; +static void (*bpf_tail_call)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_tail_call; +static unsigned long long (*bpf_get_smp_processor_id)(void) = + (void *) BPF_FUNC_get_smp_processor_id; +static unsigned long long (*bpf_get_current_pid_tgid)(void) = + (void *) BPF_FUNC_get_current_pid_tgid; +static unsigned long long (*bpf_get_current_uid_gid)(void) = + (void *) BPF_FUNC_get_current_uid_gid; +static int (*bpf_get_current_comm)(void *buf, int buf_size) = + (void *) BPF_FUNC_get_current_comm; +static int (*bpf_perf_event_read)(void *map, int index) = + (void *) BPF_FUNC_perf_event_read; +static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = + (void *) BPF_FUNC_clone_redirect; +static int (*bpf_redirect)(int ifindex, int flags) = + (void *) BPF_FUNC_redirect; +static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) = + (void *) BPF_FUNC_perf_event_output; +static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = + (void *) BPF_FUNC_get_stackid; + +/* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +struct sk_buff; +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +/* a helper structure used by eBPF C program + * to describe map attributes to elf_bpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + +static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = + (void *) BPF_FUNC_skb_store_bytes; +static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = + (void *) BPF_FUNC_l3_csum_replace; +static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = + (void *) BPF_FUNC_l4_csum_replace; + +#if defined(__x86_64__) + +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) + +#elif defined(__s390x__) + +#define PT_REGS_PARM1(x) ((x)->gprs[2]) +#define PT_REGS_PARM2(x) ((x)->gprs[3]) +#define PT_REGS_PARM3(x) ((x)->gprs[4]) +#define PT_REGS_PARM4(x) ((x)->gprs[5]) +#define PT_REGS_PARM5(x) ((x)->gprs[6]) +#define PT_REGS_RET(x) ((x)->gprs[14]) +#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->gprs[2]) +#define PT_REGS_SP(x) ((x)->gprs[15]) +#define PT_REGS_IP(x) ((x)->ip) + +#elif defined(__aarch64__) + +#define PT_REGS_PARM1(x) ((x)->regs[0]) +#define PT_REGS_PARM2(x) ((x)->regs[1]) +#define PT_REGS_PARM3(x) ((x)->regs[2]) +#define PT_REGS_PARM4(x) ((x)->regs[3]) +#define PT_REGS_PARM5(x) ((x)->regs[4]) +#define PT_REGS_RET(x) ((x)->regs[30]) +#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[0]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->pc) + +#elif defined(__powerpc__) + +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) + +#endif + +#ifdef __powerpc__ +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#else +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ + bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ + bpf_probe_read(&(ip), sizeof(ip), \ + (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +#endif + +#endif diff --git a/ebpf/filter.c b/ebpf/filter.c new file mode 100644 index 0000000000..9293d9cbbb --- /dev/null +++ b/ebpf/filter.c @@ -0,0 +1,43 @@ +//#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bpf_helpers.h" + +#define LINUX_VERSION_CODE 263682 + +int SEC("filter") hashfilter(struct __sk_buff *skb) { + __u32 nhoff = BPF_LL_OFF + ETH_HLEN; + + skb->cb[0] = nhoff; + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return -1; + case __constant_htons(ETH_P_IPV6): + return 0; + default: +#if 0 + { + char fmt[] = "Got proto %u\n"; + bpf_trace_printk(fmt, sizeof(fmt), h_proto); + break; + } +#else + break; +#endif + } + return -1; +} + + +char __license[] SEC("license") = "GPL"; + +uint32_t __version SEC("version") = LINUX_VERSION_CODE; diff --git a/ebpf/lb.c b/ebpf/lb.c new file mode 100644 index 0000000000..d7a5685de2 --- /dev/null +++ b/ebpf/lb.c @@ -0,0 +1,92 @@ +//#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bpf_helpers.h" + +#define LINUX_VERSION_CODE 263682 + +#ifndef __section +# define __section(x) __attribute__((section(x), used)) +#endif + +static __always_inline int ipv4_hash(struct __sk_buff *skb) +{ + uint32_t nhoff; + uint32_t src, dst; + + nhoff = skb->cb[0]; + src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); + dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); + +#if 0 + char fmt[] = "Got addr: %u -> %u\n"; + bpf_trace_printk(fmt, sizeof(fmt), src, dst); + char fmt2[] = "Got hash %u\n"; + bpf_trace_printk(fmt2, sizeof(fmt2), src + dst); +#endif + return src + dst; +} + +static __always_inline int ipv6_hash(struct __sk_buff *skb) +{ + uint32_t nhoff; + uint32_t src, dst, hash; + + nhoff = skb->cb[0]; + hash = 0; + src = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4 * 0 ); + dst = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4 * 0 ); + hash += src + dst; + + src = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4 * 1 ); + dst = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4 * 1 ); + hash += src + dst; + + src = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4 * 2 ); + dst = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4 * 2 ); + hash += src + dst; + + src = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4 * 3 ); + dst = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4 * 3 ); + hash += src + dst; + + return hash; +} + +int __section("loadbalancer") lb(struct __sk_buff *skb) { + __u32 nhoff = BPF_LL_OFF + ETH_HLEN; + + skb->cb[0] = nhoff; + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return ipv4_hash(skb); + case __constant_htons(ETH_P_IPV6): + return ipv6_hash(skb); + default: +#if 0 + { + char fmt[] = "Got proto %u\n"; + bpf_trace_printk(fmt, sizeof(fmt), h_proto); + break; + } +#else + break; +#endif + } + /* hash on proto by default */ + return skb->protocol; +} + +char __license[] __section("license") = "GPL"; + +uint32_t __version __section("version") = LINUX_VERSION_CODE; diff --git a/src/Makefile.am b/src/Makefile.am index e24254e513..1a5da89203 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -395,6 +395,7 @@ util-decode-der-get.c util-decode-der-get.h \ util-decode-mime.c util-decode-mime.h \ util-detect.c util-detect.h \ util-device.c util-device.h \ +util-ebpf.c util-ebpf.h \ util-enum.c util-enum.h \ util-error.c util-error.h \ util-file.c util-file.h \ diff --git a/src/runmode-af-packet.c b/src/runmode-af-packet.c index 1498501e70..0357238206 100644 --- a/src/runmode-af-packet.c +++ b/src/runmode-af-packet.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011,2012 Open Information Security Foundation +/* Copyright (C) 2011-2016 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -30,7 +30,6 @@ * */ - #include "suricata-common.h" #include "config.h" #include "tm-threads.h" @@ -53,6 +52,7 @@ #include "util-device.h" #include "util-runmodes.h" #include "util-ioctl.h" +#include "util-ebpf.h" #include "source-af-packet.h" @@ -123,6 +123,7 @@ static void *ParseAFPConfig(const char *iface) const char *bpf_filter = NULL; const char *out_iface = NULL; int cluster_type = PACKET_FANOUT_HASH; + const char *ebpf_file = NULL; if (iface == NULL) { return NULL; @@ -145,6 +146,10 @@ static void *ParseAFPConfig(const char *iface) aconf->DerefFunc = AFPDerefConfig; aconf->flags = AFP_RING_MODE; aconf->bpf_filter = NULL; + aconf->ebpf_lb_file = NULL; + aconf->ebpf_lb_fd = -1; + aconf->ebpf_filter_file = NULL; + aconf->ebpf_filter_fd = -1; aconf->out_iface = NULL; aconf->copy_mode = AFP_COPY_MODE_NONE; aconf->block_timeout = 10; @@ -331,7 +336,13 @@ static void *ParseAFPConfig(const char *iface) aconf->iface); aconf->cluster_type = PACKET_FANOUT_ROLLOVER; cluster_type = PACKET_FANOUT_ROLLOVER; - +#ifdef HAVE_PACKET_EBPF + } else if (strcmp(tmpctype, "cluster_ebpf") == 0) { + SCLogInfo("Using ebpf based cluster mode for AF_PACKET (iface %s)", + aconf->iface); + aconf->cluster_type = PACKET_FANOUT_EBPF; + cluster_type = PACKET_FANOUT_EBPF; +#endif } else { SCLogWarning(SC_ERR_INVALID_CLUSTER_TYPE,"invalid cluster-type %s",tmpctype); } @@ -355,6 +366,53 @@ static void *ParseAFPConfig(const char *iface) } } + if (ConfGetChildValueWithDefault(if_root, if_default, "ebpf-lb-file", &ebpf_file) != 1) { + aconf->ebpf_lb_file = NULL; + } else { +#ifdef HAVE_PACKET_EBPF + SCLogInfo("af-packet will use '%s' as eBPF load balancing file", + ebpf_file); +#endif + aconf->ebpf_lb_file = ebpf_file; + } + + /* One shot loading of the eBPF file */ + if (aconf->ebpf_lb_file && cluster_type == PACKET_FANOUT_EBPF) { +#ifdef HAVE_PACKET_EBPF + int ret = EBPFLoadFile(aconf->ebpf_lb_file, "loadbalancer", + &aconf->ebpf_lb_fd); + if (ret != 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, "Error when loading eBPF lb file"); + } +#else + SCLogError(SC_ERR_UNIMPLEMENTED, "eBPF support is not build-in"); +#endif + } + + if (ConfGetChildValueWithDefault(if_root, if_default, "ebpf-filter-file", &ebpf_file) != 1) { + aconf->ebpf_filter_file = NULL; + } else { +#ifdef HAVE_PACKET_EBPF + SCLogInfo("af-packet will use '%s' as eBPF filter file", + ebpf_file); +#endif + aconf->ebpf_filter_file = ebpf_file; + } + + /* One shot loading of the eBPF file */ + if (aconf->ebpf_filter_file) { +#ifdef HAVE_PACKET_EBPF + int ret = EBPFLoadFile(aconf->ebpf_filter_file, "filter", + &aconf->ebpf_filter_fd); + if (ret != 0) { + SCLogWarning(SC_ERR_INVALID_VALUE, + "Error when loading eBPF filter file"); + } +#else + SCLogError(SC_ERR_UNIMPLEMENTED, "eBPF support is not build-in"); +#endif + } + if ((ConfGetChildValueIntWithDefault(if_root, if_default, "buffer-size", &value)) == 1) { aconf->buffer_size = value; } else { diff --git a/src/source-af-packet.c b/src/source-af-packet.c index 4b6203ee49..0fb526e289 100644 --- a/src/source-af-packet.c +++ b/src/source-af-packet.c @@ -32,6 +32,8 @@ * interface */ +#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 +#define SC_PCAP_DONT_INCLUDE_PCAP_H 1 #include "suricata-common.h" #include "config.h" #include "suricata.h" @@ -62,6 +64,19 @@ #include #endif +struct bpf_program { + unsigned int bf_len; + struct bpf_insn *bf_insns; +}; + +#ifdef HAVE_PCAP_H +#include +#endif + +#ifdef HAVE_PCAP_PCAP_H +#include +#endif + #if HAVE_LINUX_IF_ETHER_H #include #endif @@ -177,6 +192,7 @@ union thdr { void *raw; }; +#define MAX_MAPS 32 /** * \brief Structure to hold thread specific variables. */ @@ -231,6 +247,8 @@ typedef struct AFPThreadVars_ int buffer_size; /* Filter */ const char *bpf_filter; + int ebpf_lb_fd; + int ebpf_filter_fd; int promisc; @@ -255,6 +273,9 @@ typedef struct AFPThreadVars_ /* mmap'ed ring buffer */ unsigned int ring_buflen; uint8_t *ring_buf; + + int map_fd[MAX_MAPS]; + } AFPThreadVars; TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *); @@ -1894,6 +1915,45 @@ int AFPIsFanoutSupported(void) #endif } +#ifdef HAVE_PACKET_EBPF + +static int SockFanoutSeteBPF(AFPThreadVars *ptv) +{ + int pfd = ptv->ebpf_lb_fd; + if (pfd == -1) { + SCLogError(SC_ERR_INVALID_VALUE, + "Fanout file descriptor is invalid"); + return -1; + } + + if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { + SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf"); + return -1; + } + SCLogInfo("Activated eBPF on socket"); + + return 0; +} + +static int SetEbpfFilter(AFPThreadVars *ptv) +{ + int pfd = ptv->ebpf_filter_fd; + if (pfd == -1) { + SCLogError(SC_ERR_INVALID_VALUE, + "Filter file descriptor is invalid"); + return -1; + } + + if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) { + SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno)); + return -1; + } + SCLogInfo("Activated eBPF filter on socket"); + + return 0; +} +#endif + static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose) { int r; @@ -2003,6 +2063,7 @@ static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose) goto socket_err; } + #ifdef HAVE_PACKET_FANOUT /* add binded socket to fanout group */ if (ptv->threads > 1) { @@ -2019,6 +2080,18 @@ static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose) } #endif +#ifdef HAVE_PACKET_EBPF + if (ptv->cluster_type == PACKET_FANOUT_EBPF) { + r = SockFanoutSeteBPF(ptv); + if (r < 0) { + SCLogError(SC_ERR_AFP_CREATE, + "Coudn't set EBPF, error %s", + strerror(errno)); + goto socket_err; + } + } +#endif + if (ptv->flags & AFP_RING_MODE) { ret = AFPSetupRing(ptv, devname); if (ret != 0) @@ -2071,6 +2144,12 @@ TmEcode AFPSetBPFFilter(AFPThreadVars *ptv) struct sock_fprog fcode; int rc; +#ifdef HAVE_PACKET_EBPF + if (ptv->ebpf_filter_fd != -1) { + return SetEbpfFilter(ptv); + } +#endif + if (!ptv->bpf_filter) return TM_ECODE_OK; @@ -2175,6 +2254,8 @@ TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data) if (afpconfig->bpf_filter) { ptv->bpf_filter = afpconfig->bpf_filter; } + ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd; + ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd; #ifdef PACKET_STATISTICS ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets", diff --git a/src/source-af-packet.h b/src/source-af-packet.h index 0fce1b80a3..f8c13c917b 100644 --- a/src/source-af-packet.h +++ b/src/source-af-packet.h @@ -88,6 +88,10 @@ typedef struct AFPIfaceConfig_ int copy_mode; ChecksumValidationMode checksum_mode; const char *bpf_filter; + const char *ebpf_lb_file; + int ebpf_lb_fd; + const char *ebpf_filter_file; + int ebpf_filter_fd; const char *out_iface; SC_ATOMIC_DECLARE(unsigned int, ref); void (*DerefFunc)(void *); diff --git a/src/suricata-common.h b/src/suricata-common.h index 082335e00e..beeee12395 100644 --- a/src/suricata-common.h +++ b/src/suricata-common.h @@ -188,6 +188,7 @@ #include #endif +#ifndef SC_PCAP_DONT_INCLUDE_PCAP_H #ifdef HAVE_PCAP_H #include #endif @@ -195,9 +196,6 @@ #ifdef HAVE_PCAP_PCAP_H #include #endif - -#ifdef HAVE_PCAP_BPF_H -#include #endif #ifdef HAVE_UTIME_H diff --git a/src/util-buffer.h b/src/util-buffer.h index cc7616ccfb..2371921731 100644 --- a/src/util-buffer.h +++ b/src/util-buffer.h @@ -100,13 +100,13 @@ void MemBufferFree(MemBuffer *buffer); * \param buffer Pointer to the src MemBuffer instance to write. * \param fp Pointer to the file file instance to write to. */ -#define MemBufferPrintToFPAsHex(buffer, fp) do { \ +#define MemBufferPrintToFPAsHex(mem_buffer, fp) do { \ uint32_t i; \ \ - for (i = 0; i < (buffer)->offset; i++) { \ - if (((buffer)->offset % 8) == 0) \ + for (i = 0; i < (mem_buffer)->offset; i++) { \ + if (((mem_buffer)->offset % 8) == 0) \ fprintf(fp, "\n"); \ - fprintf(fp, " %02X", (buffer)->buffer[i]); \ + fprintf(fp, " %02X", (mem_buffer)->buffer[i]); \ } \ } while (0) diff --git a/src/util-ebpf.c b/src/util-ebpf.c new file mode 100644 index 0000000000..a4f147545e --- /dev/null +++ b/src/util-ebpf.c @@ -0,0 +1,177 @@ +/* Copyright (C) 2018 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \ingroup afppacket + * + * @{ + */ + +/** + * \file + * + * \author Eric Leblond + * + * eBPF utility + * + */ + +#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 +#define SC_PCAP_DONT_INCLUDE_PCAP_H 1 + +#include "suricata-common.h" + +#ifdef HAVE_PACKET_EBPF + +#include +#include +#include "config.h" + +#include "util-ebpf.h" + +#define BPF_MAP_MAX_COUNT 16 + +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) + +static inline long IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +struct bpf_map_item { + const char * name; + int fd; +}; + +static struct bpf_map_item bpf_map_array[BPF_MAP_MAX_COUNT]; +static int bpf_map_last = 0; + +int EBPFGetMapFDByName(const char *name) +{ + int i; + + if (name == NULL) + return -1; + for (i = 0; i < BPF_MAP_MAX_COUNT; i++) { + if (!strcmp(bpf_map_array[i].name, name)) { + SCLogNotice("Got fd %d for eBPF map '%s'", bpf_map_array[i].fd, name); + return bpf_map_array[i].fd; + } + } + return -1; +} + +/** + * Load a section of an eBPF file + * + * This function loads a section inside an eBPF and return + * via a parameter the file descriptor that will be used to + * inject the eBPF code into the kernel via a syscall. + * + * \param path the path of the eBPF file to load + * \param section the section in the eBPF file to load + * \param val a pointer to an integer that will be the file desc + * \return -1 in case of error and 0 in case of success + */ +int EBPFLoadFile(const char *path, const char * section, int *val) +{ + int err, pfd; + bool found = false; + struct bpf_object *bpfobj = NULL; + struct bpf_program *bpfprog = NULL; + struct bpf_map *map = NULL; + /* FIXME we will need to close BPF at exit of runmode */ + if (! path) { + SCLogError(SC_ERR_INVALID_VALUE, "No file defined to load eBPF from"); + return -1; + } + + bpfobj = bpf_object__open(path); + + if (IS_ERR(bpfobj)) { + SCLogError(SC_ERR_INVALID_VALUE, + "Unable to load eBPF objects in '%s'", + path); + return -1; + } + + bpf_object__for_each_program(bpfprog, bpfobj) { + const char *title = bpf_program__title(bpfprog, 0); + if (!strcmp(title, section)) { + bpf_program__set_socket_filter(bpfprog); + found = true; + break; + } + } + + if (found == false) { + SCLogError(SC_ERR_INVALID_VALUE, + "No section '%s' in '%s' file. Will not be able to use the file", + section, + path); + return -1; + } + + err = bpf_object__load(bpfobj); + if (err < 0) { + if (err == -EPERM) { + SCLogError(SC_ERR_MEM_ALLOC, + "Permission issue when loading eBPF object try to " + "increase memlock limit: %s (%d)", + strerror(err), + err); + } else { + char buf[129]; + libbpf_strerror(err, buf, sizeof(buf)); + SCLogError(SC_ERR_INVALID_VALUE, + "Unable to load eBPF object: %s (%d)", + buf, + err); + } + return -1; + } + + /* store the map in our array */ + bpf_map__for_each(map, bpfobj) { + SCLogNotice("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map)); + bpf_map_array[bpf_map_last].fd = bpf_map__fd(map); + bpf_map_array[bpf_map_last].name = SCStrdup(bpf_map__name(map)); + if (!bpf_map_array[bpf_map_last].name) { + SCLogError(SC_ERR_MEM_ALLOC, "Unable to duplicate map name"); + return -1; + } + bpf_map_last++; + if (bpf_map_last == BPF_MAP_MAX_COUNT) { + SCLogError(SC_ERR_NOT_SUPPORTED, "Too many BPF maps in eBPF files"); + return -1; + } + } + + pfd = bpf_program__fd(bpfprog); + if (pfd == -1) { + SCLogError(SC_ERR_INVALID_VALUE, + "Unable to find %s section", section); + return -1; + } + + *val = pfd; + return 0; +} + +#endif diff --git a/src/util-ebpf.h b/src/util-ebpf.h new file mode 100644 index 0000000000..2158954e9c --- /dev/null +++ b/src/util-ebpf.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2016 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Eric Leblond + */ + +#ifndef __UTIL_EBPF_H__ +#define __UTIL_EBPF_H__ + +int EBPFGetMapFDByName(const char *name); +int EBPFLoadFile(const char *path, const char * section, int *val); + +#endif diff --git a/suricata.yaml.in b/suricata.yaml.in index 14bcffae23..ce5a563163 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -620,12 +620,21 @@ af-packet: # Requires at least Linux 3.14. # * cluster_rollover: kernel rotates between sockets filling each socket before moving # to the next. Requires at least Linux 3.10. + # * cluster_ebpf: kernel rotates between sockets following an extended BPF function loaded + # from a file. User needs to use ebpf-lb-file option to set the path to the file + # to load. Requires at least Linux 4.2. # Recommended modes are cluster_flow on most boxes and cluster_cpu or cluster_qm on system # with capture card using RSS (require cpu affinity tuning and system irq tuning) cluster-type: cluster_flow # In some fragmentation case, the hash can not be computed. If "defrag" is set # to yes, the kernel will do the needed defragmentation before sending the packets. defrag: yes + # eBPF file containing a 'loadbalancer' function that will be inserted into the + # kernel and used as load balancing function + #ebpf-lb-file: @e_sysconfdir@/ebpf/lb.bpf + # eBPF file containing a 'filter' function that will be inserted into the + # kernel and used as packet filter function + #ebpf-filter-file: @e_sysconfdir@/ebpf/filter.bpf # After Linux kernel 3.10 it is possible to activate the rollover option: if a socket is # full then kernel will send the packet on the next socket with room available. This option # can minimize packet drop and increase the treated bandwidth on single intensive flow.