af-packet: kernel bypass implementation

author Eric Leblond <eric@regit.org>

Sun, 26 Nov 2017 19:29:52 +0000 (20:29 +0100)

committer Eric Leblond <eric@regit.org>

Tue, 6 Feb 2018 15:57:55 +0000 (16:57 +0100)
author Eric Leblond <eric@regit.org>
Sun, 26 Nov 2017 19:29:52 +0000 (20:29 +0100)
committer Eric Leblond <eric@regit.org>
Tue, 6 Feb 2018 15:57:55 +0000 (16:57 +0100)
diff --git a/ebpf/Makefile.am b/ebpf/Makefile.am

index cd35cd8360e92ea83fd5e470bf4f56c3d2f9f91d..8cfda9d1155b5553f04399e3f9d03253ee34b633 100644 (file)
--- a/ebpf/Makefile.am
+++ b/ebpf/Makefile.am
@@ -1,6 +1,6 @@
  if BUILD_EBPF
  
-all: lb.bpf filter.bpf
+all: lb.bpf filter.bpf bypass_filter.bpf
  
  %.bpf: %.c
         ${CC} -Wall -O2 -D__KERNEL__ -D__ASM_SYSREG_H -emit-llvm -c $< -o - | ${LLC} -march=bpf -filetype=obj -o $@
diff --git a/ebpf/bypass_filter.c b/ebpf/bypass_filter.c

new file mode 100644 (file)

index 0000000..a728dad
--- /dev/null
+++ b/ebpf/bypass_filter.c
@@ -0,0 +1,193 @@
+//#include <bcc/proto.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/filter.h>
+
+#include "bpf_helpers.h"
+
+#define LINUX_VERSION_CODE 263682
+
+struct flowv4_keys {
+    __be32 src;
+    __be32 dst;
+    union {
+        __be32 ports;
+        __be16 port16[2];
+    };
+    __u32 ip_proto;
+} __attribute__((__aligned__(8)));
+
+struct flowv6_keys {
+    __be32 src[4];
+    __be32 dst[4];
+    union {
+        __be32 ports;
+        __be16 port16[2];
+    };
+    __u32 ip_proto;
+} __attribute__((__aligned__(8)));
+
+struct pair {
+    uint64_t time;
+    uint64_t packets;
+    uint64_t bytes;
+} __attribute__((__aligned__(8)));
+
+struct bpf_map_def SEC("maps") flow_table_v4 = {
+    .type = BPF_MAP_TYPE_HASH,
+    .key_size = sizeof(struct flowv4_keys),
+    .value_size = sizeof(struct pair),
+    .max_entries = 32768,
+};
+
+struct bpf_map_def SEC("maps") flow_table_v6 = {
+    .type = BPF_MAP_TYPE_HASH,
+    .key_size = sizeof(struct flowv6_keys),
+    .value_size = sizeof(struct pair),
+    .max_entries = 32768,
+};
+
+static __always_inline int ipv4_filter(struct __sk_buff *skb)
+{
+    uint32_t nhoff, verlen;
+    struct flowv4_keys tuple;
+    struct pair *value;
+    uint16_t port;
+
+    nhoff = skb->cb[0];
+
+    tuple.ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+    /* only support TCP and UDP for now */
+    switch (tuple.ip_proto) {
+        case IPPROTO_TCP:
+        case IPPROTO_UDP:
+            break;
+        default:
+            return -1;
+    }
+    
+    tuple.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+    tuple.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+
+    verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+    nhoff += (verlen & 0xF) << 2;
+    tuple.ports = load_word(skb, nhoff);
+    port = tuple.port16[1];
+    tuple.port16[1] = tuple.port16[0];
+    tuple.port16[0] = port;
+
+#if 0
+    if ((tuple.port16[0] == 22) || (tuple.port16[1] == 22))
+    {
+        uint16_t sp = tuple.port16[0];
+        //uint16_t dp = tuple.port16[1];
+        char fmt[] = "Parsed SSH flow: %u %d -> %u\n";
+        bpf_trace_printk(fmt, sizeof(fmt), tuple.src, sp, tuple.dst);
+    }
+#endif
+    /* Test if src is in hash */
+    value = bpf_map_lookup_elem(&flow_table_v4, &tuple);
+    if (value) {
+#if 0
+        {
+            uint16_t sp = tuple.port16[0];
+            //uint16_t dp = tuple.port16[1];
+            char bfmt[] = "Found flow: %u %d -> %u\n";
+            bpf_trace_printk(bfmt, sizeof(bfmt), tuple.src, sp, tuple.dst);
+        }
+#endif
+        __sync_fetch_and_add(&value->packets, 1);
+        __sync_fetch_and_add(&value->bytes, skb->len);
+        value->time = bpf_ktime_get_ns();
+        return 0;
+    }
+    return -1;
+}
+
+static __always_inline int ipv6_filter(struct __sk_buff *skb)
+{
+    uint32_t nhoff;
+    uint8_t nhdr;
+    struct flowv6_keys tuple;
+    struct pair *value;
+    uint16_t port;
+
+    nhoff = skb->cb[0];
+
+    tuple.src[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr));
+    tuple.src[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4);
+    tuple.src[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 8);
+    tuple.src[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 12);
+    tuple.dst[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr));
+    tuple.dst[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4);
+    tuple.dst[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 8);
+    tuple.dst[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 12);
+
+    /* get next header */
+    nhdr = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr));
+
+    /* only support direct TCP and UDP for now */
+    switch (nhdr) {
+        case IPPROTO_TCP:
+        case IPPROTO_UDP:
+            break;
+        default:
+            return -1;
+    }
+
+    /* Parse TCP */
+    tuple.ports = load_word(skb, nhoff + 40 /* IPV6_HEADER_LEN */);
+    port = tuple.port16[1];
+    tuple.port16[1] = tuple.port16[0];
+    tuple.port16[0] = port;
+    tuple.ip_proto = nhdr;
+
+    //char fmt[] = "Now Got IPv6 port %u and %u\n";
+    //bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]);
+    /* Test if src is in hash */
+    value = bpf_map_lookup_elem(&flow_table_v6, &tuple);
+    if (value) {
+        //char fmt[] = "Got a match IPv6: %u and %u\n";
+        //bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]);
+        __sync_fetch_and_add(&value->packets, 1);
+        __sync_fetch_and_add(&value->bytes, skb->len);
+        value->time = bpf_ktime_get_ns();
+        return 0;
+    }
+    return -1;
+}
+
+int SEC("filter") hashfilter(struct __sk_buff *skb) {
+    __u32 nhoff = BPF_LL_OFF + ETH_HLEN;
+
+    skb->cb[0] = nhoff;
+    switch (skb->protocol) {
+        case __constant_htons(ETH_P_IP):
+            return ipv4_filter(skb);
+        case __constant_htons(ETH_P_IPV6):
+            return ipv6_filter(skb);
+        default:
+#if 0
+            {
+                char fmt[] = "Got proto %u\n";
+                bpf_trace_printk(fmt, sizeof(fmt), h_proto);
+                break;
+            }
+#else
+            break;
+#endif
+    }
+    return -1;
+}
+
+
+char __license[] SEC("license") = "GPL";
+
+uint32_t __version SEC("version") = LINUX_VERSION_CODE;
diff --git a/src/Makefile.am b/src/Makefile.am

index 1a5da89203b93d71432e6425e6f7a54029f73448..518153fc38cd7b1ebf2fee1c7bbca835a5de7776 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -260,6 +260,7 @@ detect-xbits.c detect-xbits.h \
  detect-cipservice.c detect-cipservice.h \
  flow-bit.c flow-bit.h \
  flow.c flow.h \
+flow-bypass.c flow-bypass.h \
  flow-hash.c flow-hash.h \
  flow-manager.c flow-manager.h \
  flow-queue.c flow-queue.h \
diff --git a/src/flow-bypass.c b/src/flow-bypass.c

new file mode 100644 (file)

index 0000000..e93c5ba
--- /dev/null
+++ b/src/flow-bypass.c
@@ -0,0 +1,178 @@
+/* Copyright (C) 2016 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Eric Leblond <eleblond@stamus-networks.com>
+ */
+
+#include "suricata-common.h"
+#include "tm-threads.h"
+#include "flow.h"
+#include "flow-bypass.h"
+#include "flow-private.h"
+#include "util-ebpf.h"
+
+#define BYPASSED_FLOW_TIMEOUT   60
+#define FLOW_BYPASS_DELAY       10
+
+typedef struct BypassedFlowManagerThreadData_ {
+    uint16_t flow_bypassed_cnt_clo;
+    uint16_t flow_bypassed_pkts;
+    uint16_t flow_bypassed_bytes;
+} BypassedFlowManagerThreadData;
+
+#ifdef HAVE_PACKET_EBPF
+
+static int BypassedFlowV4Timeout(int fd, struct flowv4_keys *key, struct pair *value, void *data)
+{
+    struct timespec *curtime = (struct timespec *)data;
+    SCLogDebug("Got curtime %" PRIu64 " and value %" PRIu64 " (sp:%d, dp:%d)",
+               curtime->tv_sec, value->time / 1000000000,
+               key->port16[0], key->port16[1]
+              );
+
+    if (curtime->tv_sec - value->time / 1000000000 > BYPASSED_FLOW_TIMEOUT) {
+        SCLogDebug("Got no packet for %d -> %d at %" PRIu64,
+                   key->port16[0], key->port16[1], value->time);
+        EBPFDeleteKey(fd, key);
+        return 1;
+    }
+    return 0;
+}
+
+static int BypassedFlowV6Timeout(int fd, struct flowv6_keys *key, struct pair *value, void *data)
+{
+    struct timespec *curtime = (struct timespec *)data;
+    SCLogDebug("Got curtime %" PRIu64 " and value %" PRIu64 " (sp:%d, dp:%d)",
+               curtime->tv_sec, value->time / 1000000000,
+               key->port16[0], key->port16[1]
+              );
+
+    if (curtime->tv_sec - value->time / 1000000000 > BYPASSED_FLOW_TIMEOUT) {
+        SCLogDebug("Got no packet for %d -> %d at %" PRIu64,
+                   key->port16[0], key->port16[1], value->time);
+        EBPFDeleteKey(fd, key);
+        return 1;
+    }
+    return 0;
+}
+
+#endif
+
+static TmEcode BypassedFlowManager(ThreadVars *th_v, void *thread_data)
+{
+#ifdef HAVE_PACKET_EBPF
+    int tcount = 0;
+    BypassedFlowManagerThreadData *ftd = thread_data;
+
+    while (1) {
+        SCLogDebug("Dumping the table");
+        struct timespec curtime;
+        struct flows_stats bypassstats = { 0, 0, 0};
+        if (clock_gettime(CLOCK_MONOTONIC, &curtime) != 0) {
+            SCLogWarning(SC_ERR_INVALID_VALUE, "Can't get time: %s (%d)",
+                         strerror(errno), errno);
+            sleep(1);
+            continue;
+        }
+        /* TODO indirection here: AF_PACKET and NFQ should be able to give their iterate function */
+        tcount = EBPFForEachFlowV4Table("flow_table_v4", BypassedFlowV4Timeout, &bypassstats, &curtime);
+        if (tcount) {
+            StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)bypassstats.count);
+            StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)bypassstats.packets);
+            StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)bypassstats.bytes);
+        }
+        memset(&bypassstats, 0, sizeof(bypassstats));
+        /* TODO indirection here: AF_PACKET and NFQ should be able to give their iterate function */
+        tcount = EBPFForEachFlowV6Table("flow_table_v6", BypassedFlowV6Timeout, &bypassstats, &curtime);
+        if (tcount) {
+            StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)bypassstats.count);
+            StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)bypassstats.packets);
+            StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)bypassstats.bytes);
+        }
+
+        if (TmThreadsCheckFlag(th_v, THV_KILL)) {
+            StatsSyncCounters(th_v);
+            return TM_ECODE_OK;
+        }
+        sleep(FLOW_BYPASS_DELAY);
+        StatsSyncCountersIfSignalled(th_v);
+    }
+#endif
+    return TM_ECODE_OK;
+}
+
+
+static TmEcode BypassedFlowManagerThreadInit(ThreadVars *t, const void *initdata, void **data)
+{
+    BypassedFlowManagerThreadData *ftd = SCCalloc(1, sizeof(BypassedFlowManagerThreadData));
+    if (ftd == NULL)
+        return TM_ECODE_FAILED;
+
+    *data = ftd;
+
+    ftd->flow_bypassed_cnt_clo = StatsRegisterCounter("flow_bypassed.closed", t);
+    ftd->flow_bypassed_pkts = StatsRegisterCounter("flow_bypassed.pkts", t);
+    ftd->flow_bypassed_bytes = StatsRegisterCounter("flow_bypassed.bytes", t);
+
+    return TM_ECODE_OK;
+}
+
+static TmEcode BypassedFlowManagerThreadDeinit(ThreadVars *t, void *data)
+{
+    if (data)
+        SCFree(data);
+    return TM_ECODE_OK;
+}
+
+/** \brief spawn the flow manager thread */
+void BypassedFlowManagerThreadSpawn()
+{
+#ifdef AFLFUZZ_DISABLE_MGTTHREADS
+    return;
+#endif
+
+#ifdef HAVE_PACKET_EBPF
+    ThreadVars *tv_flowmgr = NULL;
+    tv_flowmgr = TmThreadCreateMgmtThreadByName("BypassedFlowManager",
+            "BypassedFlowManager", 0);
+    BUG_ON(tv_flowmgr == NULL);
+
+    if (tv_flowmgr == NULL) {
+        printf("ERROR: TmThreadsCreate failed\n");
+        exit(1);
+    }
+    if (TmThreadSpawn(tv_flowmgr) != TM_ECODE_OK) {
+        printf("ERROR: TmThreadSpawn failed\n");
+        exit(1);
+    }
+#endif
+}
+
+void TmModuleBypassedFlowManagerRegister (void)
+{
+    tmm_modules[TMM_BYPASSEDFLOWMANAGER].name = "BypassedFlowManager";
+    tmm_modules[TMM_BYPASSEDFLOWMANAGER].ThreadInit = BypassedFlowManagerThreadInit;
+    tmm_modules[TMM_BYPASSEDFLOWMANAGER].ThreadDeinit = BypassedFlowManagerThreadDeinit;
+    tmm_modules[TMM_BYPASSEDFLOWMANAGER].Management = BypassedFlowManager;
+    tmm_modules[TMM_BYPASSEDFLOWMANAGER].cap_flags = 0;
+    tmm_modules[TMM_BYPASSEDFLOWMANAGER].flags = TM_FLAG_MANAGEMENT_TM;
+    SCLogDebug("%s registered", tmm_modules[TMM_BYPASSEDFLOWMANAGER].name);
+}
+
diff --git a/src/flow-bypass.h b/src/flow-bypass.h

new file mode 100644 (file)

index 0000000..812cccb
--- /dev/null
+++ b/src/flow-bypass.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2016 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Eric Leblond <eleblond@stamus-networks.com>
+ */
+
+#ifndef __FLOW_BYPASS_H__
+#define __FLOW_BYPASS_H__
+
+void FlowAddToBypassed(Flow *f);
+
+void BypassedFlowManagerThreadSpawn(void);
+void TmModuleBypassedFlowManagerRegister(void);
+
+#endif
+
+
diff --git a/src/runmode-af-packet.c b/src/runmode-af-packet.c

index 03572382067a74af9bd3e73785461b47b24b6602..6ad2e7339ad23c05d649248eb36e46b2eb79b1a4 100644 (file)
--- a/src/runmode-af-packet.c
+++ b/src/runmode-af-packet.c
@@ -376,18 +376,20 @@ static void *ParseAFPConfig(const char *iface)
          aconf->ebpf_lb_file = ebpf_file;
      }
  
+#ifdef HAVE_PACKET_EBPF
      /* One shot loading of the eBPF file */
      if (aconf->ebpf_lb_file && cluster_type == PACKET_FANOUT_EBPF) {
-#ifdef HAVE_PACKET_EBPF
          int ret = EBPFLoadFile(aconf->ebpf_lb_file, "loadbalancer",
                                 &aconf->ebpf_lb_fd);
          if (ret != 0) {
              SCLogWarning(SC_ERR_INVALID_VALUE, "Error when loading eBPF lb file");
          }
+    }
  #else
+    if (aconf->ebpf_lb_file) {
          SCLogError(SC_ERR_UNIMPLEMENTED, "eBPF support is not build-in");
-#endif
      }
+#endif
  
      if (ConfGetChildValueWithDefault(if_root, if_default, "ebpf-filter-file", &ebpf_file) != 1) {
          aconf->ebpf_filter_file = NULL;
@@ -397,6 +399,12 @@ static void *ParseAFPConfig(const char *iface)
                    ebpf_file);
  #endif
          aconf->ebpf_filter_file = ebpf_file;
+        ConfGetChildValueBoolWithDefault(if_root, if_default, "bypass", &conf_val);
+        if (conf_val) {
+            SCLogConfig("Using bypass kernel functionality for AF_PACKET (iface %s)",
+                    aconf->iface);
+            aconf->flags |= AFP_BYPASS;
+        }
      }
  
      /* One shot loading of the eBPF file */
diff --git a/src/runmodes.c b/src/runmodes.c

index 4f4e7f70c2536c0df6835855183625d8791559cd..1435956adf84f3a8eadfda4464df9500343431bd 100644 (file)
--- a/src/runmodes.c
+++ b/src/runmodes.c
@@ -52,6 +52,7 @@
  
  #include "tmqh-flow.h"
  #include "flow-manager.h"
+#include "flow-bypass.h"
  #include "counters.h"
  
  int debuglog_enabled = 0;
@@ -380,6 +381,7 @@ void RunModeDispatch(int runmode, const char *custom_mode)
          /* spawn management threads */
          FlowManagerThreadSpawn();
          FlowRecyclerThreadSpawn();
+        BypassedFlowManagerThreadSpawn();
          StatsSpawnThreads();
      }
  }
diff --git a/src/source-af-packet.c b/src/source-af-packet.c

index 0fb526e289138661b978f3e4acbbde83c8e34c8a..68173f19af4198d453df40e36aea76a0c6b746a3 100644 (file)
--- a/src/source-af-packet.c
+++ b/src/source-af-packet.c
@@ -64,6 +64,12 @@
  #include <sys/ioctl.h>
  #endif
  
+#ifdef HAVE_PACKET_EBPF
+#include "util-ebpf.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#endif
+
  struct bpf_program {
      unsigned int bf_len;
      struct bpf_insn *bf_insns;
@@ -192,6 +198,8 @@ union thdr {
      void *raw;
  };
  
+static int AFPBypassCallback(Packet *p);
+
  #define MAX_MAPS 32
  /**
   * \brief Structure to hold thread specific variables.
@@ -606,6 +614,9 @@ static int AFPRead(AFPThreadVars *ptv)
          SCReturnInt(AFP_FAILURE);
      }
      PKT_SET_SRC(p, PKT_SRC_WIRE);
+    if (ptv->flags & AFP_BYPASS) {
+        p->BypassPacketsFlow = AFPBypassCallback;
+    }
  
      /* get timestamp of packet via ioctl */
      if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
@@ -875,6 +886,9 @@ static int AFPReadFromRing(AFPThreadVars *ptv)
              SCReturnInt(AFP_FAILURE);
          }
          PKT_SET_SRC(p, PKT_SRC_WIRE);
+        if (ptv->flags & AFP_BYPASS) {
+            p->BypassPacketsFlow = AFPBypassCallback;
+        }
  
          /* Suricata will treat packet so telling it is busy, this
           * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
@@ -987,6 +1001,9 @@ static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc
          SCReturnInt(AFP_FAILURE);
      }
      PKT_SET_SRC(p, PKT_SRC_WIRE);
+    if (ptv->flags & AFP_BYPASS) {
+        p->BypassPacketsFlow = AFPBypassCallback;
+    }
  
      ptv->pkts++;
      p->livedev = ptv->livedev;
@@ -2191,6 +2208,123 @@ TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
      return TM_ECODE_OK;
  }
  
+#ifdef HAVE_PACKET_EBPF
+/**
+ * Insert a half flow in the kernel bypass table
+ *
+ * \param mapfd file descriptor of the protocol bypass table
+ * \param key data to use as key in the table
+ * \param inittime time of creation of the entry (in monotonic clock)
+ */
+static int AFPInsertHalfFlow(int mapd, void *key, uint64_t inittime)
+{
+        /* FIXME error handling */
+        struct pair value = {inittime, 0, 0};
+        SCLogDebug("Inserting element in eBPF mapping");
+        if (bpf_map_update_elem(mapd, key, &value, BPF_NOEXIST) != 0) {
+            switch (errno) {
+                case E2BIG:
+                case EEXIST:
+                    return 0;
+                default:
+                    SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
+                               strerror(errno),
+                               errno);
+                    return 0;
+            }
+        }
+        return 1;
+}
+#endif
+
+static int AFPBypassCallback(Packet *p)
+{
+#ifdef HAVE_PACKET_EBPF
+    SCLogDebug("Calling af_packet callback function");
+    /* Only bypass TCP and UDP */
+    if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
+        return 0;
+    }
+
+    /* Bypassing tunneled packets is currently not supported
+     * because we can't discard the inner packet only due to
+     * primitive parsing in eBPF */
+    if (IS_TUNNEL_PKT(p)) {
+        return 0;
+    }
+    struct timespec curtime;
+    uint64_t inittime = 0;
+    /* In eBPF, the function that we have use to get time return the
+     * monotonic clock (the time since start of the computer). So we
+     * can't use the timestamp of the packet. */
+    if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
+        inittime = curtime.tv_sec * 1000000000;
+    }
+    if (PKT_IS_IPV4(p)) {
+        /* FIXME cache this and handle error at cache time*/
+        int mapd = EBPFGetMapFDByName("flow_table_v4");
+        if (mapd == -1) {
+            SCLogNotice("Can't find eBPF map fd for '%s'", "flow_table_v4");
+            return 0;
+        }
+        /* FIXME error handling */
+        struct flowv4_keys key = {};
+        key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
+        key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
+        key.port16[0] = GET_TCP_SRC_PORT(p);
+        key.port16[1] = GET_TCP_DST_PORT(p);
+        key.ip_proto = IPV4_GET_IPPROTO(p);
+        if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+            return 0;
+        }
+        key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
+        key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
+        key.port16[0] = GET_TCP_DST_PORT(p);
+        key.port16[1] = GET_TCP_SRC_PORT(p);
+        if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+            return 0;
+        }
+        return 1;
+    }
+    /* For IPv6 case we don't handle extended header in eBPF */
+    if (PKT_IS_IPV6(p) && 
+        ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
+        /* FIXME cache this and handle error at cache time*/
+        int mapd = EBPFGetMapFDByName("flow_table_v6");
+        int i = 0;
+        if (mapd == -1) {
+            SCLogNotice("Can't find eBPF map fd for '%s'", "flow_table_v6");
+            return 0;
+        }
+        SCLogDebug("add an IPv6");
+        /* FIXME error handling */
+        /* FIXME filter out next hdr IPV6 packets */
+        struct flowv6_keys key = {};
+        for (i = 0; i < 4; i++) {
+            key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+            key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
+        }
+        key.port16[0] = GET_TCP_SRC_PORT(p);
+        key.port16[1] = GET_TCP_DST_PORT(p);
+        key.ip_proto = IPV6_GET_NH(p);
+        if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+            return 0;
+        }
+        for (i = 0; i < 4; i++) {
+            key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
+            key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+        }
+        key.port16[0] = GET_TCP_DST_PORT(p);
+        key.port16[1] = GET_TCP_SRC_PORT(p);
+        if (AFPInsertHalfFlow(mapd, &key, inittime) == 0) {
+            return 0;
+        }
+        return 1;
+    }
+#endif
+    return 0;
+}
+
  /**
   * \brief Init function for ReceiveAFP.
   *
diff --git a/src/source-af-packet.h b/src/source-af-packet.h

index f8c13c917bd0383e30006b8274c31f9acee6e7a7..d001ce68ce24c51733a2be36c2b1bf721eb921b8 100644 (file)
--- a/src/source-af-packet.h
+++ b/src/source-af-packet.h
@@ -51,6 +51,7 @@
  #define AFP_TPACKET_V3 (1<<4)
  #define AFP_VLAN_DISABLED (1<<5)
  #define AFP_MMAP_LOCKED (1<<6)
+#define AFP_BYPASS   (1<<7)
  
  #define AFP_COPY_MODE_NONE  0
  #define AFP_COPY_MODE_TAP   1
diff --git a/src/suricata.c b/src/suricata.c

index 4d59cacbec744a76ae11d485ff222e298a1a458e..dd7aad9902a811120338818081d7fd3372dd62b2 100644 (file)
--- a/src/suricata.c
+++ b/src/suricata.c
@@ -100,6 +100,7 @@
  #include "flow.h"
  #include "flow-timeout.h"
  #include "flow-manager.h"
+#include "flow-bypass.h"
  #include "flow-var.h"
  #include "flow-bit.h"
  #include "pkt-var.h"
@@ -855,6 +856,7 @@ void RegisterAllModules(void)
      /* managers */
      TmModuleFlowManagerRegister();
      TmModuleFlowRecyclerRegister();
+    TmModuleBypassedFlowManagerRegister();
      /* nfq */
      TmModuleReceiveNFQRegister();
      TmModuleVerdictNFQRegister();
diff --git a/src/tm-modules.c b/src/tm-modules.c

index 13fa409f196ba68ee774533de2eb0909fcc011db..869ba184de43d6989c393faeb109428370ac70d3 100644 (file)
--- a/src/tm-modules.c
+++ b/src/tm-modules.c
@@ -229,6 +229,7 @@ const char * TmModuleTmmIdToString(TmmId id)
          CASE_CODE (TMM_STATSLOGGER);
          CASE_CODE (TMM_FLOWMANAGER);
          CASE_CODE (TMM_FLOWRECYCLER);
+        CASE_CODE (TMM_BYPASSEDFLOWMANAGER);
          CASE_CODE (TMM_UNIXMANAGER);
          CASE_CODE (TMM_DETECTLOADER);
          CASE_CODE (TMM_RECEIVENETMAP);
diff --git a/src/tm-threads-common.h b/src/tm-threads-common.h

index 8325687a832285d74be515909081e9108e20a263..d55884db81e8d2ab44f06b6a20e5d0a03f153f03 100644 (file)
--- a/src/tm-threads-common.h
+++ b/src/tm-threads-common.h
@@ -64,6 +64,7 @@ typedef enum {
  
      TMM_FLOWMANAGER,
      TMM_FLOWRECYCLER,
+    TMM_BYPASSEDFLOWMANAGER,
      TMM_DETECTLOADER,
  
      TMM_UNIXMANAGER,
diff --git a/src/util-ebpf.c b/src/util-ebpf.c

index a4f147545efc4b96ad8d2bbdd261a98c08fd91bd..e5690ba1dc09aa0002c69c7d056020dff559dec7 100644 (file)
--- a/src/util-ebpf.c
+++ b/src/util-ebpf.c
@@ -45,15 +45,6 @@
  
  #define BPF_MAP_MAX_COUNT 16
  
-#define MAX_ERRNO   4095
-
-#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
-
-static inline long IS_ERR(const void *ptr)
-{
-    return IS_ERR_VALUE((unsigned long)ptr);
-}
-
  struct bpf_map_item {
      const char * name;
      int fd;
@@ -69,14 +60,19 @@ int EBPFGetMapFDByName(const char *name)
      if (name == NULL)
          return -1;
      for (i = 0; i < BPF_MAP_MAX_COUNT; i++) {
+        if (!bpf_map_array[i].name)
+            continue;
          if (!strcmp(bpf_map_array[i].name, name)) {
-            SCLogNotice("Got fd %d for eBPF map '%s'", bpf_map_array[i].fd, name);
+            SCLogDebug("Got fd %d for eBPF map '%s'", bpf_map_array[i].fd, name);
              return bpf_map_array[i].fd;
          }
      }
      return -1;
  }
  
+#define bpf__is_error(ee) ee
+#define bpf__get_error(ee) 1
+
  /** 
   * Load a section of an eBPF file
   *
@@ -104,10 +100,13 @@ int EBPFLoadFile(const char *path, const char * section, int *val)
  
      bpfobj = bpf_object__open(path);
  
-    if (IS_ERR(bpfobj)) {
+    if (libbpf_get_error(bpfobj)) {
+        char err_buf[128];
+        libbpf_strerror(bpf__get_error(bpfobj), err_buf,
+                        sizeof(err_buf));
          SCLogError(SC_ERR_INVALID_VALUE,
-                   "Unable to load eBPF objects in '%s'",
-                   path);
+                   "Unable to load eBPF objects in '%s': %s",
+                   path, err_buf);
          return -1;
      }
  
@@ -149,7 +148,7 @@ int EBPFLoadFile(const char *path, const char * section, int *val)
  
      /* store the map in our array */
      bpf_map__for_each(map, bpfobj) {
-        SCLogNotice("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
+        SCLogDebug("Got a map '%s' with fd '%d'", bpf_map__name(map), bpf_map__fd(map));
          bpf_map_array[bpf_map_last].fd = bpf_map__fd(map);
          bpf_map_array[bpf_map_last].name = SCStrdup(bpf_map__name(map));
          if (!bpf_map_array[bpf_map_last].name) {
@@ -174,4 +173,81 @@ int EBPFLoadFile(const char *path, const char * section, int *val)
      return 0;
  }
  
+int EBPFForEachFlowV4Table(const char *name,
+                              int (*FlowCallback)(int fd, struct flowv4_keys *key, struct pair *value, void *data),
+                              struct flows_stats *flowstats,
+                              void *data)
+{
+    int mapfd = EBPFGetMapFDByName(name);
+    struct flowv4_keys key = {}, next_key;
+    struct pair value = {0, 0, 0};
+    int ret, found = 0;
+    if (bpf_map_get_next_key(mapfd, &key, &next_key) != 0) {
+        return found;
+    }
+    while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
+        bpf_map_lookup_elem(mapfd, &key, &value);
+        ret = FlowCallback(mapfd, &key, &value, data);
+        if (ret) {
+            flowstats->count++;
+            flowstats->packets += value.packets;
+            flowstats->bytes += value.bytes;
+            found = 1;
+        }
+        key = next_key;
+    }
+
+    bpf_map_lookup_elem(mapfd, &key, &value);
+    ret = FlowCallback(mapfd, &key, &value, data);
+    if (ret) {
+        flowstats->count++;
+        flowstats->packets += value.packets;
+        flowstats->bytes += value.bytes;
+        found = 1;
+    }
+
+    return found;
+}
+
+int EBPFForEachFlowV6Table(const char *name,
+                              int (*FlowCallback)(int fd, struct flowv6_keys *key, struct pair *value, void *data),
+                              struct flows_stats *flowstats,
+                              void *data)
+{
+    int mapfd = EBPFGetMapFDByName(name);
+    struct flowv6_keys key = {}, next_key;
+    struct pair value = {0, 0, 0};
+    int ret, found = 0;
+    if (bpf_map_get_next_key(mapfd, &key, &next_key) != 0) {
+        return found;
+    }
+    while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) {
+        bpf_map_lookup_elem(mapfd, &key, &value);
+        ret = FlowCallback(mapfd, &key, &value, data);
+        if (ret) {
+            flowstats->count++;
+            flowstats->packets += value.packets;
+            flowstats->bytes += value.bytes;
+            found = 1;
+        }
+        key = next_key;
+    }
+
+    bpf_map_lookup_elem(mapfd, &key, &value);
+    ret = FlowCallback(mapfd, &key, &value, data);
+    if (ret) {
+        flowstats->count++;
+        flowstats->packets += value.packets;
+        flowstats->bytes += value.bytes;
+        found = 1;
+    }
+
+    return found;
+}
+
+void EBPFDeleteKey(int fd, void *key)
+{
+    bpf_map_delete_elem(fd, key);
+}
+
  #endif
diff --git a/src/util-ebpf.h b/src/util-ebpf.h

index 2158954e9c7920abfef47a918732109413f52f14..fc445d1fd846f2e5ff48b26a6db2aef439fdc33c 100644 (file)
--- a/src/util-ebpf.h
+++ b/src/util-ebpf.h
@@ -24,7 +24,49 @@
  #ifndef __UTIL_EBPF_H__
  #define __UTIL_EBPF_H__
  
+struct flowv4_keys {
+       __be32 src;
+       __be32 dst;
+       union {
+               __be32 ports;
+               __be16 port16[2];
+       };
+       __u32 ip_proto;
+};
+
+struct flowv6_keys {
+    __be32 src[4];
+    __be32 dst[4];
+    union {
+        __be32 ports;
+        __be16 port16[2];
+    };
+    __u32 ip_proto;
+};
+
+struct pair {
+    uint64_t time;
+    uint64_t packets;
+    uint64_t bytes;
+};
+
+struct flows_stats {
+    uint64_t count;
+    uint64_t packets;
+    uint64_t bytes;
+};
+
  int EBPFGetMapFDByName(const char *name);
  int EBPFLoadFile(const char *path, const char * section, int *val);
  
+int EBPFForEachFlowV4Table(const char *name,
+                              int (*FlowCallback)(int fd, struct flowv4_keys *key, struct pair *value, void *data),
+                              struct flows_stats *flowstats,
+                              void *data);
+int EBPFForEachFlowV6Table(const char *name,
+                              int (*FlowCallback)(int fd, struct flowv6_keys *key, struct pair *value, void *data),
+                              struct flows_stats *flowstats,
+                              void *data);
+void EBPFDeleteKey(int fd, void *key);
+
  #endif
diff --git a/suricata.yaml.in b/suricata.yaml.in

index ce5a563163ded74d3b1bea48e695cd92fa982983..d338b94bcba8aa26c9178f253b4a95fc387a3e25 100644 (file)
--- a/suricata.yaml.in
+++ b/suricata.yaml.in
@@ -635,6 +635,9 @@ af-packet:
      # eBPF file containing a 'filter' function that will be inserted into the
      # kernel and used as packet filter function
      #ebpf-filter-file:  @e_sysconfdir@/ebpf/filter.bpf
+    # if the ebpf filter implements a bypass function, you can set 'bypass' to
+    # yes and benefit from these feature
+    #bypass: yes
      # After Linux kernel 3.10 it is possible to activate the rollover option: if a socket is
      # full then kernel will send the packet on the next socket with room available. This option
      # can minimize packet drop and increase the treated bandwidth on single intensive flow.
author	Eric Leblond <eric@regit.org>
	Sun, 26 Nov 2017 19:29:52 +0000 (20:29 +0100)
committer	Eric Leblond <eric@regit.org>
	Tue, 6 Feb 2018 15:57:55 +0000 (16:57 +0100)
ebpf/Makefile.am		patch \| blob \| blame \| history
ebpf/bypass_filter.c	[new file with mode: 0644]	patch \| blob
src/Makefile.am		patch \| blob \| blame \| history
src/flow-bypass.c	[new file with mode: 0644]	patch \| blob
src/flow-bypass.h	[new file with mode: 0644]	patch \| blob
src/runmode-af-packet.c		patch \| blob \| blame \| history
src/runmodes.c		patch \| blob \| blame \| history
src/source-af-packet.c		patch \| blob \| blame \| history
src/source-af-packet.h		patch \| blob \| blame \| history
src/suricata.c		patch \| blob \| blame \| history
src/tm-modules.c		patch \| blob \| blame \| history
src/tm-threads-common.h		patch \| blob \| blame \| history
src/util-ebpf.c		patch \| blob \| blame \| history
src/util-ebpf.h		patch \| blob \| blame \| history
suricata.yaml.in		patch \| blob \| blame \| history