]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
doc: Anomaly logging documentation
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
cde438f6 1/* Copyright (C) 2011-2018 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985 35#include "suricata-common.h"
e80b30c0 36#include "config.h"
c45d8985
EL
37#include "suricata.h"
38#include "decode.h"
39#include "packet-queue.h"
40#include "threads.h"
41#include "threadvars.h"
42#include "tm-queuehandlers.h"
43#include "tm-modules.h"
44#include "tm-threads.h"
45#include "tm-threads-common.h"
46#include "conf.h"
17a32bda 47#include "util-cpu.h"
c45d8985 48#include "util-debug.h"
51eb9605 49#include "util-device.h"
d65f4585 50#include "util-ebpf.h"
c45d8985
EL
51#include "util-error.h"
52#include "util-privs.h"
e80b30c0 53#include "util-optimize.h"
51eb9605 54#include "util-checksum.h"
ac56b1bf 55#include "util-ioctl.h"
2cd6e128 56#include "util-host-info.h"
c45d8985
EL
57#include "tmqh-packetpool.h"
58#include "source-af-packet.h"
34b3f194 59#include "runmodes.h"
c45d8985 60
e80b30c0 61#ifdef HAVE_AF_PACKET
472e061c
VJ
62
63#if HAVE_SYS_IOCTL_H
2bc0be6e 64#include <sys/ioctl.h>
472e061c
VJ
65#endif
66
06173267
EL
67#ifdef HAVE_PACKET_EBPF
68#include "util-ebpf.h"
69#include <bpf/libbpf.h>
70#include <bpf/bpf.h>
71#endif
72
91e1256b
EL
73struct bpf_program {
74 unsigned int bf_len;
75 struct bpf_insn *bf_insns;
76};
77
78#ifdef HAVE_PCAP_H
79#include <pcap.h>
80#endif
81
82#ifdef HAVE_PCAP_PCAP_H
83#include <pcap/pcap.h>
84#endif
85
28e9e4c8
EL
86#include "util-bpf.h"
87
472e061c 88#if HAVE_LINUX_IF_ETHER_H
c45d8985 89#include <linux/if_ether.h>
472e061c
VJ
90#endif
91
92#if HAVE_LINUX_IF_PACKET_H
c45d8985 93#include <linux/if_packet.h>
472e061c
VJ
94#endif
95
96#if HAVE_LINUX_IF_ARP_H
c45d8985 97#include <linux/if_arp.h>
472e061c 98#endif
f2a6fb8a 99
472e061c 100#if HAVE_LINUX_FILTER_H
f2a6fb8a 101#include <linux/filter.h>
e80b30c0 102#endif
c45d8985 103
472e061c 104#if HAVE_SYS_MMAN_H
49b7b00f 105#include <sys/mman.h>
472e061c
VJ
106#endif
107
a40f08a2
EL
108#ifdef HAVE_HW_TIMESTAMPING
109#include <linux/net_tstamp.h>
110#endif
111
472e061c 112#endif /* HAVE_AF_PACKET */
49b7b00f 113
c45d8985
EL
114extern int max_pending_packets;
115
e80b30c0
EL
116#ifndef HAVE_AF_PACKET
117
ab1200fb 118TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 119
8f1d7503
KS
120void TmModuleReceiveAFPRegister (void)
121{
e80b30c0
EL
122 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
124 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
125 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
126 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
127 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
128 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 129 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
130}
131
132/**
133 * \brief Registration Function for DecodeAFP.
e80b30c0 134 */
8f1d7503
KS
135void TmModuleDecodeAFPRegister (void)
136{
e80b30c0
EL
137 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
138 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
139 tmm_modules[TMM_DECODEAFP].Func = NULL;
140 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
141 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
142 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
143 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 144 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
145}
146
147/**
148 * \brief this function prints an error message and exits.
149 */
ab1200fb 150TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
151{
152 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
153 "support for AF_PACKET enabled, on Linux host please recompile "
154 "with --enable-af-packet", tv->name);
155 exit(EXIT_FAILURE);
156}
157
158#else /* We have AF_PACKET support */
159
c45d8985
EL
160#define AFP_IFACE_NAME_LENGTH 48
161
162#define AFP_STATE_DOWN 0
163#define AFP_STATE_UP 1
164
165#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 166#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
167
168#define POLL_TIMEOUT 100
169
4a1a0080
EL
170#ifndef TP_STATUS_USER_BUSY
171/* for new use latest bit available in tp_status */
172#define TP_STATUS_USER_BUSY (1 << 31)
173#endif
174
b603ad62
EL
175#ifndef TP_STATUS_VLAN_VALID
176#define TP_STATUS_VLAN_VALID (1 << 4)
177#endif
178
62e63e3f
EL
179enum {
180 AFP_READ_OK,
181 AFP_READ_FAILURE,
9efa4ace
EL
182 /** Error during treatment by other functions of Suricata */
183 AFP_SURI_FAILURE,
27b5136b 184 AFP_KERNEL_DROP,
62e63e3f
EL
185};
186
1992a227
EL
187enum {
188 AFP_FATAL_ERROR = 1,
189 AFP_RECOVERABLE_ERROR,
190};
191
49b7b00f
EL
192union thdr {
193 struct tpacket2_hdr *h2;
c2d0d938 194#ifdef HAVE_TPACKET_V3
bae1b03c 195 struct tpacket3_hdr *h3;
c2d0d938 196#endif
49b7b00f
EL
197 void *raw;
198};
199
06173267 200static int AFPBypassCallback(Packet *p);
8c880879 201static int AFPXDPBypassCallback(Packet *p);
06173267 202
91e1256b 203#define MAX_MAPS 32
c45d8985
EL
204/**
205 * \brief Structure to hold thread specific variables.
206 */
207typedef struct AFPThreadVars_
208{
69d0d484
VJ
209 union AFPRing {
210 char *v2;
211 struct iovec *v3;
212 } ring;
b797fd92 213
c45d8985 214 /* counters */
3ce39433 215 uint64_t pkts;
c45d8985 216
ff6365dd
EL
217 ThreadVars *tv;
218 TmSlot *slot;
9500d12c
EL
219 LiveDevice *livedev;
220 /* data link type for the thread */
b797fd92 221 uint32_t datalink;
9500d12c 222
d65f4585 223#ifdef HAVE_PACKET_EBPF
94a622cb 224 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 225 int v4_map_fd;
94a622cb 226 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
227 int v6_map_fd;
228#endif
229
9500d12c 230 unsigned int frame_offset;
ff6365dd 231
9500d12c
EL
232 ChecksumValidationMode checksum_mode;
233
b797fd92 234 /* references to packet and drop counters */
9500d12c
EL
235 uint16_t capture_kernel_packets;
236 uint16_t capture_kernel_drops;
9efa4ace 237 uint16_t capture_errors;
9500d12c
EL
238
239 /* handle state */
240 uint8_t afp_state;
241 uint8_t copy_mode;
4bfa3aea 242 unsigned int flags;
9500d12c
EL
243
244 /* IPS peer */
245 AFPPeer *mpeer;
246
247 /* no mmap mode */
ff6365dd
EL
248 uint8_t *data; /** Per function and thread data */
249 int datalen; /** Length of per function and thread data */
9500d12c 250 int cooked;
ff6365dd 251
9500d12c
EL
252 /*
253 * Init related members
254 */
51eb9605 255
9500d12c
EL
256 /* thread specific socket */
257 int socket;
b797fd92
EL
258
259 int ring_size;
fa902abe 260 int block_size;
234aefdf 261 int block_timeout;
e80b30c0
EL
262 /* socket buffer size */
263 int buffer_size;
fa902abe 264 /* Filter */
ab1200fb 265 const char *bpf_filter;
91e1256b
EL
266 int ebpf_lb_fd;
267 int ebpf_filter_fd;
9500d12c 268
df7dbe36 269 int promisc;
e80b30c0 270
9500d12c 271 int down_count;
662dccd8 272
e80b30c0
EL
273 int cluster_id;
274 int cluster_type;
c45d8985 275
fbca1a4e
EL
276 int threads;
277
69d0d484
VJ
278 union AFPTpacketReq {
279 struct tpacket_req v2;
c2d0d938 280#ifdef HAVE_TPACKET_V3
69d0d484 281 struct tpacket_req3 v3;
c2d0d938 282#endif
69d0d484 283 } req;
b797fd92
EL
284
285 char iface[AFP_IFACE_NAME_LENGTH];
286 /* IPS output iface */
287 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 288
cba41207
AG
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen;
291 uint8_t *ring_buf;
91e1256b 292
8c880879
EL
293 uint8_t xdp_mode;
294
c45d8985
EL
295} AFPThreadVars;
296
297TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
ab1200fb 298TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
c45d8985
EL
299void ReceiveAFPThreadExitStats(ThreadVars *, void *);
300TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
e80b30c0 301TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 302
ab1200fb 303TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
2864f9ee 304TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
c45d8985
EL
305TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
306
f2a6fb8a 307TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 308static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
309static int AFPGetDevFlags(int fd, const char *ifname);
310static int AFPDerefSocket(AFPPeer* peer);
311static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 312
19475165
EL
313
314static unsigned int nr_cpus;
315
c45d8985
EL
316/**
317 * \brief Registration Function for RecieveAFP.
318 * \todo Unit tests are needed for this module.
319 */
8f1d7503
KS
320void TmModuleReceiveAFPRegister (void)
321{
c45d8985
EL
322 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
323 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 324 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 325 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 326 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 327 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 328 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985
EL
329 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
330 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 331 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165
EL
332
333 nr_cpus = UtilCpuGetNumProcessorsConfigured();
c45d8985
EL
334}
335
a6457262
EL
336
337/**
338 * \defgroup afppeers AFP peers list
339 *
340 * AF_PACKET has an IPS mode were interface are peered: packet from
341 * on interface are sent the peered interface and the other way. The ::AFPPeer
342 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
343 * information to be able to send packet on the interface.
344 * A element of the list must not be destroyed during the run of Suricata as it
345 * is used by ::Packet and other threads.
346 *
347 * @{
348 */
349
662dccd8
EL
350typedef struct AFPPeersList_ {
351 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
352 int cnt;
353 int peered;
60400163
EL
354 int turn; /**< Next value for initialisation order */
355 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
356} AFPPeersList;
357
358/**
a6457262
EL
359 * \brief Update the peer.
360 *
361 * Update the AFPPeer of a thread ie set new state, socket number
362 * or iface index.
363 *
662dccd8 364 */
ab1200fb 365static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
366{
367 if (ptv->mpeer == NULL) {
368 return;
369 }
662dccd8
EL
370 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
371 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
372 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
373}
374
a6457262
EL
375/**
376 * \brief Clean and free ressource used by an ::AFPPeer
377 */
ab1200fb 378static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
379{
380 if (peer->flags & AFP_SOCK_PROTECT)
381 SCMutexDestroy(&peer->sock_protect);
382 SC_ATOMIC_DESTROY(peer->socket);
383 SC_ATOMIC_DESTROY(peer->if_idx);
384 SC_ATOMIC_DESTROY(peer->state);
385 SCFree(peer);
386}
387
388AFPPeersList peerslist;
389
390
a6457262
EL
391/**
392 * \brief Init the global list of ::AFPPeer
393 */
662dccd8
EL
394TmEcode AFPPeersListInit()
395{
396 SCEnter();
397 TAILQ_INIT(&peerslist.peers);
398 peerslist.peered = 0;
399 peerslist.cnt = 0;
60400163
EL
400 peerslist.turn = 0;
401 SC_ATOMIC_INIT(peerslist.reached);
402 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
403 SCReturnInt(TM_ECODE_OK);
404}
405
a6457262
EL
406/**
407 * \brief Check that all ::AFPPeer got a peer
408 *
409 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
410 */
662dccd8
EL
411TmEcode AFPPeersListCheck()
412{
413#define AFP_PEERS_MAX_TRY 4
414#define AFP_PEERS_WAIT 20000
415 int try = 0;
416 SCEnter();
417 while (try < AFP_PEERS_MAX_TRY) {
418 if (peerslist.cnt != peerslist.peered) {
419 usleep(AFP_PEERS_WAIT);
420 } else {
421 SCReturnInt(TM_ECODE_OK);
422 }
423 try++;
424 }
425 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
426 SCReturnInt(TM_ECODE_FAILED);
427}
428
a6457262
EL
429/**
430 * \brief Declare a new AFP thread to AFP peers list.
431 */
ab1200fb 432static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
433{
434 SCEnter();
435 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
436 AFPPeer *pitem;
ac56b1bf 437 int mtu, out_mtu;
662dccd8 438
e176be6f 439 if (unlikely(peer == NULL)) {
662dccd8
EL
440 SCReturnInt(TM_ECODE_FAILED);
441 }
442 memset(peer, 0, sizeof(AFPPeer));
443 SC_ATOMIC_INIT(peer->socket);
13f13b6d 444 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
445 SC_ATOMIC_INIT(peer->if_idx);
446 SC_ATOMIC_INIT(peer->state);
447 peer->flags = ptv->flags;
60400163 448 peer->turn = peerslist.turn++;
662dccd8
EL
449
450 if (peer->flags & AFP_SOCK_PROTECT) {
451 SCMutexInit(&peer->sock_protect, NULL);
452 }
453
13f13b6d 454 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
455 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
456 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
457 ptv->mpeer = peer;
458 /* add element to iface list */
459 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 460
13f13b6d
EL
461 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
462 peerslist.cnt++;
463
464 /* Iter to find a peer */
465 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
466 if (pitem->peer)
467 continue;
468 if (strcmp(pitem->iface, ptv->out_iface))
469 continue;
470 peer->peer = pitem;
471 pitem->peer = peer;
472 mtu = GetIfaceMTU(ptv->iface);
473 out_mtu = GetIfaceMTU(ptv->out_iface);
474 if (mtu != out_mtu) {
475 SCLogError(SC_ERR_AFP_CREATE,
476 "MTU on %s (%d) and %s (%d) are not equal, "
477 "transmission of packets bigger than %d will fail.",
478 ptv->iface, mtu,
479 ptv->out_iface, out_mtu,
480 (out_mtu > mtu) ? mtu : out_mtu);
481 }
482 peerslist.peered += 2;
483 break;
ac56b1bf 484 }
662dccd8
EL
485 }
486
487 AFPPeerUpdate(ptv);
488
489 SCReturnInt(TM_ECODE_OK);
490}
491
ab1200fb 492static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 493{
b2691cbe
EL
494 /* If turn is zero, we already have started threads once */
495 if (peerslist.turn == 0)
496 return 0;
497
60400163
EL
498 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
499 return 0;
500 return 1;
501}
502
ab1200fb 503static void AFPPeersListReachedInc(void)
60400163 504{
b2691cbe
EL
505 if (peerslist.turn == 0)
506 return;
507
508 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
509 SCLogInfo("All AFP capture threads are running.");
510 (void)SC_ATOMIC_SET(peerslist.reached, 0);
511 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
512 * restarted.
513 */
514 peerslist.turn = 0;
515 }
60400163
EL
516}
517
ab1200fb 518static int AFPPeersListStarted(void)
919377d4
EL
519{
520 return !peerslist.turn;
521}
522
a6457262
EL
523/**
524 * \brief Clean the global peers list.
525 */
662dccd8
EL
526void AFPPeersListClean()
527{
528 AFPPeer *pitem;
529
530 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
531 TAILQ_REMOVE(&peerslist.peers, pitem, next);
532 AFPPeerClean(pitem);
533 }
534}
535
a6457262
EL
536/**
537 * @}
538 */
539
c45d8985
EL
540/**
541 * \brief Registration Function for DecodeAFP.
542 * \todo Unit tests are needed for this module.
543 */
8f1d7503
KS
544void TmModuleDecodeAFPRegister (void)
545{
c45d8985
EL
546 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
547 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
548 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
549 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 550 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985
EL
551 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
552 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 553 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
554}
555
662dccd8 556
e80b30c0
EL
557static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
558
e8a4a4c4 559static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 560{
6efd37a3 561#ifdef PACKET_STATISTICS
e8a4a4c4
EL
562 struct tpacket_stats kstats;
563 socklen_t len = sizeof (struct tpacket_stats);
564 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
565 &kstats, &len) > -1) {
566 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
567 ptv->tv->name,
568 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
569 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
570 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
571 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
572 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 573 }
e8a4a4c4 574#endif
6efd37a3 575}
c45d8985
EL
576
577/**
578 * \brief AF packet read function.
579 *
580 * This function fills
581 * From here the packets are picked up by the DecodeAFP thread.
582 *
583 * \param user pointer to AFPThreadVars
584 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
585 */
ab1200fb 586static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
587{
588 Packet *p = NULL;
589 /* XXX should try to use read that get directly to packet */
c45d8985
EL
590 int offset = 0;
591 int caplen;
592 struct sockaddr_ll from;
593 struct iovec iov;
594 struct msghdr msg;
c45d8985
EL
595 struct cmsghdr *cmsg;
596 union {
597 struct cmsghdr cmsg;
598 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
599 } cmsg_buf;
6efd37a3 600 unsigned char aux_checksum = 0;
c45d8985
EL
601
602 msg.msg_name = &from;
603 msg.msg_namelen = sizeof(from);
604 msg.msg_iov = &iov;
605 msg.msg_iovlen = 1;
c45d8985
EL
606 msg.msg_control = &cmsg_buf;
607 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
608 msg.msg_flags = 0;
609
610 if (ptv->cooked)
611 offset = SLL_HEADER_LEN;
612 else
613 offset = 0;
e80b30c0
EL
614 iov.iov_len = ptv->datalen - offset;
615 iov.iov_base = ptv->data + offset;
c45d8985
EL
616
617 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
618
619 if (caplen < 0) {
620 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
621 errno);
62e63e3f 622 SCReturnInt(AFP_READ_FAILURE);
c45d8985 623 }
ff6365dd
EL
624
625 p = PacketGetFromQueueOrAlloc();
c45d8985 626 if (p == NULL) {
9efa4ace 627 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 628 }
b33986c8 629 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
630 if (ptv->flags & AFP_BYPASS) {
631 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
632#ifdef HAVE_PACKET_EBPF
633 p->afp_v.v4_map_fd = ptv->v4_map_fd;
634 p->afp_v.v6_map_fd = ptv->v6_map_fd;
635#endif
06173267 636 }
8c880879
EL
637 if (ptv->flags & AFP_XDPBYPASS) {
638 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
639#ifdef HAVE_PACKET_EBPF
640 p->afp_v.v4_map_fd = ptv->v4_map_fd;
641 p->afp_v.v6_map_fd = ptv->v6_map_fd;
642#endif
8c880879 643 }
c45d8985
EL
644
645 /* get timestamp of packet via ioctl */
646 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
647 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
648 errno);
649 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 650 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
651 }
652
653 ptv->pkts++;
51eb9605 654 p->livedev = ptv->livedev;
c45d8985
EL
655
656 /* add forged header */
657 if (ptv->cooked) {
e80b30c0 658 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
659 /* XXX this is minimalist, but this seems enough */
660 hdrp->sll_protocol = from.sll_protocol;
661 }
662
663 p->datalink = ptv->datalink;
664 SET_PKT_LEN(p, caplen + offset);
e80b30c0 665 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 666 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 667 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 668 }
e80b30c0
EL
669 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
670 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
671
6062e00c
EL
672 /* We only check for checksum disable */
673 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
674 p->flags |= PKT_IGNORE_CHECKSUM;
675 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
676 if (ptv->livedev->ignore_checksum) {
677 p->flags |= PKT_IGNORE_CHECKSUM;
a565148f 678 } else if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
679 SC_ATOMIC_GET(ptv->livedev->pkts),
680 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
681 ptv->livedev->ignore_checksum = 1;
6062e00c 682 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 683 }
6062e00c 684 } else {
6efd37a3
EL
685 aux_checksum = 1;
686 }
6062e00c 687
6efd37a3
EL
688 /* List is NULL if we don't have activated auxiliary data */
689 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
690 struct tpacket_auxdata *aux;
f6ddaf33 691
6efd37a3
EL
692 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
693 cmsg->cmsg_level != SOL_PACKET ||
694 cmsg->cmsg_type != PACKET_AUXDATA)
695 continue;
f6ddaf33 696
6efd37a3
EL
697 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
698
699 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
700 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 701 }
6efd37a3 702 break;
f6ddaf33
EL
703 }
704
c469824b
EL
705 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
706 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 707 SCReturnInt(AFP_SURI_FAILURE);
c469824b 708 }
62e63e3f 709 SCReturnInt(AFP_READ_OK);
c45d8985
EL
710}
711
ecf59be4
EL
712/**
713 * \brief AF packet write function.
714 *
715 * This function has to be called before the memory
716 * related to Packet in ring buffer is released.
717 *
718 * \param pointer to Packet
719 * \param version of capture: TPACKET_V2 or TPACKET_V3
720 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
721 *
722 */
723static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
724{
725 struct sockaddr_ll socket_address;
726 int socket;
ecf59be4
EL
727 uint8_t *pstart;
728 size_t plen;
ee7e689b
AG
729 union thdr h;
730 uint16_t vlan_tci = 0;
662dccd8
EL
731
732 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
3f107fa1 733 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
662dccd8
EL
734 return TM_ECODE_OK;
735 }
736 }
737
738 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
739 return TM_ECODE_OK;
740
741 if (p->ethh == NULL) {
742 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
743 return TM_ECODE_FAILED;
744 }
745 /* Index of the network device */
746 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
747 /* Address length*/
748 socket_address.sll_halen = ETH_ALEN;
749 /* Destination MAC */
750 memcpy(socket_address.sll_addr, p->ethh, 6);
751
752 /* Send packet, locking the socket if necessary */
753 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
754 SCMutexLock(&p->afp_v.peer->sock_protect);
755 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 756
ee7e689b
AG
757 h.raw = p->afp_v.relptr;
758
ecf59be4 759 if (version == TPACKET_V2) {
ecf59be4
EL
760 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
761 * the flag. It is not defined for older kernel so we go best effort
762 * and test for non zero value of the TCI header. */
763 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
764 vlan_tci = h.h2->tp_vlan_tci;
765 }
766 } else {
767#ifdef HAVE_TPACKET_V3
768 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
769 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 770 }
ee7e689b
AG
771#else
772 /* Should not get here */
773 BUG_ON(1);
774#endif
775 }
776
777 if (vlan_tci != 0) {
778 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
779 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
780 /* move ethernet addresses */
781 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
782 /* write vlan info */
783 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
784 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
785 } else {
786 pstart = GET_PKT_DATA(p);
787 plen = GET_PKT_LEN(p);
788 }
789
790 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
791 (struct sockaddr*) &socket_address,
792 sizeof(struct sockaddr_ll)) < 0) {
793 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
794 socket,
795 strerror(errno));
796 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
797 SCMutexUnlock(&p->afp_v.peer->sock_protect);
798 return TM_ECODE_FAILED;
799 }
800 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
801 SCMutexUnlock(&p->afp_v.peer->sock_protect);
802
803 return TM_ECODE_OK;
804}
805
ab1200fb 806static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 807{
662dccd8
EL
808 /* Need to be in copy mode and need to detect early release
809 where Ethernet header could not be set (and pseudo packet) */
810 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 811 AFPWritePacket(p, TPACKET_V2);
662dccd8 812 }
13f13b6d
EL
813
814 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
680e941a 815 goto cleanup;
13f13b6d 816
2011a3f8
EL
817 if (p->afp_v.relptr) {
818 union thdr h;
819 h.raw = p->afp_v.relptr;
820 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 821 }
680e941a
EL
822
823cleanup:
824 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
825}
826
ecf59be4 827#ifdef HAVE_TPACKET_V3
ab1200fb 828static void AFPReleasePacketV3(Packet *p)
bae1b03c
EL
829{
830 /* Need to be in copy mode and need to detect early release
831 where Ethernet header could not be set (and pseudo packet) */
832 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 833 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
834 }
835 PacketFreeOrRelease(p);
836}
ecf59be4 837#endif
bae1b03c 838
ab1200fb 839static void AFPReleasePacket(Packet *p)
b076a26c
KS
840{
841 AFPReleaseDataFromRing(p);
842 PacketFreeOrRelease(p);
2011a3f8
EL
843}
844
49b7b00f
EL
845/**
846 * \brief AF packet read function for ring
847 *
848 * This function fills
849 * From here the packets are picked up by the DecodeAFP thread.
850 *
851 * \param user pointer to AFPThreadVars
852 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
853 */
ab1200fb 854static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f
EL
855{
856 Packet *p = NULL;
857 union thdr h;
27b5136b 858 uint8_t emergency_flush = 0;
4d8f70c6 859 int read_pkts = 0;
b26ec603 860 int loop_start = -1;
4d8f70c6 861
49b7b00f 862
a369f8c3
EL
863 /* Loop till we have packets available */
864 while (1) {
53c02334
AS
865 if (unlikely(suricata_ctl_flags != 0)) {
866 break;
867 }
868
a369f8c3 869 /* Read packet from ring */
69d0d484 870 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace
EL
871 if (unlikely(h.raw == NULL)) {
872 /* Impossible we reach this point in normal condition, so trigger
873 * a failure in reading */
874 SCReturnInt(AFP_READ_FAILURE);
34b3f194 875 }
662dccd8 876
82a2dd85 877 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
4d8f70c6 878 if (read_pkts == 0) {
b26ec603
EL
879 if (loop_start == -1) {
880 loop_start = ptv->frame_offset;
881 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
882 SCReturnInt(AFP_READ_OK);
883 }
69d0d484 884 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
b26ec603
EL
885 ptv->frame_offset = 0;
886 }
887 continue;
4d8f70c6 888 }
27b5136b
EL
889 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
890 SCReturnInt(AFP_KERNEL_DROP);
891 } else {
892 SCReturnInt(AFP_READ_OK);
893 }
894 }
4d8f70c6
EL
895
896 read_pkts++;
b26ec603 897 loop_start = -1;
4d8f70c6 898
4a1a0080
EL
899 /* Our packet is still used by suricata, we exit read loop to
900 * gain some time */
901 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
902 SCReturnInt(AFP_READ_OK);
903 }
904
27b5136b
EL
905 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
906 h.h2->tp_status = TP_STATUS_KERNEL;
907 goto next_frame;
a369f8c3
EL
908 }
909
910 p = PacketGetFromQueueOrAlloc();
911 if (p == NULL) {
9efa4ace 912 SCReturnInt(AFP_SURI_FAILURE);
34b3f194 913 }
b33986c8 914 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
915 if (ptv->flags & AFP_BYPASS) {
916 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 917#ifdef HAVE_PACKET_EBPF
6062c27e
EL
918 p->afp_v.v4_map_fd = ptv->v4_map_fd;
919 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 920#endif
06173267 921 }
8c880879
EL
922 if (ptv->flags & AFP_XDPBYPASS) {
923 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 924#ifdef HAVE_PACKET_EBPF
6062c27e
EL
925 p->afp_v.v4_map_fd = ptv->v4_map_fd;
926 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 927#endif
8c880879 928 }
49b7b00f 929
4a1a0080
EL
930 /* Suricata will treat packet so telling it is busy, this
931 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
932 * function. */
933 h.h2->tp_status |= TP_STATUS_USER_BUSY;
934
a369f8c3 935 ptv->pkts++;
a369f8c3 936 p->livedev = ptv->livedev;
a369f8c3 937 p->datalink = ptv->datalink;
d0940396 938
a369f8c3
EL
939 if (h.h2->tp_len > h.h2->tp_snaplen) {
940 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
941 h.h2->tp_len, h.h2->tp_snaplen);
942 }
71e47868
EL
943
944 /* get vlan id from header */
9500d12c 945 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
e871f713 946 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
01a8cc4e 947 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
71e47868
EL
948 p->vlan_idx = 1;
949 p->vlanh[0] = NULL;
950 }
951
a369f8c3
EL
952 if (ptv->flags & AFP_ZERO_COPY) {
953 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
954 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 955 SCReturnInt(AFP_SURI_FAILURE);
662dccd8 956 } else {
0f2b3406 957 p->afp_v.relptr = h.raw;
b076a26c 958 p->ReleasePacket = AFPReleasePacket;
5f12b234
EL
959 p->afp_v.mpeer = ptv->mpeer;
960 AFPRefSocket(ptv->mpeer);
0f2b3406
EL
961
962 p->afp_v.copy_mode = ptv->copy_mode;
963 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
964 p->afp_v.peer = ptv->mpeer->peer;
965 } else {
966 p->afp_v.peer = NULL;
662dccd8 967 }
a369f8c3
EL
968 }
969 } else {
970 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
9efa4ace
EL
971 /* As we can possibly fail to copy the data due to invalid data, let's
972 * skip this packet and switch to the next one.
973 */
974 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 975 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
9efa4ace
EL
976 ptv->frame_offset = 0;
977 }
a369f8c3 978 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 979 SCReturnInt(AFP_SURI_FAILURE);
a369f8c3
EL
980 }
981 }
d65f4585 982
a369f8c3
EL
983 /* Timestamp */
984 p->ts.tv_sec = h.h2->tp_sec;
985 p->ts.tv_usec = h.h2->tp_nsec/1000;
986 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
987 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
988
989 /* We only check for checksum disable */
990 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
991 p->flags |= PKT_IGNORE_CHECKSUM;
992 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
993 if (ptv->livedev->ignore_checksum) {
994 p->flags |= PKT_IGNORE_CHECKSUM;
995 } else if (ChecksumAutoModeCheck(ptv->pkts,
996 SC_ATOMIC_GET(ptv->livedev->pkts),
997 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
998 ptv->livedev->ignore_checksum = 1;
999 p->flags |= PKT_IGNORE_CHECKSUM;
1000 }
1001 } else {
1002 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
49b7b00f 1003 p->flags |= PKT_IGNORE_CHECKSUM;
a369f8c3 1004 }
ee6ba099
EL
1005 }
1006 if (h.h2->tp_status & TP_STATUS_LOSING) {
1007 emergency_flush = 1;
e8a4a4c4 1008 AFPDumpCounters(ptv);
a369f8c3
EL
1009 }
1010
5f12b234
EL
1011 /* release frame if not in zero copy mode */
1012 if (!(ptv->flags & AFP_ZERO_COPY)) {
1013 h.h2->tp_status = TP_STATUS_KERNEL;
1014 }
1015
a369f8c3
EL
1016 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1017 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1018 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
a369f8c3
EL
1019 ptv->frame_offset = 0;
1020 }
1021 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1022 SCReturnInt(AFP_SURI_FAILURE);
49b7b00f 1023 }
49b7b00f 1024
27b5136b 1025next_frame:
69d0d484 1026 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 1027 ptv->frame_offset = 0;
350d7619
EL
1028 /* Get out of loop to be sure we will reach maintenance tasks */
1029 SCReturnInt(AFP_READ_OK);
34b3f194 1030 }
34b3f194
EL
1031 }
1032
49b7b00f
EL
1033 SCReturnInt(AFP_READ_OK);
1034}
1035
f947539d 1036#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1037static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1038{
1039 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1040}
1041
1042static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1043{
1044 Packet *p = PacketGetFromQueueOrAlloc();
1045 if (p == NULL) {
9efa4ace 1046 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1047 }
1048 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
1049 if (ptv->flags & AFP_BYPASS) {
1050 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 1051#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1052 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1053 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 1054#endif
e98b5e49 1055 } else if (ptv->flags & AFP_XDPBYPASS) {
8c880879 1056 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 1057#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1058 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1059 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 1060#endif
8c880879 1061 }
bae1b03c
EL
1062
1063 ptv->pkts++;
bae1b03c
EL
1064 p->livedev = ptv->livedev;
1065 p->datalink = ptv->datalink;
1066
e41a9d63
AG
1067 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1068 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1069 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1070 p->vlan_idx = 1;
1071 p->vlanh[0] = NULL;
1072 }
1073
bae1b03c
EL
1074 if (ptv->flags & AFP_ZERO_COPY) {
1075 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1076 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1077 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c 1078 }
310b27a1 1079 p->afp_v.relptr = ppd;
bae1b03c
EL
1080 p->ReleasePacket = AFPReleasePacketV3;
1081 p->afp_v.mpeer = ptv->mpeer;
1082 AFPRefSocket(ptv->mpeer);
1083
1084 p->afp_v.copy_mode = ptv->copy_mode;
1085 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1086 p->afp_v.peer = ptv->mpeer->peer;
1087 } else {
1088 p->afp_v.peer = NULL;
1089 }
1090 } else {
1091 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1092 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1093 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1094 }
1095 }
1096 /* Timestamp */
1097 p->ts.tv_sec = ppd->tp_sec;
1098 p->ts.tv_usec = ppd->tp_nsec/1000;
1099 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1100 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1101
1102 /* We only check for checksum disable */
1103 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1104 p->flags |= PKT_IGNORE_CHECKSUM;
1105 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1106 if (ptv->livedev->ignore_checksum) {
1107 p->flags |= PKT_IGNORE_CHECKSUM;
1108 } else if (ChecksumAutoModeCheck(ptv->pkts,
1109 SC_ATOMIC_GET(ptv->livedev->pkts),
1110 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1111 ptv->livedev->ignore_checksum = 1;
1112 p->flags |= PKT_IGNORE_CHECKSUM;
1113 }
1114 } else {
1115 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1116 p->flags |= PKT_IGNORE_CHECKSUM;
1117 }
1118 }
1119
1120 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
bae1b03c 1121 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1122 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1123 }
1124
1125 SCReturnInt(AFP_READ_OK);
1126}
1127
1128static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1129{
1130 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1131 uint8_t *ppd;
9efa4ace 1132 int ret = 0;
bae1b03c
EL
1133
1134 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1135 for (i = 0; i < num_pkts; ++i) {
9efa4ace
EL
1136 ret = AFPParsePacketV3(ptv, pbd,
1137 (struct tpacket3_hdr *)ppd);
1138 switch (ret) {
1139 case AFP_READ_OK:
1140 break;
1141 case AFP_SURI_FAILURE:
1142 /* Internal error but let's just continue and
1143 * treat thenext packet */
1144 break;
1145 case AFP_READ_FAILURE:
1146 SCReturnInt(AFP_READ_FAILURE);
1147 default:
1148 SCReturnInt(ret);
5f84b55d 1149 }
bae1b03c
EL
1150 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1151 }
1152
1153 SCReturnInt(AFP_READ_OK);
1154}
f947539d 1155#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1156
1157/**
1158 * \brief AF packet read function for ring
1159 *
1160 * This function fills
1161 * From here the packets are picked up by the DecodeAFP thread.
1162 *
1163 * \param user pointer to AFPThreadVars
1164 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1165 */
ab1200fb 1166static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1167{
c2d0d938 1168#ifdef HAVE_TPACKET_V3
bae1b03c 1169 struct tpacket_block_desc *pbd;
9efa4ace 1170 int ret = 0;
bae1b03c
EL
1171
1172 /* Loop till we have packets available */
1173 while (1) {
1174 if (unlikely(suricata_ctl_flags != 0)) {
1175 SCLogInfo("Exiting AFP V3 read loop");
1176 break;
1177 }
1178
69d0d484 1179 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1180
1181 /* block is not ready to be read */
1182 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1183 SCReturnInt(AFP_READ_OK);
1184 }
1185
9efa4ace
EL
1186 ret = AFPWalkBlock(ptv, pbd);
1187 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1188 AFPFlushBlock(pbd);
9efa4ace 1189 SCReturnInt(ret);
bae1b03c
EL
1190 }
1191
1192 AFPFlushBlock(pbd);
69d0d484 1193 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1194 /* return to maintenance task after one loop on the ring */
1195 if (ptv->frame_offset == 0) {
1196 SCReturnInt(AFP_READ_OK);
1197 }
1198 }
c2d0d938 1199#endif
bae1b03c
EL
1200 SCReturnInt(AFP_READ_OK);
1201}
1202
13f13b6d
EL
1203/**
1204 * \brief Reference socket
1205 *
1206 * \retval O in case of failure, 1 in case of success
1207 */
1208static int AFPRefSocket(AFPPeer* peer)
1209{
1210 if (unlikely(peer == NULL))
1211 return 0;
1212
1213 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1214 return 1;
1215}
1216
1217
1218/**
1219 * \brief Dereference socket
1220 *
1221 * \retval 1 if socket is still alive, 0 if not
1222 */
1223static int AFPDerefSocket(AFPPeer* peer)
1224{
4424f5a2
EL
1225 if (peer == NULL)
1226 return 1;
1227
13f13b6d
EL
1228 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1229 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1230 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1231 close(SC_ATOMIC_GET(peer->socket));
1232 return 0;
1233 }
1234 }
1235 return 1;
1236}
1237
ab1200fb 1238static void AFPSwitchState(AFPThreadVars *ptv, int state)
13f13b6d
EL
1239{
1240 ptv->afp_state = state;
1241 ptv->down_count = 0;
49b7b00f 1242
13f13b6d
EL
1243 AFPPeerUpdate(ptv);
1244
1245 /* Do cleaning if switching to down state */
1246 if (state == AFP_STATE_DOWN) {
5f84b55d
EL
1247#ifdef HAVE_TPACKET_V3
1248 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1249 if (!ptv->ring.v3) {
1250 SCFree(ptv->ring.v3);
1251 ptv->ring.v3 = NULL;
5f84b55d
EL
1252 }
1253 } else {
1254#endif
69d0d484 1255 if (ptv->ring.v2) {
5f84b55d 1256 /* only used in reading phase, we can free it */
69d0d484
VJ
1257 SCFree(ptv->ring.v2);
1258 ptv->ring.v2 = NULL;
5f84b55d
EL
1259 }
1260#ifdef HAVE_TPACKET_V3
13f13b6d 1261 }
5f84b55d 1262#endif
13f13b6d
EL
1263 if (ptv->socket != -1) {
1264 /* we need to wait for all packets to return data */
1265 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
b86e1762 1266 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
cba41207 1267 munmap(ptv->ring_buf, ptv->ring_buflen);
13f13b6d
EL
1268 close(ptv->socket);
1269 ptv->socket = -1;
1270 }
1271 }
1272 }
1273 if (state == AFP_STATE_UP) {
1274 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1275 }
1276}
49b7b00f 1277
7fea0ec6
EL
1278static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1279 uint64_t *discarded_pkts)
919377d4
EL
1280{
1281 struct sockaddr_ll from;
1282 struct iovec iov;
1283 struct msghdr msg;
1284 struct timeval ts;
1285 union {
1286 struct cmsghdr cmsg;
1287 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1288 } cmsg_buf;
1289
1290
1291 if (unlikely(suricata_ctl_flags != 0)) {
1292 return 1;
1293 }
1294
1295 msg.msg_name = &from;
1296 msg.msg_namelen = sizeof(from);
1297 msg.msg_iov = &iov;
1298 msg.msg_iovlen = 1;
1299 msg.msg_control = &cmsg_buf;
1300 msg.msg_controllen = sizeof(cmsg_buf);
1301 msg.msg_flags = 0;
1302
1303 iov.iov_len = ptv->datalen;
1304 iov.iov_base = ptv->data;
1305
339f0665 1306 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1307
1308 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1309 /* FIXME */
1310 return -1;
1311 }
1312
1313 if ((ts.tv_sec > synctv->tv_sec) ||
1314 (ts.tv_sec >= synctv->tv_sec &&
1315 ts.tv_usec > synctv->tv_usec)) {
1316 return 1;
1317 }
1318 return 0;
1319}
1320
7fea0ec6
EL
1321static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1322 uint64_t *discarded_pkts)
919377d4
EL
1323{
1324 union thdr h;
1325
1326 if (unlikely(suricata_ctl_flags != 0)) {
1327 return 1;
1328 }
1329
f947539d 1330#ifdef HAVE_TPACKET_V3
bae1b03c 1331 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1332 int ret = 0;
7fea0ec6 1333 struct tpacket_block_desc *pbd;
69d0d484 1334 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1335 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1336 struct tpacket3_hdr *ppd =
1337 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1338 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1339 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1340 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1341 ret = 1;
1342 }
7fea0ec6 1343 AFPFlushBlock(pbd);
69d0d484 1344 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1345 return ret;
f947539d
VJ
1346
1347 } else
1348#endif
1349 {
7fea0ec6 1350 /* Read packet from ring */
69d0d484 1351 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1352 if (h.raw == NULL) {
1353 return -1;
1354 }
1355 (*discarded_pkts)++;
1356 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1357 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1358 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1359 return 1;
1360 }
919377d4 1361
7fea0ec6 1362 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1363 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1364 ptv->frame_offset = 0;
1365 }
919377d4
EL
1366 }
1367
1368
1369 return 0;
1370}
1371
806844d8
VJ
1372/** \brief wait for all afpacket threads to fully init
1373 *
1374 * Discard packets before all threads are ready, as the cluster
1375 * setup is not complete yet.
1376 *
1377 * if AFPPeersListStarted() returns true init is complete
1378 *
1379 * \retval r 1 = happy, otherwise unhappy
1380 */
7fea0ec6 1381static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1382{
919377d4 1383 struct timeval synctv;
806844d8
VJ
1384 struct pollfd fds;
1385
1386 fds.fd = ptv->socket;
1387 fds.events = POLLIN;
919377d4
EL
1388
1389 /* Set timeval to end of the world */
1390 synctv.tv_sec = 0xffffffff;
1391 synctv.tv_usec = 0xffffffff;
1392
1393 while (1) {
8709a20d 1394 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1395 if (r > 0 &&
1396 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1397 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1398 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1399 return 0;
1400 } else if (r > 0) {
1401 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1402 gettimeofday(&synctv, NULL);
1403 }
1404 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1405 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1406 } else {
7fea0ec6 1407 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1408 }
1409 SCLogDebug("Discarding on %s", ptv->tv->name);
1410 switch (r) {
1411 case 1:
9f7ba071 1412 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1413 return 1;
1414 case -1:
1415 return r;
1416 }
1417 /* no packets */
1418 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1419 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1420 return 1;
43b6cbd4 1421 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1422 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1423 return 0;
919377d4
EL
1424 }
1425 }
1426 return 1;
1427}
1428
13f13b6d
EL
1429/**
1430 * \brief Try to reopen socket
1431 *
1432 * \retval 0 in case of success, negative if error occurs or a condition
1433 * is not met.
1434 */
c45d8985
EL
1435static int AFPTryReopen(AFPThreadVars *ptv)
1436{
13f13b6d
EL
1437 ptv->down_count++;
1438
13f13b6d
EL
1439 /* Don't reconnect till we have packet that did not release data */
1440 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1441 return -1;
1442 }
c45d8985 1443
8709a20d 1444 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1445 if (afp_activate_r != 0) {
13f13b6d
EL
1446 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1447 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1448 ptv->iface);
1449 }
c45d8985
EL
1450 return afp_activate_r;
1451 }
1452
3bea3b39 1453 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1454 return 0;
1455}
1456
e80b30c0
EL
1457/**
1458 * \brief Main AF_PACKET reading Loop function
1459 */
1460TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1461{
34581ce9
AS
1462 SCEnter();
1463
e80b30c0 1464 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1465 struct pollfd fds;
1466 int r;
34581ce9 1467 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1468 time_t last_dump = 0;
49612128 1469 time_t current_time;
5f400785 1470 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1471 uint64_t discarded_pkts = 0;
e80b30c0 1472
34581ce9 1473 ptv->slot = s->slot_next;
e80b30c0 1474
5f400785 1475 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1476 if (ptv->flags & AFP_TPACKET_V3) {
1477 AFPReadFunc = AFPReadFromRingV3;
1478 } else {
1479 AFPReadFunc = AFPReadFromRing;
1480 }
5f400785
EL
1481 } else {
1482 AFPReadFunc = AFPRead;
1483 }
1484
60400163
EL
1485 if (ptv->afp_state == AFP_STATE_DOWN) {
1486 /* Wait for our turn, threads before us must have opened the socket */
1487 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1488 usleep(1000);
1992a227
EL
1489 if (suricata_ctl_flags != 0) {
1490 break;
1491 }
60400163
EL
1492 }
1493 r = AFPCreateSocket(ptv, ptv->iface, 1);
1494 if (r < 0) {
1992a227
EL
1495 switch (-r) {
1496 case AFP_FATAL_ERROR:
1497 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1498 SCReturnInt(TM_ECODE_FAILED);
1499 case AFP_RECOVERABLE_ERROR:
1500 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1501 }
60400163
EL
1502 }
1503 AFPPeersListReachedInc();
1504 }
1505 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1506 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1507 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1508 /* let's reset counter as we will start the capture at the
1509 * next function call */
1510#ifdef PACKET_STATISTICS
1511 struct tpacket_stats kstats;
1512 socklen_t len = sizeof (struct tpacket_stats);
1513 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1514 &kstats, &len) > -1) {
1515 uint64_t pkts = 0;
1516 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1517 ", dropped %" PRIu32 "",
1518 ptv->tv->name,
1519 kstats.tp_packets, kstats.tp_drops);
1520 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1521 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1522 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1523 }
1524#endif
60400163
EL
1525 }
1526
e80b30c0
EL
1527 fds.fd = ptv->socket;
1528 fds.events = POLLIN;
1529
1530 while (1) {
1531 /* Start by checking the state of our interface */
1532 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1533 int dbreak = 0;
662dccd8 1534
e80b30c0
EL
1535 do {
1536 usleep(AFP_RECONNECT_TIMEOUT);
1537 if (suricata_ctl_flags != 0) {
1538 dbreak = 1;
1539 break;
1540 }
1541 r = AFPTryReopen(ptv);
09e709d1 1542 fds.fd = ptv->socket;
e80b30c0
EL
1543 } while (r < 0);
1544 if (dbreak == 1)
1545 break;
1546 }
1547
1548 /* make sure we have at least one packet in the packet pool, to prevent
1549 * us from alloc'ing packets at line rate */
3c6e01f6 1550 PacketPoolWait();
e80b30c0
EL
1551
1552 r = poll(&fds, 1, POLL_TIMEOUT);
1553
1554 if (suricata_ctl_flags != 0) {
1555 break;
1556 }
1557
1558 if (r > 0 &&
1559 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1560 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1561 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1562 continue;
ff6365dd 1563 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1564 char c;
1565 /* Do a recv to get errno */
1566 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1567 continue; /* what, no error? */
3bea3b39 1568 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1569 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1570 ptv->iface, errno, strerror(errno));
13f13b6d 1571 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1572 continue;
ff6365dd 1573 } else if (fds.revents & POLLNVAL) {
e80b30c0 1574 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1575 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1576 continue;
1577 }
1578 } else if (r > 0) {
5f400785 1579 r = AFPReadFunc(ptv);
62e63e3f 1580 switch (r) {
27adbfa8
EL
1581 case AFP_READ_OK:
1582 /* Trigger one dump of stats every second */
49612128
EL
1583 current_time = time(NULL);
1584 if (current_time != last_dump) {
27adbfa8 1585 AFPDumpCounters(ptv);
49612128 1586 last_dump = current_time;
27adbfa8
EL
1587 }
1588 break;
62e63e3f
EL
1589 case AFP_READ_FAILURE:
1590 /* AFPRead in error: best to reset the socket */
3bea3b39 1591 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1592 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1593 ptv->iface, errno, strerror(errno));
13f13b6d 1594 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1595 continue;
9efa4ace
EL
1596 case AFP_SURI_FAILURE:
1597 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1598 break;
27b5136b 1599 case AFP_KERNEL_DROP:
e8a4a4c4 1600 AFPDumpCounters(ptv);
27b5136b 1601 break;
e80b30c0 1602 }
11099cfa 1603 } else if (unlikely(r == 0)) {
f53e687b
EL
1604 /* Trigger one dump of stats every second */
1605 current_time = time(NULL);
1606 if (current_time != last_dump) {
1607 AFPDumpCounters(ptv);
1608 last_dump = current_time;
1609 }
11099cfa
VJ
1610 /* poll timed out, lets see if we need to inject a fake packet */
1611 TmThreadsCaptureInjectPacket(tv, ptv->slot, NULL);
1612
e80b30c0 1613 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1614 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1615 ptv->iface,
e80b30c0 1616 errno, strerror(errno));
13f13b6d 1617 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1618 continue;
1619 }
752f03e7 1620 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1621 }
1622
4e561d6b 1623 AFPDumpCounters(ptv);
752f03e7 1624 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1625 SCReturnInt(TM_ECODE_OK);
1626}
1627
13f13b6d
EL
1628static int AFPGetDevFlags(int fd, const char *ifname)
1629{
1630 struct ifreq ifr;
1631
1632 memset(&ifr, 0, sizeof(ifr));
1633 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1634
1635 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1636 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1637 ifname, strerror(errno));
1638 return -1;
1639 }
1640
1641 return ifr.ifr_flags;
1642}
1643
1644
e80b30c0 1645static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1646{
1647 struct ifreq ifr;
1648
1649 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1650 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1651
1652 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1653 if (verbose)
1654 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1655 ifname, strerror(errno));
c45d8985
EL
1656 return -1;
1657 }
1658
1659 return ifr.ifr_ifindex;
1660}
1661
e80b30c0 1662static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1663{
1664 struct ifreq ifr;
1665
1666 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1667 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1668
1669 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1670 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1671 ifname, strerror(errno));
1672 return -1;
1673 }
1674
e80b30c0
EL
1675 switch (ifr.ifr_hwaddr.sa_family) {
1676 case ARPHRD_LOOPBACK:
1677 return LINKTYPE_ETHERNET;
1678 case ARPHRD_PPP:
11eb1d7c 1679 case ARPHRD_NONE:
e80b30c0
EL
1680 return LINKTYPE_RAW;
1681 default:
1682 return ifr.ifr_hwaddr.sa_family;
1683 }
c45d8985
EL
1684}
1685
b7bf299e
EL
1686int AFPGetLinkType(const char *ifname)
1687{
1688 int ltype;
1689
1690 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1691 if (fd == -1) {
1692 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1693 return LINKTYPE_RAW;
1694 }
1695
1696 ltype = AFPGetDevLinktype(fd, ifname);
1697 close(fd);
1698
1699 return ltype;
1700}
1701
49b7b00f
EL
1702static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1703{
1704 /* Compute structure:
1705 Target is to store all pending packets
1706 with a size equal to MTU + auxdata
1707 And we keep a decent number of block
1708
1709 To do so:
1710 Compute frame_size (aligned to be able to fit in block
1711 Check which block size we need. Blocksize is a 2^n * pagesize
1712 We then need to get order, big enough to have
1713 frame_size < block size
1714 Find number of frame per block (divide)
1715 Fill in packet_req
1716
1717 Compute frame size:
1718 described in packet_mmap.txt
1719 dependant on snaplen (need to use a variable ?)
1720snaplen: MTU ?
1721tp_hdrlen determine_version in daq_afpacket
1722in V1: sizeof(struct tpacket_hdr);
1723in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1724frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1725
1726 */
1727 int tp_hdrlen = sizeof(struct tpacket_hdr);
1728 int snaplen = default_packet_size;
1729
03032457
EL
1730 if (snaplen == 0) {
1731 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1732 if (snaplen <= 0) {
1733 SCLogWarning(SC_ERR_INVALID_VALUE,
1734 "Unable to get MTU, setting snaplen to sane default of 1514");
1735 snaplen = 1514;
1736 }
1737 }
1738
69d0d484
VJ
1739 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1740 ptv->req.v2.tp_block_size = getpagesize() << order;
1741 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1742 if (frames_per_block == 0) {
bae1b03c 1743 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1744 return -1;
1745 }
69d0d484
VJ
1746 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1747 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1748 /* exact division */
69d0d484 1749 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1750 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1751 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1752 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1753 return 1;
1754}
1755
c2d0d938 1756#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1757static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1758{
69d0d484
VJ
1759 ptv->req.v3.tp_block_size = ptv->block_size;
1760 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1761 int frames_per_block = 0;
1762 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1763 int snaplen = default_packet_size;
1764
1765 if (snaplen == 0) {
1766 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1767 if (snaplen <= 0) {
1768 SCLogWarning(SC_ERR_INVALID_VALUE,
1769 "Unable to get MTU, setting snaplen to sane default of 1514");
1770 snaplen = 1514;
1771 }
1772 }
1773
69d0d484
VJ
1774 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1775 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1776
1777 if (frames_per_block == 0) {
1778 SCLogError(SC_ERR_INVALID_VALUE,
1779 "Block size is too small, it should be at least %d",
69d0d484 1780 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1781 return -1;
1782 }
69d0d484 1783 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1784 /* exact division */
69d0d484
VJ
1785 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1786 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1787 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1788 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1789 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1790 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1791 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1792 );
1793 return 1;
1794}
c2d0d938 1795#endif
bae1b03c 1796
c7bde9df
EL
1797static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1798{
1799 int val;
1800 unsigned int len = sizeof(val), i;
c7bde9df 1801 int order;
f5c20191 1802 int r, mmap_flag;
c7bde9df 1803
c2d0d938 1804#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1805 if (ptv->flags & AFP_TPACKET_V3) {
1806 val = TPACKET_V3;
f947539d 1807 } else
c2d0d938 1808#endif
f947539d 1809 {
c7bde9df
EL
1810 val = TPACKET_V2;
1811 }
1812 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1813 if (errno == ENOPROTOOPT) {
1814 if (ptv->flags & AFP_TPACKET_V3) {
1815 SCLogError(SC_ERR_AFP_CREATE,
1816 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1817 } else {
1818 SCLogError(SC_ERR_AFP_CREATE,
1819 "Too old kernel giving up (need 2.6.27 at least)");
1820 }
1821 }
1822 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1823 return AFP_FATAL_ERROR;
1824 }
1825
f947539d
VJ
1826 val = TPACKET_V2;
1827#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1828 if (ptv->flags & AFP_TPACKET_V3) {
1829 val = TPACKET_V3;
c7bde9df 1830 }
f947539d 1831#endif
c7bde9df
EL
1832 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1833 sizeof(val)) < 0) {
1834 SCLogError(SC_ERR_AFP_CREATE,
1835 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1836 strerror(errno));
1837 return AFP_FATAL_ERROR;
1838 }
1839
a40f08a2
EL
1840#ifdef HAVE_HW_TIMESTAMPING
1841 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1842 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1843 sizeof(req)) < 0) {
1844 SCLogWarning(SC_ERR_AFP_CREATE,
1845 "Can't activate hardware timestamping on packet socket: %s",
1846 strerror(errno));
1847 }
1848#endif
1849
ecf59be4
EL
1850 /* Let's reserve head room so we can add the VLAN header in IPS
1851 * or TAP mode before write the packet */
1852 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1853 /* Only one vlan is extracted from AFP header so
1854 * one VLAN header length is enough. */
1855 int reserve = VLAN_HEADER_LEN;
1856 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1857 sizeof(reserve)) < 0) {
1858 SCLogError(SC_ERR_AFP_CREATE,
1859 "Can't activate reserve on packet socket: %s",
1860 strerror(errno));
1861 return AFP_FATAL_ERROR;
1862 }
1863 }
1864
c7bde9df 1865 /* Allocate RX ring */
c2d0d938 1866#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1867 if (ptv->flags & AFP_TPACKET_V3) {
1868 if (AFPComputeRingParamsV3(ptv) != 1) {
1869 return AFP_FATAL_ERROR;
1870 }
1871 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1872 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1873 if (r < 0) {
1874 SCLogError(SC_ERR_MEM_ALLOC,
1875 "Unable to allocate RX Ring for iface %s: (%d) %s",
1876 devname,
1877 errno,
1878 strerror(errno));
1879 return AFP_FATAL_ERROR;
1880 }
1881 } else {
c2d0d938 1882#endif
fa902abe 1883 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1884 if (AFPComputeRingParams(ptv, order) != 1) {
1885 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1886 return AFP_FATAL_ERROR;
1887 }
1888
1889 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1890 (void *) &ptv->req, sizeof(ptv->req));
1891
1892 if (r < 0) {
1893 if (errno == ENOMEM) {
1894 SCLogInfo("Memory issue with ring parameters. Retrying.");
1895 continue;
1896 }
1897 SCLogError(SC_ERR_MEM_ALLOC,
1898 "Unable to allocate RX Ring for iface %s: (%d) %s",
1899 devname,
1900 errno,
1901 strerror(errno));
1902 return AFP_FATAL_ERROR;
1903 } else {
1904 break;
1905 }
1906 }
1907 if (order < 0) {
1908 SCLogError(SC_ERR_MEM_ALLOC,
1909 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1910 devname);
1911 return AFP_FATAL_ERROR;
1912 }
c2d0d938 1913#ifdef HAVE_TPACKET_V3
c7bde9df 1914 }
c2d0d938 1915#endif
c7bde9df
EL
1916
1917 /* Allocate the Ring */
c2d0d938 1918#ifdef HAVE_TPACKET_V3
c7bde9df 1919 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1920 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1921 } else {
c2d0d938 1922#endif
69d0d484 1923 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1924#ifdef HAVE_TPACKET_V3
c7bde9df 1925 }
c2d0d938 1926#endif
f5c20191
EL
1927 mmap_flag = MAP_SHARED;
1928 if (ptv->flags & AFP_MMAP_LOCKED)
1929 mmap_flag |= MAP_LOCKED;
cba41207 1930 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1931 mmap_flag, ptv->socket, 0);
cba41207 1932 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1933 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1934 strerror(errno));
c7bde9df
EL
1935 goto mmap_err;
1936 }
c2d0d938 1937#ifdef HAVE_TPACKET_V3
c7bde9df 1938 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1939 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1940 if (!ptv->ring.v3) {
1941 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1942 goto postmmap_err;
c7bde9df 1943 }
69d0d484
VJ
1944 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1945 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1946 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1947 }
1948 } else {
c2d0d938 1949#endif
c7bde9df 1950 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1951 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1952 if (ptv->ring.v2 == NULL) {
c7bde9df 1953 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1954 goto postmmap_err;
c7bde9df 1955 }
69d0d484 1956 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1957 /* fill the header ring with proper frame ptr*/
1958 ptv->frame_offset = 0;
69d0d484
VJ
1959 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1960 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1961 unsigned int j;
69d0d484
VJ
1962 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1963 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1964 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
1965 }
1966 }
1967 ptv->frame_offset = 0;
c2d0d938 1968#ifdef HAVE_TPACKET_V3
c7bde9df 1969 }
c2d0d938 1970#endif
c7bde9df
EL
1971
1972 return 0;
1973
291af719 1974postmmap_err:
cba41207 1975 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
1976 if (ptv->ring.v2)
1977 SCFree(ptv->ring.v2);
1978 if (ptv->ring.v3)
1979 SCFree(ptv->ring.v3);
c7bde9df
EL
1980mmap_err:
1981 /* Packet mmap does the cleaning when socket is closed */
1982 return AFP_FATAL_ERROR;
1983}
1984
402bdf9b
VJ
1985/** \brief test if we can use FANOUT. Older kernels like those in
1986 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1987 */
1988int AFPIsFanoutSupported(void)
1989{
1990#ifdef HAVE_PACKET_FANOUT
1991 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
1992 if (fd < 0)
1993 return 0;
402bdf9b 1994
6227d095
VJ
1995 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
1996 uint16_t id = 1;
1997 uint32_t option = (mode << 16) | (id & 0xffff);
1998 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1999 close(fd);
2000
2001 if (r < 0) {
2002 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2003 return 0;
402bdf9b 2004 }
6227d095
VJ
2005 return 1;
2006#else
402bdf9b 2007 return 0;
6227d095 2008#endif
402bdf9b
VJ
2009}
2010
91e1256b
EL
2011#ifdef HAVE_PACKET_EBPF
2012
2013static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2014{
2015 int pfd = ptv->ebpf_lb_fd;
2016 if (pfd == -1) {
2017 SCLogError(SC_ERR_INVALID_VALUE,
2018 "Fanout file descriptor is invalid");
2019 return -1;
2020 }
2021
2022 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2023 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2024 return -1;
2025 }
2026 SCLogInfo("Activated eBPF on socket");
2027
2028 return 0;
2029}
2030
2031static int SetEbpfFilter(AFPThreadVars *ptv)
2032{
2033 int pfd = ptv->ebpf_filter_fd;
2034 if (pfd == -1) {
2035 SCLogError(SC_ERR_INVALID_VALUE,
2036 "Filter file descriptor is invalid");
2037 return -1;
2038 }
2039
2040 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2041 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2042 return -1;
2043 }
2044 SCLogInfo("Activated eBPF filter on socket");
2045
2046 return 0;
2047}
2048#endif
2049
e80b30c0 2050static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2051{
2052 int r;
1992a227 2053 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2054 struct packet_mreq sock_params;
2055 struct sockaddr_ll bind_address;
662dccd8 2056 int if_idx;
49b7b00f 2057
c45d8985
EL
2058 /* open socket */
2059 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2060 if (ptv->socket == -1) {
e80b30c0 2061 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2062 goto error;
c45d8985 2063 }
cba41207 2064
662dccd8 2065 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2066
2067 if (if_idx == -1) {
fcd5e138 2068 goto socket_err;
cba41207
AG
2069 }
2070
c45d8985
EL
2071 /* bind socket */
2072 memset(&bind_address, 0, sizeof(bind_address));
2073 bind_address.sll_family = AF_PACKET;
2074 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2075 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2076 if (bind_address.sll_ifindex == -1) {
2077 if (verbose)
e80b30c0 2078 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2079 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2080 goto socket_err;
2081 }
2082
cba41207
AG
2083 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2084 if (if_flags == -1) {
2085 if (verbose) {
2086 SCLogError(SC_ERR_AFP_READ,
2087 "Couldn't get flags for interface '%s'",
2088 ptv->iface);
2089 }
2090 ret = AFP_RECOVERABLE_ERROR;
2091 goto socket_err;
2092 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2093 if (verbose) {
2094 SCLogError(SC_ERR_AFP_READ,
2095 "Interface '%s' is down",
2096 ptv->iface);
2097 }
2098 ret = AFP_RECOVERABLE_ERROR;
2099 goto socket_err;
2100 }
2101
13f13b6d
EL
2102 if (ptv->promisc != 0) {
2103 /* Force promiscuous mode */
2104 memset(&sock_params, 0, sizeof(sock_params));
2105 sock_params.mr_type = PACKET_MR_PROMISC;
2106 sock_params.mr_ifindex = bind_address.sll_ifindex;
2107 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2108 if (r < 0) {
2109 SCLogError(SC_ERR_AFP_CREATE,
2110 "Couldn't switch iface %s to promiscuous, error %s",
2111 devname, strerror(errno));
c7bde9df 2112 goto socket_err;
13f13b6d
EL
2113 }
2114 }
2115
2116 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2117 int val = 1;
2118 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2119 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2120 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2121 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2122 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2123 }
2124 }
2125
2126 /* set socket recv buffer size */
2127 if (ptv->buffer_size != 0) {
2128 /*
2129 * Set the socket buffer size to the specified value.
2130 */
b3bf7a57 2131 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2132 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2133 &ptv->buffer_size,
2134 sizeof(ptv->buffer_size)) == -1) {
2135 SCLogError(SC_ERR_AFP_CREATE,
2136 "Couldn't set buffer size to %d on iface %s, error %s",
2137 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2138 goto socket_err;
13f13b6d
EL
2139 }
2140 }
2141
2142 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2143 if (r < 0) {
2144 if (verbose) {
2145 if (errno == ENETDOWN) {
2146 SCLogError(SC_ERR_AFP_CREATE,
2147 "Couldn't bind AF_PACKET socket, iface %s is down",
2148 devname);
2149 } else {
2150 SCLogError(SC_ERR_AFP_CREATE,
2151 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2152 devname, strerror(errno));
2153 }
2154 }
1992a227 2155 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2156 goto socket_err;
13f13b6d
EL
2157 }
2158
91e1256b 2159
238ff231
EL
2160#ifdef HAVE_PACKET_FANOUT
2161 /* add binded socket to fanout group */
2162 if (ptv->threads > 1) {
238ff231
EL
2163 uint16_t mode = ptv->cluster_type;
2164 uint16_t id = ptv->cluster_id;
4111331a 2165 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2166 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2167 if (r < 0) {
2168 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2169 "Couldn't set fanout mode, error %s",
238ff231 2170 strerror(errno));
c7bde9df 2171 goto socket_err;
238ff231
EL
2172 }
2173 }
2174#endif
2175
91e1256b
EL
2176#ifdef HAVE_PACKET_EBPF
2177 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2178 r = SockFanoutSeteBPF(ptv);
2179 if (r < 0) {
2180 SCLogError(SC_ERR_AFP_CREATE,
2181 "Coudn't set EBPF, error %s",
2182 strerror(errno));
2183 goto socket_err;
2184 }
2185 }
2186#endif
2187
49b7b00f 2188 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2189 ret = AFPSetupRing(ptv, devname);
2190 if (ret != 0)
13f13b6d 2191 goto socket_err;
49b7b00f
EL
2192 }
2193
86a3f064 2194 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2195
c85ee1e3
EL
2196 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2197 switch (ptv->datalink) {
2198 case ARPHRD_PPP:
2199 case ARPHRD_ATM:
2200 ptv->cooked = 1;
619414c5 2201 break;
c85ee1e3
EL
2202 }
2203
f47df5a6 2204 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2205 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2206 ret = AFP_FATAL_ERROR;
2207 goto socket_err;
f2a6fb8a
EL
2208 }
2209
49b7b00f 2210 /* Init is ok */
13f13b6d 2211 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2212 return 0;
13f13b6d 2213
13f13b6d
EL
2214socket_err:
2215 close(ptv->socket);
2216 ptv->socket = -1;
f47df5a6 2217 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2218 if (ptv->ring.v3) {
2219 SCFree(ptv->ring.v3);
2220 ptv->ring.v3 = NULL;
f47df5a6
VJ
2221 }
2222 } else {
69d0d484
VJ
2223 if (ptv->ring.v2) {
2224 SCFree(ptv->ring.v2);
2225 ptv->ring.v2 = NULL;
f47df5a6
VJ
2226 }
2227 }
2228
13f13b6d 2229error:
1992a227 2230 return -ret;
c45d8985
EL
2231}
2232
f2a6fb8a
EL
2233TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2234{
2235 struct bpf_program filter;
2236 struct sock_fprog fcode;
2237 int rc;
2238
91e1256b
EL
2239#ifdef HAVE_PACKET_EBPF
2240 if (ptv->ebpf_filter_fd != -1) {
2241 return SetEbpfFilter(ptv);
2242 }
2243#endif
2244
f2a6fb8a
EL
2245 if (!ptv->bpf_filter)
2246 return TM_ECODE_OK;
2247
f2a6fb8a
EL
2248 SCLogInfo("Using BPF '%s' on iface '%s'",
2249 ptv->bpf_filter,
2250 ptv->iface);
28e9e4c8
EL
2251
2252 char errbuf[PCAP_ERRBUF_SIZE];
2253 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2254 ptv->datalink, /* linktype_arg */
2255 &filter, /* program */
2256 ptv->bpf_filter, /* const char *buf */
cc82ef06 2257 1, /* optimize */
28e9e4c8
EL
2258 0, /* mask */
2259 errbuf,
2260 sizeof(errbuf)) == -1) {
2261 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2262 ptv->bpf_filter,
2263 errbuf);
f2a6fb8a
EL
2264 return TM_ECODE_FAILED;
2265 }
2266
2267 fcode.len = filter.bf_len;
2268 fcode.filter = (struct sock_filter*)filter.bf_insns;
2269
2270 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2271
28e9e4c8 2272 SCBPFFree(&filter);
f2a6fb8a
EL
2273 if(rc == -1) {
2274 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2275 return TM_ECODE_FAILED;
2276 }
2277
f2a6fb8a
EL
2278 return TM_ECODE_OK;
2279}
2280
06173267
EL
2281#ifdef HAVE_PACKET_EBPF
2282/**
2283 * Insert a half flow in the kernel bypass table
2284 *
2285 * \param mapfd file descriptor of the protocol bypass table
2286 * \param key data to use as key in the table
2287 * \param inittime time of creation of the entry (in monotonic clock)
2598078e 2288 * \return 0 in case of error, 1 if success
06173267
EL
2289 */
2290static int AFPInsertHalfFlow(int mapd, void *key, uint64_t inittime)
2291{
17a32bda
EL
2292 struct pair value[nr_cpus];
2293 unsigned int i;
1e729f05
EL
2294
2295 if (mapd == -1) {
2296 return 0;
2297 }
2298
94a622cb
EL
2299 /* We use a per CPU structure so we have to set an array of values as the kernel
2300 * is not duplicating the data on each CPU by itself. */
17a32bda
EL
2301 for (i = 0; i < nr_cpus; i++) {
2302 value[i].time = inittime;
2303 value[i].packets = 0;
2304 value[i].bytes = 0;
2305 }
2306 SCLogDebug("Inserting element in eBPF mapping: %lu", inittime);
2307 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2308 switch (errno) {
3379311e 2309 /* no more place in the hash */
17a32bda 2310 case E2BIG:
17a32bda 2311 return 0;
3379311e
EL
2312 /* if we already have the key then bypass is a success */
2313 case EEXIST:
2314 return 1;
2315 /* Not supposed to be there so issue a error */
17a32bda
EL
2316 default:
2317 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2318 strerror(errno),
2319 errno);
2320 return 0;
06173267 2321 }
17a32bda
EL
2322 }
2323 return 1;
06173267
EL
2324}
2325#endif
2326
2598078e 2327/**
94a622cb
EL
2328 * Bypass function for AF_PACKET capture in eBPF mode
2329 *
2330 * This function creates two half flows in the map shared with the kernel
2331 * to trigger bypass.
2332 *
2333 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2334 * This table contains the list of half flows to bypass. The in-kernel filter
2335 * will skip/drop the packet if they belong to a flow in one of the flows
2336 * table.
2337 *
2338 * \param p the packet belonging to the flow to bypass
2339 * \return 0 if unable to bypass, 1 if success
2598078e 2340 */
06173267
EL
2341static int AFPBypassCallback(Packet *p)
2342{
2343#ifdef HAVE_PACKET_EBPF
2344 SCLogDebug("Calling af_packet callback function");
2345 /* Only bypass TCP and UDP */
2346 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2347 return 0;
2348 }
2349
2350 /* Bypassing tunneled packets is currently not supported
2351 * because we can't discard the inner packet only due to
2352 * primitive parsing in eBPF */
2353 if (IS_TUNNEL_PKT(p)) {
2354 return 0;
2355 }
2356 struct timespec curtime;
2357 uint64_t inittime = 0;
2358 /* In eBPF, the function that we have use to get time return the
2359 * monotonic clock (the time since start of the computer). So we
2360 * can't use the timestamp of the packet. */
2361 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2362 inittime = curtime.tv_sec * 1000000000;
2363 }
2364 if (PKT_IS_IPV4(p)) {
d65f4585 2365 SCLogDebug("add an IPv4");
eff10fce
EL
2366 if (p->afp_v.v4_map_fd == -1) {
2367 return 0;
2368 }
06173267
EL
2369 struct flowv4_keys key = {};
2370 key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2371 key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2372 key.port16[0] = GET_TCP_SRC_PORT(p);
2373 key.port16[1] = GET_TCP_DST_PORT(p);
8c880879 2374
06173267 2375 key.ip_proto = IPV4_GET_IPPROTO(p);
d65f4585 2376 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
06173267
EL
2377 return 0;
2378 }
2379 key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2380 key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2381 key.port16[0] = GET_TCP_DST_PORT(p);
2382 key.port16[1] = GET_TCP_SRC_PORT(p);
d65f4585 2383 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
06173267
EL
2384 return 0;
2385 }
f0439103 2386 EBPFUpdateFlow(p->flow, p);
06173267
EL
2387 return 1;
2388 }
2389 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2390 if (PKT_IS_IPV6(p) &&
06173267 2391 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2392 int i;
eff10fce
EL
2393 if (p->afp_v.v6_map_fd == -1) {
2394 return 0;
2395 }
06173267 2396 SCLogDebug("add an IPv6");
06173267
EL
2397 struct flowv6_keys key = {};
2398 for (i = 0; i < 4; i++) {
2399 key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2400 key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2401 }
2402 key.port16[0] = GET_TCP_SRC_PORT(p);
2403 key.port16[1] = GET_TCP_DST_PORT(p);
2404 key.ip_proto = IPV6_GET_NH(p);
d65f4585 2405 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
06173267
EL
2406 return 0;
2407 }
2408 for (i = 0; i < 4; i++) {
2409 key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2410 key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2411 }
2412 key.port16[0] = GET_TCP_DST_PORT(p);
2413 key.port16[1] = GET_TCP_SRC_PORT(p);
d65f4585 2414 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
06173267
EL
2415 return 0;
2416 }
f0439103 2417 EBPFUpdateFlow(p->flow, p);
06173267
EL
2418 return 1;
2419 }
2420#endif
2421 return 0;
2422}
2423
94a622cb
EL
2424/**
2425 * Bypass function for AF_PACKET capture in XDP mode
2426 *
2427 * This function creates two half flows in the map shared with the kernel
2428 * to trigger bypass. This function is similar to AFPBypassCallback() but
2429 * the bytes order is changed for some data due to the way we get the data
2430 * in the XDP case.
2431 *
2432 * \param p the packet belonging to the flow to bypass
2433 * \return 0 if unable to bypass, 1 if success
2434 */
8c880879
EL
2435static int AFPXDPBypassCallback(Packet *p)
2436{
2437#ifdef HAVE_PACKET_XDP
2438 SCLogDebug("Calling af_packet callback function");
2439 /* Only bypass TCP and UDP */
2440 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2441 return 0;
2442 }
2443
2444 /* Bypassing tunneled packets is currently not supported
2445 * because we can't discard the inner packet only due to
2446 * primitive parsing in eBPF */
2447 if (IS_TUNNEL_PKT(p)) {
2448 return 0;
2449 }
2450 struct timespec curtime;
2451 uint64_t inittime = 0;
94a622cb
EL
2452 /* In eBPF, the function that we have use to get time return the
2453 * monotonic clock (the time since start of the computer). So we
2454 * can't use the timestamp of the packet. */
8c880879
EL
2455 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2456 inittime = curtime.tv_sec * 1000000000;
2457 }
2458 if (PKT_IS_IPV4(p)) {
8c880879 2459 struct flowv4_keys key = {};
eff10fce
EL
2460 if (p->afp_v.v4_map_fd == -1) {
2461 return 0;
2462 }
8c880879
EL
2463 key.src = GET_IPV4_SRC_ADDR_U32(p);
2464 key.dst = GET_IPV4_DST_ADDR_U32(p);
94a622cb 2465 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2466 * (as in eBPF filter) so we need to pass from host to network order */
8c880879
EL
2467 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2468 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2469 key.ip_proto = IPV4_GET_IPPROTO(p);
d65f4585 2470 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
8c880879
EL
2471 return 0;
2472 }
2473 key.src = GET_IPV4_DST_ADDR_U32(p);
2474 key.dst = GET_IPV4_SRC_ADDR_U32(p);
2475 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2476 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
d65f4585 2477 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
8c880879
EL
2478 return 0;
2479 }
2480 return 1;
2481 }
2482 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2483 if (PKT_IS_IPV6(p) &&
8c880879 2484 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2485 SCLogDebug("add an IPv6");
eff10fce
EL
2486 if (p->afp_v.v6_map_fd == -1) {
2487 return 0;
2488 }
d65f4585 2489 int i;
8c880879
EL
2490 struct flowv6_keys key = {};
2491 for (i = 0; i < 4; i++) {
2492 key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2493 key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2494 }
2495 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2496 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2497 key.ip_proto = IPV6_GET_NH(p);
d65f4585 2498 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
8c880879
EL
2499 return 0;
2500 }
2501 for (i = 0; i < 4; i++) {
2502 key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2503 key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2504 }
2505 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2506 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
d65f4585 2507 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
8c880879
EL
2508 return 0;
2509 }
2510 return 1;
2511 }
2512#endif
2513 return 0;
2514}
2515
c45d8985
EL
2516/**
2517 * \brief Init function for ReceiveAFP.
2518 *
2519 * \param tv pointer to ThreadVars
2520 * \param initdata pointer to the interface passed from the user
2521 * \param data pointer gets populated with AFPThreadVars
2522 *
2523 * \todo Create a general AFP setup function.
2524 */
ab1200fb 2525TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2526{
c45d8985 2527 SCEnter();
ab1200fb 2528 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2529
c45d8985
EL
2530 if (initdata == NULL) {
2531 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2532 SCReturnInt(TM_ECODE_FAILED);
2533 }
2534
2535 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2536 if (unlikely(ptv == NULL)) {
45d5c3ca 2537 afpconfig->DerefFunc(afpconfig);
c45d8985 2538 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2539 }
c45d8985
EL
2540 memset(ptv, 0, sizeof(AFPThreadVars));
2541
2542 ptv->tv = tv;
2543 ptv->cooked = 0;
2544
fbca1a4e 2545 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2546 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2547
51eb9605
EL
2548 ptv->livedev = LiveGetDevice(ptv->iface);
2549 if (ptv->livedev == NULL) {
2550 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2551 SCFree(ptv);
51eb9605
EL
2552 SCReturnInt(TM_ECODE_FAILED);
2553 }
2554
fbca1a4e 2555 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2556 ptv->ring_size = afpconfig->ring_size;
fa902abe 2557 ptv->block_size = afpconfig->block_size;
e80b30c0 2558
df7dbe36 2559 ptv->promisc = afpconfig->promisc;
6062e00c 2560 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2561 ptv->bpf_filter = NULL;
df7dbe36 2562
fbca1a4e 2563 ptv->threads = 1;
e80b30c0
EL
2564#ifdef HAVE_PACKET_FANOUT
2565 ptv->cluster_type = PACKET_FANOUT_LB;
2566 ptv->cluster_id = 1;
2567 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2568 if (afpconfig->threads > 1) {
9d882116
VJ
2569 ptv->cluster_id = afpconfig->cluster_id;
2570 ptv->cluster_type = afpconfig->cluster_type;
2571 ptv->threads = afpconfig->threads;
e80b30c0
EL
2572 }
2573#endif
49b7b00f 2574 ptv->flags = afpconfig->flags;
e80b30c0 2575
f2a6fb8a
EL
2576 if (afpconfig->bpf_filter) {
2577 ptv->bpf_filter = afpconfig->bpf_filter;
2578 }
91e1256b
EL
2579 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2580 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2581 ptv->xdp_mode = afpconfig->xdp_mode;
f2a6fb8a 2582
d65f4585
EL
2583#ifdef HAVE_PACKET_EBPF
2584 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2585 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585
EL
2586 if (ptv->v4_map_fd == -1) {
2587 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2588 }
126488f7 2589 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585
EL
2590 if (ptv->v6_map_fd == -1) {
2591 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2592 }
2593 }
2594#endif
2595
6efd37a3 2596#ifdef PACKET_STATISTICS
1ef786e7
VJ
2597 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2598 ptv->tv);
2599 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2600 ptv->tv);
9efa4ace
EL
2601 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2602 ptv->tv);
6efd37a3
EL
2603#endif
2604
662dccd8
EL
2605 ptv->copy_mode = afpconfig->copy_mode;
2606 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2607 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2608 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2609 /* Warn about BPF filter consequence */
2610 if (ptv->bpf_filter) {
2611 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2612 " in dropping all non matching packets.");
2613 }
662dccd8 2614 }
c85ee1e3 2615
b7e78d33 2616
0581a23f
EL
2617 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2618 SCFree(ptv);
2619 afpconfig->DerefFunc(afpconfig);
2620 SCReturnInt(TM_ECODE_FAILED);
2621 }
2622
e80b30c0
EL
2623#define T_DATA_SIZE 70000
2624 ptv->data = SCMalloc(T_DATA_SIZE);
2625 if (ptv->data == NULL) {
45d5c3ca 2626 afpconfig->DerefFunc(afpconfig);
6019ae3d 2627 SCFree(ptv);
e80b30c0 2628 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2629 }
e80b30c0
EL
2630 ptv->datalen = T_DATA_SIZE;
2631#undef T_DATA_SIZE
2632
c45d8985 2633 *data = (void *)ptv;
fbca1a4e 2634
45d5c3ca 2635 afpconfig->DerefFunc(afpconfig);
71e47868
EL
2636
2637 /* A bit strange to have this here but we only have vlan information
2638 * during reading so we need to know if we want to keep vlan during
2639 * the capture phase */
2640 int vlanbool = 0;
2641 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
9500d12c 2642 ptv->flags |= AFP_VLAN_DISABLED;
71e47868
EL
2643 }
2644
2cd6e128
EL
2645 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2646 * get the info from packet extended header but we will use a standard
2647 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2648 if (! SCKernelVersionIsAtLeast(3, 0)) {
9500d12c 2649 ptv->flags |= AFP_VLAN_DISABLED;
2cd6e128
EL
2650 }
2651
c45d8985
EL
2652 SCReturnInt(TM_ECODE_OK);
2653}
2654
2655/**
2656 * \brief This function prints stats to the screen at exit.
2657 * \param tv pointer to ThreadVars
2658 * \param data pointer that gets cast into AFPThreadVars for ptv
2659 */
8f1d7503
KS
2660void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2661{
c45d8985
EL
2662 SCEnter();
2663 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2664
2665#ifdef PACKET_STATISTICS
e8a4a4c4 2666 AFPDumpCounters(ptv);
b3bf7a57 2667 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2668 tv->name,
752f03e7
VJ
2669 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2670 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2671#endif
c45d8985
EL
2672}
2673
2674/**
2675 * \brief DeInit function closes af packet socket at exit.
2676 * \param tv pointer to ThreadVars
2677 * \param data pointer that gets cast into AFPThreadVars for ptv
2678 */
8f1d7503
KS
2679TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2680{
c45d8985
EL
2681 AFPThreadVars *ptv = (AFPThreadVars *)data;
2682
13f13b6d
EL
2683 AFPSwitchState(ptv, AFP_STATE_DOWN);
2684
8c880879
EL
2685#ifdef HAVE_PACKET_XDP
2686 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2687#endif
e80b30c0
EL
2688 if (ptv->data != NULL) {
2689 SCFree(ptv->data);
2690 ptv->data = NULL;
2691 }
2692 ptv->datalen = 0;
2693
f2a6fb8a 2694 ptv->bpf_filter = NULL;
69d0d484
VJ
2695 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2696 SCFree(ptv->ring.v3);
ce59ec5d 2697 } else {
69d0d484
VJ
2698 if (ptv->ring.v2)
2699 SCFree(ptv->ring.v2);
ce59ec5d 2700 }
f2a6fb8a 2701
7127ae2b 2702 SCFree(ptv);
c45d8985
EL
2703 SCReturnInt(TM_ECODE_OK);
2704}
2705
2706/**
2707 * \brief This function passes off to link type decoders.
2708 *
2709 * DecodeAFP reads packets from the PacketQueue and passes
2710 * them off to the proper link type decoder.
2711 *
2712 * \param t pointer to ThreadVars
2713 * \param p pointer to the current packet
2714 * \param data pointer that gets cast into AFPThreadVars for ptv
2715 * \param pq pointer to the current PacketQueue
2716 */
2717TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2718{
2719 SCEnter();
2720 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2721
f7b1aefa
VJ
2722 /* XXX HACK: flow timeout can call us for injected pseudo packets
2723 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2724 if (p->flags & PKT_PSEUDO_STREAM_END)
2725 return TM_ECODE_OK;
2726
c45d8985 2727 /* update counters */
14466a80 2728 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2729
1fb7c0dd
EL
2730 /* If suri has set vlan during reading, we increase vlan counter */
2731 if (p->vlan_idx) {
1c0b4ee0 2732 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2733 }
2734
c45d8985 2735 /* call the decoder */
49dbb455 2736 switch (p->datalink) {
c45d8985
EL
2737 case LINKTYPE_ETHERNET:
2738 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2739 break;
49dbb455
VJ
2740 case LINKTYPE_LINUX_SLL:
2741 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2742 break;
c45d8985
EL
2743 case LINKTYPE_PPP:
2744 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2745 break;
2746 case LINKTYPE_RAW:
f67aa5de 2747 case LINKTYPE_GRE_OVER_IP:
c45d8985
EL
2748 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2749 break;
49dbb455
VJ
2750 case LINKTYPE_NULL:
2751 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2752 break;
c45d8985
EL
2753 default:
2754 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2755 break;
2756 }
2757
3088b6ac 2758 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2759
c45d8985
EL
2760 SCReturnInt(TM_ECODE_OK);
2761}
2762
ab1200fb 2763TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2764{
2765 SCEnter();
2766 DecodeThreadVars *dtv = NULL;
2767
5f307aca 2768 dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2769
2770 if (dtv == NULL)
2771 SCReturnInt(TM_ECODE_FAILED);
2772
2773 DecodeRegisterPerfCounters(dtv, tv);
2774
2775 *data = (void *)dtv;
2776
2777 SCReturnInt(TM_ECODE_OK);
2778}
2779
2864f9ee
VJ
2780TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2781{
2782 if (data != NULL)
98c88d51 2783 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2784 SCReturnInt(TM_ECODE_OK);
2785}
2786
e80b30c0 2787#endif /* HAVE_AF_PACKET */
c45d8985 2788/* eof */
a6457262
EL
2789/**
2790 * @}
2791 */