]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
af-packet: re-enable sync for tpacket v2
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
cde438f6 1/* Copyright (C) 2011-2018 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985 35#include "suricata-common.h"
e80b30c0 36#include "config.h"
c45d8985
EL
37#include "suricata.h"
38#include "decode.h"
39#include "packet-queue.h"
40#include "threads.h"
41#include "threadvars.h"
42#include "tm-queuehandlers.h"
43#include "tm-modules.h"
44#include "tm-threads.h"
45#include "tm-threads-common.h"
46#include "conf.h"
17a32bda 47#include "util-cpu.h"
c45d8985 48#include "util-debug.h"
51eb9605 49#include "util-device.h"
d65f4585 50#include "util-ebpf.h"
c45d8985
EL
51#include "util-error.h"
52#include "util-privs.h"
e80b30c0 53#include "util-optimize.h"
51eb9605 54#include "util-checksum.h"
ac56b1bf 55#include "util-ioctl.h"
2cd6e128 56#include "util-host-info.h"
c45d8985
EL
57#include "tmqh-packetpool.h"
58#include "source-af-packet.h"
34b3f194 59#include "runmodes.h"
c45d8985 60
e80b30c0 61#ifdef HAVE_AF_PACKET
472e061c
VJ
62
63#if HAVE_SYS_IOCTL_H
2bc0be6e 64#include <sys/ioctl.h>
472e061c
VJ
65#endif
66
06173267
EL
67#ifdef HAVE_PACKET_EBPF
68#include "util-ebpf.h"
69#include <bpf/libbpf.h>
70#include <bpf/bpf.h>
71#endif
72
91e1256b
EL
73struct bpf_program {
74 unsigned int bf_len;
75 struct bpf_insn *bf_insns;
76};
77
78#ifdef HAVE_PCAP_H
79#include <pcap.h>
80#endif
81
82#ifdef HAVE_PCAP_PCAP_H
83#include <pcap/pcap.h>
84#endif
85
28e9e4c8
EL
86#include "util-bpf.h"
87
472e061c 88#if HAVE_LINUX_IF_ETHER_H
c45d8985 89#include <linux/if_ether.h>
472e061c
VJ
90#endif
91
92#if HAVE_LINUX_IF_PACKET_H
c45d8985 93#include <linux/if_packet.h>
472e061c
VJ
94#endif
95
96#if HAVE_LINUX_IF_ARP_H
c45d8985 97#include <linux/if_arp.h>
472e061c 98#endif
f2a6fb8a 99
472e061c 100#if HAVE_LINUX_FILTER_H
f2a6fb8a 101#include <linux/filter.h>
e80b30c0 102#endif
c45d8985 103
472e061c 104#if HAVE_SYS_MMAN_H
49b7b00f 105#include <sys/mman.h>
472e061c
VJ
106#endif
107
a40f08a2
EL
108#ifdef HAVE_HW_TIMESTAMPING
109#include <linux/net_tstamp.h>
110#endif
111
472e061c 112#endif /* HAVE_AF_PACKET */
49b7b00f 113
c45d8985
EL
114extern int max_pending_packets;
115
e80b30c0
EL
116#ifndef HAVE_AF_PACKET
117
ab1200fb 118TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 119
8f1d7503
KS
120void TmModuleReceiveAFPRegister (void)
121{
e80b30c0
EL
122 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
124 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
125 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
126 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
127 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
128 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 129 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
130}
131
132/**
133 * \brief Registration Function for DecodeAFP.
e80b30c0 134 */
8f1d7503
KS
135void TmModuleDecodeAFPRegister (void)
136{
e80b30c0
EL
137 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
138 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
139 tmm_modules[TMM_DECODEAFP].Func = NULL;
140 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
141 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
142 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
143 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 144 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
145}
146
147/**
148 * \brief this function prints an error message and exits.
149 */
ab1200fb 150TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
151{
152 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
153 "support for AF_PACKET enabled, on Linux host please recompile "
154 "with --enable-af-packet", tv->name);
155 exit(EXIT_FAILURE);
156}
157
158#else /* We have AF_PACKET support */
159
c45d8985
EL
160#define AFP_IFACE_NAME_LENGTH 48
161
162#define AFP_STATE_DOWN 0
163#define AFP_STATE_UP 1
164
165#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 166#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
167
168#define POLL_TIMEOUT 100
169
4a1a0080
EL
170#ifndef TP_STATUS_USER_BUSY
171/* for new use latest bit available in tp_status */
172#define TP_STATUS_USER_BUSY (1 << 31)
173#endif
174
b603ad62
EL
175#ifndef TP_STATUS_VLAN_VALID
176#define TP_STATUS_VLAN_VALID (1 << 4)
177#endif
178
62e63e3f
EL
179enum {
180 AFP_READ_OK,
181 AFP_READ_FAILURE,
9efa4ace
EL
182 /** Error during treatment by other functions of Suricata */
183 AFP_SURI_FAILURE,
27b5136b 184 AFP_KERNEL_DROP,
62e63e3f
EL
185};
186
1992a227
EL
187enum {
188 AFP_FATAL_ERROR = 1,
189 AFP_RECOVERABLE_ERROR,
190};
191
49b7b00f
EL
192union thdr {
193 struct tpacket2_hdr *h2;
c2d0d938 194#ifdef HAVE_TPACKET_V3
bae1b03c 195 struct tpacket3_hdr *h3;
c2d0d938 196#endif
49b7b00f
EL
197 void *raw;
198};
199
06173267 200static int AFPBypassCallback(Packet *p);
8c880879 201static int AFPXDPBypassCallback(Packet *p);
06173267 202
91e1256b 203#define MAX_MAPS 32
c45d8985
EL
204/**
205 * \brief Structure to hold thread specific variables.
206 */
207typedef struct AFPThreadVars_
208{
b797fd92
EL
209 union {
210 char *ring_v2;
211 struct iovec *ring_v3;
212 };
213
c45d8985 214 /* counters */
3ce39433 215 uint64_t pkts;
c45d8985 216
ff6365dd
EL
217 ThreadVars *tv;
218 TmSlot *slot;
9500d12c
EL
219 LiveDevice *livedev;
220 /* data link type for the thread */
b797fd92 221 uint32_t datalink;
9500d12c 222
d65f4585 223#ifdef HAVE_PACKET_EBPF
94a622cb 224 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 225 int v4_map_fd;
94a622cb 226 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
227 int v6_map_fd;
228#endif
229
9500d12c 230 unsigned int frame_offset;
ff6365dd 231
9500d12c
EL
232 ChecksumValidationMode checksum_mode;
233
b797fd92 234 /* references to packet and drop counters */
9500d12c
EL
235 uint16_t capture_kernel_packets;
236 uint16_t capture_kernel_drops;
9efa4ace 237 uint16_t capture_errors;
9500d12c
EL
238
239 /* handle state */
240 uint8_t afp_state;
241 uint8_t copy_mode;
4bfa3aea 242 unsigned int flags;
9500d12c
EL
243
244 /* IPS peer */
245 AFPPeer *mpeer;
246
247 /* no mmap mode */
ff6365dd
EL
248 uint8_t *data; /** Per function and thread data */
249 int datalen; /** Length of per function and thread data */
9500d12c 250 int cooked;
ff6365dd 251
9500d12c
EL
252 /*
253 * Init related members
254 */
51eb9605 255
9500d12c
EL
256 /* thread specific socket */
257 int socket;
b797fd92
EL
258
259 int ring_size;
fa902abe 260 int block_size;
234aefdf 261 int block_timeout;
e80b30c0
EL
262 /* socket buffer size */
263 int buffer_size;
fa902abe 264 /* Filter */
ab1200fb 265 const char *bpf_filter;
91e1256b
EL
266 int ebpf_lb_fd;
267 int ebpf_filter_fd;
9500d12c 268
df7dbe36 269 int promisc;
e80b30c0 270
9500d12c 271 int down_count;
662dccd8 272
e80b30c0
EL
273 int cluster_id;
274 int cluster_type;
c45d8985 275
fbca1a4e
EL
276 int threads;
277
b797fd92
EL
278 union {
279 struct tpacket_req req;
c2d0d938 280#ifdef HAVE_TPACKET_V3
b797fd92 281 struct tpacket_req3 req3;
c2d0d938 282#endif
b797fd92
EL
283 };
284
285 char iface[AFP_IFACE_NAME_LENGTH];
286 /* IPS output iface */
287 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 288
cba41207
AG
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen;
291 uint8_t *ring_buf;
91e1256b 292
8c880879
EL
293 uint8_t xdp_mode;
294
c45d8985
EL
295} AFPThreadVars;
296
297TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
ab1200fb 298TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
c45d8985
EL
299void ReceiveAFPThreadExitStats(ThreadVars *, void *);
300TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
e80b30c0 301TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 302
ab1200fb 303TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
2864f9ee 304TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
c45d8985
EL
305TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
306
f2a6fb8a 307TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 308static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
309static int AFPGetDevFlags(int fd, const char *ifname);
310static int AFPDerefSocket(AFPPeer* peer);
311static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 312
19475165
EL
313
314static unsigned int nr_cpus;
315
c45d8985
EL
316/**
317 * \brief Registration Function for RecieveAFP.
318 * \todo Unit tests are needed for this module.
319 */
8f1d7503
KS
320void TmModuleReceiveAFPRegister (void)
321{
c45d8985
EL
322 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
323 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 324 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 325 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 326 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 327 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 328 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985
EL
329 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
330 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 331 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165
EL
332
333 nr_cpus = UtilCpuGetNumProcessorsConfigured();
c45d8985
EL
334}
335
a6457262
EL
336
337/**
338 * \defgroup afppeers AFP peers list
339 *
340 * AF_PACKET has an IPS mode were interface are peered: packet from
341 * on interface are sent the peered interface and the other way. The ::AFPPeer
342 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
343 * information to be able to send packet on the interface.
344 * A element of the list must not be destroyed during the run of Suricata as it
345 * is used by ::Packet and other threads.
346 *
347 * @{
348 */
349
662dccd8
EL
350typedef struct AFPPeersList_ {
351 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
352 int cnt;
353 int peered;
60400163
EL
354 int turn; /**< Next value for initialisation order */
355 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
356} AFPPeersList;
357
358/**
a6457262
EL
359 * \brief Update the peer.
360 *
361 * Update the AFPPeer of a thread ie set new state, socket number
362 * or iface index.
363 *
662dccd8 364 */
ab1200fb 365static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
366{
367 if (ptv->mpeer == NULL) {
368 return;
369 }
662dccd8
EL
370 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
371 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
372 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
373}
374
a6457262
EL
375/**
376 * \brief Clean and free ressource used by an ::AFPPeer
377 */
ab1200fb 378static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
379{
380 if (peer->flags & AFP_SOCK_PROTECT)
381 SCMutexDestroy(&peer->sock_protect);
382 SC_ATOMIC_DESTROY(peer->socket);
383 SC_ATOMIC_DESTROY(peer->if_idx);
384 SC_ATOMIC_DESTROY(peer->state);
385 SCFree(peer);
386}
387
388AFPPeersList peerslist;
389
390
a6457262
EL
391/**
392 * \brief Init the global list of ::AFPPeer
393 */
662dccd8
EL
394TmEcode AFPPeersListInit()
395{
396 SCEnter();
397 TAILQ_INIT(&peerslist.peers);
398 peerslist.peered = 0;
399 peerslist.cnt = 0;
60400163
EL
400 peerslist.turn = 0;
401 SC_ATOMIC_INIT(peerslist.reached);
402 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
403 SCReturnInt(TM_ECODE_OK);
404}
405
a6457262
EL
406/**
407 * \brief Check that all ::AFPPeer got a peer
408 *
409 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
410 */
662dccd8
EL
411TmEcode AFPPeersListCheck()
412{
413#define AFP_PEERS_MAX_TRY 4
414#define AFP_PEERS_WAIT 20000
415 int try = 0;
416 SCEnter();
417 while (try < AFP_PEERS_MAX_TRY) {
418 if (peerslist.cnt != peerslist.peered) {
419 usleep(AFP_PEERS_WAIT);
420 } else {
421 SCReturnInt(TM_ECODE_OK);
422 }
423 try++;
424 }
425 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
426 SCReturnInt(TM_ECODE_FAILED);
427}
428
a6457262
EL
429/**
430 * \brief Declare a new AFP thread to AFP peers list.
431 */
ab1200fb 432static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
433{
434 SCEnter();
435 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
436 AFPPeer *pitem;
ac56b1bf 437 int mtu, out_mtu;
662dccd8 438
e176be6f 439 if (unlikely(peer == NULL)) {
662dccd8
EL
440 SCReturnInt(TM_ECODE_FAILED);
441 }
442 memset(peer, 0, sizeof(AFPPeer));
443 SC_ATOMIC_INIT(peer->socket);
13f13b6d 444 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
445 SC_ATOMIC_INIT(peer->if_idx);
446 SC_ATOMIC_INIT(peer->state);
447 peer->flags = ptv->flags;
60400163 448 peer->turn = peerslist.turn++;
662dccd8
EL
449
450 if (peer->flags & AFP_SOCK_PROTECT) {
451 SCMutexInit(&peer->sock_protect, NULL);
452 }
453
13f13b6d 454 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
455 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
456 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
457 ptv->mpeer = peer;
458 /* add element to iface list */
459 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 460
13f13b6d
EL
461 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
462 peerslist.cnt++;
463
464 /* Iter to find a peer */
465 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
466 if (pitem->peer)
467 continue;
468 if (strcmp(pitem->iface, ptv->out_iface))
469 continue;
470 peer->peer = pitem;
471 pitem->peer = peer;
472 mtu = GetIfaceMTU(ptv->iface);
473 out_mtu = GetIfaceMTU(ptv->out_iface);
474 if (mtu != out_mtu) {
475 SCLogError(SC_ERR_AFP_CREATE,
476 "MTU on %s (%d) and %s (%d) are not equal, "
477 "transmission of packets bigger than %d will fail.",
478 ptv->iface, mtu,
479 ptv->out_iface, out_mtu,
480 (out_mtu > mtu) ? mtu : out_mtu);
481 }
482 peerslist.peered += 2;
483 break;
ac56b1bf 484 }
662dccd8
EL
485 }
486
487 AFPPeerUpdate(ptv);
488
489 SCReturnInt(TM_ECODE_OK);
490}
491
ab1200fb 492static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 493{
b2691cbe
EL
494 /* If turn is zero, we already have started threads once */
495 if (peerslist.turn == 0)
496 return 0;
497
60400163
EL
498 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
499 return 0;
500 return 1;
501}
502
ab1200fb 503static void AFPPeersListReachedInc(void)
60400163 504{
b2691cbe
EL
505 if (peerslist.turn == 0)
506 return;
507
508 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
509 SCLogInfo("All AFP capture threads are running.");
510 (void)SC_ATOMIC_SET(peerslist.reached, 0);
511 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
512 * restarted.
513 */
514 peerslist.turn = 0;
515 }
60400163
EL
516}
517
ab1200fb 518static int AFPPeersListStarted(void)
919377d4
EL
519{
520 return !peerslist.turn;
521}
522
a6457262
EL
523/**
524 * \brief Clean the global peers list.
525 */
662dccd8
EL
526void AFPPeersListClean()
527{
528 AFPPeer *pitem;
529
530 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
531 TAILQ_REMOVE(&peerslist.peers, pitem, next);
532 AFPPeerClean(pitem);
533 }
534}
535
a6457262
EL
536/**
537 * @}
538 */
539
c45d8985
EL
540/**
541 * \brief Registration Function for DecodeAFP.
542 * \todo Unit tests are needed for this module.
543 */
8f1d7503
KS
544void TmModuleDecodeAFPRegister (void)
545{
c45d8985
EL
546 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
547 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
548 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
549 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 550 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985
EL
551 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
552 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 553 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
554}
555
662dccd8 556
e80b30c0
EL
557static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
558
e8a4a4c4 559static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 560{
6efd37a3 561#ifdef PACKET_STATISTICS
e8a4a4c4
EL
562 struct tpacket_stats kstats;
563 socklen_t len = sizeof (struct tpacket_stats);
564 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
565 &kstats, &len) > -1) {
566 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
567 ptv->tv->name,
568 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
569 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
570 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
571 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
572 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 573 }
e8a4a4c4 574#endif
6efd37a3 575}
c45d8985
EL
576
577/**
578 * \brief AF packet read function.
579 *
580 * This function fills
581 * From here the packets are picked up by the DecodeAFP thread.
582 *
583 * \param user pointer to AFPThreadVars
584 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
585 */
ab1200fb 586static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
587{
588 Packet *p = NULL;
589 /* XXX should try to use read that get directly to packet */
c45d8985
EL
590 int offset = 0;
591 int caplen;
592 struct sockaddr_ll from;
593 struct iovec iov;
594 struct msghdr msg;
c45d8985
EL
595 struct cmsghdr *cmsg;
596 union {
597 struct cmsghdr cmsg;
598 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
599 } cmsg_buf;
6efd37a3 600 unsigned char aux_checksum = 0;
c45d8985
EL
601
602 msg.msg_name = &from;
603 msg.msg_namelen = sizeof(from);
604 msg.msg_iov = &iov;
605 msg.msg_iovlen = 1;
c45d8985
EL
606 msg.msg_control = &cmsg_buf;
607 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
608 msg.msg_flags = 0;
609
610 if (ptv->cooked)
611 offset = SLL_HEADER_LEN;
612 else
613 offset = 0;
e80b30c0
EL
614 iov.iov_len = ptv->datalen - offset;
615 iov.iov_base = ptv->data + offset;
c45d8985
EL
616
617 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
618
619 if (caplen < 0) {
620 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
621 errno);
62e63e3f 622 SCReturnInt(AFP_READ_FAILURE);
c45d8985 623 }
ff6365dd
EL
624
625 p = PacketGetFromQueueOrAlloc();
c45d8985 626 if (p == NULL) {
9efa4ace 627 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 628 }
b33986c8 629 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
630 if (ptv->flags & AFP_BYPASS) {
631 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
632#ifdef HAVE_PACKET_EBPF
633 p->afp_v.v4_map_fd = ptv->v4_map_fd;
634 p->afp_v.v6_map_fd = ptv->v6_map_fd;
635#endif
06173267 636 }
8c880879
EL
637 if (ptv->flags & AFP_XDPBYPASS) {
638 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
639#ifdef HAVE_PACKET_EBPF
640 p->afp_v.v4_map_fd = ptv->v4_map_fd;
641 p->afp_v.v6_map_fd = ptv->v6_map_fd;
642#endif
8c880879 643 }
c45d8985
EL
644
645 /* get timestamp of packet via ioctl */
646 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
647 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
648 errno);
649 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 650 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
651 }
652
653 ptv->pkts++;
51eb9605 654 p->livedev = ptv->livedev;
c45d8985
EL
655
656 /* add forged header */
657 if (ptv->cooked) {
e80b30c0 658 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
659 /* XXX this is minimalist, but this seems enough */
660 hdrp->sll_protocol = from.sll_protocol;
661 }
662
663 p->datalink = ptv->datalink;
664 SET_PKT_LEN(p, caplen + offset);
e80b30c0 665 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 666 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 667 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 668 }
e80b30c0
EL
669 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
670 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
671
6062e00c
EL
672 /* We only check for checksum disable */
673 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
674 p->flags |= PKT_IGNORE_CHECKSUM;
675 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
676 if (ptv->livedev->ignore_checksum) {
677 p->flags |= PKT_IGNORE_CHECKSUM;
a565148f 678 } else if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
679 SC_ATOMIC_GET(ptv->livedev->pkts),
680 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
681 ptv->livedev->ignore_checksum = 1;
6062e00c 682 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 683 }
6062e00c 684 } else {
6efd37a3
EL
685 aux_checksum = 1;
686 }
6062e00c 687
6efd37a3
EL
688 /* List is NULL if we don't have activated auxiliary data */
689 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
690 struct tpacket_auxdata *aux;
f6ddaf33 691
6efd37a3
EL
692 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
693 cmsg->cmsg_level != SOL_PACKET ||
694 cmsg->cmsg_type != PACKET_AUXDATA)
695 continue;
f6ddaf33 696
6efd37a3
EL
697 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
698
699 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
700 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 701 }
6efd37a3 702 break;
f6ddaf33
EL
703 }
704
c469824b
EL
705 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
706 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 707 SCReturnInt(AFP_SURI_FAILURE);
c469824b 708 }
62e63e3f 709 SCReturnInt(AFP_READ_OK);
c45d8985
EL
710}
711
ecf59be4
EL
712/**
713 * \brief AF packet write function.
714 *
715 * This function has to be called before the memory
716 * related to Packet in ring buffer is released.
717 *
718 * \param pointer to Packet
719 * \param version of capture: TPACKET_V2 or TPACKET_V3
720 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
721 *
722 */
723static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
724{
725 struct sockaddr_ll socket_address;
726 int socket;
ecf59be4
EL
727 uint8_t *pstart;
728 size_t plen;
ee7e689b
AG
729 union thdr h;
730 uint16_t vlan_tci = 0;
662dccd8
EL
731
732 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
3f107fa1 733 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
662dccd8
EL
734 return TM_ECODE_OK;
735 }
736 }
737
738 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
739 return TM_ECODE_OK;
740
741 if (p->ethh == NULL) {
742 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
743 return TM_ECODE_FAILED;
744 }
745 /* Index of the network device */
746 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
747 /* Address length*/
748 socket_address.sll_halen = ETH_ALEN;
749 /* Destination MAC */
750 memcpy(socket_address.sll_addr, p->ethh, 6);
751
752 /* Send packet, locking the socket if necessary */
753 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
754 SCMutexLock(&p->afp_v.peer->sock_protect);
755 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 756
ee7e689b
AG
757 h.raw = p->afp_v.relptr;
758
ecf59be4 759 if (version == TPACKET_V2) {
ecf59be4
EL
760 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
761 * the flag. It is not defined for older kernel so we go best effort
762 * and test for non zero value of the TCI header. */
763 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
764 vlan_tci = h.h2->tp_vlan_tci;
765 }
766 } else {
767#ifdef HAVE_TPACKET_V3
768 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
769 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 770 }
ee7e689b
AG
771#else
772 /* Should not get here */
773 BUG_ON(1);
774#endif
775 }
776
777 if (vlan_tci != 0) {
778 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
779 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
780 /* move ethernet addresses */
781 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
782 /* write vlan info */
783 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
784 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
785 } else {
786 pstart = GET_PKT_DATA(p);
787 plen = GET_PKT_LEN(p);
788 }
789
790 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
791 (struct sockaddr*) &socket_address,
792 sizeof(struct sockaddr_ll)) < 0) {
793 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
794 socket,
795 strerror(errno));
796 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
797 SCMutexUnlock(&p->afp_v.peer->sock_protect);
798 return TM_ECODE_FAILED;
799 }
800 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
801 SCMutexUnlock(&p->afp_v.peer->sock_protect);
802
803 return TM_ECODE_OK;
804}
805
ab1200fb 806static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 807{
662dccd8
EL
808 /* Need to be in copy mode and need to detect early release
809 where Ethernet header could not be set (and pseudo packet) */
810 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 811 AFPWritePacket(p, TPACKET_V2);
662dccd8 812 }
13f13b6d
EL
813
814 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
680e941a 815 goto cleanup;
13f13b6d 816
2011a3f8
EL
817 if (p->afp_v.relptr) {
818 union thdr h;
819 h.raw = p->afp_v.relptr;
820 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 821 }
680e941a
EL
822
823cleanup:
824 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
825}
826
ecf59be4 827#ifdef HAVE_TPACKET_V3
ab1200fb 828static void AFPReleasePacketV3(Packet *p)
bae1b03c
EL
829{
830 /* Need to be in copy mode and need to detect early release
831 where Ethernet header could not be set (and pseudo packet) */
832 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 833 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
834 }
835 PacketFreeOrRelease(p);
836}
ecf59be4 837#endif
bae1b03c 838
ab1200fb 839static void AFPReleasePacket(Packet *p)
b076a26c
KS
840{
841 AFPReleaseDataFromRing(p);
842 PacketFreeOrRelease(p);
2011a3f8
EL
843}
844
49b7b00f
EL
845/**
846 * \brief AF packet read function for ring
847 *
848 * This function fills
849 * From here the packets are picked up by the DecodeAFP thread.
850 *
851 * \param user pointer to AFPThreadVars
852 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
853 */
ab1200fb 854static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f
EL
855{
856 Packet *p = NULL;
857 union thdr h;
27b5136b 858 uint8_t emergency_flush = 0;
4d8f70c6 859 int read_pkts = 0;
b26ec603 860 int loop_start = -1;
4d8f70c6 861
49b7b00f 862
a369f8c3
EL
863 /* Loop till we have packets available */
864 while (1) {
53c02334
AS
865 if (unlikely(suricata_ctl_flags != 0)) {
866 break;
867 }
868
a369f8c3 869 /* Read packet from ring */
b797fd92 870 h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
9efa4ace
EL
871 if (unlikely(h.raw == NULL)) {
872 /* Impossible we reach this point in normal condition, so trigger
873 * a failure in reading */
874 SCReturnInt(AFP_READ_FAILURE);
34b3f194 875 }
662dccd8 876
82a2dd85 877 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
4d8f70c6 878 if (read_pkts == 0) {
b26ec603
EL
879 if (loop_start == -1) {
880 loop_start = ptv->frame_offset;
881 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
882 SCReturnInt(AFP_READ_OK);
883 }
884 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
885 ptv->frame_offset = 0;
886 }
887 continue;
4d8f70c6 888 }
27b5136b
EL
889 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
890 SCReturnInt(AFP_KERNEL_DROP);
891 } else {
892 SCReturnInt(AFP_READ_OK);
893 }
894 }
4d8f70c6
EL
895
896 read_pkts++;
b26ec603 897 loop_start = -1;
4d8f70c6 898
4a1a0080
EL
899 /* Our packet is still used by suricata, we exit read loop to
900 * gain some time */
901 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
902 SCReturnInt(AFP_READ_OK);
903 }
904
27b5136b
EL
905 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
906 h.h2->tp_status = TP_STATUS_KERNEL;
907 goto next_frame;
a369f8c3
EL
908 }
909
910 p = PacketGetFromQueueOrAlloc();
911 if (p == NULL) {
9efa4ace 912 SCReturnInt(AFP_SURI_FAILURE);
34b3f194 913 }
b33986c8 914 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
915 if (ptv->flags & AFP_BYPASS) {
916 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 917#ifdef HAVE_PACKET_EBPF
6062c27e
EL
918 p->afp_v.v4_map_fd = ptv->v4_map_fd;
919 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 920#endif
06173267 921 }
8c880879
EL
922 if (ptv->flags & AFP_XDPBYPASS) {
923 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 924#ifdef HAVE_PACKET_EBPF
6062c27e
EL
925 p->afp_v.v4_map_fd = ptv->v4_map_fd;
926 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 927#endif
8c880879 928 }
49b7b00f 929
4a1a0080
EL
930 /* Suricata will treat packet so telling it is busy, this
931 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
932 * function. */
933 h.h2->tp_status |= TP_STATUS_USER_BUSY;
934
a369f8c3 935 ptv->pkts++;
a369f8c3 936 p->livedev = ptv->livedev;
a369f8c3 937 p->datalink = ptv->datalink;
d0940396 938
a369f8c3
EL
939 if (h.h2->tp_len > h.h2->tp_snaplen) {
940 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
941 h.h2->tp_len, h.h2->tp_snaplen);
942 }
71e47868
EL
943
944 /* get vlan id from header */
9500d12c 945 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
e871f713 946 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
01a8cc4e 947 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
71e47868
EL
948 p->vlan_idx = 1;
949 p->vlanh[0] = NULL;
950 }
951
a369f8c3
EL
952 if (ptv->flags & AFP_ZERO_COPY) {
953 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
954 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 955 SCReturnInt(AFP_SURI_FAILURE);
662dccd8 956 } else {
0f2b3406 957 p->afp_v.relptr = h.raw;
b076a26c 958 p->ReleasePacket = AFPReleasePacket;
5f12b234
EL
959 p->afp_v.mpeer = ptv->mpeer;
960 AFPRefSocket(ptv->mpeer);
0f2b3406
EL
961
962 p->afp_v.copy_mode = ptv->copy_mode;
963 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
964 p->afp_v.peer = ptv->mpeer->peer;
965 } else {
966 p->afp_v.peer = NULL;
662dccd8 967 }
a369f8c3
EL
968 }
969 } else {
970 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
9efa4ace
EL
971 /* As we can possibly fail to copy the data due to invalid data, let's
972 * skip this packet and switch to the next one.
973 */
974 h.h2->tp_status = TP_STATUS_KERNEL;
975 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
976 ptv->frame_offset = 0;
977 }
a369f8c3 978 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 979 SCReturnInt(AFP_SURI_FAILURE);
a369f8c3
EL
980 }
981 }
d65f4585 982
a369f8c3
EL
983 /* Timestamp */
984 p->ts.tv_sec = h.h2->tp_sec;
985 p->ts.tv_usec = h.h2->tp_nsec/1000;
986 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
987 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
988
989 /* We only check for checksum disable */
990 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
991 p->flags |= PKT_IGNORE_CHECKSUM;
992 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
993 if (ptv->livedev->ignore_checksum) {
994 p->flags |= PKT_IGNORE_CHECKSUM;
995 } else if (ChecksumAutoModeCheck(ptv->pkts,
996 SC_ATOMIC_GET(ptv->livedev->pkts),
997 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
998 ptv->livedev->ignore_checksum = 1;
999 p->flags |= PKT_IGNORE_CHECKSUM;
1000 }
1001 } else {
1002 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
49b7b00f 1003 p->flags |= PKT_IGNORE_CHECKSUM;
a369f8c3 1004 }
ee6ba099
EL
1005 }
1006 if (h.h2->tp_status & TP_STATUS_LOSING) {
1007 emergency_flush = 1;
e8a4a4c4 1008 AFPDumpCounters(ptv);
a369f8c3
EL
1009 }
1010
5f12b234
EL
1011 /* release frame if not in zero copy mode */
1012 if (!(ptv->flags & AFP_ZERO_COPY)) {
1013 h.h2->tp_status = TP_STATUS_KERNEL;
1014 }
1015
a369f8c3
EL
1016 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1017 h.h2->tp_status = TP_STATUS_KERNEL;
1018 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1019 ptv->frame_offset = 0;
1020 }
1021 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1022 SCReturnInt(AFP_SURI_FAILURE);
49b7b00f 1023 }
49b7b00f 1024
27b5136b 1025next_frame:
34b3f194
EL
1026 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1027 ptv->frame_offset = 0;
350d7619
EL
1028 /* Get out of loop to be sure we will reach maintenance tasks */
1029 SCReturnInt(AFP_READ_OK);
34b3f194 1030 }
34b3f194
EL
1031 }
1032
49b7b00f
EL
1033 SCReturnInt(AFP_READ_OK);
1034}
1035
f947539d 1036#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1037static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1038{
1039 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1040}
1041
1042static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1043{
1044 Packet *p = PacketGetFromQueueOrAlloc();
1045 if (p == NULL) {
9efa4ace 1046 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1047 }
1048 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
1049 if (ptv->flags & AFP_BYPASS) {
1050 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 1051#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1052 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1053 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 1054#endif
e98b5e49 1055 } else if (ptv->flags & AFP_XDPBYPASS) {
8c880879 1056 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 1057#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1058 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1059 p->afp_v.v6_map_fd = ptv->v6_map_fd;
d65f4585 1060#endif
8c880879 1061 }
bae1b03c
EL
1062
1063 ptv->pkts++;
bae1b03c
EL
1064 p->livedev = ptv->livedev;
1065 p->datalink = ptv->datalink;
1066
e41a9d63
AG
1067 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1068 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1069 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1070 p->vlan_idx = 1;
1071 p->vlanh[0] = NULL;
1072 }
1073
bae1b03c
EL
1074 if (ptv->flags & AFP_ZERO_COPY) {
1075 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1076 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1077 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c 1078 }
310b27a1 1079 p->afp_v.relptr = ppd;
bae1b03c
EL
1080 p->ReleasePacket = AFPReleasePacketV3;
1081 p->afp_v.mpeer = ptv->mpeer;
1082 AFPRefSocket(ptv->mpeer);
1083
1084 p->afp_v.copy_mode = ptv->copy_mode;
1085 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1086 p->afp_v.peer = ptv->mpeer->peer;
1087 } else {
1088 p->afp_v.peer = NULL;
1089 }
1090 } else {
1091 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1092 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1093 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1094 }
1095 }
1096 /* Timestamp */
1097 p->ts.tv_sec = ppd->tp_sec;
1098 p->ts.tv_usec = ppd->tp_nsec/1000;
1099 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1100 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1101
1102 /* We only check for checksum disable */
1103 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1104 p->flags |= PKT_IGNORE_CHECKSUM;
1105 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1106 if (ptv->livedev->ignore_checksum) {
1107 p->flags |= PKT_IGNORE_CHECKSUM;
1108 } else if (ChecksumAutoModeCheck(ptv->pkts,
1109 SC_ATOMIC_GET(ptv->livedev->pkts),
1110 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1111 ptv->livedev->ignore_checksum = 1;
1112 p->flags |= PKT_IGNORE_CHECKSUM;
1113 }
1114 } else {
1115 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1116 p->flags |= PKT_IGNORE_CHECKSUM;
1117 }
1118 }
1119
1120 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
bae1b03c 1121 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1122 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1123 }
1124
1125 SCReturnInt(AFP_READ_OK);
1126}
1127
1128static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1129{
1130 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1131 uint8_t *ppd;
9efa4ace 1132 int ret = 0;
bae1b03c
EL
1133
1134 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1135 for (i = 0; i < num_pkts; ++i) {
9efa4ace
EL
1136 ret = AFPParsePacketV3(ptv, pbd,
1137 (struct tpacket3_hdr *)ppd);
1138 switch (ret) {
1139 case AFP_READ_OK:
1140 break;
1141 case AFP_SURI_FAILURE:
1142 /* Internal error but let's just continue and
1143 * treat thenext packet */
1144 break;
1145 case AFP_READ_FAILURE:
1146 SCReturnInt(AFP_READ_FAILURE);
1147 default:
1148 SCReturnInt(ret);
5f84b55d 1149 }
bae1b03c
EL
1150 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1151 }
1152
1153 SCReturnInt(AFP_READ_OK);
1154}
f947539d 1155#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1156
1157/**
1158 * \brief AF packet read function for ring
1159 *
1160 * This function fills
1161 * From here the packets are picked up by the DecodeAFP thread.
1162 *
1163 * \param user pointer to AFPThreadVars
1164 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1165 */
ab1200fb 1166static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1167{
c2d0d938 1168#ifdef HAVE_TPACKET_V3
bae1b03c 1169 struct tpacket_block_desc *pbd;
9efa4ace 1170 int ret = 0;
bae1b03c
EL
1171
1172 /* Loop till we have packets available */
1173 while (1) {
1174 if (unlikely(suricata_ctl_flags != 0)) {
1175 SCLogInfo("Exiting AFP V3 read loop");
1176 break;
1177 }
1178
b797fd92 1179 pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1180
1181 /* block is not ready to be read */
1182 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1183 SCReturnInt(AFP_READ_OK);
1184 }
1185
9efa4ace
EL
1186 ret = AFPWalkBlock(ptv, pbd);
1187 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1188 AFPFlushBlock(pbd);
9efa4ace 1189 SCReturnInt(ret);
bae1b03c
EL
1190 }
1191
1192 AFPFlushBlock(pbd);
1193 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
1194 /* return to maintenance task after one loop on the ring */
1195 if (ptv->frame_offset == 0) {
1196 SCReturnInt(AFP_READ_OK);
1197 }
1198 }
c2d0d938 1199#endif
bae1b03c
EL
1200 SCReturnInt(AFP_READ_OK);
1201}
1202
13f13b6d
EL
1203/**
1204 * \brief Reference socket
1205 *
1206 * \retval O in case of failure, 1 in case of success
1207 */
1208static int AFPRefSocket(AFPPeer* peer)
1209{
1210 if (unlikely(peer == NULL))
1211 return 0;
1212
1213 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1214 return 1;
1215}
1216
1217
1218/**
1219 * \brief Dereference socket
1220 *
1221 * \retval 1 if socket is still alive, 0 if not
1222 */
1223static int AFPDerefSocket(AFPPeer* peer)
1224{
4424f5a2
EL
1225 if (peer == NULL)
1226 return 1;
1227
13f13b6d
EL
1228 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1229 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1230 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1231 close(SC_ATOMIC_GET(peer->socket));
1232 return 0;
1233 }
1234 }
1235 return 1;
1236}
1237
ab1200fb 1238static void AFPSwitchState(AFPThreadVars *ptv, int state)
13f13b6d
EL
1239{
1240 ptv->afp_state = state;
1241 ptv->down_count = 0;
49b7b00f 1242
13f13b6d
EL
1243 AFPPeerUpdate(ptv);
1244
1245 /* Do cleaning if switching to down state */
1246 if (state == AFP_STATE_DOWN) {
5f84b55d
EL
1247#ifdef HAVE_TPACKET_V3
1248 if (ptv->flags & AFP_TPACKET_V3) {
1249 if (!ptv->ring_v3) {
1250 SCFree(ptv->ring_v3);
1251 ptv->ring_v3 = NULL;
1252 }
1253 } else {
1254#endif
1255 if (ptv->ring_v2) {
1256 /* only used in reading phase, we can free it */
1257 SCFree(ptv->ring_v2);
1258 ptv->ring_v2 = NULL;
1259 }
1260#ifdef HAVE_TPACKET_V3
13f13b6d 1261 }
5f84b55d 1262#endif
13f13b6d
EL
1263 if (ptv->socket != -1) {
1264 /* we need to wait for all packets to return data */
1265 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
b86e1762 1266 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
cba41207 1267 munmap(ptv->ring_buf, ptv->ring_buflen);
13f13b6d
EL
1268 close(ptv->socket);
1269 ptv->socket = -1;
1270 }
1271 }
1272 }
1273 if (state == AFP_STATE_UP) {
1274 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1275 }
1276}
49b7b00f 1277
7fea0ec6
EL
1278static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1279 uint64_t *discarded_pkts)
919377d4
EL
1280{
1281 struct sockaddr_ll from;
1282 struct iovec iov;
1283 struct msghdr msg;
1284 struct timeval ts;
1285 union {
1286 struct cmsghdr cmsg;
1287 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1288 } cmsg_buf;
1289
1290
1291 if (unlikely(suricata_ctl_flags != 0)) {
1292 return 1;
1293 }
1294
1295 msg.msg_name = &from;
1296 msg.msg_namelen = sizeof(from);
1297 msg.msg_iov = &iov;
1298 msg.msg_iovlen = 1;
1299 msg.msg_control = &cmsg_buf;
1300 msg.msg_controllen = sizeof(cmsg_buf);
1301 msg.msg_flags = 0;
1302
1303 iov.iov_len = ptv->datalen;
1304 iov.iov_base = ptv->data;
1305
339f0665 1306 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1307
1308 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1309 /* FIXME */
1310 return -1;
1311 }
1312
1313 if ((ts.tv_sec > synctv->tv_sec) ||
1314 (ts.tv_sec >= synctv->tv_sec &&
1315 ts.tv_usec > synctv->tv_usec)) {
1316 return 1;
1317 }
1318 return 0;
1319}
1320
7fea0ec6
EL
1321static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1322 uint64_t *discarded_pkts)
919377d4
EL
1323{
1324 union thdr h;
1325
1326 if (unlikely(suricata_ctl_flags != 0)) {
1327 return 1;
1328 }
1329
f947539d 1330#ifdef HAVE_TPACKET_V3
bae1b03c 1331 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1332 int ret = 0;
7fea0ec6
EL
1333 struct tpacket_block_desc *pbd;
1334 pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
1335 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1336 struct tpacket3_hdr *ppd =
1337 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1338 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1339 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1340 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1341 ret = 1;
1342 }
7fea0ec6
EL
1343 AFPFlushBlock(pbd);
1344 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
cebbe06f 1345 return ret;
f947539d
VJ
1346
1347 } else
1348#endif
1349 {
7fea0ec6
EL
1350 /* Read packet from ring */
1351 h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
1352 if (h.raw == NULL) {
1353 return -1;
1354 }
1355 (*discarded_pkts)++;
1356 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1357 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1358 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1359 return 1;
1360 }
919377d4 1361
7fea0ec6
EL
1362 h.h2->tp_status = TP_STATUS_KERNEL;
1363 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1364 ptv->frame_offset = 0;
1365 }
919377d4
EL
1366 }
1367
1368
1369 return 0;
1370}
1371
806844d8
VJ
1372/** \brief wait for all afpacket threads to fully init
1373 *
1374 * Discard packets before all threads are ready, as the cluster
1375 * setup is not complete yet.
1376 *
1377 * if AFPPeersListStarted() returns true init is complete
1378 *
1379 * \retval r 1 = happy, otherwise unhappy
1380 */
7fea0ec6 1381static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4
EL
1382{
1383 int r;
1384 struct timeval synctv;
806844d8
VJ
1385 struct pollfd fds;
1386
1387 fds.fd = ptv->socket;
1388 fds.events = POLLIN;
919377d4
EL
1389
1390 /* Set timeval to end of the world */
1391 synctv.tv_sec = 0xffffffff;
1392 synctv.tv_usec = 0xffffffff;
1393
1394 while (1) {
806844d8
VJ
1395 r = poll(&fds, 1, POLL_TIMEOUT);
1396 if (r > 0 &&
1397 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1398 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1399 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1400 return 0;
1401 } else if (r > 0) {
1402 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1403 gettimeofday(&synctv, NULL);
1404 }
1405 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1406 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1407 } else {
7fea0ec6 1408 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1409 }
1410 SCLogDebug("Discarding on %s", ptv->tv->name);
1411 switch (r) {
1412 case 1:
9f7ba071 1413 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1414 return 1;
1415 case -1:
1416 return r;
1417 }
1418 /* no packets */
1419 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1420 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1421 return 1;
43b6cbd4 1422 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1423 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1424 return 0;
919377d4
EL
1425 }
1426 }
1427 return 1;
1428}
1429
13f13b6d
EL
1430/**
1431 * \brief Try to reopen socket
1432 *
1433 * \retval 0 in case of success, negative if error occurs or a condition
1434 * is not met.
1435 */
c45d8985
EL
1436static int AFPTryReopen(AFPThreadVars *ptv)
1437{
1438 int afp_activate_r;
1439
13f13b6d
EL
1440 ptv->down_count++;
1441
13f13b6d
EL
1442 /* Don't reconnect till we have packet that did not release data */
1443 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1444 return -1;
1445 }
c45d8985 1446
e80b30c0 1447 afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1448 if (afp_activate_r != 0) {
13f13b6d
EL
1449 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1450 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1451 ptv->iface);
1452 }
c45d8985
EL
1453 return afp_activate_r;
1454 }
1455
3bea3b39 1456 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1457 return 0;
1458}
1459
e80b30c0
EL
1460/**
1461 * \brief Main AF_PACKET reading Loop function
1462 */
1463TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1464{
34581ce9
AS
1465 SCEnter();
1466
e80b30c0 1467 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1468 struct pollfd fds;
1469 int r;
34581ce9 1470 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1471 time_t last_dump = 0;
49612128 1472 time_t current_time;
5f400785 1473 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1474 uint64_t discarded_pkts = 0;
e80b30c0 1475
34581ce9 1476 ptv->slot = s->slot_next;
e80b30c0 1477
5f400785 1478 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1479 if (ptv->flags & AFP_TPACKET_V3) {
1480 AFPReadFunc = AFPReadFromRingV3;
1481 } else {
1482 AFPReadFunc = AFPReadFromRing;
1483 }
5f400785
EL
1484 } else {
1485 AFPReadFunc = AFPRead;
1486 }
1487
60400163
EL
1488 if (ptv->afp_state == AFP_STATE_DOWN) {
1489 /* Wait for our turn, threads before us must have opened the socket */
1490 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1491 usleep(1000);
1992a227
EL
1492 if (suricata_ctl_flags != 0) {
1493 break;
1494 }
60400163
EL
1495 }
1496 r = AFPCreateSocket(ptv, ptv->iface, 1);
1497 if (r < 0) {
1992a227
EL
1498 switch (-r) {
1499 case AFP_FATAL_ERROR:
1500 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1501 SCReturnInt(TM_ECODE_FAILED);
1502 case AFP_RECOVERABLE_ERROR:
1503 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1504 }
60400163
EL
1505 }
1506 AFPPeersListReachedInc();
1507 }
1508 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1509 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1510 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1511 /* let's reset counter as we will start the capture at the
1512 * next function call */
1513#ifdef PACKET_STATISTICS
1514 struct tpacket_stats kstats;
1515 socklen_t len = sizeof (struct tpacket_stats);
1516 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1517 &kstats, &len) > -1) {
1518 uint64_t pkts = 0;
1519 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1520 ", dropped %" PRIu32 "",
1521 ptv->tv->name,
1522 kstats.tp_packets, kstats.tp_drops);
1523 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1524 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1525 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1526 }
1527#endif
60400163
EL
1528 }
1529
e80b30c0
EL
1530 fds.fd = ptv->socket;
1531 fds.events = POLLIN;
1532
1533 while (1) {
1534 /* Start by checking the state of our interface */
1535 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1536 int dbreak = 0;
662dccd8 1537
e80b30c0
EL
1538 do {
1539 usleep(AFP_RECONNECT_TIMEOUT);
1540 if (suricata_ctl_flags != 0) {
1541 dbreak = 1;
1542 break;
1543 }
1544 r = AFPTryReopen(ptv);
09e709d1 1545 fds.fd = ptv->socket;
e80b30c0
EL
1546 } while (r < 0);
1547 if (dbreak == 1)
1548 break;
1549 }
1550
1551 /* make sure we have at least one packet in the packet pool, to prevent
1552 * us from alloc'ing packets at line rate */
3c6e01f6 1553 PacketPoolWait();
e80b30c0
EL
1554
1555 r = poll(&fds, 1, POLL_TIMEOUT);
1556
1557 if (suricata_ctl_flags != 0) {
1558 break;
1559 }
1560
1561 if (r > 0 &&
1562 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1563 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1564 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1565 continue;
ff6365dd 1566 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1567 char c;
1568 /* Do a recv to get errno */
1569 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1570 continue; /* what, no error? */
3bea3b39 1571 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1572 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1573 ptv->iface, errno, strerror(errno));
13f13b6d 1574 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1575 continue;
ff6365dd 1576 } else if (fds.revents & POLLNVAL) {
e80b30c0 1577 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1578 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1579 continue;
1580 }
1581 } else if (r > 0) {
5f400785 1582 r = AFPReadFunc(ptv);
62e63e3f 1583 switch (r) {
27adbfa8
EL
1584 case AFP_READ_OK:
1585 /* Trigger one dump of stats every second */
49612128
EL
1586 current_time = time(NULL);
1587 if (current_time != last_dump) {
27adbfa8 1588 AFPDumpCounters(ptv);
49612128 1589 last_dump = current_time;
27adbfa8
EL
1590 }
1591 break;
62e63e3f
EL
1592 case AFP_READ_FAILURE:
1593 /* AFPRead in error: best to reset the socket */
3bea3b39 1594 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1595 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1596 ptv->iface, errno, strerror(errno));
13f13b6d 1597 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1598 continue;
9efa4ace
EL
1599 case AFP_SURI_FAILURE:
1600 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1601 break;
27b5136b 1602 case AFP_KERNEL_DROP:
e8a4a4c4 1603 AFPDumpCounters(ptv);
27b5136b 1604 break;
e80b30c0 1605 }
11099cfa 1606 } else if (unlikely(r == 0)) {
f53e687b
EL
1607 /* Trigger one dump of stats every second */
1608 current_time = time(NULL);
1609 if (current_time != last_dump) {
1610 AFPDumpCounters(ptv);
1611 last_dump = current_time;
1612 }
11099cfa
VJ
1613 /* poll timed out, lets see if we need to inject a fake packet */
1614 TmThreadsCaptureInjectPacket(tv, ptv->slot, NULL);
1615
e80b30c0 1616 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1617 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1618 ptv->iface,
e80b30c0 1619 errno, strerror(errno));
13f13b6d 1620 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1621 continue;
1622 }
752f03e7 1623 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1624 }
1625
4e561d6b 1626 AFPDumpCounters(ptv);
752f03e7 1627 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1628 SCReturnInt(TM_ECODE_OK);
1629}
1630
13f13b6d
EL
1631static int AFPGetDevFlags(int fd, const char *ifname)
1632{
1633 struct ifreq ifr;
1634
1635 memset(&ifr, 0, sizeof(ifr));
1636 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1637
1638 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1639 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1640 ifname, strerror(errno));
1641 return -1;
1642 }
1643
1644 return ifr.ifr_flags;
1645}
1646
1647
e80b30c0 1648static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1649{
1650 struct ifreq ifr;
1651
1652 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1653 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1654
1655 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1656 if (verbose)
1657 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1658 ifname, strerror(errno));
c45d8985
EL
1659 return -1;
1660 }
1661
1662 return ifr.ifr_ifindex;
1663}
1664
e80b30c0 1665static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1666{
1667 struct ifreq ifr;
1668
1669 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1670 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1671
1672 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1673 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1674 ifname, strerror(errno));
1675 return -1;
1676 }
1677
e80b30c0
EL
1678 switch (ifr.ifr_hwaddr.sa_family) {
1679 case ARPHRD_LOOPBACK:
1680 return LINKTYPE_ETHERNET;
1681 case ARPHRD_PPP:
11eb1d7c 1682 case ARPHRD_NONE:
e80b30c0
EL
1683 return LINKTYPE_RAW;
1684 default:
1685 return ifr.ifr_hwaddr.sa_family;
1686 }
c45d8985
EL
1687}
1688
b7bf299e
EL
1689int AFPGetLinkType(const char *ifname)
1690{
1691 int ltype;
1692
1693 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1694 if (fd == -1) {
1695 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1696 return LINKTYPE_RAW;
1697 }
1698
1699 ltype = AFPGetDevLinktype(fd, ifname);
1700 close(fd);
1701
1702 return ltype;
1703}
1704
49b7b00f
EL
1705static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1706{
1707 /* Compute structure:
1708 Target is to store all pending packets
1709 with a size equal to MTU + auxdata
1710 And we keep a decent number of block
1711
1712 To do so:
1713 Compute frame_size (aligned to be able to fit in block
1714 Check which block size we need. Blocksize is a 2^n * pagesize
1715 We then need to get order, big enough to have
1716 frame_size < block size
1717 Find number of frame per block (divide)
1718 Fill in packet_req
1719
1720 Compute frame size:
1721 described in packet_mmap.txt
1722 dependant on snaplen (need to use a variable ?)
1723snaplen: MTU ?
1724tp_hdrlen determine_version in daq_afpacket
1725in V1: sizeof(struct tpacket_hdr);
1726in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1727frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1728
1729 */
1730 int tp_hdrlen = sizeof(struct tpacket_hdr);
1731 int snaplen = default_packet_size;
1732
03032457
EL
1733 if (snaplen == 0) {
1734 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1735 if (snaplen <= 0) {
1736 SCLogWarning(SC_ERR_INVALID_VALUE,
1737 "Unable to get MTU, setting snaplen to sane default of 1514");
1738 snaplen = 1514;
1739 }
1740 }
1741
49b7b00f
EL
1742 ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1743 ptv->req.tp_block_size = getpagesize() << order;
1744 int frames_per_block = ptv->req.tp_block_size / ptv->req.tp_frame_size;
1745 if (frames_per_block == 0) {
bae1b03c 1746 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1747 return -1;
1748 }
8879df80 1749 ptv->req.tp_frame_nr = ptv->ring_size;
d8d9b098 1750 ptv->req.tp_block_nr = ptv->req.tp_frame_nr / frames_per_block + 1;
49b7b00f
EL
1751 /* exact division */
1752 ptv->req.tp_frame_nr = ptv->req.tp_block_nr * frames_per_block;
b3bf7a57 1753 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
49b7b00f
EL
1754 ptv->req.tp_block_size, ptv->req.tp_block_nr,
1755 ptv->req.tp_frame_size, ptv->req.tp_frame_nr);
1756 return 1;
1757}
1758
c2d0d938 1759#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1760static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1761{
fa902abe 1762 ptv->req3.tp_block_size = ptv->block_size;
bae1b03c
EL
1763 ptv->req3.tp_frame_size = 2048;
1764 int frames_per_block = 0;
1765 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1766 int snaplen = default_packet_size;
1767
1768 if (snaplen == 0) {
1769 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1770 if (snaplen <= 0) {
1771 SCLogWarning(SC_ERR_INVALID_VALUE,
1772 "Unable to get MTU, setting snaplen to sane default of 1514");
1773 snaplen = 1514;
1774 }
1775 }
1776
1777 ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1778 frames_per_block = ptv->req3.tp_block_size / ptv->req3.tp_frame_size;
1779
1780 if (frames_per_block == 0) {
1781 SCLogError(SC_ERR_INVALID_VALUE,
1782 "Block size is too small, it should be at least %d",
1783 ptv->req3.tp_frame_size);
1784 return -1;
1785 }
1786 ptv->req3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1787 /* exact division */
1788 ptv->req3.tp_frame_nr = ptv->req3.tp_block_nr * frames_per_block;
234aefdf 1789 ptv->req3.tp_retire_blk_tov = ptv->block_timeout;
bae1b03c 1790 ptv->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1791 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
bae1b03c
EL
1792 ptv->req3.tp_block_size, ptv->req3.tp_block_nr,
1793 ptv->req3.tp_frame_size, ptv->req3.tp_frame_nr,
1794 ptv->req3.tp_block_size * ptv->req3.tp_block_nr
1795 );
1796 return 1;
1797}
c2d0d938 1798#endif
bae1b03c 1799
c7bde9df
EL
1800static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1801{
1802 int val;
1803 unsigned int len = sizeof(val), i;
c7bde9df 1804 int order;
f5c20191 1805 int r, mmap_flag;
c7bde9df 1806
c2d0d938 1807#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1808 if (ptv->flags & AFP_TPACKET_V3) {
1809 val = TPACKET_V3;
f947539d 1810 } else
c2d0d938 1811#endif
f947539d 1812 {
c7bde9df
EL
1813 val = TPACKET_V2;
1814 }
1815 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1816 if (errno == ENOPROTOOPT) {
1817 if (ptv->flags & AFP_TPACKET_V3) {
1818 SCLogError(SC_ERR_AFP_CREATE,
1819 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1820 } else {
1821 SCLogError(SC_ERR_AFP_CREATE,
1822 "Too old kernel giving up (need 2.6.27 at least)");
1823 }
1824 }
1825 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1826 return AFP_FATAL_ERROR;
1827 }
1828
f947539d
VJ
1829 val = TPACKET_V2;
1830#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1831 if (ptv->flags & AFP_TPACKET_V3) {
1832 val = TPACKET_V3;
c7bde9df 1833 }
f947539d 1834#endif
c7bde9df
EL
1835 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1836 sizeof(val)) < 0) {
1837 SCLogError(SC_ERR_AFP_CREATE,
1838 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1839 strerror(errno));
1840 return AFP_FATAL_ERROR;
1841 }
1842
a40f08a2
EL
1843#ifdef HAVE_HW_TIMESTAMPING
1844 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1845 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1846 sizeof(req)) < 0) {
1847 SCLogWarning(SC_ERR_AFP_CREATE,
1848 "Can't activate hardware timestamping on packet socket: %s",
1849 strerror(errno));
1850 }
1851#endif
1852
ecf59be4
EL
1853 /* Let's reserve head room so we can add the VLAN header in IPS
1854 * or TAP mode before write the packet */
1855 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1856 /* Only one vlan is extracted from AFP header so
1857 * one VLAN header length is enough. */
1858 int reserve = VLAN_HEADER_LEN;
1859 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1860 sizeof(reserve)) < 0) {
1861 SCLogError(SC_ERR_AFP_CREATE,
1862 "Can't activate reserve on packet socket: %s",
1863 strerror(errno));
1864 return AFP_FATAL_ERROR;
1865 }
1866 }
1867
c7bde9df 1868 /* Allocate RX ring */
c2d0d938 1869#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1870 if (ptv->flags & AFP_TPACKET_V3) {
1871 if (AFPComputeRingParamsV3(ptv) != 1) {
1872 return AFP_FATAL_ERROR;
1873 }
1874 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1875 (void *) &ptv->req3, sizeof(ptv->req3));
1876 if (r < 0) {
1877 SCLogError(SC_ERR_MEM_ALLOC,
1878 "Unable to allocate RX Ring for iface %s: (%d) %s",
1879 devname,
1880 errno,
1881 strerror(errno));
1882 return AFP_FATAL_ERROR;
1883 }
1884 } else {
c2d0d938 1885#endif
fa902abe 1886 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1887 if (AFPComputeRingParams(ptv, order) != 1) {
1888 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1889 return AFP_FATAL_ERROR;
1890 }
1891
1892 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1893 (void *) &ptv->req, sizeof(ptv->req));
1894
1895 if (r < 0) {
1896 if (errno == ENOMEM) {
1897 SCLogInfo("Memory issue with ring parameters. Retrying.");
1898 continue;
1899 }
1900 SCLogError(SC_ERR_MEM_ALLOC,
1901 "Unable to allocate RX Ring for iface %s: (%d) %s",
1902 devname,
1903 errno,
1904 strerror(errno));
1905 return AFP_FATAL_ERROR;
1906 } else {
1907 break;
1908 }
1909 }
1910 if (order < 0) {
1911 SCLogError(SC_ERR_MEM_ALLOC,
1912 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1913 devname);
1914 return AFP_FATAL_ERROR;
1915 }
c2d0d938 1916#ifdef HAVE_TPACKET_V3
c7bde9df 1917 }
c2d0d938 1918#endif
c7bde9df
EL
1919
1920 /* Allocate the Ring */
c2d0d938 1921#ifdef HAVE_TPACKET_V3
c7bde9df 1922 if (ptv->flags & AFP_TPACKET_V3) {
cba41207 1923 ptv->ring_buflen = ptv->req3.tp_block_nr * ptv->req3.tp_block_size;
c7bde9df 1924 } else {
c2d0d938 1925#endif
cba41207 1926 ptv->ring_buflen = ptv->req.tp_block_nr * ptv->req.tp_block_size;
c2d0d938 1927#ifdef HAVE_TPACKET_V3
c7bde9df 1928 }
c2d0d938 1929#endif
f5c20191
EL
1930 mmap_flag = MAP_SHARED;
1931 if (ptv->flags & AFP_MMAP_LOCKED)
1932 mmap_flag |= MAP_LOCKED;
cba41207 1933 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1934 mmap_flag, ptv->socket, 0);
cba41207 1935 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1936 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1937 strerror(errno));
c7bde9df
EL
1938 goto mmap_err;
1939 }
c2d0d938 1940#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1941 if (ptv->flags & AFP_TPACKET_V3) {
1942 ptv->ring_v3 = SCMalloc(ptv->req3.tp_block_nr * sizeof(*ptv->ring_v3));
1943 if (!ptv->ring_v3) {
1944 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring_v3");
291af719 1945 goto postmmap_err;
c7bde9df
EL
1946 }
1947 for (i = 0; i < ptv->req3.tp_block_nr; ++i) {
cba41207 1948 ptv->ring_v3[i].iov_base = ptv->ring_buf + (i * ptv->req3.tp_block_size);
c7bde9df
EL
1949 ptv->ring_v3[i].iov_len = ptv->req3.tp_block_size;
1950 }
1951 } else {
c2d0d938 1952#endif
c7bde9df
EL
1953 /* allocate a ring for each frame header pointer*/
1954 ptv->ring_v2 = SCMalloc(ptv->req.tp_frame_nr * sizeof (union thdr *));
1955 if (ptv->ring_v2 == NULL) {
1956 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1957 goto postmmap_err;
c7bde9df
EL
1958 }
1959 memset(ptv->ring_v2, 0, ptv->req.tp_frame_nr * sizeof (union thdr *));
1960 /* fill the header ring with proper frame ptr*/
1961 ptv->frame_offset = 0;
1962 for (i = 0; i < ptv->req.tp_block_nr; ++i) {
cba41207 1963 void *base = &(ptv->ring_buf[i * ptv->req.tp_block_size]);
c7bde9df
EL
1964 unsigned int j;
1965 for (j = 0; j < ptv->req.tp_block_size / ptv->req.tp_frame_size; ++j, ++ptv->frame_offset) {
1966 (((union thdr **)ptv->ring_v2)[ptv->frame_offset]) = base;
1967 base += ptv->req.tp_frame_size;
1968 }
1969 }
1970 ptv->frame_offset = 0;
c2d0d938 1971#ifdef HAVE_TPACKET_V3
c7bde9df 1972 }
c2d0d938 1973#endif
c7bde9df
EL
1974
1975 return 0;
1976
291af719 1977postmmap_err:
cba41207 1978 munmap(ptv->ring_buf, ptv->ring_buflen);
291af719
EL
1979 if (ptv->ring_v2)
1980 SCFree(ptv->ring_v2);
1981 if (ptv->ring_v3)
1982 SCFree(ptv->ring_v3);
c7bde9df
EL
1983mmap_err:
1984 /* Packet mmap does the cleaning when socket is closed */
1985 return AFP_FATAL_ERROR;
1986}
1987
402bdf9b
VJ
1988/** \brief test if we can use FANOUT. Older kernels like those in
1989 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1990 */
1991int AFPIsFanoutSupported(void)
1992{
1993#ifdef HAVE_PACKET_FANOUT
1994 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
1995 if (fd < 0)
1996 return 0;
402bdf9b 1997
6227d095
VJ
1998 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
1999 uint16_t id = 1;
2000 uint32_t option = (mode << 16) | (id & 0xffff);
2001 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2002 close(fd);
2003
2004 if (r < 0) {
2005 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2006 return 0;
402bdf9b 2007 }
6227d095
VJ
2008 return 1;
2009#else
402bdf9b 2010 return 0;
6227d095 2011#endif
402bdf9b
VJ
2012}
2013
91e1256b
EL
2014#ifdef HAVE_PACKET_EBPF
2015
2016static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2017{
2018 int pfd = ptv->ebpf_lb_fd;
2019 if (pfd == -1) {
2020 SCLogError(SC_ERR_INVALID_VALUE,
2021 "Fanout file descriptor is invalid");
2022 return -1;
2023 }
2024
2025 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2026 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2027 return -1;
2028 }
2029 SCLogInfo("Activated eBPF on socket");
2030
2031 return 0;
2032}
2033
2034static int SetEbpfFilter(AFPThreadVars *ptv)
2035{
2036 int pfd = ptv->ebpf_filter_fd;
2037 if (pfd == -1) {
2038 SCLogError(SC_ERR_INVALID_VALUE,
2039 "Filter file descriptor is invalid");
2040 return -1;
2041 }
2042
2043 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2044 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2045 return -1;
2046 }
2047 SCLogInfo("Activated eBPF filter on socket");
2048
2049 return 0;
2050}
2051#endif
2052
e80b30c0 2053static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2054{
2055 int r;
1992a227 2056 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2057 struct packet_mreq sock_params;
2058 struct sockaddr_ll bind_address;
662dccd8 2059 int if_idx;
49b7b00f 2060
c45d8985
EL
2061 /* open socket */
2062 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2063 if (ptv->socket == -1) {
e80b30c0 2064 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2065 goto error;
c45d8985 2066 }
cba41207 2067
662dccd8 2068 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2069
2070 if (if_idx == -1) {
fcd5e138 2071 goto socket_err;
cba41207
AG
2072 }
2073
c45d8985
EL
2074 /* bind socket */
2075 memset(&bind_address, 0, sizeof(bind_address));
2076 bind_address.sll_family = AF_PACKET;
2077 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2078 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2079 if (bind_address.sll_ifindex == -1) {
2080 if (verbose)
e80b30c0 2081 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2082 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2083 goto socket_err;
2084 }
2085
cba41207
AG
2086 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2087 if (if_flags == -1) {
2088 if (verbose) {
2089 SCLogError(SC_ERR_AFP_READ,
2090 "Couldn't get flags for interface '%s'",
2091 ptv->iface);
2092 }
2093 ret = AFP_RECOVERABLE_ERROR;
2094 goto socket_err;
2095 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2096 if (verbose) {
2097 SCLogError(SC_ERR_AFP_READ,
2098 "Interface '%s' is down",
2099 ptv->iface);
2100 }
2101 ret = AFP_RECOVERABLE_ERROR;
2102 goto socket_err;
2103 }
2104
13f13b6d
EL
2105 if (ptv->promisc != 0) {
2106 /* Force promiscuous mode */
2107 memset(&sock_params, 0, sizeof(sock_params));
2108 sock_params.mr_type = PACKET_MR_PROMISC;
2109 sock_params.mr_ifindex = bind_address.sll_ifindex;
2110 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2111 if (r < 0) {
2112 SCLogError(SC_ERR_AFP_CREATE,
2113 "Couldn't switch iface %s to promiscuous, error %s",
2114 devname, strerror(errno));
c7bde9df 2115 goto socket_err;
13f13b6d
EL
2116 }
2117 }
2118
2119 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2120 int val = 1;
2121 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2122 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2123 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2124 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2125 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2126 }
2127 }
2128
2129 /* set socket recv buffer size */
2130 if (ptv->buffer_size != 0) {
2131 /*
2132 * Set the socket buffer size to the specified value.
2133 */
b3bf7a57 2134 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2135 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2136 &ptv->buffer_size,
2137 sizeof(ptv->buffer_size)) == -1) {
2138 SCLogError(SC_ERR_AFP_CREATE,
2139 "Couldn't set buffer size to %d on iface %s, error %s",
2140 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2141 goto socket_err;
13f13b6d
EL
2142 }
2143 }
2144
2145 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2146 if (r < 0) {
2147 if (verbose) {
2148 if (errno == ENETDOWN) {
2149 SCLogError(SC_ERR_AFP_CREATE,
2150 "Couldn't bind AF_PACKET socket, iface %s is down",
2151 devname);
2152 } else {
2153 SCLogError(SC_ERR_AFP_CREATE,
2154 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2155 devname, strerror(errno));
2156 }
2157 }
1992a227 2158 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2159 goto socket_err;
13f13b6d
EL
2160 }
2161
91e1256b 2162
238ff231
EL
2163#ifdef HAVE_PACKET_FANOUT
2164 /* add binded socket to fanout group */
2165 if (ptv->threads > 1) {
238ff231
EL
2166 uint16_t mode = ptv->cluster_type;
2167 uint16_t id = ptv->cluster_id;
4111331a 2168 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2169 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2170 if (r < 0) {
2171 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2172 "Couldn't set fanout mode, error %s",
238ff231 2173 strerror(errno));
c7bde9df 2174 goto socket_err;
238ff231
EL
2175 }
2176 }
2177#endif
2178
91e1256b
EL
2179#ifdef HAVE_PACKET_EBPF
2180 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2181 r = SockFanoutSeteBPF(ptv);
2182 if (r < 0) {
2183 SCLogError(SC_ERR_AFP_CREATE,
2184 "Coudn't set EBPF, error %s",
2185 strerror(errno));
2186 goto socket_err;
2187 }
2188 }
2189#endif
2190
49b7b00f 2191 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2192 ret = AFPSetupRing(ptv, devname);
2193 if (ret != 0)
13f13b6d 2194 goto socket_err;
49b7b00f
EL
2195 }
2196
86a3f064 2197 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2198
c85ee1e3
EL
2199 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2200 switch (ptv->datalink) {
2201 case ARPHRD_PPP:
2202 case ARPHRD_ATM:
2203 ptv->cooked = 1;
619414c5 2204 break;
c85ee1e3
EL
2205 }
2206
f47df5a6 2207 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2208 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2209 ret = AFP_FATAL_ERROR;
2210 goto socket_err;
f2a6fb8a
EL
2211 }
2212
49b7b00f 2213 /* Init is ok */
13f13b6d 2214 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2215 return 0;
13f13b6d 2216
13f13b6d
EL
2217socket_err:
2218 close(ptv->socket);
2219 ptv->socket = -1;
f47df5a6
VJ
2220 if (ptv->flags & AFP_TPACKET_V3) {
2221 if (ptv->ring_v3) {
2222 SCFree(ptv->ring_v3);
2223 ptv->ring_v3 = NULL;
2224 }
2225 } else {
2226 if (ptv->ring_v2) {
2227 SCFree(ptv->ring_v2);
2228 ptv->ring_v2 = NULL;
2229 }
2230 }
2231
13f13b6d 2232error:
1992a227 2233 return -ret;
c45d8985
EL
2234}
2235
f2a6fb8a
EL
2236TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2237{
2238 struct bpf_program filter;
2239 struct sock_fprog fcode;
2240 int rc;
2241
91e1256b
EL
2242#ifdef HAVE_PACKET_EBPF
2243 if (ptv->ebpf_filter_fd != -1) {
2244 return SetEbpfFilter(ptv);
2245 }
2246#endif
2247
f2a6fb8a
EL
2248 if (!ptv->bpf_filter)
2249 return TM_ECODE_OK;
2250
f2a6fb8a
EL
2251 SCLogInfo("Using BPF '%s' on iface '%s'",
2252 ptv->bpf_filter,
2253 ptv->iface);
28e9e4c8
EL
2254
2255 char errbuf[PCAP_ERRBUF_SIZE];
2256 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2257 ptv->datalink, /* linktype_arg */
2258 &filter, /* program */
2259 ptv->bpf_filter, /* const char *buf */
cc82ef06 2260 1, /* optimize */
28e9e4c8
EL
2261 0, /* mask */
2262 errbuf,
2263 sizeof(errbuf)) == -1) {
2264 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2265 ptv->bpf_filter,
2266 errbuf);
f2a6fb8a
EL
2267 return TM_ECODE_FAILED;
2268 }
2269
2270 fcode.len = filter.bf_len;
2271 fcode.filter = (struct sock_filter*)filter.bf_insns;
2272
2273 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2274
28e9e4c8 2275 SCBPFFree(&filter);
f2a6fb8a
EL
2276 if(rc == -1) {
2277 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2278 return TM_ECODE_FAILED;
2279 }
2280
f2a6fb8a
EL
2281 return TM_ECODE_OK;
2282}
2283
06173267
EL
2284#ifdef HAVE_PACKET_EBPF
2285/**
2286 * Insert a half flow in the kernel bypass table
2287 *
2288 * \param mapfd file descriptor of the protocol bypass table
2289 * \param key data to use as key in the table
2290 * \param inittime time of creation of the entry (in monotonic clock)
2598078e 2291 * \return 0 in case of error, 1 if success
06173267
EL
2292 */
2293static int AFPInsertHalfFlow(int mapd, void *key, uint64_t inittime)
2294{
17a32bda
EL
2295 struct pair value[nr_cpus];
2296 unsigned int i;
1e729f05
EL
2297
2298 if (mapd == -1) {
2299 return 0;
2300 }
2301
94a622cb
EL
2302 /* We use a per CPU structure so we have to set an array of values as the kernel
2303 * is not duplicating the data on each CPU by itself. */
17a32bda
EL
2304 for (i = 0; i < nr_cpus; i++) {
2305 value[i].time = inittime;
2306 value[i].packets = 0;
2307 value[i].bytes = 0;
2308 }
2309 SCLogDebug("Inserting element in eBPF mapping: %lu", inittime);
2310 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2311 switch (errno) {
3379311e 2312 /* no more place in the hash */
17a32bda 2313 case E2BIG:
17a32bda 2314 return 0;
3379311e
EL
2315 /* if we already have the key then bypass is a success */
2316 case EEXIST:
2317 return 1;
2318 /* Not supposed to be there so issue a error */
17a32bda
EL
2319 default:
2320 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2321 strerror(errno),
2322 errno);
2323 return 0;
06173267 2324 }
17a32bda
EL
2325 }
2326 return 1;
06173267
EL
2327}
2328#endif
2329
2598078e 2330/**
94a622cb
EL
2331 * Bypass function for AF_PACKET capture in eBPF mode
2332 *
2333 * This function creates two half flows in the map shared with the kernel
2334 * to trigger bypass.
2335 *
2336 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2337 * This table contains the list of half flows to bypass. The in-kernel filter
2338 * will skip/drop the packet if they belong to a flow in one of the flows
2339 * table.
2340 *
2341 * \param p the packet belonging to the flow to bypass
2342 * \return 0 if unable to bypass, 1 if success
2598078e 2343 */
06173267
EL
2344static int AFPBypassCallback(Packet *p)
2345{
2346#ifdef HAVE_PACKET_EBPF
2347 SCLogDebug("Calling af_packet callback function");
2348 /* Only bypass TCP and UDP */
2349 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2350 return 0;
2351 }
2352
2353 /* Bypassing tunneled packets is currently not supported
2354 * because we can't discard the inner packet only due to
2355 * primitive parsing in eBPF */
2356 if (IS_TUNNEL_PKT(p)) {
2357 return 0;
2358 }
2359 struct timespec curtime;
2360 uint64_t inittime = 0;
2361 /* In eBPF, the function that we have use to get time return the
2362 * monotonic clock (the time since start of the computer). So we
2363 * can't use the timestamp of the packet. */
2364 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2365 inittime = curtime.tv_sec * 1000000000;
2366 }
2367 if (PKT_IS_IPV4(p)) {
d65f4585 2368 SCLogDebug("add an IPv4");
eff10fce
EL
2369 if (p->afp_v.v4_map_fd == -1) {
2370 return 0;
2371 }
06173267
EL
2372 struct flowv4_keys key = {};
2373 key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2374 key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2375 key.port16[0] = GET_TCP_SRC_PORT(p);
2376 key.port16[1] = GET_TCP_DST_PORT(p);
8c880879 2377
06173267 2378 key.ip_proto = IPV4_GET_IPPROTO(p);
d65f4585 2379 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
06173267
EL
2380 return 0;
2381 }
2382 key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2383 key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2384 key.port16[0] = GET_TCP_DST_PORT(p);
2385 key.port16[1] = GET_TCP_SRC_PORT(p);
d65f4585 2386 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
06173267
EL
2387 return 0;
2388 }
f0439103 2389 EBPFUpdateFlow(p->flow, p);
06173267
EL
2390 return 1;
2391 }
2392 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2393 if (PKT_IS_IPV6(p) &&
06173267 2394 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2395 int i;
eff10fce
EL
2396 if (p->afp_v.v6_map_fd == -1) {
2397 return 0;
2398 }
06173267 2399 SCLogDebug("add an IPv6");
06173267
EL
2400 struct flowv6_keys key = {};
2401 for (i = 0; i < 4; i++) {
2402 key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2403 key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2404 }
2405 key.port16[0] = GET_TCP_SRC_PORT(p);
2406 key.port16[1] = GET_TCP_DST_PORT(p);
2407 key.ip_proto = IPV6_GET_NH(p);
d65f4585 2408 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
06173267
EL
2409 return 0;
2410 }
2411 for (i = 0; i < 4; i++) {
2412 key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2413 key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2414 }
2415 key.port16[0] = GET_TCP_DST_PORT(p);
2416 key.port16[1] = GET_TCP_SRC_PORT(p);
d65f4585 2417 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
06173267
EL
2418 return 0;
2419 }
f0439103 2420 EBPFUpdateFlow(p->flow, p);
06173267
EL
2421 return 1;
2422 }
2423#endif
2424 return 0;
2425}
2426
94a622cb
EL
2427/**
2428 * Bypass function for AF_PACKET capture in XDP mode
2429 *
2430 * This function creates two half flows in the map shared with the kernel
2431 * to trigger bypass. This function is similar to AFPBypassCallback() but
2432 * the bytes order is changed for some data due to the way we get the data
2433 * in the XDP case.
2434 *
2435 * \param p the packet belonging to the flow to bypass
2436 * \return 0 if unable to bypass, 1 if success
2437 */
8c880879
EL
2438static int AFPXDPBypassCallback(Packet *p)
2439{
2440#ifdef HAVE_PACKET_XDP
2441 SCLogDebug("Calling af_packet callback function");
2442 /* Only bypass TCP and UDP */
2443 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2444 return 0;
2445 }
2446
2447 /* Bypassing tunneled packets is currently not supported
2448 * because we can't discard the inner packet only due to
2449 * primitive parsing in eBPF */
2450 if (IS_TUNNEL_PKT(p)) {
2451 return 0;
2452 }
2453 struct timespec curtime;
2454 uint64_t inittime = 0;
94a622cb
EL
2455 /* In eBPF, the function that we have use to get time return the
2456 * monotonic clock (the time since start of the computer). So we
2457 * can't use the timestamp of the packet. */
8c880879
EL
2458 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2459 inittime = curtime.tv_sec * 1000000000;
2460 }
2461 if (PKT_IS_IPV4(p)) {
8c880879 2462 struct flowv4_keys key = {};
eff10fce
EL
2463 if (p->afp_v.v4_map_fd == -1) {
2464 return 0;
2465 }
8c880879
EL
2466 key.src = GET_IPV4_SRC_ADDR_U32(p);
2467 key.dst = GET_IPV4_DST_ADDR_U32(p);
94a622cb 2468 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2469 * (as in eBPF filter) so we need to pass from host to network order */
8c880879
EL
2470 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2471 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2472 key.ip_proto = IPV4_GET_IPPROTO(p);
d65f4585 2473 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
8c880879
EL
2474 return 0;
2475 }
2476 key.src = GET_IPV4_DST_ADDR_U32(p);
2477 key.dst = GET_IPV4_SRC_ADDR_U32(p);
2478 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2479 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
d65f4585 2480 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
8c880879
EL
2481 return 0;
2482 }
2483 return 1;
2484 }
2485 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2486 if (PKT_IS_IPV6(p) &&
8c880879 2487 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2488 SCLogDebug("add an IPv6");
eff10fce
EL
2489 if (p->afp_v.v6_map_fd == -1) {
2490 return 0;
2491 }
d65f4585 2492 int i;
8c880879
EL
2493 struct flowv6_keys key = {};
2494 for (i = 0; i < 4; i++) {
2495 key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2496 key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2497 }
2498 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2499 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2500 key.ip_proto = IPV6_GET_NH(p);
d65f4585 2501 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
8c880879
EL
2502 return 0;
2503 }
2504 for (i = 0; i < 4; i++) {
2505 key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2506 key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2507 }
2508 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2509 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
d65f4585 2510 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
8c880879
EL
2511 return 0;
2512 }
2513 return 1;
2514 }
2515#endif
2516 return 0;
2517}
2518
c45d8985
EL
2519/**
2520 * \brief Init function for ReceiveAFP.
2521 *
2522 * \param tv pointer to ThreadVars
2523 * \param initdata pointer to the interface passed from the user
2524 * \param data pointer gets populated with AFPThreadVars
2525 *
2526 * \todo Create a general AFP setup function.
2527 */
ab1200fb 2528TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2529{
c45d8985 2530 SCEnter();
ab1200fb 2531 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2532
c45d8985
EL
2533 if (initdata == NULL) {
2534 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2535 SCReturnInt(TM_ECODE_FAILED);
2536 }
2537
2538 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2539 if (unlikely(ptv == NULL)) {
45d5c3ca 2540 afpconfig->DerefFunc(afpconfig);
c45d8985 2541 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2542 }
c45d8985
EL
2543 memset(ptv, 0, sizeof(AFPThreadVars));
2544
2545 ptv->tv = tv;
2546 ptv->cooked = 0;
2547
fbca1a4e 2548 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2549 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2550
51eb9605
EL
2551 ptv->livedev = LiveGetDevice(ptv->iface);
2552 if (ptv->livedev == NULL) {
2553 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2554 SCFree(ptv);
51eb9605
EL
2555 SCReturnInt(TM_ECODE_FAILED);
2556 }
2557
fbca1a4e 2558 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2559 ptv->ring_size = afpconfig->ring_size;
fa902abe 2560 ptv->block_size = afpconfig->block_size;
e80b30c0 2561
df7dbe36 2562 ptv->promisc = afpconfig->promisc;
6062e00c 2563 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2564 ptv->bpf_filter = NULL;
df7dbe36 2565
fbca1a4e 2566 ptv->threads = 1;
e80b30c0
EL
2567#ifdef HAVE_PACKET_FANOUT
2568 ptv->cluster_type = PACKET_FANOUT_LB;
2569 ptv->cluster_id = 1;
2570 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2571 if (afpconfig->threads > 1) {
9d882116
VJ
2572 ptv->cluster_id = afpconfig->cluster_id;
2573 ptv->cluster_type = afpconfig->cluster_type;
2574 ptv->threads = afpconfig->threads;
e80b30c0
EL
2575 }
2576#endif
49b7b00f 2577 ptv->flags = afpconfig->flags;
e80b30c0 2578
f2a6fb8a
EL
2579 if (afpconfig->bpf_filter) {
2580 ptv->bpf_filter = afpconfig->bpf_filter;
2581 }
91e1256b
EL
2582 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2583 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2584 ptv->xdp_mode = afpconfig->xdp_mode;
f2a6fb8a 2585
d65f4585
EL
2586#ifdef HAVE_PACKET_EBPF
2587 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2588 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585
EL
2589 if (ptv->v4_map_fd == -1) {
2590 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2591 }
126488f7 2592 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585
EL
2593 if (ptv->v6_map_fd == -1) {
2594 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2595 }
2596 }
2597#endif
2598
6efd37a3 2599#ifdef PACKET_STATISTICS
1ef786e7
VJ
2600 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2601 ptv->tv);
2602 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2603 ptv->tv);
9efa4ace
EL
2604 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2605 ptv->tv);
6efd37a3
EL
2606#endif
2607
662dccd8
EL
2608 ptv->copy_mode = afpconfig->copy_mode;
2609 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2610 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2611 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2612 /* Warn about BPF filter consequence */
2613 if (ptv->bpf_filter) {
2614 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2615 " in dropping all non matching packets.");
2616 }
662dccd8 2617 }
c85ee1e3 2618
b7e78d33 2619
0581a23f
EL
2620 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2621 SCFree(ptv);
2622 afpconfig->DerefFunc(afpconfig);
2623 SCReturnInt(TM_ECODE_FAILED);
2624 }
2625
e80b30c0
EL
2626#define T_DATA_SIZE 70000
2627 ptv->data = SCMalloc(T_DATA_SIZE);
2628 if (ptv->data == NULL) {
45d5c3ca 2629 afpconfig->DerefFunc(afpconfig);
6019ae3d 2630 SCFree(ptv);
e80b30c0 2631 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2632 }
e80b30c0
EL
2633 ptv->datalen = T_DATA_SIZE;
2634#undef T_DATA_SIZE
2635
c45d8985 2636 *data = (void *)ptv;
fbca1a4e 2637
45d5c3ca 2638 afpconfig->DerefFunc(afpconfig);
71e47868
EL
2639
2640 /* A bit strange to have this here but we only have vlan information
2641 * during reading so we need to know if we want to keep vlan during
2642 * the capture phase */
2643 int vlanbool = 0;
2644 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
9500d12c 2645 ptv->flags |= AFP_VLAN_DISABLED;
71e47868
EL
2646 }
2647
2cd6e128
EL
2648 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2649 * get the info from packet extended header but we will use a standard
2650 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2651 if (! SCKernelVersionIsAtLeast(3, 0)) {
9500d12c 2652 ptv->flags |= AFP_VLAN_DISABLED;
2cd6e128
EL
2653 }
2654
c45d8985
EL
2655 SCReturnInt(TM_ECODE_OK);
2656}
2657
2658/**
2659 * \brief This function prints stats to the screen at exit.
2660 * \param tv pointer to ThreadVars
2661 * \param data pointer that gets cast into AFPThreadVars for ptv
2662 */
8f1d7503
KS
2663void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2664{
c45d8985
EL
2665 SCEnter();
2666 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2667
2668#ifdef PACKET_STATISTICS
e8a4a4c4 2669 AFPDumpCounters(ptv);
b3bf7a57 2670 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2671 tv->name,
752f03e7
VJ
2672 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2673 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2674#endif
c45d8985
EL
2675}
2676
2677/**
2678 * \brief DeInit function closes af packet socket at exit.
2679 * \param tv pointer to ThreadVars
2680 * \param data pointer that gets cast into AFPThreadVars for ptv
2681 */
8f1d7503
KS
2682TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2683{
c45d8985
EL
2684 AFPThreadVars *ptv = (AFPThreadVars *)data;
2685
13f13b6d
EL
2686 AFPSwitchState(ptv, AFP_STATE_DOWN);
2687
8c880879
EL
2688#ifdef HAVE_PACKET_XDP
2689 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2690#endif
e80b30c0
EL
2691 if (ptv->data != NULL) {
2692 SCFree(ptv->data);
2693 ptv->data = NULL;
2694 }
2695 ptv->datalen = 0;
2696
f2a6fb8a 2697 ptv->bpf_filter = NULL;
ce59ec5d
EL
2698 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring_v3) {
2699 SCFree(ptv->ring_v3);
2700 } else {
2701 if (ptv->ring_v2)
2702 SCFree(ptv->ring_v2);
2703 }
f2a6fb8a 2704
7127ae2b 2705 SCFree(ptv);
c45d8985
EL
2706 SCReturnInt(TM_ECODE_OK);
2707}
2708
2709/**
2710 * \brief This function passes off to link type decoders.
2711 *
2712 * DecodeAFP reads packets from the PacketQueue and passes
2713 * them off to the proper link type decoder.
2714 *
2715 * \param t pointer to ThreadVars
2716 * \param p pointer to the current packet
2717 * \param data pointer that gets cast into AFPThreadVars for ptv
2718 * \param pq pointer to the current PacketQueue
2719 */
2720TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2721{
2722 SCEnter();
2723 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2724
f7b1aefa
VJ
2725 /* XXX HACK: flow timeout can call us for injected pseudo packets
2726 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2727 if (p->flags & PKT_PSEUDO_STREAM_END)
2728 return TM_ECODE_OK;
2729
c45d8985 2730 /* update counters */
14466a80 2731 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2732
1fb7c0dd
EL
2733 /* If suri has set vlan during reading, we increase vlan counter */
2734 if (p->vlan_idx) {
1c0b4ee0 2735 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2736 }
2737
c45d8985 2738 /* call the decoder */
49dbb455 2739 switch (p->datalink) {
c45d8985
EL
2740 case LINKTYPE_ETHERNET:
2741 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2742 break;
49dbb455
VJ
2743 case LINKTYPE_LINUX_SLL:
2744 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2745 break;
c45d8985
EL
2746 case LINKTYPE_PPP:
2747 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2748 break;
2749 case LINKTYPE_RAW:
f67aa5de 2750 case LINKTYPE_GRE_OVER_IP:
c45d8985
EL
2751 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2752 break;
49dbb455
VJ
2753 case LINKTYPE_NULL:
2754 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2755 break;
c45d8985
EL
2756 default:
2757 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2758 break;
2759 }
2760
3088b6ac 2761 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2762
c45d8985
EL
2763 SCReturnInt(TM_ECODE_OK);
2764}
2765
ab1200fb 2766TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2767{
2768 SCEnter();
2769 DecodeThreadVars *dtv = NULL;
2770
5f307aca 2771 dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2772
2773 if (dtv == NULL)
2774 SCReturnInt(TM_ECODE_FAILED);
2775
2776 DecodeRegisterPerfCounters(dtv, tv);
2777
2778 *data = (void *)dtv;
2779
2780 SCReturnInt(TM_ECODE_OK);
2781}
2782
2864f9ee
VJ
2783TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2784{
2785 if (data != NULL)
98c88d51 2786 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2787 SCReturnInt(TM_ECODE_OK);
2788}
2789
e80b30c0 2790#endif /* HAVE_AF_PACKET */
c45d8985 2791/* eof */
a6457262
EL
2792/**
2793 * @}
2794 */