]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
suricata: Check if default log dir is writable
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
cde438f6 1/* Copyright (C) 2011-2018 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985 35#include "suricata-common.h"
e80b30c0 36#include "config.h"
c45d8985
EL
37#include "suricata.h"
38#include "decode.h"
39#include "packet-queue.h"
40#include "threads.h"
41#include "threadvars.h"
42#include "tm-queuehandlers.h"
43#include "tm-modules.h"
44#include "tm-threads.h"
45#include "tm-threads-common.h"
46#include "conf.h"
17a32bda 47#include "util-cpu.h"
c45d8985 48#include "util-debug.h"
51eb9605 49#include "util-device.h"
d65f4585 50#include "util-ebpf.h"
c45d8985
EL
51#include "util-error.h"
52#include "util-privs.h"
e80b30c0 53#include "util-optimize.h"
51eb9605 54#include "util-checksum.h"
ac56b1bf 55#include "util-ioctl.h"
2cd6e128 56#include "util-host-info.h"
c45d8985
EL
57#include "tmqh-packetpool.h"
58#include "source-af-packet.h"
34b3f194 59#include "runmodes.h"
b07bda7a 60#include "flow-storage.h"
c45d8985 61
e80b30c0 62#ifdef HAVE_AF_PACKET
472e061c
VJ
63
64#if HAVE_SYS_IOCTL_H
2bc0be6e 65#include <sys/ioctl.h>
472e061c
VJ
66#endif
67
b37554e0
EL
68#if HAVE_LINUX_SOCKIOS_H
69#include <linux/sockios.h>
70#endif
71
06173267
EL
72#ifdef HAVE_PACKET_EBPF
73#include "util-ebpf.h"
74#include <bpf/libbpf.h>
75#include <bpf/bpf.h>
76#endif
77
91e1256b
EL
78struct bpf_program {
79 unsigned int bf_len;
80 struct bpf_insn *bf_insns;
81};
82
83#ifdef HAVE_PCAP_H
84#include <pcap.h>
85#endif
86
87#ifdef HAVE_PCAP_PCAP_H
88#include <pcap/pcap.h>
89#endif
90
28e9e4c8
EL
91#include "util-bpf.h"
92
472e061c 93#if HAVE_LINUX_IF_ETHER_H
c45d8985 94#include <linux/if_ether.h>
472e061c
VJ
95#endif
96
97#if HAVE_LINUX_IF_PACKET_H
c45d8985 98#include <linux/if_packet.h>
472e061c
VJ
99#endif
100
101#if HAVE_LINUX_IF_ARP_H
c45d8985 102#include <linux/if_arp.h>
472e061c 103#endif
f2a6fb8a 104
472e061c 105#if HAVE_LINUX_FILTER_H
f2a6fb8a 106#include <linux/filter.h>
e80b30c0 107#endif
c45d8985 108
472e061c 109#if HAVE_SYS_MMAN_H
49b7b00f 110#include <sys/mman.h>
472e061c
VJ
111#endif
112
a40f08a2
EL
113#ifdef HAVE_HW_TIMESTAMPING
114#include <linux/net_tstamp.h>
115#endif
116
472e061c 117#endif /* HAVE_AF_PACKET */
49b7b00f 118
c45d8985
EL
119extern int max_pending_packets;
120
e80b30c0
EL
121#ifndef HAVE_AF_PACKET
122
ab1200fb 123TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 124
8f1d7503
KS
125void TmModuleReceiveAFPRegister (void)
126{
e80b30c0
EL
127 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
128 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
129 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
130 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
131 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
132 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
133 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 134 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
135}
136
137/**
138 * \brief Registration Function for DecodeAFP.
e80b30c0 139 */
8f1d7503
KS
140void TmModuleDecodeAFPRegister (void)
141{
e80b30c0
EL
142 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
143 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
144 tmm_modules[TMM_DECODEAFP].Func = NULL;
145 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
146 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
147 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
148 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 149 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
150}
151
152/**
153 * \brief this function prints an error message and exits.
154 */
ab1200fb 155TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
156{
157 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
158 "support for AF_PACKET enabled, on Linux host please recompile "
159 "with --enable-af-packet", tv->name);
160 exit(EXIT_FAILURE);
161}
162
163#else /* We have AF_PACKET support */
164
c45d8985
EL
165#define AFP_IFACE_NAME_LENGTH 48
166
167#define AFP_STATE_DOWN 0
168#define AFP_STATE_UP 1
169
170#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 171#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
172
173#define POLL_TIMEOUT 100
174
4a1a0080
EL
175#ifndef TP_STATUS_USER_BUSY
176/* for new use latest bit available in tp_status */
177#define TP_STATUS_USER_BUSY (1 << 31)
178#endif
179
b603ad62
EL
180#ifndef TP_STATUS_VLAN_VALID
181#define TP_STATUS_VLAN_VALID (1 << 4)
182#endif
183
62e63e3f
EL
184enum {
185 AFP_READ_OK,
186 AFP_READ_FAILURE,
9efa4ace
EL
187 /** Error during treatment by other functions of Suricata */
188 AFP_SURI_FAILURE,
27b5136b 189 AFP_KERNEL_DROP,
62e63e3f
EL
190};
191
1992a227
EL
192enum {
193 AFP_FATAL_ERROR = 1,
194 AFP_RECOVERABLE_ERROR,
195};
196
49b7b00f
EL
197union thdr {
198 struct tpacket2_hdr *h2;
c2d0d938 199#ifdef HAVE_TPACKET_V3
bae1b03c 200 struct tpacket3_hdr *h3;
c2d0d938 201#endif
49b7b00f
EL
202 void *raw;
203};
204
06173267 205static int AFPBypassCallback(Packet *p);
8c880879 206static int AFPXDPBypassCallback(Packet *p);
06173267 207
91e1256b 208#define MAX_MAPS 32
c45d8985
EL
209/**
210 * \brief Structure to hold thread specific variables.
211 */
212typedef struct AFPThreadVars_
213{
69d0d484
VJ
214 union AFPRing {
215 char *v2;
216 struct iovec *v3;
217 } ring;
b797fd92 218
c45d8985 219 /* counters */
3ce39433 220 uint64_t pkts;
c45d8985 221
ff6365dd
EL
222 ThreadVars *tv;
223 TmSlot *slot;
9500d12c
EL
224 LiveDevice *livedev;
225 /* data link type for the thread */
b797fd92 226 uint32_t datalink;
9500d12c 227
d65f4585 228#ifdef HAVE_PACKET_EBPF
94a622cb 229 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 230 int v4_map_fd;
94a622cb 231 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
232 int v6_map_fd;
233#endif
234
9500d12c 235 unsigned int frame_offset;
ff6365dd 236
9500d12c
EL
237 ChecksumValidationMode checksum_mode;
238
b797fd92 239 /* references to packet and drop counters */
9500d12c
EL
240 uint16_t capture_kernel_packets;
241 uint16_t capture_kernel_drops;
9efa4ace 242 uint16_t capture_errors;
9500d12c
EL
243
244 /* handle state */
245 uint8_t afp_state;
246 uint8_t copy_mode;
4bfa3aea 247 unsigned int flags;
9500d12c
EL
248
249 /* IPS peer */
250 AFPPeer *mpeer;
251
252 /* no mmap mode */
ff6365dd
EL
253 uint8_t *data; /** Per function and thread data */
254 int datalen; /** Length of per function and thread data */
9500d12c 255 int cooked;
ff6365dd 256
9500d12c
EL
257 /*
258 * Init related members
259 */
51eb9605 260
9500d12c
EL
261 /* thread specific socket */
262 int socket;
b797fd92
EL
263
264 int ring_size;
fa902abe 265 int block_size;
234aefdf 266 int block_timeout;
e80b30c0
EL
267 /* socket buffer size */
268 int buffer_size;
fa902abe 269 /* Filter */
ab1200fb 270 const char *bpf_filter;
91e1256b
EL
271 int ebpf_lb_fd;
272 int ebpf_filter_fd;
9500d12c 273
df7dbe36 274 int promisc;
e80b30c0 275
9500d12c 276 int down_count;
662dccd8 277
e80b30c0
EL
278 int cluster_id;
279 int cluster_type;
c45d8985 280
fbca1a4e
EL
281 int threads;
282
69d0d484
VJ
283 union AFPTpacketReq {
284 struct tpacket_req v2;
c2d0d938 285#ifdef HAVE_TPACKET_V3
69d0d484 286 struct tpacket_req3 v3;
c2d0d938 287#endif
69d0d484 288 } req;
b797fd92
EL
289
290 char iface[AFP_IFACE_NAME_LENGTH];
291 /* IPS output iface */
292 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 293
cba41207
AG
294 /* mmap'ed ring buffer */
295 unsigned int ring_buflen;
296 uint8_t *ring_buf;
91e1256b 297
8c880879
EL
298 uint8_t xdp_mode;
299
36838017 300#ifdef HAVE_PACKET_EBPF
4cf53100 301 struct ebpf_timeout_config ebpf_t_config;
36838017 302#endif
315c29a8 303
c45d8985
EL
304} AFPThreadVars;
305
306TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
ab1200fb 307TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
c45d8985
EL
308void ReceiveAFPThreadExitStats(ThreadVars *, void *);
309TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
e80b30c0 310TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 311
ab1200fb 312TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
2864f9ee 313TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
c45d8985
EL
314TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
315
f2a6fb8a 316TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 317static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
318static int AFPGetDevFlags(int fd, const char *ifname);
319static int AFPDerefSocket(AFPPeer* peer);
320static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 321
19475165 322
c45d8985
EL
323/**
324 * \brief Registration Function for RecieveAFP.
325 * \todo Unit tests are needed for this module.
326 */
8f1d7503
KS
327void TmModuleReceiveAFPRegister (void)
328{
c45d8985
EL
329 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
330 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 331 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 332 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 333 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 334 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 335 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985
EL
336 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
337 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 338 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165 339
c45d8985
EL
340}
341
a6457262
EL
342
343/**
344 * \defgroup afppeers AFP peers list
345 *
346 * AF_PACKET has an IPS mode were interface are peered: packet from
347 * on interface are sent the peered interface and the other way. The ::AFPPeer
348 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
349 * information to be able to send packet on the interface.
350 * A element of the list must not be destroyed during the run of Suricata as it
351 * is used by ::Packet and other threads.
352 *
353 * @{
354 */
355
662dccd8
EL
356typedef struct AFPPeersList_ {
357 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
358 int cnt;
359 int peered;
60400163
EL
360 int turn; /**< Next value for initialisation order */
361 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
362} AFPPeersList;
363
364/**
a6457262
EL
365 * \brief Update the peer.
366 *
367 * Update the AFPPeer of a thread ie set new state, socket number
368 * or iface index.
369 *
662dccd8 370 */
ab1200fb 371static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
372{
373 if (ptv->mpeer == NULL) {
374 return;
375 }
662dccd8
EL
376 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
377 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
378 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
379}
380
a6457262
EL
381/**
382 * \brief Clean and free ressource used by an ::AFPPeer
383 */
ab1200fb 384static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
385{
386 if (peer->flags & AFP_SOCK_PROTECT)
387 SCMutexDestroy(&peer->sock_protect);
388 SC_ATOMIC_DESTROY(peer->socket);
389 SC_ATOMIC_DESTROY(peer->if_idx);
390 SC_ATOMIC_DESTROY(peer->state);
391 SCFree(peer);
392}
393
394AFPPeersList peerslist;
395
396
a6457262
EL
397/**
398 * \brief Init the global list of ::AFPPeer
399 */
662dccd8
EL
400TmEcode AFPPeersListInit()
401{
402 SCEnter();
403 TAILQ_INIT(&peerslist.peers);
404 peerslist.peered = 0;
405 peerslist.cnt = 0;
60400163
EL
406 peerslist.turn = 0;
407 SC_ATOMIC_INIT(peerslist.reached);
408 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
409 SCReturnInt(TM_ECODE_OK);
410}
411
a6457262
EL
412/**
413 * \brief Check that all ::AFPPeer got a peer
414 *
415 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
416 */
662dccd8
EL
417TmEcode AFPPeersListCheck()
418{
419#define AFP_PEERS_MAX_TRY 4
420#define AFP_PEERS_WAIT 20000
421 int try = 0;
422 SCEnter();
423 while (try < AFP_PEERS_MAX_TRY) {
424 if (peerslist.cnt != peerslist.peered) {
425 usleep(AFP_PEERS_WAIT);
426 } else {
427 SCReturnInt(TM_ECODE_OK);
428 }
429 try++;
430 }
431 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
432 SCReturnInt(TM_ECODE_FAILED);
433}
434
a6457262
EL
435/**
436 * \brief Declare a new AFP thread to AFP peers list.
437 */
ab1200fb 438static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
439{
440 SCEnter();
441 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
442 AFPPeer *pitem;
ac56b1bf 443 int mtu, out_mtu;
662dccd8 444
e176be6f 445 if (unlikely(peer == NULL)) {
662dccd8
EL
446 SCReturnInt(TM_ECODE_FAILED);
447 }
448 memset(peer, 0, sizeof(AFPPeer));
449 SC_ATOMIC_INIT(peer->socket);
13f13b6d 450 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
451 SC_ATOMIC_INIT(peer->if_idx);
452 SC_ATOMIC_INIT(peer->state);
453 peer->flags = ptv->flags;
60400163 454 peer->turn = peerslist.turn++;
662dccd8
EL
455
456 if (peer->flags & AFP_SOCK_PROTECT) {
457 SCMutexInit(&peer->sock_protect, NULL);
458 }
459
13f13b6d 460 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
461 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
462 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
463 ptv->mpeer = peer;
464 /* add element to iface list */
465 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 466
13f13b6d
EL
467 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
468 peerslist.cnt++;
469
470 /* Iter to find a peer */
471 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
472 if (pitem->peer)
473 continue;
474 if (strcmp(pitem->iface, ptv->out_iface))
475 continue;
476 peer->peer = pitem;
477 pitem->peer = peer;
478 mtu = GetIfaceMTU(ptv->iface);
479 out_mtu = GetIfaceMTU(ptv->out_iface);
480 if (mtu != out_mtu) {
481 SCLogError(SC_ERR_AFP_CREATE,
482 "MTU on %s (%d) and %s (%d) are not equal, "
483 "transmission of packets bigger than %d will fail.",
484 ptv->iface, mtu,
485 ptv->out_iface, out_mtu,
486 (out_mtu > mtu) ? mtu : out_mtu);
487 }
488 peerslist.peered += 2;
489 break;
ac56b1bf 490 }
662dccd8
EL
491 }
492
493 AFPPeerUpdate(ptv);
494
495 SCReturnInt(TM_ECODE_OK);
496}
497
ab1200fb 498static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 499{
b2691cbe
EL
500 /* If turn is zero, we already have started threads once */
501 if (peerslist.turn == 0)
502 return 0;
503
60400163
EL
504 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
505 return 0;
506 return 1;
507}
508
ab1200fb 509static void AFPPeersListReachedInc(void)
60400163 510{
b2691cbe
EL
511 if (peerslist.turn == 0)
512 return;
513
514 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
515 SCLogInfo("All AFP capture threads are running.");
516 (void)SC_ATOMIC_SET(peerslist.reached, 0);
517 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
518 * restarted.
519 */
520 peerslist.turn = 0;
521 }
60400163
EL
522}
523
ab1200fb 524static int AFPPeersListStarted(void)
919377d4
EL
525{
526 return !peerslist.turn;
527}
528
a6457262
EL
529/**
530 * \brief Clean the global peers list.
531 */
662dccd8
EL
532void AFPPeersListClean()
533{
534 AFPPeer *pitem;
535
536 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
537 TAILQ_REMOVE(&peerslist.peers, pitem, next);
538 AFPPeerClean(pitem);
539 }
540}
541
a6457262
EL
542/**
543 * @}
544 */
545
c45d8985
EL
546/**
547 * \brief Registration Function for DecodeAFP.
548 * \todo Unit tests are needed for this module.
549 */
8f1d7503
KS
550void TmModuleDecodeAFPRegister (void)
551{
c45d8985
EL
552 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
553 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
554 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
555 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 556 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985
EL
557 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
558 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 559 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
560}
561
662dccd8 562
e80b30c0
EL
563static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
564
e8a4a4c4 565static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 566{
6efd37a3 567#ifdef PACKET_STATISTICS
e8a4a4c4
EL
568 struct tpacket_stats kstats;
569 socklen_t len = sizeof (struct tpacket_stats);
570 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
571 &kstats, &len) > -1) {
572 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
573 ptv->tv->name,
574 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
575 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
576 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
577 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
578 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 579 }
e8a4a4c4 580#endif
6efd37a3 581}
c45d8985
EL
582
583/**
584 * \brief AF packet read function.
585 *
586 * This function fills
587 * From here the packets are picked up by the DecodeAFP thread.
588 *
589 * \param user pointer to AFPThreadVars
590 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
591 */
ab1200fb 592static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
593{
594 Packet *p = NULL;
595 /* XXX should try to use read that get directly to packet */
c45d8985
EL
596 int offset = 0;
597 int caplen;
598 struct sockaddr_ll from;
599 struct iovec iov;
600 struct msghdr msg;
c45d8985
EL
601 struct cmsghdr *cmsg;
602 union {
603 struct cmsghdr cmsg;
604 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
605 } cmsg_buf;
6efd37a3 606 unsigned char aux_checksum = 0;
c45d8985
EL
607
608 msg.msg_name = &from;
609 msg.msg_namelen = sizeof(from);
610 msg.msg_iov = &iov;
611 msg.msg_iovlen = 1;
c45d8985
EL
612 msg.msg_control = &cmsg_buf;
613 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
614 msg.msg_flags = 0;
615
616 if (ptv->cooked)
617 offset = SLL_HEADER_LEN;
618 else
619 offset = 0;
e80b30c0
EL
620 iov.iov_len = ptv->datalen - offset;
621 iov.iov_base = ptv->data + offset;
c45d8985
EL
622
623 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
624
625 if (caplen < 0) {
626 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
627 errno);
62e63e3f 628 SCReturnInt(AFP_READ_FAILURE);
c45d8985 629 }
ff6365dd
EL
630
631 p = PacketGetFromQueueOrAlloc();
c45d8985 632 if (p == NULL) {
9efa4ace 633 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 634 }
b33986c8 635 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
636 if (ptv->flags & AFP_BYPASS) {
637 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
638#ifdef HAVE_PACKET_EBPF
639 p->afp_v.v4_map_fd = ptv->v4_map_fd;
640 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 641 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 642#endif
06173267 643 }
8c880879
EL
644 if (ptv->flags & AFP_XDPBYPASS) {
645 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
646#ifdef HAVE_PACKET_EBPF
647 p->afp_v.v4_map_fd = ptv->v4_map_fd;
648 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 649 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 650#endif
8c880879 651 }
c45d8985
EL
652
653 /* get timestamp of packet via ioctl */
654 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
655 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
656 errno);
657 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 658 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
659 }
660
661 ptv->pkts++;
51eb9605 662 p->livedev = ptv->livedev;
c45d8985
EL
663
664 /* add forged header */
665 if (ptv->cooked) {
e80b30c0 666 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
667 /* XXX this is minimalist, but this seems enough */
668 hdrp->sll_protocol = from.sll_protocol;
669 }
670
671 p->datalink = ptv->datalink;
672 SET_PKT_LEN(p, caplen + offset);
e80b30c0 673 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 674 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 675 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 676 }
e80b30c0
EL
677 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
678 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
679
6062e00c
EL
680 /* We only check for checksum disable */
681 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
682 p->flags |= PKT_IGNORE_CHECKSUM;
683 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
684 if (ptv->livedev->ignore_checksum) {
685 p->flags |= PKT_IGNORE_CHECKSUM;
a565148f 686 } else if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
687 SC_ATOMIC_GET(ptv->livedev->pkts),
688 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
689 ptv->livedev->ignore_checksum = 1;
6062e00c 690 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 691 }
6062e00c 692 } else {
6efd37a3
EL
693 aux_checksum = 1;
694 }
6062e00c 695
6efd37a3
EL
696 /* List is NULL if we don't have activated auxiliary data */
697 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
698 struct tpacket_auxdata *aux;
f6ddaf33 699
6efd37a3
EL
700 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
701 cmsg->cmsg_level != SOL_PACKET ||
702 cmsg->cmsg_type != PACKET_AUXDATA)
703 continue;
f6ddaf33 704
6efd37a3
EL
705 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
706
707 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
708 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 709 }
6efd37a3 710 break;
f6ddaf33
EL
711 }
712
c469824b
EL
713 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
714 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 715 SCReturnInt(AFP_SURI_FAILURE);
c469824b 716 }
62e63e3f 717 SCReturnInt(AFP_READ_OK);
c45d8985
EL
718}
719
ecf59be4
EL
720/**
721 * \brief AF packet write function.
722 *
723 * This function has to be called before the memory
724 * related to Packet in ring buffer is released.
725 *
726 * \param pointer to Packet
727 * \param version of capture: TPACKET_V2 or TPACKET_V3
728 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
729 *
730 */
731static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
732{
733 struct sockaddr_ll socket_address;
734 int socket;
ecf59be4
EL
735 uint8_t *pstart;
736 size_t plen;
ee7e689b
AG
737 union thdr h;
738 uint16_t vlan_tci = 0;
662dccd8
EL
739
740 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
3f107fa1 741 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
662dccd8
EL
742 return TM_ECODE_OK;
743 }
744 }
745
746 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
747 return TM_ECODE_OK;
748
749 if (p->ethh == NULL) {
750 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
751 return TM_ECODE_FAILED;
752 }
753 /* Index of the network device */
754 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
755 /* Address length*/
756 socket_address.sll_halen = ETH_ALEN;
757 /* Destination MAC */
758 memcpy(socket_address.sll_addr, p->ethh, 6);
759
760 /* Send packet, locking the socket if necessary */
761 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
762 SCMutexLock(&p->afp_v.peer->sock_protect);
763 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 764
ee7e689b
AG
765 h.raw = p->afp_v.relptr;
766
ecf59be4 767 if (version == TPACKET_V2) {
ecf59be4
EL
768 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
769 * the flag. It is not defined for older kernel so we go best effort
770 * and test for non zero value of the TCI header. */
771 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
772 vlan_tci = h.h2->tp_vlan_tci;
773 }
774 } else {
775#ifdef HAVE_TPACKET_V3
776 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
777 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 778 }
ee7e689b
AG
779#else
780 /* Should not get here */
781 BUG_ON(1);
782#endif
783 }
784
785 if (vlan_tci != 0) {
786 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
787 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
788 /* move ethernet addresses */
789 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
790 /* write vlan info */
791 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
792 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
793 } else {
794 pstart = GET_PKT_DATA(p);
795 plen = GET_PKT_LEN(p);
796 }
797
798 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
799 (struct sockaddr*) &socket_address,
800 sizeof(struct sockaddr_ll)) < 0) {
801 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
802 socket,
803 strerror(errno));
804 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
805 SCMutexUnlock(&p->afp_v.peer->sock_protect);
806 return TM_ECODE_FAILED;
807 }
808 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
809 SCMutexUnlock(&p->afp_v.peer->sock_protect);
810
811 return TM_ECODE_OK;
812}
813
ab1200fb 814static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 815{
662dccd8
EL
816 /* Need to be in copy mode and need to detect early release
817 where Ethernet header could not be set (and pseudo packet) */
818 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 819 AFPWritePacket(p, TPACKET_V2);
662dccd8 820 }
13f13b6d
EL
821
822 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
680e941a 823 goto cleanup;
13f13b6d 824
2011a3f8
EL
825 if (p->afp_v.relptr) {
826 union thdr h;
827 h.raw = p->afp_v.relptr;
828 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 829 }
680e941a
EL
830
831cleanup:
832 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
833}
834
ecf59be4 835#ifdef HAVE_TPACKET_V3
ab1200fb 836static void AFPReleasePacketV3(Packet *p)
bae1b03c
EL
837{
838 /* Need to be in copy mode and need to detect early release
839 where Ethernet header could not be set (and pseudo packet) */
840 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 841 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
842 }
843 PacketFreeOrRelease(p);
844}
ecf59be4 845#endif
bae1b03c 846
ab1200fb 847static void AFPReleasePacket(Packet *p)
b076a26c
KS
848{
849 AFPReleaseDataFromRing(p);
850 PacketFreeOrRelease(p);
2011a3f8
EL
851}
852
49b7b00f
EL
853/**
854 * \brief AF packet read function for ring
855 *
856 * This function fills
857 * From here the packets are picked up by the DecodeAFP thread.
858 *
859 * \param user pointer to AFPThreadVars
860 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
861 */
ab1200fb 862static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f
EL
863{
864 Packet *p = NULL;
865 union thdr h;
27b5136b 866 uint8_t emergency_flush = 0;
4d8f70c6 867 int read_pkts = 0;
b26ec603 868 int loop_start = -1;
4d8f70c6 869
49b7b00f 870
a369f8c3
EL
871 /* Loop till we have packets available */
872 while (1) {
53c02334
AS
873 if (unlikely(suricata_ctl_flags != 0)) {
874 break;
875 }
876
a369f8c3 877 /* Read packet from ring */
69d0d484 878 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace
EL
879 if (unlikely(h.raw == NULL)) {
880 /* Impossible we reach this point in normal condition, so trigger
881 * a failure in reading */
882 SCReturnInt(AFP_READ_FAILURE);
34b3f194 883 }
662dccd8 884
82a2dd85 885 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
4d8f70c6 886 if (read_pkts == 0) {
b26ec603
EL
887 if (loop_start == -1) {
888 loop_start = ptv->frame_offset;
889 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
890 SCReturnInt(AFP_READ_OK);
891 }
69d0d484 892 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
b26ec603
EL
893 ptv->frame_offset = 0;
894 }
895 continue;
4d8f70c6 896 }
27b5136b
EL
897 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
898 SCReturnInt(AFP_KERNEL_DROP);
899 } else {
900 SCReturnInt(AFP_READ_OK);
901 }
902 }
4d8f70c6
EL
903
904 read_pkts++;
b26ec603 905 loop_start = -1;
4d8f70c6 906
4a1a0080
EL
907 /* Our packet is still used by suricata, we exit read loop to
908 * gain some time */
909 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
910 SCReturnInt(AFP_READ_OK);
911 }
912
27b5136b
EL
913 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
914 h.h2->tp_status = TP_STATUS_KERNEL;
915 goto next_frame;
a369f8c3
EL
916 }
917
918 p = PacketGetFromQueueOrAlloc();
919 if (p == NULL) {
9efa4ace 920 SCReturnInt(AFP_SURI_FAILURE);
34b3f194 921 }
b33986c8 922 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
923 if (ptv->flags & AFP_BYPASS) {
924 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 925#ifdef HAVE_PACKET_EBPF
6062c27e
EL
926 p->afp_v.v4_map_fd = ptv->v4_map_fd;
927 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 928 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 929#endif
06173267 930 }
8c880879
EL
931 if (ptv->flags & AFP_XDPBYPASS) {
932 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 933#ifdef HAVE_PACKET_EBPF
6062c27e
EL
934 p->afp_v.v4_map_fd = ptv->v4_map_fd;
935 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 936 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 937#endif
8c880879 938 }
49b7b00f 939
4a1a0080
EL
940 /* Suricata will treat packet so telling it is busy, this
941 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
942 * function. */
943 h.h2->tp_status |= TP_STATUS_USER_BUSY;
944
a369f8c3 945 ptv->pkts++;
a369f8c3 946 p->livedev = ptv->livedev;
a369f8c3 947 p->datalink = ptv->datalink;
d0940396 948
a369f8c3
EL
949 if (h.h2->tp_len > h.h2->tp_snaplen) {
950 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
951 h.h2->tp_len, h.h2->tp_snaplen);
952 }
71e47868
EL
953
954 /* get vlan id from header */
bcc03f17 955 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
e871f713 956 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
01a8cc4e 957 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
71e47868 958 p->vlan_idx = 1;
71e47868
EL
959 }
960
a369f8c3
EL
961 if (ptv->flags & AFP_ZERO_COPY) {
962 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
963 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 964 SCReturnInt(AFP_SURI_FAILURE);
662dccd8 965 } else {
0f2b3406 966 p->afp_v.relptr = h.raw;
b076a26c 967 p->ReleasePacket = AFPReleasePacket;
5f12b234
EL
968 p->afp_v.mpeer = ptv->mpeer;
969 AFPRefSocket(ptv->mpeer);
0f2b3406
EL
970
971 p->afp_v.copy_mode = ptv->copy_mode;
972 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
973 p->afp_v.peer = ptv->mpeer->peer;
974 } else {
975 p->afp_v.peer = NULL;
662dccd8 976 }
a369f8c3
EL
977 }
978 } else {
979 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
9efa4ace
EL
980 /* As we can possibly fail to copy the data due to invalid data, let's
981 * skip this packet and switch to the next one.
982 */
983 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 984 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
9efa4ace
EL
985 ptv->frame_offset = 0;
986 }
a369f8c3 987 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 988 SCReturnInt(AFP_SURI_FAILURE);
a369f8c3
EL
989 }
990 }
d65f4585 991
a369f8c3
EL
992 /* Timestamp */
993 p->ts.tv_sec = h.h2->tp_sec;
994 p->ts.tv_usec = h.h2->tp_nsec/1000;
995 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
996 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
997
998 /* We only check for checksum disable */
999 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1000 p->flags |= PKT_IGNORE_CHECKSUM;
1001 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1002 if (ptv->livedev->ignore_checksum) {
1003 p->flags |= PKT_IGNORE_CHECKSUM;
1004 } else if (ChecksumAutoModeCheck(ptv->pkts,
1005 SC_ATOMIC_GET(ptv->livedev->pkts),
1006 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1007 ptv->livedev->ignore_checksum = 1;
1008 p->flags |= PKT_IGNORE_CHECKSUM;
1009 }
1010 } else {
1011 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
49b7b00f 1012 p->flags |= PKT_IGNORE_CHECKSUM;
a369f8c3 1013 }
ee6ba099
EL
1014 }
1015 if (h.h2->tp_status & TP_STATUS_LOSING) {
1016 emergency_flush = 1;
e8a4a4c4 1017 AFPDumpCounters(ptv);
a369f8c3
EL
1018 }
1019
5f12b234
EL
1020 /* release frame if not in zero copy mode */
1021 if (!(ptv->flags & AFP_ZERO_COPY)) {
1022 h.h2->tp_status = TP_STATUS_KERNEL;
1023 }
1024
a369f8c3
EL
1025 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1026 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1027 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
a369f8c3
EL
1028 ptv->frame_offset = 0;
1029 }
1030 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1031 SCReturnInt(AFP_SURI_FAILURE);
49b7b00f 1032 }
49b7b00f 1033
27b5136b 1034next_frame:
69d0d484 1035 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 1036 ptv->frame_offset = 0;
350d7619
EL
1037 /* Get out of loop to be sure we will reach maintenance tasks */
1038 SCReturnInt(AFP_READ_OK);
34b3f194 1039 }
34b3f194
EL
1040 }
1041
49b7b00f
EL
1042 SCReturnInt(AFP_READ_OK);
1043}
1044
f947539d 1045#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1046static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1047{
1048 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1049}
1050
1051static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1052{
1053 Packet *p = PacketGetFromQueueOrAlloc();
1054 if (p == NULL) {
9efa4ace 1055 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1056 }
1057 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
1058 if (ptv->flags & AFP_BYPASS) {
1059 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 1060#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1061 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1062 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1063 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1064#endif
e98b5e49 1065 } else if (ptv->flags & AFP_XDPBYPASS) {
8c880879 1066 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 1067#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1068 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1069 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1070 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1071#endif
8c880879 1072 }
bae1b03c
EL
1073
1074 ptv->pkts++;
bae1b03c
EL
1075 p->livedev = ptv->livedev;
1076 p->datalink = ptv->datalink;
1077
bcc03f17 1078 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
e41a9d63
AG
1079 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1080 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1081 p->vlan_idx = 1;
e41a9d63
AG
1082 }
1083
bae1b03c
EL
1084 if (ptv->flags & AFP_ZERO_COPY) {
1085 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1086 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1087 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c 1088 }
310b27a1 1089 p->afp_v.relptr = ppd;
bae1b03c
EL
1090 p->ReleasePacket = AFPReleasePacketV3;
1091 p->afp_v.mpeer = ptv->mpeer;
1092 AFPRefSocket(ptv->mpeer);
1093
1094 p->afp_v.copy_mode = ptv->copy_mode;
1095 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1096 p->afp_v.peer = ptv->mpeer->peer;
1097 } else {
1098 p->afp_v.peer = NULL;
1099 }
1100 } else {
1101 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1102 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1103 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1104 }
1105 }
1106 /* Timestamp */
1107 p->ts.tv_sec = ppd->tp_sec;
1108 p->ts.tv_usec = ppd->tp_nsec/1000;
1109 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1110 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1111
1112 /* We only check for checksum disable */
1113 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1114 p->flags |= PKT_IGNORE_CHECKSUM;
1115 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1116 if (ptv->livedev->ignore_checksum) {
1117 p->flags |= PKT_IGNORE_CHECKSUM;
1118 } else if (ChecksumAutoModeCheck(ptv->pkts,
1119 SC_ATOMIC_GET(ptv->livedev->pkts),
1120 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1121 ptv->livedev->ignore_checksum = 1;
1122 p->flags |= PKT_IGNORE_CHECKSUM;
1123 }
1124 } else {
1125 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1126 p->flags |= PKT_IGNORE_CHECKSUM;
1127 }
1128 }
1129
1130 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
bae1b03c 1131 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1132 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1133 }
1134
1135 SCReturnInt(AFP_READ_OK);
1136}
1137
1138static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1139{
1140 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1141 uint8_t *ppd;
9efa4ace 1142 int ret = 0;
bae1b03c
EL
1143
1144 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1145 for (i = 0; i < num_pkts; ++i) {
9efa4ace
EL
1146 ret = AFPParsePacketV3(ptv, pbd,
1147 (struct tpacket3_hdr *)ppd);
1148 switch (ret) {
1149 case AFP_READ_OK:
1150 break;
1151 case AFP_SURI_FAILURE:
1152 /* Internal error but let's just continue and
1153 * treat thenext packet */
1154 break;
1155 case AFP_READ_FAILURE:
1156 SCReturnInt(AFP_READ_FAILURE);
1157 default:
1158 SCReturnInt(ret);
5f84b55d 1159 }
bae1b03c
EL
1160 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1161 }
1162
1163 SCReturnInt(AFP_READ_OK);
1164}
f947539d 1165#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1166
1167/**
1168 * \brief AF packet read function for ring
1169 *
1170 * This function fills
1171 * From here the packets are picked up by the DecodeAFP thread.
1172 *
1173 * \param user pointer to AFPThreadVars
1174 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1175 */
ab1200fb 1176static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1177{
c2d0d938 1178#ifdef HAVE_TPACKET_V3
bae1b03c 1179 struct tpacket_block_desc *pbd;
9efa4ace 1180 int ret = 0;
bae1b03c
EL
1181
1182 /* Loop till we have packets available */
1183 while (1) {
1184 if (unlikely(suricata_ctl_flags != 0)) {
1185 SCLogInfo("Exiting AFP V3 read loop");
1186 break;
1187 }
1188
69d0d484 1189 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1190
1191 /* block is not ready to be read */
1192 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1193 SCReturnInt(AFP_READ_OK);
1194 }
1195
9efa4ace
EL
1196 ret = AFPWalkBlock(ptv, pbd);
1197 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1198 AFPFlushBlock(pbd);
9efa4ace 1199 SCReturnInt(ret);
bae1b03c
EL
1200 }
1201
1202 AFPFlushBlock(pbd);
69d0d484 1203 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1204 /* return to maintenance task after one loop on the ring */
1205 if (ptv->frame_offset == 0) {
1206 SCReturnInt(AFP_READ_OK);
1207 }
1208 }
c2d0d938 1209#endif
bae1b03c
EL
1210 SCReturnInt(AFP_READ_OK);
1211}
1212
13f13b6d
EL
1213/**
1214 * \brief Reference socket
1215 *
1216 * \retval O in case of failure, 1 in case of success
1217 */
1218static int AFPRefSocket(AFPPeer* peer)
1219{
1220 if (unlikely(peer == NULL))
1221 return 0;
1222
1223 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1224 return 1;
1225}
1226
1227
1228/**
1229 * \brief Dereference socket
1230 *
1231 * \retval 1 if socket is still alive, 0 if not
1232 */
1233static int AFPDerefSocket(AFPPeer* peer)
1234{
4424f5a2
EL
1235 if (peer == NULL)
1236 return 1;
1237
13f13b6d
EL
1238 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1239 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1240 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1241 close(SC_ATOMIC_GET(peer->socket));
1242 return 0;
1243 }
1244 }
1245 return 1;
1246}
1247
ab1200fb 1248static void AFPSwitchState(AFPThreadVars *ptv, int state)
13f13b6d
EL
1249{
1250 ptv->afp_state = state;
1251 ptv->down_count = 0;
49b7b00f 1252
13f13b6d
EL
1253 AFPPeerUpdate(ptv);
1254
1255 /* Do cleaning if switching to down state */
1256 if (state == AFP_STATE_DOWN) {
5f84b55d
EL
1257#ifdef HAVE_TPACKET_V3
1258 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1259 if (!ptv->ring.v3) {
1260 SCFree(ptv->ring.v3);
1261 ptv->ring.v3 = NULL;
5f84b55d
EL
1262 }
1263 } else {
1264#endif
69d0d484 1265 if (ptv->ring.v2) {
5f84b55d 1266 /* only used in reading phase, we can free it */
69d0d484
VJ
1267 SCFree(ptv->ring.v2);
1268 ptv->ring.v2 = NULL;
5f84b55d
EL
1269 }
1270#ifdef HAVE_TPACKET_V3
13f13b6d 1271 }
5f84b55d 1272#endif
13f13b6d
EL
1273 if (ptv->socket != -1) {
1274 /* we need to wait for all packets to return data */
1275 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
b86e1762 1276 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
cba41207 1277 munmap(ptv->ring_buf, ptv->ring_buflen);
13f13b6d
EL
1278 close(ptv->socket);
1279 ptv->socket = -1;
1280 }
1281 }
1282 }
1283 if (state == AFP_STATE_UP) {
1284 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1285 }
1286}
49b7b00f 1287
7fea0ec6
EL
1288static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1289 uint64_t *discarded_pkts)
919377d4
EL
1290{
1291 struct sockaddr_ll from;
1292 struct iovec iov;
1293 struct msghdr msg;
1294 struct timeval ts;
1295 union {
1296 struct cmsghdr cmsg;
1297 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1298 } cmsg_buf;
1299
1300
1301 if (unlikely(suricata_ctl_flags != 0)) {
1302 return 1;
1303 }
1304
1305 msg.msg_name = &from;
1306 msg.msg_namelen = sizeof(from);
1307 msg.msg_iov = &iov;
1308 msg.msg_iovlen = 1;
1309 msg.msg_control = &cmsg_buf;
1310 msg.msg_controllen = sizeof(cmsg_buf);
1311 msg.msg_flags = 0;
1312
1313 iov.iov_len = ptv->datalen;
1314 iov.iov_base = ptv->data;
1315
339f0665 1316 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1317
1318 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1319 /* FIXME */
1320 return -1;
1321 }
1322
1323 if ((ts.tv_sec > synctv->tv_sec) ||
1324 (ts.tv_sec >= synctv->tv_sec &&
1325 ts.tv_usec > synctv->tv_usec)) {
1326 return 1;
1327 }
1328 return 0;
1329}
1330
7fea0ec6
EL
1331static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1332 uint64_t *discarded_pkts)
919377d4
EL
1333{
1334 union thdr h;
1335
1336 if (unlikely(suricata_ctl_flags != 0)) {
1337 return 1;
1338 }
1339
f947539d 1340#ifdef HAVE_TPACKET_V3
bae1b03c 1341 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1342 int ret = 0;
7fea0ec6 1343 struct tpacket_block_desc *pbd;
69d0d484 1344 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1345 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1346 struct tpacket3_hdr *ppd =
1347 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1348 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1349 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1350 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1351 ret = 1;
1352 }
7fea0ec6 1353 AFPFlushBlock(pbd);
69d0d484 1354 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1355 return ret;
f947539d
VJ
1356
1357 } else
1358#endif
1359 {
7fea0ec6 1360 /* Read packet from ring */
69d0d484 1361 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1362 if (h.raw == NULL) {
1363 return -1;
1364 }
1365 (*discarded_pkts)++;
1366 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1367 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1368 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1369 return 1;
1370 }
919377d4 1371
7fea0ec6 1372 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1373 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1374 ptv->frame_offset = 0;
1375 }
919377d4
EL
1376 }
1377
1378
1379 return 0;
1380}
1381
806844d8
VJ
1382/** \brief wait for all afpacket threads to fully init
1383 *
1384 * Discard packets before all threads are ready, as the cluster
1385 * setup is not complete yet.
1386 *
1387 * if AFPPeersListStarted() returns true init is complete
1388 *
1389 * \retval r 1 = happy, otherwise unhappy
1390 */
7fea0ec6 1391static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1392{
919377d4 1393 struct timeval synctv;
806844d8
VJ
1394 struct pollfd fds;
1395
1396 fds.fd = ptv->socket;
1397 fds.events = POLLIN;
919377d4
EL
1398
1399 /* Set timeval to end of the world */
1400 synctv.tv_sec = 0xffffffff;
1401 synctv.tv_usec = 0xffffffff;
1402
1403 while (1) {
8709a20d 1404 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1405 if (r > 0 &&
1406 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1407 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1408 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1409 return 0;
1410 } else if (r > 0) {
1411 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1412 gettimeofday(&synctv, NULL);
1413 }
1414 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1415 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1416 } else {
7fea0ec6 1417 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1418 }
1419 SCLogDebug("Discarding on %s", ptv->tv->name);
1420 switch (r) {
1421 case 1:
9f7ba071 1422 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1423 return 1;
1424 case -1:
1425 return r;
1426 }
1427 /* no packets */
1428 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1429 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1430 return 1;
43b6cbd4 1431 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1432 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1433 return 0;
919377d4
EL
1434 }
1435 }
1436 return 1;
1437}
1438
13f13b6d
EL
1439/**
1440 * \brief Try to reopen socket
1441 *
1442 * \retval 0 in case of success, negative if error occurs or a condition
1443 * is not met.
1444 */
c45d8985
EL
1445static int AFPTryReopen(AFPThreadVars *ptv)
1446{
13f13b6d
EL
1447 ptv->down_count++;
1448
13f13b6d
EL
1449 /* Don't reconnect till we have packet that did not release data */
1450 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1451 return -1;
1452 }
c45d8985 1453
8709a20d 1454 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1455 if (afp_activate_r != 0) {
13f13b6d
EL
1456 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1457 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1458 ptv->iface);
1459 }
c45d8985
EL
1460 return afp_activate_r;
1461 }
1462
3bea3b39 1463 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1464 return 0;
1465}
1466
e80b30c0
EL
1467/**
1468 * \brief Main AF_PACKET reading Loop function
1469 */
1470TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1471{
34581ce9
AS
1472 SCEnter();
1473
e80b30c0 1474 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1475 struct pollfd fds;
1476 int r;
34581ce9 1477 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1478 time_t last_dump = 0;
49612128 1479 time_t current_time;
5f400785 1480 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1481 uint64_t discarded_pkts = 0;
e80b30c0 1482
34581ce9 1483 ptv->slot = s->slot_next;
e80b30c0 1484
5f400785 1485 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1486 if (ptv->flags & AFP_TPACKET_V3) {
1487 AFPReadFunc = AFPReadFromRingV3;
1488 } else {
1489 AFPReadFunc = AFPReadFromRing;
1490 }
5f400785
EL
1491 } else {
1492 AFPReadFunc = AFPRead;
1493 }
1494
60400163
EL
1495 if (ptv->afp_state == AFP_STATE_DOWN) {
1496 /* Wait for our turn, threads before us must have opened the socket */
1497 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1498 usleep(1000);
1992a227
EL
1499 if (suricata_ctl_flags != 0) {
1500 break;
1501 }
60400163
EL
1502 }
1503 r = AFPCreateSocket(ptv, ptv->iface, 1);
1504 if (r < 0) {
1992a227
EL
1505 switch (-r) {
1506 case AFP_FATAL_ERROR:
1507 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1508 SCReturnInt(TM_ECODE_FAILED);
1509 case AFP_RECOVERABLE_ERROR:
1510 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1511 }
60400163
EL
1512 }
1513 AFPPeersListReachedInc();
1514 }
1515 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1516 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1517 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1518 /* let's reset counter as we will start the capture at the
1519 * next function call */
1520#ifdef PACKET_STATISTICS
1521 struct tpacket_stats kstats;
1522 socklen_t len = sizeof (struct tpacket_stats);
1523 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1524 &kstats, &len) > -1) {
1525 uint64_t pkts = 0;
1526 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1527 ", dropped %" PRIu32 "",
1528 ptv->tv->name,
1529 kstats.tp_packets, kstats.tp_drops);
1530 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1531 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1532 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1533 }
1534#endif
60400163
EL
1535 }
1536
e80b30c0
EL
1537 fds.fd = ptv->socket;
1538 fds.events = POLLIN;
1539
1540 while (1) {
1541 /* Start by checking the state of our interface */
1542 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1543 int dbreak = 0;
662dccd8 1544
e80b30c0
EL
1545 do {
1546 usleep(AFP_RECONNECT_TIMEOUT);
1547 if (suricata_ctl_flags != 0) {
1548 dbreak = 1;
1549 break;
1550 }
1551 r = AFPTryReopen(ptv);
09e709d1 1552 fds.fd = ptv->socket;
e80b30c0
EL
1553 } while (r < 0);
1554 if (dbreak == 1)
1555 break;
1556 }
1557
1558 /* make sure we have at least one packet in the packet pool, to prevent
1559 * us from alloc'ing packets at line rate */
3c6e01f6 1560 PacketPoolWait();
e80b30c0
EL
1561
1562 r = poll(&fds, 1, POLL_TIMEOUT);
1563
1564 if (suricata_ctl_flags != 0) {
1565 break;
1566 }
1567
1568 if (r > 0 &&
1569 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1570 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1571 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1572 continue;
ff6365dd 1573 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1574 char c;
1575 /* Do a recv to get errno */
1576 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1577 continue; /* what, no error? */
3bea3b39 1578 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1579 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1580 ptv->iface, errno, strerror(errno));
13f13b6d 1581 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1582 continue;
ff6365dd 1583 } else if (fds.revents & POLLNVAL) {
e80b30c0 1584 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1585 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1586 continue;
1587 }
1588 } else if (r > 0) {
5f400785 1589 r = AFPReadFunc(ptv);
62e63e3f 1590 switch (r) {
27adbfa8
EL
1591 case AFP_READ_OK:
1592 /* Trigger one dump of stats every second */
49612128
EL
1593 current_time = time(NULL);
1594 if (current_time != last_dump) {
27adbfa8 1595 AFPDumpCounters(ptv);
49612128 1596 last_dump = current_time;
27adbfa8
EL
1597 }
1598 break;
62e63e3f
EL
1599 case AFP_READ_FAILURE:
1600 /* AFPRead in error: best to reset the socket */
3bea3b39 1601 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1602 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1603 ptv->iface, errno, strerror(errno));
13f13b6d 1604 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1605 continue;
9efa4ace
EL
1606 case AFP_SURI_FAILURE:
1607 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1608 break;
27b5136b 1609 case AFP_KERNEL_DROP:
e8a4a4c4 1610 AFPDumpCounters(ptv);
27b5136b 1611 break;
e80b30c0 1612 }
11099cfa 1613 } else if (unlikely(r == 0)) {
f53e687b
EL
1614 /* Trigger one dump of stats every second */
1615 current_time = time(NULL);
1616 if (current_time != last_dump) {
1617 AFPDumpCounters(ptv);
1618 last_dump = current_time;
1619 }
ce71bf1f
VJ
1620 /* poll timed out, lets see handle our timeout path */
1621 TmThreadsCaptureHandleTimeout(tv, ptv->slot, NULL);
11099cfa 1622
e80b30c0 1623 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1624 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1625 ptv->iface,
e80b30c0 1626 errno, strerror(errno));
13f13b6d 1627 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1628 continue;
1629 }
752f03e7 1630 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1631 }
1632
4e561d6b 1633 AFPDumpCounters(ptv);
752f03e7 1634 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1635 SCReturnInt(TM_ECODE_OK);
1636}
1637
13f13b6d
EL
1638static int AFPGetDevFlags(int fd, const char *ifname)
1639{
1640 struct ifreq ifr;
1641
1642 memset(&ifr, 0, sizeof(ifr));
1643 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1644
1645 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1646 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1647 ifname, strerror(errno));
1648 return -1;
1649 }
1650
1651 return ifr.ifr_flags;
1652}
1653
1654
e80b30c0 1655static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1656{
1657 struct ifreq ifr;
1658
1659 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1660 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1661
1662 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1663 if (verbose)
1664 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1665 ifname, strerror(errno));
c45d8985
EL
1666 return -1;
1667 }
1668
1669 return ifr.ifr_ifindex;
1670}
1671
e80b30c0 1672static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1673{
1674 struct ifreq ifr;
1675
1676 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1677 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1678
1679 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1680 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1681 ifname, strerror(errno));
1682 return -1;
1683 }
1684
e80b30c0
EL
1685 switch (ifr.ifr_hwaddr.sa_family) {
1686 case ARPHRD_LOOPBACK:
1687 return LINKTYPE_ETHERNET;
1688 case ARPHRD_PPP:
11eb1d7c 1689 case ARPHRD_NONE:
e80b30c0
EL
1690 return LINKTYPE_RAW;
1691 default:
1692 return ifr.ifr_hwaddr.sa_family;
1693 }
c45d8985
EL
1694}
1695
b7bf299e
EL
1696int AFPGetLinkType(const char *ifname)
1697{
1698 int ltype;
1699
1700 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1701 if (fd == -1) {
1702 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1703 return LINKTYPE_RAW;
1704 }
1705
1706 ltype = AFPGetDevLinktype(fd, ifname);
1707 close(fd);
1708
1709 return ltype;
1710}
1711
49b7b00f
EL
1712static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1713{
1714 /* Compute structure:
1715 Target is to store all pending packets
1716 with a size equal to MTU + auxdata
1717 And we keep a decent number of block
1718
1719 To do so:
1720 Compute frame_size (aligned to be able to fit in block
1721 Check which block size we need. Blocksize is a 2^n * pagesize
1722 We then need to get order, big enough to have
1723 frame_size < block size
1724 Find number of frame per block (divide)
1725 Fill in packet_req
1726
1727 Compute frame size:
1728 described in packet_mmap.txt
1729 dependant on snaplen (need to use a variable ?)
1730snaplen: MTU ?
1731tp_hdrlen determine_version in daq_afpacket
1732in V1: sizeof(struct tpacket_hdr);
1733in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1734frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1735
1736 */
1737 int tp_hdrlen = sizeof(struct tpacket_hdr);
1738 int snaplen = default_packet_size;
1739
03032457
EL
1740 if (snaplen == 0) {
1741 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1742 if (snaplen <= 0) {
1743 SCLogWarning(SC_ERR_INVALID_VALUE,
1744 "Unable to get MTU, setting snaplen to sane default of 1514");
1745 snaplen = 1514;
1746 }
1747 }
1748
69d0d484
VJ
1749 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1750 ptv->req.v2.tp_block_size = getpagesize() << order;
1751 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1752 if (frames_per_block == 0) {
bae1b03c 1753 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1754 return -1;
1755 }
69d0d484
VJ
1756 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1757 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1758 /* exact division */
69d0d484 1759 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1760 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1761 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1762 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1763 return 1;
1764}
1765
c2d0d938 1766#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1767static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1768{
69d0d484
VJ
1769 ptv->req.v3.tp_block_size = ptv->block_size;
1770 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1771 int frames_per_block = 0;
1772 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1773 int snaplen = default_packet_size;
1774
1775 if (snaplen == 0) {
1776 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1777 if (snaplen <= 0) {
1778 SCLogWarning(SC_ERR_INVALID_VALUE,
1779 "Unable to get MTU, setting snaplen to sane default of 1514");
1780 snaplen = 1514;
1781 }
1782 }
1783
69d0d484
VJ
1784 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1785 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1786
1787 if (frames_per_block == 0) {
1788 SCLogError(SC_ERR_INVALID_VALUE,
1789 "Block size is too small, it should be at least %d",
69d0d484 1790 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1791 return -1;
1792 }
69d0d484 1793 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1794 /* exact division */
69d0d484
VJ
1795 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1796 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1797 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1798 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1799 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1800 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1801 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1802 );
1803 return 1;
1804}
c2d0d938 1805#endif
bae1b03c 1806
c7bde9df
EL
1807static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1808{
1809 int val;
1810 unsigned int len = sizeof(val), i;
c7bde9df 1811 int order;
f5c20191 1812 int r, mmap_flag;
c7bde9df 1813
c2d0d938 1814#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1815 if (ptv->flags & AFP_TPACKET_V3) {
1816 val = TPACKET_V3;
f947539d 1817 } else
c2d0d938 1818#endif
f947539d 1819 {
c7bde9df
EL
1820 val = TPACKET_V2;
1821 }
1822 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1823 if (errno == ENOPROTOOPT) {
1824 if (ptv->flags & AFP_TPACKET_V3) {
1825 SCLogError(SC_ERR_AFP_CREATE,
1826 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1827 } else {
1828 SCLogError(SC_ERR_AFP_CREATE,
1829 "Too old kernel giving up (need 2.6.27 at least)");
1830 }
1831 }
1832 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1833 return AFP_FATAL_ERROR;
1834 }
1835
f947539d
VJ
1836 val = TPACKET_V2;
1837#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1838 if (ptv->flags & AFP_TPACKET_V3) {
1839 val = TPACKET_V3;
c7bde9df 1840 }
f947539d 1841#endif
c7bde9df
EL
1842 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1843 sizeof(val)) < 0) {
1844 SCLogError(SC_ERR_AFP_CREATE,
1845 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1846 strerror(errno));
1847 return AFP_FATAL_ERROR;
1848 }
1849
a40f08a2
EL
1850#ifdef HAVE_HW_TIMESTAMPING
1851 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1852 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1853 sizeof(req)) < 0) {
1854 SCLogWarning(SC_ERR_AFP_CREATE,
1855 "Can't activate hardware timestamping on packet socket: %s",
1856 strerror(errno));
1857 }
1858#endif
1859
ecf59be4
EL
1860 /* Let's reserve head room so we can add the VLAN header in IPS
1861 * or TAP mode before write the packet */
1862 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1863 /* Only one vlan is extracted from AFP header so
1864 * one VLAN header length is enough. */
1865 int reserve = VLAN_HEADER_LEN;
1866 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1867 sizeof(reserve)) < 0) {
1868 SCLogError(SC_ERR_AFP_CREATE,
1869 "Can't activate reserve on packet socket: %s",
1870 strerror(errno));
1871 return AFP_FATAL_ERROR;
1872 }
1873 }
1874
c7bde9df 1875 /* Allocate RX ring */
c2d0d938 1876#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1877 if (ptv->flags & AFP_TPACKET_V3) {
1878 if (AFPComputeRingParamsV3(ptv) != 1) {
1879 return AFP_FATAL_ERROR;
1880 }
1881 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1882 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1883 if (r < 0) {
1884 SCLogError(SC_ERR_MEM_ALLOC,
1885 "Unable to allocate RX Ring for iface %s: (%d) %s",
1886 devname,
1887 errno,
1888 strerror(errno));
1889 return AFP_FATAL_ERROR;
1890 }
1891 } else {
c2d0d938 1892#endif
fa902abe 1893 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1894 if (AFPComputeRingParams(ptv, order) != 1) {
1895 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1896 return AFP_FATAL_ERROR;
1897 }
1898
1899 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1900 (void *) &ptv->req, sizeof(ptv->req));
1901
1902 if (r < 0) {
1903 if (errno == ENOMEM) {
1904 SCLogInfo("Memory issue with ring parameters. Retrying.");
1905 continue;
1906 }
1907 SCLogError(SC_ERR_MEM_ALLOC,
1908 "Unable to allocate RX Ring for iface %s: (%d) %s",
1909 devname,
1910 errno,
1911 strerror(errno));
1912 return AFP_FATAL_ERROR;
1913 } else {
1914 break;
1915 }
1916 }
1917 if (order < 0) {
1918 SCLogError(SC_ERR_MEM_ALLOC,
1919 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1920 devname);
1921 return AFP_FATAL_ERROR;
1922 }
c2d0d938 1923#ifdef HAVE_TPACKET_V3
c7bde9df 1924 }
c2d0d938 1925#endif
c7bde9df
EL
1926
1927 /* Allocate the Ring */
c2d0d938 1928#ifdef HAVE_TPACKET_V3
c7bde9df 1929 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1930 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1931 } else {
c2d0d938 1932#endif
69d0d484 1933 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1934#ifdef HAVE_TPACKET_V3
c7bde9df 1935 }
c2d0d938 1936#endif
f5c20191
EL
1937 mmap_flag = MAP_SHARED;
1938 if (ptv->flags & AFP_MMAP_LOCKED)
1939 mmap_flag |= MAP_LOCKED;
cba41207 1940 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1941 mmap_flag, ptv->socket, 0);
cba41207 1942 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1943 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1944 strerror(errno));
c7bde9df
EL
1945 goto mmap_err;
1946 }
c2d0d938 1947#ifdef HAVE_TPACKET_V3
c7bde9df 1948 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1949 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1950 if (!ptv->ring.v3) {
1951 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1952 goto postmmap_err;
c7bde9df 1953 }
69d0d484
VJ
1954 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1955 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1956 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1957 }
1958 } else {
c2d0d938 1959#endif
c7bde9df 1960 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1961 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1962 if (ptv->ring.v2 == NULL) {
c7bde9df 1963 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1964 goto postmmap_err;
c7bde9df 1965 }
69d0d484 1966 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1967 /* fill the header ring with proper frame ptr*/
1968 ptv->frame_offset = 0;
69d0d484
VJ
1969 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1970 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1971 unsigned int j;
69d0d484
VJ
1972 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1973 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1974 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
1975 }
1976 }
1977 ptv->frame_offset = 0;
c2d0d938 1978#ifdef HAVE_TPACKET_V3
c7bde9df 1979 }
c2d0d938 1980#endif
c7bde9df
EL
1981
1982 return 0;
1983
291af719 1984postmmap_err:
cba41207 1985 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
1986 if (ptv->ring.v2)
1987 SCFree(ptv->ring.v2);
1988 if (ptv->ring.v3)
1989 SCFree(ptv->ring.v3);
c7bde9df
EL
1990mmap_err:
1991 /* Packet mmap does the cleaning when socket is closed */
1992 return AFP_FATAL_ERROR;
1993}
1994
402bdf9b
VJ
1995/** \brief test if we can use FANOUT. Older kernels like those in
1996 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1997 */
1998int AFPIsFanoutSupported(void)
1999{
2000#ifdef HAVE_PACKET_FANOUT
2001 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
2002 if (fd < 0)
2003 return 0;
402bdf9b 2004
6227d095
VJ
2005 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
2006 uint16_t id = 1;
2007 uint32_t option = (mode << 16) | (id & 0xffff);
2008 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2009 close(fd);
2010
2011 if (r < 0) {
2012 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2013 return 0;
402bdf9b 2014 }
6227d095
VJ
2015 return 1;
2016#else
402bdf9b 2017 return 0;
6227d095 2018#endif
402bdf9b
VJ
2019}
2020
91e1256b
EL
2021#ifdef HAVE_PACKET_EBPF
2022
2023static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2024{
2025 int pfd = ptv->ebpf_lb_fd;
2026 if (pfd == -1) {
2027 SCLogError(SC_ERR_INVALID_VALUE,
2028 "Fanout file descriptor is invalid");
2029 return -1;
2030 }
2031
2032 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2033 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2034 return -1;
2035 }
2036 SCLogInfo("Activated eBPF on socket");
2037
2038 return 0;
2039}
2040
2041static int SetEbpfFilter(AFPThreadVars *ptv)
2042{
2043 int pfd = ptv->ebpf_filter_fd;
2044 if (pfd == -1) {
2045 SCLogError(SC_ERR_INVALID_VALUE,
2046 "Filter file descriptor is invalid");
2047 return -1;
2048 }
2049
2050 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2051 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2052 return -1;
2053 }
2054 SCLogInfo("Activated eBPF filter on socket");
2055
2056 return 0;
2057}
2058#endif
2059
e80b30c0 2060static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2061{
2062 int r;
1992a227 2063 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2064 struct packet_mreq sock_params;
2065 struct sockaddr_ll bind_address;
662dccd8 2066 int if_idx;
49b7b00f 2067
c45d8985
EL
2068 /* open socket */
2069 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2070 if (ptv->socket == -1) {
e80b30c0 2071 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2072 goto error;
c45d8985 2073 }
cba41207 2074
662dccd8 2075 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2076
2077 if (if_idx == -1) {
fcd5e138 2078 goto socket_err;
cba41207
AG
2079 }
2080
c45d8985
EL
2081 /* bind socket */
2082 memset(&bind_address, 0, sizeof(bind_address));
2083 bind_address.sll_family = AF_PACKET;
2084 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2085 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2086 if (bind_address.sll_ifindex == -1) {
2087 if (verbose)
e80b30c0 2088 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2089 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2090 goto socket_err;
2091 }
2092
cba41207
AG
2093 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2094 if (if_flags == -1) {
2095 if (verbose) {
2096 SCLogError(SC_ERR_AFP_READ,
2097 "Couldn't get flags for interface '%s'",
2098 ptv->iface);
2099 }
2100 ret = AFP_RECOVERABLE_ERROR;
2101 goto socket_err;
2102 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2103 if (verbose) {
2104 SCLogError(SC_ERR_AFP_READ,
2105 "Interface '%s' is down",
2106 ptv->iface);
2107 }
2108 ret = AFP_RECOVERABLE_ERROR;
2109 goto socket_err;
2110 }
2111
13f13b6d
EL
2112 if (ptv->promisc != 0) {
2113 /* Force promiscuous mode */
2114 memset(&sock_params, 0, sizeof(sock_params));
2115 sock_params.mr_type = PACKET_MR_PROMISC;
2116 sock_params.mr_ifindex = bind_address.sll_ifindex;
2117 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2118 if (r < 0) {
2119 SCLogError(SC_ERR_AFP_CREATE,
2120 "Couldn't switch iface %s to promiscuous, error %s",
2121 devname, strerror(errno));
c7bde9df 2122 goto socket_err;
13f13b6d
EL
2123 }
2124 }
2125
2126 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2127 int val = 1;
2128 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2129 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2130 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2131 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2132 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2133 }
2134 }
2135
2136 /* set socket recv buffer size */
2137 if (ptv->buffer_size != 0) {
2138 /*
2139 * Set the socket buffer size to the specified value.
2140 */
b3bf7a57 2141 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2142 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2143 &ptv->buffer_size,
2144 sizeof(ptv->buffer_size)) == -1) {
2145 SCLogError(SC_ERR_AFP_CREATE,
2146 "Couldn't set buffer size to %d on iface %s, error %s",
2147 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2148 goto socket_err;
13f13b6d
EL
2149 }
2150 }
2151
2152 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2153 if (r < 0) {
2154 if (verbose) {
2155 if (errno == ENETDOWN) {
2156 SCLogError(SC_ERR_AFP_CREATE,
2157 "Couldn't bind AF_PACKET socket, iface %s is down",
2158 devname);
2159 } else {
2160 SCLogError(SC_ERR_AFP_CREATE,
2161 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2162 devname, strerror(errno));
2163 }
2164 }
1992a227 2165 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2166 goto socket_err;
13f13b6d
EL
2167 }
2168
91e1256b 2169
238ff231
EL
2170#ifdef HAVE_PACKET_FANOUT
2171 /* add binded socket to fanout group */
2172 if (ptv->threads > 1) {
238ff231
EL
2173 uint16_t mode = ptv->cluster_type;
2174 uint16_t id = ptv->cluster_id;
4111331a 2175 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2176 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2177 if (r < 0) {
2178 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2179 "Couldn't set fanout mode, error %s",
238ff231 2180 strerror(errno));
c7bde9df 2181 goto socket_err;
238ff231
EL
2182 }
2183 }
2184#endif
2185
91e1256b
EL
2186#ifdef HAVE_PACKET_EBPF
2187 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2188 r = SockFanoutSeteBPF(ptv);
2189 if (r < 0) {
2190 SCLogError(SC_ERR_AFP_CREATE,
2191 "Coudn't set EBPF, error %s",
2192 strerror(errno));
2193 goto socket_err;
2194 }
2195 }
2196#endif
2197
49b7b00f 2198 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2199 ret = AFPSetupRing(ptv, devname);
2200 if (ret != 0)
13f13b6d 2201 goto socket_err;
49b7b00f
EL
2202 }
2203
86a3f064 2204 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2205
c85ee1e3
EL
2206 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2207 switch (ptv->datalink) {
2208 case ARPHRD_PPP:
2209 case ARPHRD_ATM:
2210 ptv->cooked = 1;
619414c5 2211 break;
c85ee1e3
EL
2212 }
2213
f47df5a6 2214 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2215 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2216 ret = AFP_FATAL_ERROR;
2217 goto socket_err;
f2a6fb8a
EL
2218 }
2219
49b7b00f 2220 /* Init is ok */
13f13b6d 2221 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2222 return 0;
13f13b6d 2223
13f13b6d
EL
2224socket_err:
2225 close(ptv->socket);
2226 ptv->socket = -1;
f47df5a6 2227 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2228 if (ptv->ring.v3) {
2229 SCFree(ptv->ring.v3);
2230 ptv->ring.v3 = NULL;
f47df5a6
VJ
2231 }
2232 } else {
69d0d484
VJ
2233 if (ptv->ring.v2) {
2234 SCFree(ptv->ring.v2);
2235 ptv->ring.v2 = NULL;
f47df5a6
VJ
2236 }
2237 }
2238
13f13b6d 2239error:
1992a227 2240 return -ret;
c45d8985
EL
2241}
2242
f2a6fb8a
EL
2243TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2244{
2245 struct bpf_program filter;
2246 struct sock_fprog fcode;
2247 int rc;
2248
91e1256b
EL
2249#ifdef HAVE_PACKET_EBPF
2250 if (ptv->ebpf_filter_fd != -1) {
2251 return SetEbpfFilter(ptv);
2252 }
2253#endif
2254
f2a6fb8a
EL
2255 if (!ptv->bpf_filter)
2256 return TM_ECODE_OK;
2257
f2a6fb8a
EL
2258 SCLogInfo("Using BPF '%s' on iface '%s'",
2259 ptv->bpf_filter,
2260 ptv->iface);
28e9e4c8
EL
2261
2262 char errbuf[PCAP_ERRBUF_SIZE];
2263 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2264 ptv->datalink, /* linktype_arg */
2265 &filter, /* program */
2266 ptv->bpf_filter, /* const char *buf */
cc82ef06 2267 1, /* optimize */
28e9e4c8
EL
2268 0, /* mask */
2269 errbuf,
2270 sizeof(errbuf)) == -1) {
2271 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2272 ptv->bpf_filter,
2273 errbuf);
f2a6fb8a
EL
2274 return TM_ECODE_FAILED;
2275 }
2276
2277 fcode.len = filter.bf_len;
2278 fcode.filter = (struct sock_filter*)filter.bf_insns;
2279
2280 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2281
28e9e4c8 2282 SCBPFFree(&filter);
f2a6fb8a
EL
2283 if(rc == -1) {
2284 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2285 return TM_ECODE_FAILED;
2286 }
2287
f2a6fb8a
EL
2288 return TM_ECODE_OK;
2289}
2290
06173267
EL
2291#ifdef HAVE_PACKET_EBPF
2292/**
2293 * Insert a half flow in the kernel bypass table
2294 *
2295 * \param mapfd file descriptor of the protocol bypass table
2296 * \param key data to use as key in the table
2598078e 2297 * \return 0 in case of error, 1 if success
06173267 2298 */
69d2c8eb 2299static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
06173267 2300{
651a27e4 2301 BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
17a32bda 2302 unsigned int i;
1e729f05
EL
2303
2304 if (mapd == -1) {
2305 return 0;
2306 }
2307
94a622cb 2308 /* We use a per CPU structure so we have to set an array of values as the kernel
6ab1cbcb
EL
2309 * is not duplicating the data on each CPU by itself. */
2310 for (i = 0; i < nr_cpus; i++) {
651a27e4
EL
2311 BPF_PERCPU(value, i).packets = 0;
2312 BPF_PERCPU(value, i).bytes = 0;
17a32bda 2313 }
17a32bda
EL
2314 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2315 switch (errno) {
3379311e 2316 /* no more place in the hash */
17a32bda 2317 case E2BIG:
17a32bda 2318 return 0;
fcae1c18
EL
2319 /* no more place in the hash for some hardware bypass */
2320 case EAGAIN:
2321 return 0;
3379311e
EL
2322 /* if we already have the key then bypass is a success */
2323 case EEXIST:
2324 return 1;
2325 /* Not supposed to be there so issue a error */
17a32bda
EL
2326 default:
2327 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2328 strerror(errno),
2329 errno);
2330 return 0;
06173267 2331 }
17a32bda
EL
2332 }
2333 return 1;
06173267 2334}
b07bda7a 2335
9206b30f
EL
2336static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
2337 int family)
b07bda7a
EL
2338{
2339 FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
2340 if (fc) {
2341 EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
2342 if (eb == NULL) {
9206b30f
EL
2343 EBPFDeleteKey(map_fd, key0);
2344 EBPFDeleteKey(map_fd, key1);
2345 LiveDevAddBypassFail(p->livedev, 1, family);
b07bda7a
EL
2346 SCFree(key0);
2347 SCFree(key1);
2348 return 0;
2349 }
2350 eb->key[0] = key0;
2351 eb->key[1] = key1;
2352 eb->mapfd = map_fd;
2353 eb->cpus_count = p->afp_v.nr_cpus;
2354 fc->BypassUpdate = EBPFBypassUpdate;
2355 fc->BypassFree = EBPFBypassFree;
2356 fc->bypass_data = eb;
9206b30f
EL
2357 } else {
2358 EBPFDeleteKey(map_fd, key0);
2359 EBPFDeleteKey(map_fd, key1);
2360 LiveDevAddBypassFail(p->livedev, 1, family);
2361 SCFree(key0);
2362 SCFree(key1);
2363 return 0;
b07bda7a 2364 }
9206b30f
EL
2365
2366 LiveDevAddBypassStats(p->livedev, 1, family);
6126f105 2367 LiveDevAddBypassSuccess(p->livedev, 1, family);
b07bda7a
EL
2368 return 1;
2369}
2370
06173267
EL
2371#endif
2372
2598078e 2373/**
94a622cb
EL
2374 * Bypass function for AF_PACKET capture in eBPF mode
2375 *
2376 * This function creates two half flows in the map shared with the kernel
2377 * to trigger bypass.
2378 *
2379 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2380 * This table contains the list of half flows to bypass. The in-kernel filter
2381 * will skip/drop the packet if they belong to a flow in one of the flows
2382 * table.
2383 *
2384 * \param p the packet belonging to the flow to bypass
2385 * \return 0 if unable to bypass, 1 if success
2598078e 2386 */
06173267
EL
2387static int AFPBypassCallback(Packet *p)
2388{
2389#ifdef HAVE_PACKET_EBPF
2390 SCLogDebug("Calling af_packet callback function");
2391 /* Only bypass TCP and UDP */
2392 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2393 return 0;
2394 }
2395
fc2f2fa7
EL
2396 /* If we don't have a flow attached to packet the eBPF map entries
2397 * will be destroyed at first flow bypass manager pass as we won't
2398 * find any associated entry */
2399 if (p->flow == NULL) {
2400 return 0;
2401 }
06173267
EL
2402 /* Bypassing tunneled packets is currently not supported
2403 * because we can't discard the inner packet only due to
2404 * primitive parsing in eBPF */
2405 if (IS_TUNNEL_PKT(p)) {
2406 return 0;
2407 }
06173267 2408 if (PKT_IS_IPV4(p)) {
d65f4585 2409 SCLogDebug("add an IPv4");
eff10fce
EL
2410 if (p->afp_v.v4_map_fd == -1) {
2411 return 0;
2412 }
b07bda7a
EL
2413 struct flowv4_keys *keys[2];
2414 keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
2415 if (keys[0] == NULL) {
2416 return 0;
2417 }
2418 keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2419 keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2420 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2421 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2422 keys[0]->vlan0 = p->vlan_id[0];
2423 keys[0]->vlan1 = p->vlan_id[1];
8c880879 2424
d119845d
EL
2425 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2426 keys[0]->ip_proto = 1;
2427 } else {
2428 keys[0]->ip_proto = 0;
2429 }
69d2c8eb 2430 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2431 p->afp_v.nr_cpus) == 0) {
9206b30f 2432 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2433 SCFree(keys[0]);
2434 return 0;
2435 }
2436 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2437 if (keys[1] == NULL) {
9206b30f
EL
2438 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2439 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2440 SCFree(keys[0]);
06173267
EL
2441 return 0;
2442 }
b07bda7a
EL
2443 keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
2444 keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2445 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2446 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2447 keys[1]->vlan0 = p->vlan_id[0];
2448 keys[1]->vlan1 = p->vlan_id[1];
b07bda7a 2449
d119845d 2450 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2451 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2452 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2453 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2454 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2455 SCFree(keys[0]);
2456 SCFree(keys[1]);
06173267
EL
2457 return 0;
2458 }
315c29a8 2459 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2460 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
06173267
EL
2461 }
2462 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2463 if (PKT_IS_IPV6(p) &&
06173267 2464 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2465 int i;
eff10fce
EL
2466 if (p->afp_v.v6_map_fd == -1) {
2467 return 0;
2468 }
06173267 2469 SCLogDebug("add an IPv6");
b07bda7a
EL
2470 struct flowv6_keys *keys[2];
2471 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2472 if (keys[0] == NULL) {
9206b30f 2473 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2474 return 0;
2475 }
06173267 2476 for (i = 0; i < 4; i++) {
b07bda7a
EL
2477 keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2478 keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2479 }
2480 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2481 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2482 keys[0]->vlan0 = p->vlan_id[0];
2483 keys[0]->vlan1 = p->vlan_id[1];
2484
2485 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2486 keys[0]->ip_proto = 1;
2487 } else {
2488 keys[0]->ip_proto = 0;
2489 }
69d2c8eb 2490 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2491 p->afp_v.nr_cpus) == 0) {
9206b30f 2492 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2493 SCFree(keys[0]);
06173267
EL
2494 return 0;
2495 }
b07bda7a
EL
2496 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2497 if (keys[1] == NULL) {
9206b30f
EL
2498 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2499 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2500 SCFree(keys[0]);
2501 return 0;
06173267 2502 }
b07bda7a
EL
2503 for (i = 0; i < 4; i++) {
2504 keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2505 keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2506 }
2507 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2508 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2509 keys[1]->vlan0 = p->vlan_id[0];
2510 keys[1]->vlan1 = p->vlan_id[1];
2511
2512 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2513 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2514 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2515 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2516 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2517 SCFree(keys[0]);
2518 SCFree(keys[1]);
06173267
EL
2519 return 0;
2520 }
fc2f2fa7
EL
2521 if (p->flow)
2522 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2523 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
06173267
EL
2524 }
2525#endif
2526 return 0;
2527}
2528
94a622cb
EL
2529/**
2530 * Bypass function for AF_PACKET capture in XDP mode
2531 *
2532 * This function creates two half flows in the map shared with the kernel
2533 * to trigger bypass. This function is similar to AFPBypassCallback() but
2534 * the bytes order is changed for some data due to the way we get the data
2535 * in the XDP case.
2536 *
2537 * \param p the packet belonging to the flow to bypass
2538 * \return 0 if unable to bypass, 1 if success
2539 */
8c880879
EL
2540static int AFPXDPBypassCallback(Packet *p)
2541{
2542#ifdef HAVE_PACKET_XDP
2543 SCLogDebug("Calling af_packet callback function");
2544 /* Only bypass TCP and UDP */
2545 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2546 return 0;
2547 }
2548
fc2f2fa7
EL
2549 /* If we don't have a flow attached to packet the eBPF map entries
2550 * will be destroyed at first flow bypass manager pass as we won't
2551 * find any associated entry */
2552 if (p->flow == NULL) {
2553 return 0;
2554 }
8c880879
EL
2555 /* Bypassing tunneled packets is currently not supported
2556 * because we can't discard the inner packet only due to
2557 * primitive parsing in eBPF */
2558 if (IS_TUNNEL_PKT(p)) {
2559 return 0;
2560 }
8c880879 2561 if (PKT_IS_IPV4(p)) {
b07bda7a
EL
2562 struct flowv4_keys *keys[2];
2563 keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
2564 if (keys[0] == NULL) {
9206b30f 2565 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2566 return 0;
2567 }
eff10fce 2568 if (p->afp_v.v4_map_fd == -1) {
b07bda7a 2569 SCFree(keys[0]);
eff10fce
EL
2570 return 0;
2571 }
b07bda7a
EL
2572 keys[0]->src = p->src.addr_data32[0];
2573 keys[0]->dst = p->dst.addr_data32[0];
94a622cb 2574 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2575 * (as in eBPF filter) so we need to pass from host to network order */
b07bda7a
EL
2576 keys[0]->port16[0] = htons(p->sp);
2577 keys[0]->port16[1] = htons(p->dp);
d119845d
EL
2578 keys[0]->vlan0 = p->vlan_id[0];
2579 keys[0]->vlan1 = p->vlan_id[1];
2580 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2581 keys[0]->ip_proto = 1;
2582 } else {
2583 keys[0]->ip_proto = 0;
2584 }
69d2c8eb 2585 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2586 p->afp_v.nr_cpus) == 0) {
9206b30f 2587 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2588 SCFree(keys[0]);
2589 return 0;
2590 }
2591 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2592 if (keys[1] == NULL) {
9206b30f
EL
2593 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2594 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2595 SCFree(keys[0]);
8c880879
EL
2596 return 0;
2597 }
b07bda7a
EL
2598 keys[1]->src = p->dst.addr_data32[0];
2599 keys[1]->dst = p->src.addr_data32[0];
2600 keys[1]->port16[0] = htons(p->dp);
2601 keys[1]->port16[1] = htons(p->sp);
d119845d
EL
2602 keys[1]->vlan0 = p->vlan_id[0];
2603 keys[1]->vlan1 = p->vlan_id[1];
2604 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2605 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2606 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2607 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2608 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2609 SCFree(keys[0]);
2610 SCFree(keys[1]);
8c880879
EL
2611 return 0;
2612 }
9206b30f 2613 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
8c880879
EL
2614 }
2615 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2616 if (PKT_IS_IPV6(p) &&
8c880879 2617 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2618 SCLogDebug("add an IPv6");
eff10fce
EL
2619 if (p->afp_v.v6_map_fd == -1) {
2620 return 0;
2621 }
d65f4585 2622 int i;
b07bda7a
EL
2623 struct flowv6_keys *keys[2];
2624 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2625 if (keys[0] == NULL) {
2626 return 0;
2627 }
2628
8c880879 2629 for (i = 0; i < 4; i++) {
b07bda7a
EL
2630 keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
2631 keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
2632 }
2633 keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
2634 keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
d119845d
EL
2635 keys[0]->vlan0 = p->vlan_id[0];
2636 keys[0]->vlan1 = p->vlan_id[1];
2637 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2638 keys[0]->ip_proto = 1;
2639 } else {
2640 keys[0]->ip_proto = 0;
2641 }
69d2c8eb 2642 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2643 p->afp_v.nr_cpus) == 0) {
9206b30f 2644 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2645 SCFree(keys[0]);
8c880879
EL
2646 return 0;
2647 }
b07bda7a
EL
2648 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2649 if (keys[1] == NULL) {
9206b30f
EL
2650 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2651 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2652 SCFree(keys[0]);
2653 return 0;
8c880879 2654 }
b07bda7a
EL
2655 for (i = 0; i < 4; i++) {
2656 keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
2657 keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2658 }
2659 keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
2660 keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
d119845d
EL
2661 keys[1]->vlan0 = p->vlan_id[0];
2662 keys[1]->vlan1 = p->vlan_id[1];
2663 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2664 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2665 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2666 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2667 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2668 SCFree(keys[0]);
2669 SCFree(keys[1]);
8c880879
EL
2670 return 0;
2671 }
9206b30f 2672 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
8c880879
EL
2673 }
2674#endif
2675 return 0;
2676}
2677
5e62ae6d
EL
2678
2679bool g_flowv4_ok = true;
2680bool g_flowv6_ok = true;
2681
c45d8985
EL
2682/**
2683 * \brief Init function for ReceiveAFP.
2684 *
2685 * \param tv pointer to ThreadVars
2686 * \param initdata pointer to the interface passed from the user
2687 * \param data pointer gets populated with AFPThreadVars
2688 *
2689 * \todo Create a general AFP setup function.
2690 */
ab1200fb 2691TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2692{
c45d8985 2693 SCEnter();
ab1200fb 2694 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2695
c45d8985
EL
2696 if (initdata == NULL) {
2697 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2698 SCReturnInt(TM_ECODE_FAILED);
2699 }
2700
2701 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2702 if (unlikely(ptv == NULL)) {
45d5c3ca 2703 afpconfig->DerefFunc(afpconfig);
c45d8985 2704 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2705 }
c45d8985
EL
2706 memset(ptv, 0, sizeof(AFPThreadVars));
2707
2708 ptv->tv = tv;
2709 ptv->cooked = 0;
2710
fbca1a4e 2711 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2712 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2713
51eb9605
EL
2714 ptv->livedev = LiveGetDevice(ptv->iface);
2715 if (ptv->livedev == NULL) {
2716 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2717 SCFree(ptv);
51eb9605
EL
2718 SCReturnInt(TM_ECODE_FAILED);
2719 }
2720
fbca1a4e 2721 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2722 ptv->ring_size = afpconfig->ring_size;
fa902abe 2723 ptv->block_size = afpconfig->block_size;
8baf64f5 2724 ptv->block_timeout = afpconfig->block_timeout;
e80b30c0 2725
df7dbe36 2726 ptv->promisc = afpconfig->promisc;
6062e00c 2727 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2728 ptv->bpf_filter = NULL;
df7dbe36 2729
fbca1a4e 2730 ptv->threads = 1;
e80b30c0
EL
2731#ifdef HAVE_PACKET_FANOUT
2732 ptv->cluster_type = PACKET_FANOUT_LB;
2733 ptv->cluster_id = 1;
2734 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2735 if (afpconfig->threads > 1) {
9d882116
VJ
2736 ptv->cluster_id = afpconfig->cluster_id;
2737 ptv->cluster_type = afpconfig->cluster_type;
2738 ptv->threads = afpconfig->threads;
e80b30c0
EL
2739 }
2740#endif
49b7b00f 2741 ptv->flags = afpconfig->flags;
e80b30c0 2742
f2a6fb8a
EL
2743 if (afpconfig->bpf_filter) {
2744 ptv->bpf_filter = afpconfig->bpf_filter;
2745 }
91e1256b
EL
2746 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2747 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2748 ptv->xdp_mode = afpconfig->xdp_mode;
36838017 2749#ifdef HAVE_PACKET_EBPF
4cf53100 2750 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
f2a6fb8a 2751
d65f4585 2752 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2753 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585 2754 if (ptv->v4_map_fd == -1) {
5e62ae6d
EL
2755 if (g_flowv4_ok == false) {
2756 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2757 "flow_table_v4");
2758 g_flowv4_ok = true;
2759 }
d65f4585 2760 }
126488f7 2761 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585 2762 if (ptv->v6_map_fd == -1) {
5e62ae6d
EL
2763 if (g_flowv6_ok) {
2764 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2765 "flow_table_v6");
2766 g_flowv6_ok = false;
2767 }
d65f4585
EL
2768 }
2769 }
4cf53100 2770 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
d65f4585
EL
2771#endif
2772
6efd37a3 2773#ifdef PACKET_STATISTICS
1ef786e7
VJ
2774 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2775 ptv->tv);
2776 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2777 ptv->tv);
9efa4ace
EL
2778 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2779 ptv->tv);
6efd37a3
EL
2780#endif
2781
662dccd8
EL
2782 ptv->copy_mode = afpconfig->copy_mode;
2783 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2784 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2785 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2786 /* Warn about BPF filter consequence */
2787 if (ptv->bpf_filter) {
2788 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2789 " in dropping all non matching packets.");
2790 }
662dccd8 2791 }
c85ee1e3 2792
b7e78d33 2793
0581a23f
EL
2794 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2795 SCFree(ptv);
2796 afpconfig->DerefFunc(afpconfig);
2797 SCReturnInt(TM_ECODE_FAILED);
2798 }
2799
e80b30c0
EL
2800#define T_DATA_SIZE 70000
2801 ptv->data = SCMalloc(T_DATA_SIZE);
2802 if (ptv->data == NULL) {
45d5c3ca 2803 afpconfig->DerefFunc(afpconfig);
6019ae3d 2804 SCFree(ptv);
e80b30c0 2805 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2806 }
e80b30c0
EL
2807 ptv->datalen = T_DATA_SIZE;
2808#undef T_DATA_SIZE
2809
c45d8985 2810 *data = (void *)ptv;
fbca1a4e 2811
45d5c3ca 2812 afpconfig->DerefFunc(afpconfig);
71e47868 2813
2cd6e128
EL
2814 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2815 * get the info from packet extended header but we will use a standard
2816 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
bcc03f17
MF
2817 if (SCKernelVersionIsAtLeast(3, 0)) {
2818 ptv->flags |= AFP_VLAN_IN_HEADER;
2cd6e128
EL
2819 }
2820
c45d8985
EL
2821 SCReturnInt(TM_ECODE_OK);
2822}
2823
2824/**
2825 * \brief This function prints stats to the screen at exit.
2826 * \param tv pointer to ThreadVars
2827 * \param data pointer that gets cast into AFPThreadVars for ptv
2828 */
8f1d7503
KS
2829void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2830{
c45d8985
EL
2831 SCEnter();
2832 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2833
2834#ifdef PACKET_STATISTICS
e8a4a4c4 2835 AFPDumpCounters(ptv);
b3bf7a57 2836 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2837 tv->name,
752f03e7
VJ
2838 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2839 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2840#endif
c45d8985
EL
2841}
2842
2843/**
2844 * \brief DeInit function closes af packet socket at exit.
2845 * \param tv pointer to ThreadVars
2846 * \param data pointer that gets cast into AFPThreadVars for ptv
2847 */
8f1d7503
KS
2848TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2849{
c45d8985
EL
2850 AFPThreadVars *ptv = (AFPThreadVars *)data;
2851
13f13b6d
EL
2852 AFPSwitchState(ptv, AFP_STATE_DOWN);
2853
8c880879 2854#ifdef HAVE_PACKET_XDP
4cf53100
EL
2855 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2856 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2857 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2858 }
8c880879 2859#endif
e80b30c0
EL
2860 if (ptv->data != NULL) {
2861 SCFree(ptv->data);
2862 ptv->data = NULL;
2863 }
2864 ptv->datalen = 0;
2865
f2a6fb8a 2866 ptv->bpf_filter = NULL;
69d0d484
VJ
2867 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2868 SCFree(ptv->ring.v3);
ce59ec5d 2869 } else {
69d0d484
VJ
2870 if (ptv->ring.v2)
2871 SCFree(ptv->ring.v2);
ce59ec5d 2872 }
f2a6fb8a 2873
7127ae2b 2874 SCFree(ptv);
c45d8985
EL
2875 SCReturnInt(TM_ECODE_OK);
2876}
2877
2878/**
2879 * \brief This function passes off to link type decoders.
2880 *
2881 * DecodeAFP reads packets from the PacketQueue and passes
2882 * them off to the proper link type decoder.
2883 *
2884 * \param t pointer to ThreadVars
2885 * \param p pointer to the current packet
2886 * \param data pointer that gets cast into AFPThreadVars for ptv
2887 * \param pq pointer to the current PacketQueue
2888 */
2889TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2890{
2891 SCEnter();
2892 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2893
f7b1aefa
VJ
2894 /* XXX HACK: flow timeout can call us for injected pseudo packets
2895 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2896 if (p->flags & PKT_PSEUDO_STREAM_END)
2897 return TM_ECODE_OK;
2898
c45d8985 2899 /* update counters */
14466a80 2900 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2901
1fb7c0dd
EL
2902 /* If suri has set vlan during reading, we increase vlan counter */
2903 if (p->vlan_idx) {
1c0b4ee0 2904 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2905 }
2906
c45d8985 2907 /* call the decoder */
49dbb455 2908 switch (p->datalink) {
c45d8985
EL
2909 case LINKTYPE_ETHERNET:
2910 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2911 break;
49dbb455
VJ
2912 case LINKTYPE_LINUX_SLL:
2913 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2914 break;
c45d8985
EL
2915 case LINKTYPE_PPP:
2916 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2917 break;
2918 case LINKTYPE_RAW:
f67aa5de 2919 case LINKTYPE_GRE_OVER_IP:
c45d8985
EL
2920 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2921 break;
49dbb455
VJ
2922 case LINKTYPE_NULL:
2923 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2924 break;
c45d8985
EL
2925 default:
2926 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2927 break;
2928 }
2929
3088b6ac 2930 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2931
c45d8985
EL
2932 SCReturnInt(TM_ECODE_OK);
2933}
2934
ab1200fb 2935TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2936{
2937 SCEnter();
2938 DecodeThreadVars *dtv = NULL;
2939
5f307aca 2940 dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2941
2942 if (dtv == NULL)
2943 SCReturnInt(TM_ECODE_FAILED);
2944
2945 DecodeRegisterPerfCounters(dtv, tv);
2946
2947 *data = (void *)dtv;
2948
2949 SCReturnInt(TM_ECODE_OK);
2950}
2951
2864f9ee
VJ
2952TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2953{
2954 if (data != NULL)
98c88d51 2955 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2956 SCReturnInt(TM_ECODE_OK);
2957}
2958
e80b30c0 2959#endif /* HAVE_AF_PACKET */
c45d8985 2960/* eof */
a6457262
EL
2961/**
2962 * @}
2963 */