]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
decode/ieee8021ah: fix possible packet truncation
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
cde438f6 1/* Copyright (C) 2011-2018 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985 35#include "suricata-common.h"
e80b30c0 36#include "config.h"
c45d8985
EL
37#include "suricata.h"
38#include "decode.h"
39#include "packet-queue.h"
40#include "threads.h"
41#include "threadvars.h"
42#include "tm-queuehandlers.h"
43#include "tm-modules.h"
44#include "tm-threads.h"
45#include "tm-threads-common.h"
46#include "conf.h"
17a32bda 47#include "util-cpu.h"
c45d8985 48#include "util-debug.h"
51eb9605 49#include "util-device.h"
d65f4585 50#include "util-ebpf.h"
c45d8985
EL
51#include "util-error.h"
52#include "util-privs.h"
e80b30c0 53#include "util-optimize.h"
51eb9605 54#include "util-checksum.h"
ac56b1bf 55#include "util-ioctl.h"
2cd6e128 56#include "util-host-info.h"
c45d8985
EL
57#include "tmqh-packetpool.h"
58#include "source-af-packet.h"
34b3f194 59#include "runmodes.h"
b07bda7a 60#include "flow-storage.h"
c45d8985 61
e80b30c0 62#ifdef HAVE_AF_PACKET
472e061c
VJ
63
64#if HAVE_SYS_IOCTL_H
2bc0be6e 65#include <sys/ioctl.h>
472e061c
VJ
66#endif
67
b37554e0
EL
68#if HAVE_LINUX_SOCKIOS_H
69#include <linux/sockios.h>
70#endif
71
06173267
EL
72#ifdef HAVE_PACKET_EBPF
73#include "util-ebpf.h"
74#include <bpf/libbpf.h>
75#include <bpf/bpf.h>
76#endif
77
91e1256b
EL
78struct bpf_program {
79 unsigned int bf_len;
80 struct bpf_insn *bf_insns;
81};
82
83#ifdef HAVE_PCAP_H
84#include <pcap.h>
85#endif
86
87#ifdef HAVE_PCAP_PCAP_H
88#include <pcap/pcap.h>
89#endif
90
28e9e4c8
EL
91#include "util-bpf.h"
92
472e061c 93#if HAVE_LINUX_IF_ETHER_H
c45d8985 94#include <linux/if_ether.h>
472e061c
VJ
95#endif
96
97#if HAVE_LINUX_IF_PACKET_H
c45d8985 98#include <linux/if_packet.h>
472e061c
VJ
99#endif
100
101#if HAVE_LINUX_IF_ARP_H
c45d8985 102#include <linux/if_arp.h>
472e061c 103#endif
f2a6fb8a 104
472e061c 105#if HAVE_LINUX_FILTER_H
f2a6fb8a 106#include <linux/filter.h>
e80b30c0 107#endif
c45d8985 108
472e061c 109#if HAVE_SYS_MMAN_H
49b7b00f 110#include <sys/mman.h>
472e061c
VJ
111#endif
112
a40f08a2
EL
113#ifdef HAVE_HW_TIMESTAMPING
114#include <linux/net_tstamp.h>
115#endif
116
472e061c 117#endif /* HAVE_AF_PACKET */
49b7b00f 118
c45d8985
EL
119extern int max_pending_packets;
120
e80b30c0
EL
121#ifndef HAVE_AF_PACKET
122
ab1200fb 123TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 124
8f1d7503
KS
125void TmModuleReceiveAFPRegister (void)
126{
e80b30c0
EL
127 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
128 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
129 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
130 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
131 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
132 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
133 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 134 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
135}
136
137/**
138 * \brief Registration Function for DecodeAFP.
e80b30c0 139 */
8f1d7503
KS
140void TmModuleDecodeAFPRegister (void)
141{
e80b30c0
EL
142 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
143 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
144 tmm_modules[TMM_DECODEAFP].Func = NULL;
145 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
146 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
147 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
148 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 149 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
150}
151
152/**
153 * \brief this function prints an error message and exits.
154 */
ab1200fb 155TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
156{
157 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
158 "support for AF_PACKET enabled, on Linux host please recompile "
159 "with --enable-af-packet", tv->name);
160 exit(EXIT_FAILURE);
161}
162
163#else /* We have AF_PACKET support */
164
c45d8985
EL
165#define AFP_IFACE_NAME_LENGTH 48
166
167#define AFP_STATE_DOWN 0
168#define AFP_STATE_UP 1
169
170#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 171#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
172
173#define POLL_TIMEOUT 100
174
4a1a0080
EL
175#ifndef TP_STATUS_USER_BUSY
176/* for new use latest bit available in tp_status */
177#define TP_STATUS_USER_BUSY (1 << 31)
178#endif
179
b603ad62
EL
180#ifndef TP_STATUS_VLAN_VALID
181#define TP_STATUS_VLAN_VALID (1 << 4)
182#endif
183
62e63e3f
EL
184enum {
185 AFP_READ_OK,
186 AFP_READ_FAILURE,
9efa4ace
EL
187 /** Error during treatment by other functions of Suricata */
188 AFP_SURI_FAILURE,
27b5136b 189 AFP_KERNEL_DROP,
62e63e3f
EL
190};
191
1992a227
EL
192enum {
193 AFP_FATAL_ERROR = 1,
194 AFP_RECOVERABLE_ERROR,
195};
196
49b7b00f
EL
197union thdr {
198 struct tpacket2_hdr *h2;
c2d0d938 199#ifdef HAVE_TPACKET_V3
bae1b03c 200 struct tpacket3_hdr *h3;
c2d0d938 201#endif
49b7b00f
EL
202 void *raw;
203};
204
06173267 205static int AFPBypassCallback(Packet *p);
8c880879 206static int AFPXDPBypassCallback(Packet *p);
06173267 207
91e1256b 208#define MAX_MAPS 32
c45d8985
EL
209/**
210 * \brief Structure to hold thread specific variables.
211 */
212typedef struct AFPThreadVars_
213{
69d0d484
VJ
214 union AFPRing {
215 char *v2;
216 struct iovec *v3;
217 } ring;
b797fd92 218
c45d8985 219 /* counters */
3ce39433 220 uint64_t pkts;
c45d8985 221
ff6365dd
EL
222 ThreadVars *tv;
223 TmSlot *slot;
9500d12c
EL
224 LiveDevice *livedev;
225 /* data link type for the thread */
b797fd92 226 uint32_t datalink;
9500d12c 227
d65f4585 228#ifdef HAVE_PACKET_EBPF
94a622cb 229 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 230 int v4_map_fd;
94a622cb 231 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
232 int v6_map_fd;
233#endif
234
9500d12c 235 unsigned int frame_offset;
ff6365dd 236
9500d12c
EL
237 ChecksumValidationMode checksum_mode;
238
b797fd92 239 /* references to packet and drop counters */
9500d12c
EL
240 uint16_t capture_kernel_packets;
241 uint16_t capture_kernel_drops;
9efa4ace 242 uint16_t capture_errors;
9500d12c
EL
243
244 /* handle state */
245 uint8_t afp_state;
246 uint8_t copy_mode;
4bfa3aea 247 unsigned int flags;
9500d12c
EL
248
249 /* IPS peer */
250 AFPPeer *mpeer;
251
252 /* no mmap mode */
ff6365dd
EL
253 uint8_t *data; /** Per function and thread data */
254 int datalen; /** Length of per function and thread data */
9500d12c 255 int cooked;
ff6365dd 256
9500d12c
EL
257 /*
258 * Init related members
259 */
51eb9605 260
9500d12c
EL
261 /* thread specific socket */
262 int socket;
b797fd92
EL
263
264 int ring_size;
fa902abe 265 int block_size;
234aefdf 266 int block_timeout;
e80b30c0
EL
267 /* socket buffer size */
268 int buffer_size;
fa902abe 269 /* Filter */
ab1200fb 270 const char *bpf_filter;
91e1256b
EL
271 int ebpf_lb_fd;
272 int ebpf_filter_fd;
9500d12c 273
df7dbe36 274 int promisc;
e80b30c0 275
9500d12c 276 int down_count;
662dccd8 277
e80b30c0
EL
278 int cluster_id;
279 int cluster_type;
c45d8985 280
fbca1a4e
EL
281 int threads;
282
69d0d484
VJ
283 union AFPTpacketReq {
284 struct tpacket_req v2;
c2d0d938 285#ifdef HAVE_TPACKET_V3
69d0d484 286 struct tpacket_req3 v3;
c2d0d938 287#endif
69d0d484 288 } req;
b797fd92
EL
289
290 char iface[AFP_IFACE_NAME_LENGTH];
291 /* IPS output iface */
292 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 293
cba41207
AG
294 /* mmap'ed ring buffer */
295 unsigned int ring_buflen;
296 uint8_t *ring_buf;
91e1256b 297
8c880879
EL
298 uint8_t xdp_mode;
299
36838017 300#ifdef HAVE_PACKET_EBPF
4cf53100 301 struct ebpf_timeout_config ebpf_t_config;
36838017 302#endif
315c29a8 303
c45d8985
EL
304} AFPThreadVars;
305
15e3bdb7
VJ
306static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
307static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
308static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
309static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 310
15e3bdb7
VJ
311static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
312static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
f8aed4ce 313static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
c45d8985 314
15e3bdb7 315static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 316static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
317static int AFPGetDevFlags(int fd, const char *ifname);
318static int AFPDerefSocket(AFPPeer* peer);
319static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 320
19475165 321
c45d8985
EL
322/**
323 * \brief Registration Function for RecieveAFP.
324 * \todo Unit tests are needed for this module.
325 */
8f1d7503
KS
326void TmModuleReceiveAFPRegister (void)
327{
c45d8985
EL
328 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
329 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 330 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 331 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 332 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 333 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 334 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985
EL
335 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
336 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 337 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165 338
c45d8985
EL
339}
340
a6457262
EL
341
342/**
343 * \defgroup afppeers AFP peers list
344 *
345 * AF_PACKET has an IPS mode were interface are peered: packet from
346 * on interface are sent the peered interface and the other way. The ::AFPPeer
347 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
348 * information to be able to send packet on the interface.
349 * A element of the list must not be destroyed during the run of Suricata as it
350 * is used by ::Packet and other threads.
351 *
352 * @{
353 */
354
662dccd8
EL
355typedef struct AFPPeersList_ {
356 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
357 int cnt;
358 int peered;
60400163
EL
359 int turn; /**< Next value for initialisation order */
360 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
361} AFPPeersList;
362
363/**
a6457262
EL
364 * \brief Update the peer.
365 *
366 * Update the AFPPeer of a thread ie set new state, socket number
367 * or iface index.
368 *
662dccd8 369 */
ab1200fb 370static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
371{
372 if (ptv->mpeer == NULL) {
373 return;
374 }
662dccd8
EL
375 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
376 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
377 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
378}
379
a6457262
EL
380/**
381 * \brief Clean and free ressource used by an ::AFPPeer
382 */
ab1200fb 383static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
384{
385 if (peer->flags & AFP_SOCK_PROTECT)
386 SCMutexDestroy(&peer->sock_protect);
387 SC_ATOMIC_DESTROY(peer->socket);
388 SC_ATOMIC_DESTROY(peer->if_idx);
389 SC_ATOMIC_DESTROY(peer->state);
390 SCFree(peer);
391}
392
393AFPPeersList peerslist;
394
395
a6457262
EL
396/**
397 * \brief Init the global list of ::AFPPeer
398 */
662dccd8
EL
399TmEcode AFPPeersListInit()
400{
401 SCEnter();
402 TAILQ_INIT(&peerslist.peers);
403 peerslist.peered = 0;
404 peerslist.cnt = 0;
60400163
EL
405 peerslist.turn = 0;
406 SC_ATOMIC_INIT(peerslist.reached);
407 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
408 SCReturnInt(TM_ECODE_OK);
409}
410
a6457262
EL
411/**
412 * \brief Check that all ::AFPPeer got a peer
413 *
414 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
415 */
662dccd8
EL
416TmEcode AFPPeersListCheck()
417{
418#define AFP_PEERS_MAX_TRY 4
419#define AFP_PEERS_WAIT 20000
420 int try = 0;
421 SCEnter();
422 while (try < AFP_PEERS_MAX_TRY) {
423 if (peerslist.cnt != peerslist.peered) {
424 usleep(AFP_PEERS_WAIT);
425 } else {
426 SCReturnInt(TM_ECODE_OK);
427 }
428 try++;
429 }
430 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
431 SCReturnInt(TM_ECODE_FAILED);
432}
433
a6457262
EL
434/**
435 * \brief Declare a new AFP thread to AFP peers list.
436 */
ab1200fb 437static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
438{
439 SCEnter();
440 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
441 AFPPeer *pitem;
ac56b1bf 442 int mtu, out_mtu;
662dccd8 443
e176be6f 444 if (unlikely(peer == NULL)) {
662dccd8
EL
445 SCReturnInt(TM_ECODE_FAILED);
446 }
447 memset(peer, 0, sizeof(AFPPeer));
448 SC_ATOMIC_INIT(peer->socket);
13f13b6d 449 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
450 SC_ATOMIC_INIT(peer->if_idx);
451 SC_ATOMIC_INIT(peer->state);
452 peer->flags = ptv->flags;
60400163 453 peer->turn = peerslist.turn++;
662dccd8
EL
454
455 if (peer->flags & AFP_SOCK_PROTECT) {
456 SCMutexInit(&peer->sock_protect, NULL);
457 }
458
13f13b6d 459 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
460 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
461 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
462 ptv->mpeer = peer;
463 /* add element to iface list */
464 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 465
13f13b6d
EL
466 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
467 peerslist.cnt++;
468
469 /* Iter to find a peer */
470 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
471 if (pitem->peer)
472 continue;
473 if (strcmp(pitem->iface, ptv->out_iface))
474 continue;
475 peer->peer = pitem;
476 pitem->peer = peer;
477 mtu = GetIfaceMTU(ptv->iface);
478 out_mtu = GetIfaceMTU(ptv->out_iface);
479 if (mtu != out_mtu) {
480 SCLogError(SC_ERR_AFP_CREATE,
481 "MTU on %s (%d) and %s (%d) are not equal, "
482 "transmission of packets bigger than %d will fail.",
483 ptv->iface, mtu,
484 ptv->out_iface, out_mtu,
485 (out_mtu > mtu) ? mtu : out_mtu);
486 }
487 peerslist.peered += 2;
488 break;
ac56b1bf 489 }
662dccd8
EL
490 }
491
492 AFPPeerUpdate(ptv);
493
494 SCReturnInt(TM_ECODE_OK);
495}
496
ab1200fb 497static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 498{
b2691cbe
EL
499 /* If turn is zero, we already have started threads once */
500 if (peerslist.turn == 0)
501 return 0;
502
60400163
EL
503 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
504 return 0;
505 return 1;
506}
507
ab1200fb 508static void AFPPeersListReachedInc(void)
60400163 509{
b2691cbe
EL
510 if (peerslist.turn == 0)
511 return;
512
513 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
514 SCLogInfo("All AFP capture threads are running.");
515 (void)SC_ATOMIC_SET(peerslist.reached, 0);
516 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
517 * restarted.
518 */
519 peerslist.turn = 0;
520 }
60400163
EL
521}
522
ab1200fb 523static int AFPPeersListStarted(void)
919377d4
EL
524{
525 return !peerslist.turn;
526}
527
a6457262
EL
528/**
529 * \brief Clean the global peers list.
530 */
662dccd8
EL
531void AFPPeersListClean()
532{
533 AFPPeer *pitem;
534
535 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
536 TAILQ_REMOVE(&peerslist.peers, pitem, next);
537 AFPPeerClean(pitem);
538 }
539}
540
a6457262
EL
541/**
542 * @}
543 */
544
c45d8985
EL
545/**
546 * \brief Registration Function for DecodeAFP.
547 * \todo Unit tests are needed for this module.
548 */
8f1d7503
KS
549void TmModuleDecodeAFPRegister (void)
550{
c45d8985
EL
551 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
552 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
553 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
554 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 555 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985
EL
556 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
557 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 558 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
559}
560
662dccd8 561
e80b30c0
EL
562static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
563
e8a4a4c4 564static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 565{
6efd37a3 566#ifdef PACKET_STATISTICS
e8a4a4c4
EL
567 struct tpacket_stats kstats;
568 socklen_t len = sizeof (struct tpacket_stats);
569 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
570 &kstats, &len) > -1) {
571 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
572 ptv->tv->name,
573 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
574 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
575 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
576 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
577 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 578 }
e8a4a4c4 579#endif
6efd37a3 580}
c45d8985
EL
581
582/**
583 * \brief AF packet read function.
584 *
585 * This function fills
586 * From here the packets are picked up by the DecodeAFP thread.
587 *
588 * \param user pointer to AFPThreadVars
589 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
590 */
ab1200fb 591static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
592{
593 Packet *p = NULL;
594 /* XXX should try to use read that get directly to packet */
c45d8985
EL
595 int offset = 0;
596 int caplen;
597 struct sockaddr_ll from;
598 struct iovec iov;
599 struct msghdr msg;
c45d8985
EL
600 struct cmsghdr *cmsg;
601 union {
602 struct cmsghdr cmsg;
603 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
604 } cmsg_buf;
6efd37a3 605 unsigned char aux_checksum = 0;
c45d8985
EL
606
607 msg.msg_name = &from;
608 msg.msg_namelen = sizeof(from);
609 msg.msg_iov = &iov;
610 msg.msg_iovlen = 1;
c45d8985
EL
611 msg.msg_control = &cmsg_buf;
612 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
613 msg.msg_flags = 0;
614
615 if (ptv->cooked)
616 offset = SLL_HEADER_LEN;
617 else
618 offset = 0;
e80b30c0
EL
619 iov.iov_len = ptv->datalen - offset;
620 iov.iov_base = ptv->data + offset;
c45d8985
EL
621
622 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
623
624 if (caplen < 0) {
625 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
626 errno);
62e63e3f 627 SCReturnInt(AFP_READ_FAILURE);
c45d8985 628 }
ff6365dd
EL
629
630 p = PacketGetFromQueueOrAlloc();
c45d8985 631 if (p == NULL) {
9efa4ace 632 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 633 }
b33986c8 634 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
635 if (ptv->flags & AFP_BYPASS) {
636 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
637#ifdef HAVE_PACKET_EBPF
638 p->afp_v.v4_map_fd = ptv->v4_map_fd;
639 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 640 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 641#endif
06173267 642 }
8c880879
EL
643 if (ptv->flags & AFP_XDPBYPASS) {
644 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
645#ifdef HAVE_PACKET_EBPF
646 p->afp_v.v4_map_fd = ptv->v4_map_fd;
647 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 648 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 649#endif
8c880879 650 }
c45d8985
EL
651
652 /* get timestamp of packet via ioctl */
653 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
654 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
655 errno);
656 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 657 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
658 }
659
660 ptv->pkts++;
51eb9605 661 p->livedev = ptv->livedev;
c45d8985
EL
662
663 /* add forged header */
664 if (ptv->cooked) {
e80b30c0 665 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
666 /* XXX this is minimalist, but this seems enough */
667 hdrp->sll_protocol = from.sll_protocol;
668 }
669
670 p->datalink = ptv->datalink;
671 SET_PKT_LEN(p, caplen + offset);
e80b30c0 672 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 673 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 674 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 675 }
e80b30c0
EL
676 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
677 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
678
6062e00c
EL
679 /* We only check for checksum disable */
680 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
681 p->flags |= PKT_IGNORE_CHECKSUM;
682 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
683 if (ptv->livedev->ignore_checksum) {
684 p->flags |= PKT_IGNORE_CHECKSUM;
a565148f 685 } else if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
686 SC_ATOMIC_GET(ptv->livedev->pkts),
687 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
688 ptv->livedev->ignore_checksum = 1;
6062e00c 689 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 690 }
6062e00c 691 } else {
6efd37a3
EL
692 aux_checksum = 1;
693 }
6062e00c 694
6efd37a3
EL
695 /* List is NULL if we don't have activated auxiliary data */
696 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
697 struct tpacket_auxdata *aux;
f6ddaf33 698
6efd37a3
EL
699 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
700 cmsg->cmsg_level != SOL_PACKET ||
701 cmsg->cmsg_type != PACKET_AUXDATA)
702 continue;
f6ddaf33 703
6efd37a3
EL
704 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
705
706 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
707 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 708 }
6efd37a3 709 break;
f6ddaf33
EL
710 }
711
c469824b 712 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
9efa4ace 713 SCReturnInt(AFP_SURI_FAILURE);
c469824b 714 }
62e63e3f 715 SCReturnInt(AFP_READ_OK);
c45d8985
EL
716}
717
ecf59be4
EL
718/**
719 * \brief AF packet write function.
720 *
721 * This function has to be called before the memory
722 * related to Packet in ring buffer is released.
723 *
724 * \param pointer to Packet
725 * \param version of capture: TPACKET_V2 or TPACKET_V3
726 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
727 *
728 */
729static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
730{
731 struct sockaddr_ll socket_address;
732 int socket;
ecf59be4
EL
733 uint8_t *pstart;
734 size_t plen;
ee7e689b
AG
735 union thdr h;
736 uint16_t vlan_tci = 0;
662dccd8
EL
737
738 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
3f107fa1 739 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
662dccd8
EL
740 return TM_ECODE_OK;
741 }
742 }
743
744 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
745 return TM_ECODE_OK;
746
747 if (p->ethh == NULL) {
748 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
749 return TM_ECODE_FAILED;
750 }
751 /* Index of the network device */
752 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
753 /* Address length*/
754 socket_address.sll_halen = ETH_ALEN;
755 /* Destination MAC */
756 memcpy(socket_address.sll_addr, p->ethh, 6);
757
758 /* Send packet, locking the socket if necessary */
759 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
760 SCMutexLock(&p->afp_v.peer->sock_protect);
761 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 762
ee7e689b
AG
763 h.raw = p->afp_v.relptr;
764
ecf59be4 765 if (version == TPACKET_V2) {
ecf59be4
EL
766 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
767 * the flag. It is not defined for older kernel so we go best effort
768 * and test for non zero value of the TCI header. */
769 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
770 vlan_tci = h.h2->tp_vlan_tci;
771 }
772 } else {
773#ifdef HAVE_TPACKET_V3
774 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
775 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 776 }
ee7e689b
AG
777#else
778 /* Should not get here */
779 BUG_ON(1);
780#endif
781 }
782
783 if (vlan_tci != 0) {
784 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
785 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
786 /* move ethernet addresses */
787 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
788 /* write vlan info */
789 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
790 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
791 } else {
792 pstart = GET_PKT_DATA(p);
793 plen = GET_PKT_LEN(p);
794 }
795
796 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
797 (struct sockaddr*) &socket_address,
798 sizeof(struct sockaddr_ll)) < 0) {
799 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
800 socket,
801 strerror(errno));
802 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
803 SCMutexUnlock(&p->afp_v.peer->sock_protect);
804 return TM_ECODE_FAILED;
805 }
806 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
807 SCMutexUnlock(&p->afp_v.peer->sock_protect);
808
809 return TM_ECODE_OK;
810}
811
ab1200fb 812static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 813{
662dccd8
EL
814 /* Need to be in copy mode and need to detect early release
815 where Ethernet header could not be set (and pseudo packet) */
816 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 817 AFPWritePacket(p, TPACKET_V2);
662dccd8 818 }
13f13b6d
EL
819
820 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
680e941a 821 goto cleanup;
13f13b6d 822
2011a3f8
EL
823 if (p->afp_v.relptr) {
824 union thdr h;
825 h.raw = p->afp_v.relptr;
826 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 827 }
680e941a
EL
828
829cleanup:
830 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
831}
832
ecf59be4 833#ifdef HAVE_TPACKET_V3
ab1200fb 834static void AFPReleasePacketV3(Packet *p)
bae1b03c
EL
835{
836 /* Need to be in copy mode and need to detect early release
837 where Ethernet header could not be set (and pseudo packet) */
838 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 839 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
840 }
841 PacketFreeOrRelease(p);
842}
ecf59be4 843#endif
bae1b03c 844
ab1200fb 845static void AFPReleasePacket(Packet *p)
b076a26c
KS
846{
847 AFPReleaseDataFromRing(p);
848 PacketFreeOrRelease(p);
2011a3f8
EL
849}
850
49b7b00f
EL
851/**
852 * \brief AF packet read function for ring
853 *
854 * This function fills
855 * From here the packets are picked up by the DecodeAFP thread.
856 *
857 * \param user pointer to AFPThreadVars
858 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
859 */
ab1200fb 860static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f
EL
861{
862 Packet *p = NULL;
863 union thdr h;
27b5136b 864 uint8_t emergency_flush = 0;
4d8f70c6 865 int read_pkts = 0;
b26ec603 866 int loop_start = -1;
4d8f70c6 867
49b7b00f 868
a369f8c3
EL
869 /* Loop till we have packets available */
870 while (1) {
53c02334
AS
871 if (unlikely(suricata_ctl_flags != 0)) {
872 break;
873 }
874
a369f8c3 875 /* Read packet from ring */
69d0d484 876 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace
EL
877 if (unlikely(h.raw == NULL)) {
878 /* Impossible we reach this point in normal condition, so trigger
879 * a failure in reading */
880 SCReturnInt(AFP_READ_FAILURE);
34b3f194 881 }
662dccd8 882
82a2dd85 883 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
4d8f70c6 884 if (read_pkts == 0) {
b26ec603
EL
885 if (loop_start == -1) {
886 loop_start = ptv->frame_offset;
887 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
888 SCReturnInt(AFP_READ_OK);
889 }
69d0d484 890 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
b26ec603
EL
891 ptv->frame_offset = 0;
892 }
893 continue;
4d8f70c6 894 }
27b5136b
EL
895 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
896 SCReturnInt(AFP_KERNEL_DROP);
897 } else {
898 SCReturnInt(AFP_READ_OK);
899 }
900 }
4d8f70c6
EL
901
902 read_pkts++;
b26ec603 903 loop_start = -1;
4d8f70c6 904
4a1a0080
EL
905 /* Our packet is still used by suricata, we exit read loop to
906 * gain some time */
907 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
908 SCReturnInt(AFP_READ_OK);
909 }
910
27b5136b
EL
911 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
912 h.h2->tp_status = TP_STATUS_KERNEL;
913 goto next_frame;
a369f8c3
EL
914 }
915
916 p = PacketGetFromQueueOrAlloc();
917 if (p == NULL) {
9efa4ace 918 SCReturnInt(AFP_SURI_FAILURE);
34b3f194 919 }
b33986c8 920 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
921 if (ptv->flags & AFP_BYPASS) {
922 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 923#ifdef HAVE_PACKET_EBPF
6062c27e
EL
924 p->afp_v.v4_map_fd = ptv->v4_map_fd;
925 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 926 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 927#endif
06173267 928 }
8c880879
EL
929 if (ptv->flags & AFP_XDPBYPASS) {
930 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 931#ifdef HAVE_PACKET_EBPF
6062c27e
EL
932 p->afp_v.v4_map_fd = ptv->v4_map_fd;
933 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 934 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 935#endif
8c880879 936 }
49b7b00f 937
4a1a0080
EL
938 /* Suricata will treat packet so telling it is busy, this
939 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
940 * function. */
941 h.h2->tp_status |= TP_STATUS_USER_BUSY;
942
a369f8c3 943 ptv->pkts++;
a369f8c3 944 p->livedev = ptv->livedev;
a369f8c3 945 p->datalink = ptv->datalink;
d0940396 946
a369f8c3
EL
947 if (h.h2->tp_len > h.h2->tp_snaplen) {
948 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
949 h.h2->tp_len, h.h2->tp_snaplen);
950 }
71e47868
EL
951
952 /* get vlan id from header */
bcc03f17 953 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
e871f713 954 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
01a8cc4e 955 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
71e47868 956 p->vlan_idx = 1;
71e47868
EL
957 }
958
a369f8c3
EL
959 if (ptv->flags & AFP_ZERO_COPY) {
960 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
961 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 962 SCReturnInt(AFP_SURI_FAILURE);
662dccd8 963 } else {
0f2b3406 964 p->afp_v.relptr = h.raw;
b076a26c 965 p->ReleasePacket = AFPReleasePacket;
5f12b234
EL
966 p->afp_v.mpeer = ptv->mpeer;
967 AFPRefSocket(ptv->mpeer);
0f2b3406
EL
968
969 p->afp_v.copy_mode = ptv->copy_mode;
970 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
971 p->afp_v.peer = ptv->mpeer->peer;
972 } else {
973 p->afp_v.peer = NULL;
662dccd8 974 }
a369f8c3
EL
975 }
976 } else {
977 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
9efa4ace
EL
978 /* As we can possibly fail to copy the data due to invalid data, let's
979 * skip this packet and switch to the next one.
980 */
981 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 982 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
9efa4ace
EL
983 ptv->frame_offset = 0;
984 }
a369f8c3 985 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 986 SCReturnInt(AFP_SURI_FAILURE);
a369f8c3
EL
987 }
988 }
d65f4585 989
a369f8c3
EL
990 /* Timestamp */
991 p->ts.tv_sec = h.h2->tp_sec;
992 p->ts.tv_usec = h.h2->tp_nsec/1000;
993 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
994 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
995
996 /* We only check for checksum disable */
997 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
998 p->flags |= PKT_IGNORE_CHECKSUM;
999 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1000 if (ptv->livedev->ignore_checksum) {
1001 p->flags |= PKT_IGNORE_CHECKSUM;
1002 } else if (ChecksumAutoModeCheck(ptv->pkts,
1003 SC_ATOMIC_GET(ptv->livedev->pkts),
1004 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1005 ptv->livedev->ignore_checksum = 1;
1006 p->flags |= PKT_IGNORE_CHECKSUM;
1007 }
1008 } else {
1009 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
49b7b00f 1010 p->flags |= PKT_IGNORE_CHECKSUM;
a369f8c3 1011 }
ee6ba099
EL
1012 }
1013 if (h.h2->tp_status & TP_STATUS_LOSING) {
1014 emergency_flush = 1;
e8a4a4c4 1015 AFPDumpCounters(ptv);
a369f8c3
EL
1016 }
1017
5f12b234
EL
1018 /* release frame if not in zero copy mode */
1019 if (!(ptv->flags & AFP_ZERO_COPY)) {
1020 h.h2->tp_status = TP_STATUS_KERNEL;
1021 }
1022
a369f8c3
EL
1023 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1024 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1025 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
a369f8c3
EL
1026 ptv->frame_offset = 0;
1027 }
9efa4ace 1028 SCReturnInt(AFP_SURI_FAILURE);
49b7b00f 1029 }
49b7b00f 1030
27b5136b 1031next_frame:
69d0d484 1032 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 1033 ptv->frame_offset = 0;
350d7619
EL
1034 /* Get out of loop to be sure we will reach maintenance tasks */
1035 SCReturnInt(AFP_READ_OK);
34b3f194 1036 }
34b3f194
EL
1037 }
1038
49b7b00f
EL
1039 SCReturnInt(AFP_READ_OK);
1040}
1041
f947539d 1042#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1043static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1044{
1045 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1046}
1047
1048static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1049{
1050 Packet *p = PacketGetFromQueueOrAlloc();
1051 if (p == NULL) {
9efa4ace 1052 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1053 }
1054 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
1055 if (ptv->flags & AFP_BYPASS) {
1056 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 1057#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1058 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1059 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1060 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1061#endif
e98b5e49 1062 } else if (ptv->flags & AFP_XDPBYPASS) {
8c880879 1063 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 1064#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1065 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1066 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1067 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1068#endif
8c880879 1069 }
bae1b03c
EL
1070
1071 ptv->pkts++;
bae1b03c
EL
1072 p->livedev = ptv->livedev;
1073 p->datalink = ptv->datalink;
1074
bcc03f17 1075 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
e41a9d63
AG
1076 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1077 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1078 p->vlan_idx = 1;
e41a9d63
AG
1079 }
1080
bae1b03c
EL
1081 if (ptv->flags & AFP_ZERO_COPY) {
1082 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1083 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1084 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c 1085 }
310b27a1 1086 p->afp_v.relptr = ppd;
bae1b03c
EL
1087 p->ReleasePacket = AFPReleasePacketV3;
1088 p->afp_v.mpeer = ptv->mpeer;
1089 AFPRefSocket(ptv->mpeer);
1090
1091 p->afp_v.copy_mode = ptv->copy_mode;
1092 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1093 p->afp_v.peer = ptv->mpeer->peer;
1094 } else {
1095 p->afp_v.peer = NULL;
1096 }
1097 } else {
1098 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1099 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1100 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1101 }
1102 }
1103 /* Timestamp */
1104 p->ts.tv_sec = ppd->tp_sec;
1105 p->ts.tv_usec = ppd->tp_nsec/1000;
1106 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1107 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1108
1109 /* We only check for checksum disable */
1110 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1111 p->flags |= PKT_IGNORE_CHECKSUM;
1112 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1113 if (ptv->livedev->ignore_checksum) {
1114 p->flags |= PKT_IGNORE_CHECKSUM;
1115 } else if (ChecksumAutoModeCheck(ptv->pkts,
1116 SC_ATOMIC_GET(ptv->livedev->pkts),
1117 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1118 ptv->livedev->ignore_checksum = 1;
1119 p->flags |= PKT_IGNORE_CHECKSUM;
1120 }
1121 } else {
1122 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1123 p->flags |= PKT_IGNORE_CHECKSUM;
1124 }
1125 }
1126
1127 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
9efa4ace 1128 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1129 }
1130
1131 SCReturnInt(AFP_READ_OK);
1132}
1133
1134static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1135{
1136 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1137 uint8_t *ppd;
9efa4ace 1138 int ret = 0;
bae1b03c
EL
1139
1140 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1141 for (i = 0; i < num_pkts; ++i) {
9efa4ace
EL
1142 ret = AFPParsePacketV3(ptv, pbd,
1143 (struct tpacket3_hdr *)ppd);
1144 switch (ret) {
1145 case AFP_READ_OK:
1146 break;
1147 case AFP_SURI_FAILURE:
1148 /* Internal error but let's just continue and
1149 * treat thenext packet */
1150 break;
1151 case AFP_READ_FAILURE:
1152 SCReturnInt(AFP_READ_FAILURE);
1153 default:
1154 SCReturnInt(ret);
5f84b55d 1155 }
bae1b03c
EL
1156 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1157 }
1158
1159 SCReturnInt(AFP_READ_OK);
1160}
f947539d 1161#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1162
1163/**
1164 * \brief AF packet read function for ring
1165 *
1166 * This function fills
1167 * From here the packets are picked up by the DecodeAFP thread.
1168 *
1169 * \param user pointer to AFPThreadVars
1170 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1171 */
ab1200fb 1172static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1173{
c2d0d938 1174#ifdef HAVE_TPACKET_V3
bae1b03c 1175 struct tpacket_block_desc *pbd;
9efa4ace 1176 int ret = 0;
bae1b03c
EL
1177
1178 /* Loop till we have packets available */
1179 while (1) {
1180 if (unlikely(suricata_ctl_flags != 0)) {
1181 SCLogInfo("Exiting AFP V3 read loop");
1182 break;
1183 }
1184
69d0d484 1185 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1186
1187 /* block is not ready to be read */
1188 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1189 SCReturnInt(AFP_READ_OK);
1190 }
1191
9efa4ace
EL
1192 ret = AFPWalkBlock(ptv, pbd);
1193 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1194 AFPFlushBlock(pbd);
9efa4ace 1195 SCReturnInt(ret);
bae1b03c
EL
1196 }
1197
1198 AFPFlushBlock(pbd);
69d0d484 1199 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1200 /* return to maintenance task after one loop on the ring */
1201 if (ptv->frame_offset == 0) {
1202 SCReturnInt(AFP_READ_OK);
1203 }
1204 }
c2d0d938 1205#endif
bae1b03c
EL
1206 SCReturnInt(AFP_READ_OK);
1207}
1208
13f13b6d
EL
1209/**
1210 * \brief Reference socket
1211 *
1212 * \retval O in case of failure, 1 in case of success
1213 */
1214static int AFPRefSocket(AFPPeer* peer)
1215{
1216 if (unlikely(peer == NULL))
1217 return 0;
1218
1219 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1220 return 1;
1221}
1222
1223
1224/**
1225 * \brief Dereference socket
1226 *
1227 * \retval 1 if socket is still alive, 0 if not
1228 */
1229static int AFPDerefSocket(AFPPeer* peer)
1230{
4424f5a2
EL
1231 if (peer == NULL)
1232 return 1;
1233
13f13b6d
EL
1234 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1235 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1236 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1237 close(SC_ATOMIC_GET(peer->socket));
1238 return 0;
1239 }
1240 }
1241 return 1;
1242}
1243
ab1200fb 1244static void AFPSwitchState(AFPThreadVars *ptv, int state)
13f13b6d
EL
1245{
1246 ptv->afp_state = state;
1247 ptv->down_count = 0;
49b7b00f 1248
13f13b6d
EL
1249 AFPPeerUpdate(ptv);
1250
1251 /* Do cleaning if switching to down state */
1252 if (state == AFP_STATE_DOWN) {
5f84b55d
EL
1253#ifdef HAVE_TPACKET_V3
1254 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1255 if (!ptv->ring.v3) {
1256 SCFree(ptv->ring.v3);
1257 ptv->ring.v3 = NULL;
5f84b55d
EL
1258 }
1259 } else {
1260#endif
69d0d484 1261 if (ptv->ring.v2) {
5f84b55d 1262 /* only used in reading phase, we can free it */
69d0d484
VJ
1263 SCFree(ptv->ring.v2);
1264 ptv->ring.v2 = NULL;
5f84b55d
EL
1265 }
1266#ifdef HAVE_TPACKET_V3
13f13b6d 1267 }
5f84b55d 1268#endif
13f13b6d
EL
1269 if (ptv->socket != -1) {
1270 /* we need to wait for all packets to return data */
1271 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
b86e1762 1272 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
cba41207 1273 munmap(ptv->ring_buf, ptv->ring_buflen);
13f13b6d
EL
1274 close(ptv->socket);
1275 ptv->socket = -1;
1276 }
1277 }
1278 }
1279 if (state == AFP_STATE_UP) {
1280 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1281 }
1282}
49b7b00f 1283
7fea0ec6
EL
1284static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1285 uint64_t *discarded_pkts)
919377d4
EL
1286{
1287 struct sockaddr_ll from;
1288 struct iovec iov;
1289 struct msghdr msg;
1290 struct timeval ts;
1291 union {
1292 struct cmsghdr cmsg;
1293 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1294 } cmsg_buf;
1295
1296
1297 if (unlikely(suricata_ctl_flags != 0)) {
1298 return 1;
1299 }
1300
1301 msg.msg_name = &from;
1302 msg.msg_namelen = sizeof(from);
1303 msg.msg_iov = &iov;
1304 msg.msg_iovlen = 1;
1305 msg.msg_control = &cmsg_buf;
1306 msg.msg_controllen = sizeof(cmsg_buf);
1307 msg.msg_flags = 0;
1308
1309 iov.iov_len = ptv->datalen;
1310 iov.iov_base = ptv->data;
1311
339f0665 1312 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1313
1314 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1315 /* FIXME */
1316 return -1;
1317 }
1318
1319 if ((ts.tv_sec > synctv->tv_sec) ||
1320 (ts.tv_sec >= synctv->tv_sec &&
1321 ts.tv_usec > synctv->tv_usec)) {
1322 return 1;
1323 }
1324 return 0;
1325}
1326
7fea0ec6
EL
1327static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1328 uint64_t *discarded_pkts)
919377d4
EL
1329{
1330 union thdr h;
1331
1332 if (unlikely(suricata_ctl_flags != 0)) {
1333 return 1;
1334 }
1335
f947539d 1336#ifdef HAVE_TPACKET_V3
bae1b03c 1337 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1338 int ret = 0;
7fea0ec6 1339 struct tpacket_block_desc *pbd;
69d0d484 1340 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1341 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1342 struct tpacket3_hdr *ppd =
1343 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1344 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1345 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1346 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1347 ret = 1;
1348 }
7fea0ec6 1349 AFPFlushBlock(pbd);
69d0d484 1350 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1351 return ret;
f947539d
VJ
1352
1353 } else
1354#endif
1355 {
7fea0ec6 1356 /* Read packet from ring */
69d0d484 1357 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1358 if (h.raw == NULL) {
1359 return -1;
1360 }
1361 (*discarded_pkts)++;
1362 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1363 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1364 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1365 return 1;
1366 }
919377d4 1367
7fea0ec6 1368 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1369 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1370 ptv->frame_offset = 0;
1371 }
919377d4
EL
1372 }
1373
1374
1375 return 0;
1376}
1377
806844d8
VJ
1378/** \brief wait for all afpacket threads to fully init
1379 *
1380 * Discard packets before all threads are ready, as the cluster
1381 * setup is not complete yet.
1382 *
1383 * if AFPPeersListStarted() returns true init is complete
1384 *
1385 * \retval r 1 = happy, otherwise unhappy
1386 */
7fea0ec6 1387static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1388{
919377d4 1389 struct timeval synctv;
806844d8
VJ
1390 struct pollfd fds;
1391
1392 fds.fd = ptv->socket;
1393 fds.events = POLLIN;
919377d4
EL
1394
1395 /* Set timeval to end of the world */
1396 synctv.tv_sec = 0xffffffff;
1397 synctv.tv_usec = 0xffffffff;
1398
1399 while (1) {
8709a20d 1400 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1401 if (r > 0 &&
1402 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1403 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1404 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1405 return 0;
1406 } else if (r > 0) {
1407 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1408 gettimeofday(&synctv, NULL);
1409 }
1410 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1411 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1412 } else {
7fea0ec6 1413 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1414 }
1415 SCLogDebug("Discarding on %s", ptv->tv->name);
1416 switch (r) {
1417 case 1:
9f7ba071 1418 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1419 return 1;
1420 case -1:
1421 return r;
1422 }
1423 /* no packets */
1424 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1425 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1426 return 1;
43b6cbd4 1427 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1428 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1429 return 0;
919377d4
EL
1430 }
1431 }
1432 return 1;
1433}
1434
13f13b6d
EL
1435/**
1436 * \brief Try to reopen socket
1437 *
1438 * \retval 0 in case of success, negative if error occurs or a condition
1439 * is not met.
1440 */
c45d8985
EL
1441static int AFPTryReopen(AFPThreadVars *ptv)
1442{
13f13b6d
EL
1443 ptv->down_count++;
1444
13f13b6d
EL
1445 /* Don't reconnect till we have packet that did not release data */
1446 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1447 return -1;
1448 }
c45d8985 1449
8709a20d 1450 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1451 if (afp_activate_r != 0) {
13f13b6d
EL
1452 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1453 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1454 ptv->iface);
1455 }
c45d8985
EL
1456 return afp_activate_r;
1457 }
1458
3bea3b39 1459 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1460 return 0;
1461}
1462
e80b30c0
EL
1463/**
1464 * \brief Main AF_PACKET reading Loop function
1465 */
1466TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1467{
34581ce9
AS
1468 SCEnter();
1469
e80b30c0 1470 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1471 struct pollfd fds;
1472 int r;
34581ce9 1473 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1474 time_t last_dump = 0;
49612128 1475 time_t current_time;
5f400785 1476 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1477 uint64_t discarded_pkts = 0;
e80b30c0 1478
34581ce9 1479 ptv->slot = s->slot_next;
e80b30c0 1480
5f400785 1481 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1482 if (ptv->flags & AFP_TPACKET_V3) {
1483 AFPReadFunc = AFPReadFromRingV3;
1484 } else {
1485 AFPReadFunc = AFPReadFromRing;
1486 }
5f400785
EL
1487 } else {
1488 AFPReadFunc = AFPRead;
1489 }
1490
60400163
EL
1491 if (ptv->afp_state == AFP_STATE_DOWN) {
1492 /* Wait for our turn, threads before us must have opened the socket */
1493 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1494 usleep(1000);
1992a227
EL
1495 if (suricata_ctl_flags != 0) {
1496 break;
1497 }
60400163
EL
1498 }
1499 r = AFPCreateSocket(ptv, ptv->iface, 1);
1500 if (r < 0) {
1992a227
EL
1501 switch (-r) {
1502 case AFP_FATAL_ERROR:
1503 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1504 SCReturnInt(TM_ECODE_FAILED);
1505 case AFP_RECOVERABLE_ERROR:
1506 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1507 }
60400163
EL
1508 }
1509 AFPPeersListReachedInc();
1510 }
1511 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1512 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1513 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1514 /* let's reset counter as we will start the capture at the
1515 * next function call */
1516#ifdef PACKET_STATISTICS
1517 struct tpacket_stats kstats;
1518 socklen_t len = sizeof (struct tpacket_stats);
1519 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1520 &kstats, &len) > -1) {
1521 uint64_t pkts = 0;
1522 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1523 ", dropped %" PRIu32 "",
1524 ptv->tv->name,
1525 kstats.tp_packets, kstats.tp_drops);
1526 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1527 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1528 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1529 }
1530#endif
60400163
EL
1531 }
1532
e80b30c0
EL
1533 fds.fd = ptv->socket;
1534 fds.events = POLLIN;
1535
1536 while (1) {
1537 /* Start by checking the state of our interface */
1538 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1539 int dbreak = 0;
662dccd8 1540
e80b30c0
EL
1541 do {
1542 usleep(AFP_RECONNECT_TIMEOUT);
1543 if (suricata_ctl_flags != 0) {
1544 dbreak = 1;
1545 break;
1546 }
1547 r = AFPTryReopen(ptv);
09e709d1 1548 fds.fd = ptv->socket;
e80b30c0
EL
1549 } while (r < 0);
1550 if (dbreak == 1)
1551 break;
1552 }
1553
1554 /* make sure we have at least one packet in the packet pool, to prevent
1555 * us from alloc'ing packets at line rate */
3c6e01f6 1556 PacketPoolWait();
e80b30c0
EL
1557
1558 r = poll(&fds, 1, POLL_TIMEOUT);
1559
1560 if (suricata_ctl_flags != 0) {
1561 break;
1562 }
1563
1564 if (r > 0 &&
1565 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1566 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1567 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1568 continue;
ff6365dd 1569 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1570 char c;
1571 /* Do a recv to get errno */
1572 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1573 continue; /* what, no error? */
3bea3b39 1574 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1575 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1576 ptv->iface, errno, strerror(errno));
13f13b6d 1577 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1578 continue;
ff6365dd 1579 } else if (fds.revents & POLLNVAL) {
e80b30c0 1580 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1581 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1582 continue;
1583 }
1584 } else if (r > 0) {
5f400785 1585 r = AFPReadFunc(ptv);
62e63e3f 1586 switch (r) {
27adbfa8
EL
1587 case AFP_READ_OK:
1588 /* Trigger one dump of stats every second */
49612128
EL
1589 current_time = time(NULL);
1590 if (current_time != last_dump) {
27adbfa8 1591 AFPDumpCounters(ptv);
49612128 1592 last_dump = current_time;
27adbfa8
EL
1593 }
1594 break;
62e63e3f
EL
1595 case AFP_READ_FAILURE:
1596 /* AFPRead in error: best to reset the socket */
3bea3b39 1597 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1598 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1599 ptv->iface, errno, strerror(errno));
13f13b6d 1600 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1601 continue;
9efa4ace
EL
1602 case AFP_SURI_FAILURE:
1603 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1604 break;
27b5136b 1605 case AFP_KERNEL_DROP:
e8a4a4c4 1606 AFPDumpCounters(ptv);
27b5136b 1607 break;
e80b30c0 1608 }
11099cfa 1609 } else if (unlikely(r == 0)) {
f53e687b
EL
1610 /* Trigger one dump of stats every second */
1611 current_time = time(NULL);
1612 if (current_time != last_dump) {
1613 AFPDumpCounters(ptv);
1614 last_dump = current_time;
1615 }
ce71bf1f 1616 /* poll timed out, lets see handle our timeout path */
49599dfe 1617 TmThreadsCaptureHandleTimeout(tv, NULL);
11099cfa 1618
e80b30c0 1619 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1620 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1621 ptv->iface,
e80b30c0 1622 errno, strerror(errno));
13f13b6d 1623 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1624 continue;
1625 }
752f03e7 1626 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1627 }
1628
4e561d6b 1629 AFPDumpCounters(ptv);
752f03e7 1630 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1631 SCReturnInt(TM_ECODE_OK);
1632}
1633
13f13b6d
EL
1634static int AFPGetDevFlags(int fd, const char *ifname)
1635{
1636 struct ifreq ifr;
1637
1638 memset(&ifr, 0, sizeof(ifr));
1639 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1640
1641 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1642 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1643 ifname, strerror(errno));
1644 return -1;
1645 }
1646
1647 return ifr.ifr_flags;
1648}
1649
1650
e80b30c0 1651static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1652{
1653 struct ifreq ifr;
1654
1655 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1656 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1657
1658 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1659 if (verbose)
1660 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1661 ifname, strerror(errno));
c45d8985
EL
1662 return -1;
1663 }
1664
1665 return ifr.ifr_ifindex;
1666}
1667
e80b30c0 1668static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1669{
1670 struct ifreq ifr;
1671
1672 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1673 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1674
1675 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1676 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1677 ifname, strerror(errno));
1678 return -1;
1679 }
1680
e80b30c0
EL
1681 switch (ifr.ifr_hwaddr.sa_family) {
1682 case ARPHRD_LOOPBACK:
1683 return LINKTYPE_ETHERNET;
1684 case ARPHRD_PPP:
11eb1d7c 1685 case ARPHRD_NONE:
e80b30c0
EL
1686 return LINKTYPE_RAW;
1687 default:
1688 return ifr.ifr_hwaddr.sa_family;
1689 }
c45d8985
EL
1690}
1691
b7bf299e
EL
1692int AFPGetLinkType(const char *ifname)
1693{
1694 int ltype;
1695
1696 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1697 if (fd == -1) {
1698 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1699 return LINKTYPE_RAW;
1700 }
1701
1702 ltype = AFPGetDevLinktype(fd, ifname);
1703 close(fd);
1704
1705 return ltype;
1706}
1707
49b7b00f
EL
1708static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1709{
1710 /* Compute structure:
1711 Target is to store all pending packets
1712 with a size equal to MTU + auxdata
1713 And we keep a decent number of block
1714
1715 To do so:
1716 Compute frame_size (aligned to be able to fit in block
1717 Check which block size we need. Blocksize is a 2^n * pagesize
1718 We then need to get order, big enough to have
1719 frame_size < block size
1720 Find number of frame per block (divide)
1721 Fill in packet_req
1722
1723 Compute frame size:
1724 described in packet_mmap.txt
1725 dependant on snaplen (need to use a variable ?)
1726snaplen: MTU ?
1727tp_hdrlen determine_version in daq_afpacket
1728in V1: sizeof(struct tpacket_hdr);
1729in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1730frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1731
1732 */
1733 int tp_hdrlen = sizeof(struct tpacket_hdr);
1734 int snaplen = default_packet_size;
1735
03032457
EL
1736 if (snaplen == 0) {
1737 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1738 if (snaplen <= 0) {
1739 SCLogWarning(SC_ERR_INVALID_VALUE,
1740 "Unable to get MTU, setting snaplen to sane default of 1514");
1741 snaplen = 1514;
1742 }
1743 }
1744
69d0d484
VJ
1745 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1746 ptv->req.v2.tp_block_size = getpagesize() << order;
1747 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1748 if (frames_per_block == 0) {
bae1b03c 1749 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1750 return -1;
1751 }
69d0d484
VJ
1752 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1753 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1754 /* exact division */
69d0d484 1755 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1756 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1757 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1758 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1759 return 1;
1760}
1761
c2d0d938 1762#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1763static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1764{
69d0d484
VJ
1765 ptv->req.v3.tp_block_size = ptv->block_size;
1766 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1767 int frames_per_block = 0;
1768 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1769 int snaplen = default_packet_size;
1770
1771 if (snaplen == 0) {
1772 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1773 if (snaplen <= 0) {
1774 SCLogWarning(SC_ERR_INVALID_VALUE,
1775 "Unable to get MTU, setting snaplen to sane default of 1514");
1776 snaplen = 1514;
1777 }
1778 }
1779
69d0d484
VJ
1780 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1781 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1782
1783 if (frames_per_block == 0) {
1784 SCLogError(SC_ERR_INVALID_VALUE,
1785 "Block size is too small, it should be at least %d",
69d0d484 1786 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1787 return -1;
1788 }
69d0d484 1789 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1790 /* exact division */
69d0d484
VJ
1791 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1792 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1793 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1794 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1795 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1796 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1797 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1798 );
1799 return 1;
1800}
c2d0d938 1801#endif
bae1b03c 1802
c7bde9df
EL
1803static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1804{
1805 int val;
1806 unsigned int len = sizeof(val), i;
c7bde9df 1807 int order;
f5c20191 1808 int r, mmap_flag;
c7bde9df 1809
c2d0d938 1810#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1811 if (ptv->flags & AFP_TPACKET_V3) {
1812 val = TPACKET_V3;
f947539d 1813 } else
c2d0d938 1814#endif
f947539d 1815 {
c7bde9df
EL
1816 val = TPACKET_V2;
1817 }
1818 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1819 if (errno == ENOPROTOOPT) {
1820 if (ptv->flags & AFP_TPACKET_V3) {
1821 SCLogError(SC_ERR_AFP_CREATE,
1822 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1823 } else {
1824 SCLogError(SC_ERR_AFP_CREATE,
1825 "Too old kernel giving up (need 2.6.27 at least)");
1826 }
1827 }
1828 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1829 return AFP_FATAL_ERROR;
1830 }
1831
f947539d
VJ
1832 val = TPACKET_V2;
1833#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1834 if (ptv->flags & AFP_TPACKET_V3) {
1835 val = TPACKET_V3;
c7bde9df 1836 }
f947539d 1837#endif
c7bde9df
EL
1838 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1839 sizeof(val)) < 0) {
1840 SCLogError(SC_ERR_AFP_CREATE,
1841 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1842 strerror(errno));
1843 return AFP_FATAL_ERROR;
1844 }
1845
a40f08a2
EL
1846#ifdef HAVE_HW_TIMESTAMPING
1847 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1848 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1849 sizeof(req)) < 0) {
1850 SCLogWarning(SC_ERR_AFP_CREATE,
1851 "Can't activate hardware timestamping on packet socket: %s",
1852 strerror(errno));
1853 }
1854#endif
1855
ecf59be4
EL
1856 /* Let's reserve head room so we can add the VLAN header in IPS
1857 * or TAP mode before write the packet */
1858 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1859 /* Only one vlan is extracted from AFP header so
1860 * one VLAN header length is enough. */
1861 int reserve = VLAN_HEADER_LEN;
1862 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1863 sizeof(reserve)) < 0) {
1864 SCLogError(SC_ERR_AFP_CREATE,
1865 "Can't activate reserve on packet socket: %s",
1866 strerror(errno));
1867 return AFP_FATAL_ERROR;
1868 }
1869 }
1870
c7bde9df 1871 /* Allocate RX ring */
c2d0d938 1872#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1873 if (ptv->flags & AFP_TPACKET_V3) {
1874 if (AFPComputeRingParamsV3(ptv) != 1) {
1875 return AFP_FATAL_ERROR;
1876 }
1877 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1878 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1879 if (r < 0) {
1880 SCLogError(SC_ERR_MEM_ALLOC,
1881 "Unable to allocate RX Ring for iface %s: (%d) %s",
1882 devname,
1883 errno,
1884 strerror(errno));
1885 return AFP_FATAL_ERROR;
1886 }
1887 } else {
c2d0d938 1888#endif
fa902abe 1889 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1890 if (AFPComputeRingParams(ptv, order) != 1) {
1891 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1892 return AFP_FATAL_ERROR;
1893 }
1894
1895 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1896 (void *) &ptv->req, sizeof(ptv->req));
1897
1898 if (r < 0) {
1899 if (errno == ENOMEM) {
1900 SCLogInfo("Memory issue with ring parameters. Retrying.");
1901 continue;
1902 }
1903 SCLogError(SC_ERR_MEM_ALLOC,
1904 "Unable to allocate RX Ring for iface %s: (%d) %s",
1905 devname,
1906 errno,
1907 strerror(errno));
1908 return AFP_FATAL_ERROR;
1909 } else {
1910 break;
1911 }
1912 }
1913 if (order < 0) {
1914 SCLogError(SC_ERR_MEM_ALLOC,
1915 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1916 devname);
1917 return AFP_FATAL_ERROR;
1918 }
c2d0d938 1919#ifdef HAVE_TPACKET_V3
c7bde9df 1920 }
c2d0d938 1921#endif
c7bde9df
EL
1922
1923 /* Allocate the Ring */
c2d0d938 1924#ifdef HAVE_TPACKET_V3
c7bde9df 1925 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1926 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1927 } else {
c2d0d938 1928#endif
69d0d484 1929 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1930#ifdef HAVE_TPACKET_V3
c7bde9df 1931 }
c2d0d938 1932#endif
f5c20191
EL
1933 mmap_flag = MAP_SHARED;
1934 if (ptv->flags & AFP_MMAP_LOCKED)
1935 mmap_flag |= MAP_LOCKED;
cba41207 1936 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1937 mmap_flag, ptv->socket, 0);
cba41207 1938 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1939 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1940 strerror(errno));
c7bde9df
EL
1941 goto mmap_err;
1942 }
c2d0d938 1943#ifdef HAVE_TPACKET_V3
c7bde9df 1944 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1945 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1946 if (!ptv->ring.v3) {
1947 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1948 goto postmmap_err;
c7bde9df 1949 }
69d0d484
VJ
1950 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1951 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1952 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1953 }
1954 } else {
c2d0d938 1955#endif
c7bde9df 1956 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1957 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1958 if (ptv->ring.v2 == NULL) {
c7bde9df 1959 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1960 goto postmmap_err;
c7bde9df 1961 }
69d0d484 1962 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1963 /* fill the header ring with proper frame ptr*/
1964 ptv->frame_offset = 0;
69d0d484
VJ
1965 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1966 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1967 unsigned int j;
69d0d484
VJ
1968 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1969 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1970 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
1971 }
1972 }
1973 ptv->frame_offset = 0;
c2d0d938 1974#ifdef HAVE_TPACKET_V3
c7bde9df 1975 }
c2d0d938 1976#endif
c7bde9df
EL
1977
1978 return 0;
1979
291af719 1980postmmap_err:
cba41207 1981 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
1982 if (ptv->ring.v2)
1983 SCFree(ptv->ring.v2);
1984 if (ptv->ring.v3)
1985 SCFree(ptv->ring.v3);
c7bde9df
EL
1986mmap_err:
1987 /* Packet mmap does the cleaning when socket is closed */
1988 return AFP_FATAL_ERROR;
1989}
1990
402bdf9b
VJ
1991/** \brief test if we can use FANOUT. Older kernels like those in
1992 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1993 */
8940a9d3 1994int AFPIsFanoutSupported(int cluster_id)
402bdf9b
VJ
1995{
1996#ifdef HAVE_PACKET_FANOUT
1997 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
1998 if (fd < 0)
1999 return 0;
402bdf9b 2000
6227d095
VJ
2001 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
2002 uint16_t id = 1;
2003 uint32_t option = (mode << 16) | (id & 0xffff);
2004 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2005 close(fd);
2006
2007 if (r < 0) {
8940a9d3
SB
2008 SCLogError(SC_ERR_INVALID_VALUE, "fanout not supported by kernel: "
2009 "Kernel too old or cluster-id %d already in use.", cluster_id);
6227d095 2010 return 0;
402bdf9b 2011 }
6227d095
VJ
2012 return 1;
2013#else
402bdf9b 2014 return 0;
6227d095 2015#endif
402bdf9b
VJ
2016}
2017
91e1256b
EL
2018#ifdef HAVE_PACKET_EBPF
2019
2020static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2021{
2022 int pfd = ptv->ebpf_lb_fd;
2023 if (pfd == -1) {
2024 SCLogError(SC_ERR_INVALID_VALUE,
2025 "Fanout file descriptor is invalid");
2026 return -1;
2027 }
2028
2029 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2030 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2031 return -1;
2032 }
2033 SCLogInfo("Activated eBPF on socket");
2034
2035 return 0;
2036}
2037
2038static int SetEbpfFilter(AFPThreadVars *ptv)
2039{
2040 int pfd = ptv->ebpf_filter_fd;
2041 if (pfd == -1) {
2042 SCLogError(SC_ERR_INVALID_VALUE,
2043 "Filter file descriptor is invalid");
2044 return -1;
2045 }
2046
2047 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2048 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2049 return -1;
2050 }
2051 SCLogInfo("Activated eBPF filter on socket");
2052
2053 return 0;
2054}
2055#endif
2056
e80b30c0 2057static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2058{
2059 int r;
1992a227 2060 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2061 struct packet_mreq sock_params;
2062 struct sockaddr_ll bind_address;
662dccd8 2063 int if_idx;
49b7b00f 2064
c45d8985
EL
2065 /* open socket */
2066 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2067 if (ptv->socket == -1) {
e80b30c0 2068 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2069 goto error;
c45d8985 2070 }
cba41207 2071
662dccd8 2072 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2073
2074 if (if_idx == -1) {
fcd5e138 2075 goto socket_err;
cba41207
AG
2076 }
2077
c45d8985
EL
2078 /* bind socket */
2079 memset(&bind_address, 0, sizeof(bind_address));
2080 bind_address.sll_family = AF_PACKET;
2081 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2082 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2083 if (bind_address.sll_ifindex == -1) {
2084 if (verbose)
e80b30c0 2085 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2086 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2087 goto socket_err;
2088 }
2089
cba41207
AG
2090 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2091 if (if_flags == -1) {
2092 if (verbose) {
2093 SCLogError(SC_ERR_AFP_READ,
2094 "Couldn't get flags for interface '%s'",
2095 ptv->iface);
2096 }
2097 ret = AFP_RECOVERABLE_ERROR;
2098 goto socket_err;
2099 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2100 if (verbose) {
2101 SCLogError(SC_ERR_AFP_READ,
2102 "Interface '%s' is down",
2103 ptv->iface);
2104 }
2105 ret = AFP_RECOVERABLE_ERROR;
2106 goto socket_err;
2107 }
2108
13f13b6d
EL
2109 if (ptv->promisc != 0) {
2110 /* Force promiscuous mode */
2111 memset(&sock_params, 0, sizeof(sock_params));
2112 sock_params.mr_type = PACKET_MR_PROMISC;
2113 sock_params.mr_ifindex = bind_address.sll_ifindex;
2114 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2115 if (r < 0) {
2116 SCLogError(SC_ERR_AFP_CREATE,
2117 "Couldn't switch iface %s to promiscuous, error %s",
2118 devname, strerror(errno));
c7bde9df 2119 goto socket_err;
13f13b6d
EL
2120 }
2121 }
2122
2123 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2124 int val = 1;
2125 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2126 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2127 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2128 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2129 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2130 }
2131 }
2132
2133 /* set socket recv buffer size */
2134 if (ptv->buffer_size != 0) {
2135 /*
2136 * Set the socket buffer size to the specified value.
2137 */
b3bf7a57 2138 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2139 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2140 &ptv->buffer_size,
2141 sizeof(ptv->buffer_size)) == -1) {
2142 SCLogError(SC_ERR_AFP_CREATE,
2143 "Couldn't set buffer size to %d on iface %s, error %s",
2144 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2145 goto socket_err;
13f13b6d
EL
2146 }
2147 }
2148
2149 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2150 if (r < 0) {
2151 if (verbose) {
2152 if (errno == ENETDOWN) {
2153 SCLogError(SC_ERR_AFP_CREATE,
2154 "Couldn't bind AF_PACKET socket, iface %s is down",
2155 devname);
2156 } else {
2157 SCLogError(SC_ERR_AFP_CREATE,
2158 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2159 devname, strerror(errno));
2160 }
2161 }
1992a227 2162 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2163 goto socket_err;
13f13b6d
EL
2164 }
2165
91e1256b 2166
238ff231
EL
2167#ifdef HAVE_PACKET_FANOUT
2168 /* add binded socket to fanout group */
2169 if (ptv->threads > 1) {
238ff231
EL
2170 uint16_t mode = ptv->cluster_type;
2171 uint16_t id = ptv->cluster_id;
4111331a 2172 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2173 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2174 if (r < 0) {
2175 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2176 "Couldn't set fanout mode, error %s",
238ff231 2177 strerror(errno));
c7bde9df 2178 goto socket_err;
238ff231
EL
2179 }
2180 }
2181#endif
2182
91e1256b
EL
2183#ifdef HAVE_PACKET_EBPF
2184 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2185 r = SockFanoutSeteBPF(ptv);
2186 if (r < 0) {
2187 SCLogError(SC_ERR_AFP_CREATE,
2188 "Coudn't set EBPF, error %s",
2189 strerror(errno));
2190 goto socket_err;
2191 }
2192 }
2193#endif
2194
49b7b00f 2195 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2196 ret = AFPSetupRing(ptv, devname);
2197 if (ret != 0)
13f13b6d 2198 goto socket_err;
49b7b00f
EL
2199 }
2200
86a3f064 2201 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2202
c85ee1e3
EL
2203 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2204 switch (ptv->datalink) {
2205 case ARPHRD_PPP:
2206 case ARPHRD_ATM:
2207 ptv->cooked = 1;
619414c5 2208 break;
c85ee1e3
EL
2209 }
2210
f47df5a6 2211 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2212 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2213 ret = AFP_FATAL_ERROR;
2214 goto socket_err;
f2a6fb8a
EL
2215 }
2216
49b7b00f 2217 /* Init is ok */
13f13b6d 2218 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2219 return 0;
13f13b6d 2220
13f13b6d
EL
2221socket_err:
2222 close(ptv->socket);
2223 ptv->socket = -1;
f47df5a6 2224 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2225 if (ptv->ring.v3) {
2226 SCFree(ptv->ring.v3);
2227 ptv->ring.v3 = NULL;
f47df5a6
VJ
2228 }
2229 } else {
69d0d484
VJ
2230 if (ptv->ring.v2) {
2231 SCFree(ptv->ring.v2);
2232 ptv->ring.v2 = NULL;
f47df5a6
VJ
2233 }
2234 }
2235
13f13b6d 2236error:
1992a227 2237 return -ret;
c45d8985
EL
2238}
2239
f2a6fb8a
EL
2240TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2241{
2242 struct bpf_program filter;
2243 struct sock_fprog fcode;
2244 int rc;
2245
91e1256b
EL
2246#ifdef HAVE_PACKET_EBPF
2247 if (ptv->ebpf_filter_fd != -1) {
2248 return SetEbpfFilter(ptv);
2249 }
2250#endif
2251
f2a6fb8a
EL
2252 if (!ptv->bpf_filter)
2253 return TM_ECODE_OK;
2254
f2a6fb8a
EL
2255 SCLogInfo("Using BPF '%s' on iface '%s'",
2256 ptv->bpf_filter,
2257 ptv->iface);
28e9e4c8
EL
2258
2259 char errbuf[PCAP_ERRBUF_SIZE];
2260 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2261 ptv->datalink, /* linktype_arg */
2262 &filter, /* program */
2263 ptv->bpf_filter, /* const char *buf */
cc82ef06 2264 1, /* optimize */
28e9e4c8
EL
2265 0, /* mask */
2266 errbuf,
2267 sizeof(errbuf)) == -1) {
2268 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2269 ptv->bpf_filter,
2270 errbuf);
f2a6fb8a
EL
2271 return TM_ECODE_FAILED;
2272 }
2273
2274 fcode.len = filter.bf_len;
2275 fcode.filter = (struct sock_filter*)filter.bf_insns;
2276
2277 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2278
28e9e4c8 2279 SCBPFFree(&filter);
f2a6fb8a
EL
2280 if(rc == -1) {
2281 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2282 return TM_ECODE_FAILED;
2283 }
2284
f2a6fb8a
EL
2285 return TM_ECODE_OK;
2286}
2287
06173267
EL
2288#ifdef HAVE_PACKET_EBPF
2289/**
2290 * Insert a half flow in the kernel bypass table
2291 *
2292 * \param mapfd file descriptor of the protocol bypass table
2293 * \param key data to use as key in the table
2598078e 2294 * \return 0 in case of error, 1 if success
06173267 2295 */
69d2c8eb 2296static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
06173267 2297{
651a27e4 2298 BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
17a32bda 2299 unsigned int i;
1e729f05
EL
2300
2301 if (mapd == -1) {
2302 return 0;
2303 }
2304
94a622cb 2305 /* We use a per CPU structure so we have to set an array of values as the kernel
6ab1cbcb
EL
2306 * is not duplicating the data on each CPU by itself. */
2307 for (i = 0; i < nr_cpus; i++) {
651a27e4
EL
2308 BPF_PERCPU(value, i).packets = 0;
2309 BPF_PERCPU(value, i).bytes = 0;
17a32bda 2310 }
17a32bda
EL
2311 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2312 switch (errno) {
3379311e 2313 /* no more place in the hash */
17a32bda 2314 case E2BIG:
17a32bda 2315 return 0;
fcae1c18
EL
2316 /* no more place in the hash for some hardware bypass */
2317 case EAGAIN:
2318 return 0;
3379311e
EL
2319 /* if we already have the key then bypass is a success */
2320 case EEXIST:
2321 return 1;
2322 /* Not supposed to be there so issue a error */
17a32bda
EL
2323 default:
2324 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2325 strerror(errno),
2326 errno);
2327 return 0;
06173267 2328 }
17a32bda
EL
2329 }
2330 return 1;
06173267 2331}
b07bda7a 2332
9206b30f
EL
2333static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
2334 int family)
b07bda7a
EL
2335{
2336 FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
2337 if (fc) {
2338 EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
2339 if (eb == NULL) {
9206b30f
EL
2340 EBPFDeleteKey(map_fd, key0);
2341 EBPFDeleteKey(map_fd, key1);
2342 LiveDevAddBypassFail(p->livedev, 1, family);
b07bda7a
EL
2343 SCFree(key0);
2344 SCFree(key1);
2345 return 0;
2346 }
2347 eb->key[0] = key0;
2348 eb->key[1] = key1;
2349 eb->mapfd = map_fd;
2350 eb->cpus_count = p->afp_v.nr_cpus;
2351 fc->BypassUpdate = EBPFBypassUpdate;
2352 fc->BypassFree = EBPFBypassFree;
2353 fc->bypass_data = eb;
9206b30f
EL
2354 } else {
2355 EBPFDeleteKey(map_fd, key0);
2356 EBPFDeleteKey(map_fd, key1);
2357 LiveDevAddBypassFail(p->livedev, 1, family);
2358 SCFree(key0);
2359 SCFree(key1);
2360 return 0;
b07bda7a 2361 }
9206b30f
EL
2362
2363 LiveDevAddBypassStats(p->livedev, 1, family);
6126f105 2364 LiveDevAddBypassSuccess(p->livedev, 1, family);
b07bda7a
EL
2365 return 1;
2366}
2367
06173267
EL
2368#endif
2369
2598078e 2370/**
94a622cb
EL
2371 * Bypass function for AF_PACKET capture in eBPF mode
2372 *
2373 * This function creates two half flows in the map shared with the kernel
2374 * to trigger bypass.
2375 *
2376 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2377 * This table contains the list of half flows to bypass. The in-kernel filter
2378 * will skip/drop the packet if they belong to a flow in one of the flows
2379 * table.
2380 *
2381 * \param p the packet belonging to the flow to bypass
2382 * \return 0 if unable to bypass, 1 if success
2598078e 2383 */
06173267
EL
2384static int AFPBypassCallback(Packet *p)
2385{
2386#ifdef HAVE_PACKET_EBPF
2387 SCLogDebug("Calling af_packet callback function");
2388 /* Only bypass TCP and UDP */
2389 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2390 return 0;
2391 }
2392
fc2f2fa7
EL
2393 /* If we don't have a flow attached to packet the eBPF map entries
2394 * will be destroyed at first flow bypass manager pass as we won't
2395 * find any associated entry */
2396 if (p->flow == NULL) {
2397 return 0;
2398 }
06173267
EL
2399 /* Bypassing tunneled packets is currently not supported
2400 * because we can't discard the inner packet only due to
2401 * primitive parsing in eBPF */
2402 if (IS_TUNNEL_PKT(p)) {
2403 return 0;
2404 }
06173267 2405 if (PKT_IS_IPV4(p)) {
d65f4585 2406 SCLogDebug("add an IPv4");
eff10fce
EL
2407 if (p->afp_v.v4_map_fd == -1) {
2408 return 0;
2409 }
b07bda7a
EL
2410 struct flowv4_keys *keys[2];
2411 keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
2412 if (keys[0] == NULL) {
2413 return 0;
2414 }
2415 keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2416 keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2417 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2418 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2419 keys[0]->vlan0 = p->vlan_id[0];
2420 keys[0]->vlan1 = p->vlan_id[1];
8c880879 2421
d119845d
EL
2422 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2423 keys[0]->ip_proto = 1;
2424 } else {
2425 keys[0]->ip_proto = 0;
2426 }
69d2c8eb 2427 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2428 p->afp_v.nr_cpus) == 0) {
9206b30f 2429 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2430 SCFree(keys[0]);
2431 return 0;
2432 }
2433 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2434 if (keys[1] == NULL) {
9206b30f
EL
2435 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2436 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2437 SCFree(keys[0]);
06173267
EL
2438 return 0;
2439 }
b07bda7a
EL
2440 keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
2441 keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2442 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2443 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2444 keys[1]->vlan0 = p->vlan_id[0];
2445 keys[1]->vlan1 = p->vlan_id[1];
b07bda7a 2446
d119845d 2447 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2448 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2449 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2450 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2451 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2452 SCFree(keys[0]);
2453 SCFree(keys[1]);
06173267
EL
2454 return 0;
2455 }
315c29a8 2456 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2457 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
06173267
EL
2458 }
2459 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2460 if (PKT_IS_IPV6(p) &&
06173267 2461 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2462 int i;
eff10fce
EL
2463 if (p->afp_v.v6_map_fd == -1) {
2464 return 0;
2465 }
06173267 2466 SCLogDebug("add an IPv6");
b07bda7a
EL
2467 struct flowv6_keys *keys[2];
2468 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2469 if (keys[0] == NULL) {
9206b30f 2470 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2471 return 0;
2472 }
06173267 2473 for (i = 0; i < 4; i++) {
b07bda7a
EL
2474 keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2475 keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2476 }
2477 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2478 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2479 keys[0]->vlan0 = p->vlan_id[0];
2480 keys[0]->vlan1 = p->vlan_id[1];
2481
2482 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2483 keys[0]->ip_proto = 1;
2484 } else {
2485 keys[0]->ip_proto = 0;
2486 }
69d2c8eb 2487 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2488 p->afp_v.nr_cpus) == 0) {
9206b30f 2489 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2490 SCFree(keys[0]);
06173267
EL
2491 return 0;
2492 }
b07bda7a
EL
2493 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2494 if (keys[1] == NULL) {
9206b30f
EL
2495 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2496 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2497 SCFree(keys[0]);
2498 return 0;
06173267 2499 }
b07bda7a
EL
2500 for (i = 0; i < 4; i++) {
2501 keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2502 keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2503 }
2504 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2505 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2506 keys[1]->vlan0 = p->vlan_id[0];
2507 keys[1]->vlan1 = p->vlan_id[1];
2508
2509 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2510 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2511 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2512 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2513 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2514 SCFree(keys[0]);
2515 SCFree(keys[1]);
06173267
EL
2516 return 0;
2517 }
fc2f2fa7
EL
2518 if (p->flow)
2519 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2520 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
06173267
EL
2521 }
2522#endif
2523 return 0;
2524}
2525
94a622cb
EL
2526/**
2527 * Bypass function for AF_PACKET capture in XDP mode
2528 *
2529 * This function creates two half flows in the map shared with the kernel
2530 * to trigger bypass. This function is similar to AFPBypassCallback() but
2531 * the bytes order is changed for some data due to the way we get the data
2532 * in the XDP case.
2533 *
2534 * \param p the packet belonging to the flow to bypass
2535 * \return 0 if unable to bypass, 1 if success
2536 */
8c880879
EL
2537static int AFPXDPBypassCallback(Packet *p)
2538{
2539#ifdef HAVE_PACKET_XDP
2540 SCLogDebug("Calling af_packet callback function");
2541 /* Only bypass TCP and UDP */
2542 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2543 return 0;
2544 }
2545
fc2f2fa7
EL
2546 /* If we don't have a flow attached to packet the eBPF map entries
2547 * will be destroyed at first flow bypass manager pass as we won't
2548 * find any associated entry */
2549 if (p->flow == NULL) {
2550 return 0;
2551 }
8c880879
EL
2552 /* Bypassing tunneled packets is currently not supported
2553 * because we can't discard the inner packet only due to
2554 * primitive parsing in eBPF */
2555 if (IS_TUNNEL_PKT(p)) {
2556 return 0;
2557 }
8c880879 2558 if (PKT_IS_IPV4(p)) {
b07bda7a
EL
2559 struct flowv4_keys *keys[2];
2560 keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
2561 if (keys[0] == NULL) {
9206b30f 2562 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2563 return 0;
2564 }
eff10fce 2565 if (p->afp_v.v4_map_fd == -1) {
b07bda7a 2566 SCFree(keys[0]);
eff10fce
EL
2567 return 0;
2568 }
b07bda7a
EL
2569 keys[0]->src = p->src.addr_data32[0];
2570 keys[0]->dst = p->dst.addr_data32[0];
94a622cb 2571 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2572 * (as in eBPF filter) so we need to pass from host to network order */
b07bda7a
EL
2573 keys[0]->port16[0] = htons(p->sp);
2574 keys[0]->port16[1] = htons(p->dp);
d119845d
EL
2575 keys[0]->vlan0 = p->vlan_id[0];
2576 keys[0]->vlan1 = p->vlan_id[1];
2577 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2578 keys[0]->ip_proto = 1;
2579 } else {
2580 keys[0]->ip_proto = 0;
2581 }
69d2c8eb 2582 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2583 p->afp_v.nr_cpus) == 0) {
9206b30f 2584 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2585 SCFree(keys[0]);
2586 return 0;
2587 }
2588 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2589 if (keys[1] == NULL) {
9206b30f
EL
2590 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2591 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2592 SCFree(keys[0]);
8c880879
EL
2593 return 0;
2594 }
b07bda7a
EL
2595 keys[1]->src = p->dst.addr_data32[0];
2596 keys[1]->dst = p->src.addr_data32[0];
2597 keys[1]->port16[0] = htons(p->dp);
2598 keys[1]->port16[1] = htons(p->sp);
d119845d
EL
2599 keys[1]->vlan0 = p->vlan_id[0];
2600 keys[1]->vlan1 = p->vlan_id[1];
2601 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2602 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2603 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2604 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2605 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2606 SCFree(keys[0]);
2607 SCFree(keys[1]);
8c880879
EL
2608 return 0;
2609 }
9206b30f 2610 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
8c880879
EL
2611 }
2612 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2613 if (PKT_IS_IPV6(p) &&
8c880879 2614 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2615 SCLogDebug("add an IPv6");
eff10fce
EL
2616 if (p->afp_v.v6_map_fd == -1) {
2617 return 0;
2618 }
d65f4585 2619 int i;
b07bda7a
EL
2620 struct flowv6_keys *keys[2];
2621 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2622 if (keys[0] == NULL) {
2623 return 0;
2624 }
2625
8c880879 2626 for (i = 0; i < 4; i++) {
b07bda7a
EL
2627 keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
2628 keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
2629 }
2630 keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
2631 keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
d119845d
EL
2632 keys[0]->vlan0 = p->vlan_id[0];
2633 keys[0]->vlan1 = p->vlan_id[1];
2634 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2635 keys[0]->ip_proto = 1;
2636 } else {
2637 keys[0]->ip_proto = 0;
2638 }
69d2c8eb 2639 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2640 p->afp_v.nr_cpus) == 0) {
9206b30f 2641 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2642 SCFree(keys[0]);
8c880879
EL
2643 return 0;
2644 }
b07bda7a
EL
2645 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2646 if (keys[1] == NULL) {
9206b30f
EL
2647 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2648 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2649 SCFree(keys[0]);
2650 return 0;
8c880879 2651 }
b07bda7a
EL
2652 for (i = 0; i < 4; i++) {
2653 keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
2654 keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2655 }
2656 keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
2657 keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
d119845d
EL
2658 keys[1]->vlan0 = p->vlan_id[0];
2659 keys[1]->vlan1 = p->vlan_id[1];
2660 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2661 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2662 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2663 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2664 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2665 SCFree(keys[0]);
2666 SCFree(keys[1]);
8c880879
EL
2667 return 0;
2668 }
9206b30f 2669 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
8c880879
EL
2670 }
2671#endif
2672 return 0;
2673}
2674
5e62ae6d
EL
2675
2676bool g_flowv4_ok = true;
2677bool g_flowv6_ok = true;
2678
c45d8985
EL
2679/**
2680 * \brief Init function for ReceiveAFP.
2681 *
2682 * \param tv pointer to ThreadVars
2683 * \param initdata pointer to the interface passed from the user
2684 * \param data pointer gets populated with AFPThreadVars
2685 *
2686 * \todo Create a general AFP setup function.
2687 */
ab1200fb 2688TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2689{
c45d8985 2690 SCEnter();
ab1200fb 2691 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2692
c45d8985
EL
2693 if (initdata == NULL) {
2694 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2695 SCReturnInt(TM_ECODE_FAILED);
2696 }
2697
2698 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2699 if (unlikely(ptv == NULL)) {
45d5c3ca 2700 afpconfig->DerefFunc(afpconfig);
c45d8985 2701 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2702 }
c45d8985
EL
2703 memset(ptv, 0, sizeof(AFPThreadVars));
2704
2705 ptv->tv = tv;
2706 ptv->cooked = 0;
2707
fbca1a4e 2708 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2709 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2710
51eb9605
EL
2711 ptv->livedev = LiveGetDevice(ptv->iface);
2712 if (ptv->livedev == NULL) {
2713 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2714 SCFree(ptv);
51eb9605
EL
2715 SCReturnInt(TM_ECODE_FAILED);
2716 }
2717
fbca1a4e 2718 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2719 ptv->ring_size = afpconfig->ring_size;
fa902abe 2720 ptv->block_size = afpconfig->block_size;
8baf64f5 2721 ptv->block_timeout = afpconfig->block_timeout;
e80b30c0 2722
df7dbe36 2723 ptv->promisc = afpconfig->promisc;
6062e00c 2724 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2725 ptv->bpf_filter = NULL;
df7dbe36 2726
fbca1a4e 2727 ptv->threads = 1;
e80b30c0
EL
2728#ifdef HAVE_PACKET_FANOUT
2729 ptv->cluster_type = PACKET_FANOUT_LB;
2730 ptv->cluster_id = 1;
2731 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2732 if (afpconfig->threads > 1) {
9d882116
VJ
2733 ptv->cluster_id = afpconfig->cluster_id;
2734 ptv->cluster_type = afpconfig->cluster_type;
2735 ptv->threads = afpconfig->threads;
e80b30c0
EL
2736 }
2737#endif
49b7b00f 2738 ptv->flags = afpconfig->flags;
e80b30c0 2739
f2a6fb8a
EL
2740 if (afpconfig->bpf_filter) {
2741 ptv->bpf_filter = afpconfig->bpf_filter;
2742 }
91e1256b
EL
2743 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2744 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2745 ptv->xdp_mode = afpconfig->xdp_mode;
36838017 2746#ifdef HAVE_PACKET_EBPF
4cf53100 2747 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
f2a6fb8a 2748
d65f4585 2749 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2750 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585 2751 if (ptv->v4_map_fd == -1) {
5e62ae6d
EL
2752 if (g_flowv4_ok == false) {
2753 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2754 "flow_table_v4");
2755 g_flowv4_ok = true;
2756 }
d65f4585 2757 }
126488f7 2758 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585 2759 if (ptv->v6_map_fd == -1) {
5e62ae6d
EL
2760 if (g_flowv6_ok) {
2761 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2762 "flow_table_v6");
2763 g_flowv6_ok = false;
2764 }
d65f4585
EL
2765 }
2766 }
4cf53100 2767 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
d65f4585
EL
2768#endif
2769
6efd37a3 2770#ifdef PACKET_STATISTICS
1ef786e7
VJ
2771 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2772 ptv->tv);
2773 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2774 ptv->tv);
9efa4ace
EL
2775 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2776 ptv->tv);
6efd37a3
EL
2777#endif
2778
662dccd8
EL
2779 ptv->copy_mode = afpconfig->copy_mode;
2780 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2781 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2782 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2783 /* Warn about BPF filter consequence */
2784 if (ptv->bpf_filter) {
2785 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2786 " in dropping all non matching packets.");
2787 }
662dccd8 2788 }
c85ee1e3 2789
b7e78d33 2790
0581a23f
EL
2791 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2792 SCFree(ptv);
2793 afpconfig->DerefFunc(afpconfig);
2794 SCReturnInt(TM_ECODE_FAILED);
2795 }
2796
e80b30c0
EL
2797#define T_DATA_SIZE 70000
2798 ptv->data = SCMalloc(T_DATA_SIZE);
2799 if (ptv->data == NULL) {
45d5c3ca 2800 afpconfig->DerefFunc(afpconfig);
6019ae3d 2801 SCFree(ptv);
e80b30c0 2802 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2803 }
e80b30c0
EL
2804 ptv->datalen = T_DATA_SIZE;
2805#undef T_DATA_SIZE
2806
c45d8985 2807 *data = (void *)ptv;
fbca1a4e 2808
45d5c3ca 2809 afpconfig->DerefFunc(afpconfig);
71e47868 2810
2cd6e128
EL
2811 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2812 * get the info from packet extended header but we will use a standard
2813 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
bcc03f17
MF
2814 if (SCKernelVersionIsAtLeast(3, 0)) {
2815 ptv->flags |= AFP_VLAN_IN_HEADER;
2cd6e128
EL
2816 }
2817
c45d8985
EL
2818 SCReturnInt(TM_ECODE_OK);
2819}
2820
2821/**
2822 * \brief This function prints stats to the screen at exit.
2823 * \param tv pointer to ThreadVars
2824 * \param data pointer that gets cast into AFPThreadVars for ptv
2825 */
8f1d7503
KS
2826void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2827{
c45d8985
EL
2828 SCEnter();
2829 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2830
2831#ifdef PACKET_STATISTICS
e8a4a4c4 2832 AFPDumpCounters(ptv);
b3bf7a57 2833 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2834 tv->name,
752f03e7
VJ
2835 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2836 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2837#endif
c45d8985
EL
2838}
2839
2840/**
2841 * \brief DeInit function closes af packet socket at exit.
2842 * \param tv pointer to ThreadVars
2843 * \param data pointer that gets cast into AFPThreadVars for ptv
2844 */
8f1d7503
KS
2845TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2846{
c45d8985
EL
2847 AFPThreadVars *ptv = (AFPThreadVars *)data;
2848
13f13b6d
EL
2849 AFPSwitchState(ptv, AFP_STATE_DOWN);
2850
8c880879 2851#ifdef HAVE_PACKET_XDP
4cf53100
EL
2852 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2853 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2854 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2855 }
8c880879 2856#endif
e80b30c0
EL
2857 if (ptv->data != NULL) {
2858 SCFree(ptv->data);
2859 ptv->data = NULL;
2860 }
2861 ptv->datalen = 0;
2862
f2a6fb8a 2863 ptv->bpf_filter = NULL;
69d0d484
VJ
2864 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2865 SCFree(ptv->ring.v3);
ce59ec5d 2866 } else {
69d0d484
VJ
2867 if (ptv->ring.v2)
2868 SCFree(ptv->ring.v2);
ce59ec5d 2869 }
f2a6fb8a 2870
7127ae2b 2871 SCFree(ptv);
c45d8985
EL
2872 SCReturnInt(TM_ECODE_OK);
2873}
2874
2875/**
2876 * \brief This function passes off to link type decoders.
2877 *
f8aed4ce 2878 * DecodeAFP decodes packets from AF_PACKET and passes
c45d8985
EL
2879 * them off to the proper link type decoder.
2880 *
2881 * \param t pointer to ThreadVars
2882 * \param p pointer to the current packet
2883 * \param data pointer that gets cast into AFPThreadVars for ptv
c45d8985 2884 */
f8aed4ce 2885TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
c45d8985
EL
2886{
2887 SCEnter();
2888 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2889
7810f224 2890 BUG_ON(PKT_IS_PSEUDOPKT(p));
f7b1aefa 2891
c45d8985 2892 /* update counters */
14466a80 2893 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2894
1fb7c0dd
EL
2895 /* If suri has set vlan during reading, we increase vlan counter */
2896 if (p->vlan_idx) {
1c0b4ee0 2897 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2898 }
2899
c45d8985 2900 /* call the decoder */
49dbb455 2901 switch (p->datalink) {
c45d8985 2902 case LINKTYPE_ETHERNET:
f8aed4ce 2903 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p));
c45d8985 2904 break;
49dbb455 2905 case LINKTYPE_LINUX_SLL:
f8aed4ce 2906 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
49dbb455 2907 break;
c45d8985 2908 case LINKTYPE_PPP:
f8aed4ce 2909 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
c45d8985
EL
2910 break;
2911 case LINKTYPE_RAW:
f67aa5de 2912 case LINKTYPE_GRE_OVER_IP:
f8aed4ce 2913 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
c45d8985 2914 break;
49dbb455 2915 case LINKTYPE_NULL:
f8aed4ce 2916 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
49dbb455 2917 break;
c45d8985
EL
2918 default:
2919 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2920 break;
2921 }
2922
3088b6ac 2923 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2924
c45d8985
EL
2925 SCReturnInt(TM_ECODE_OK);
2926}
2927
ab1200fb 2928TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2929{
2930 SCEnter();
2931 DecodeThreadVars *dtv = NULL;
2932
5f307aca 2933 dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2934
2935 if (dtv == NULL)
2936 SCReturnInt(TM_ECODE_FAILED);
2937
2938 DecodeRegisterPerfCounters(dtv, tv);
2939
2940 *data = (void *)dtv;
2941
2942 SCReturnInt(TM_ECODE_OK);
2943}
2944
2864f9ee
VJ
2945TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2946{
2947 if (data != NULL)
98c88d51 2948 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2949 SCReturnInt(TM_ECODE_OK);
2950}
2951
e80b30c0 2952#endif /* HAVE_AF_PACKET */
c45d8985 2953/* eof */
a6457262
EL
2954/**
2955 * @}
2956 */