]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
af-packet: PacketSetData can't fail; remove check
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
8b08b034 1/* Copyright (C) 2011-2021 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985
EL
35#include "suricata-common.h"
36#include "suricata.h"
37#include "decode.h"
38#include "packet-queue.h"
39#include "threads.h"
40#include "threadvars.h"
41#include "tm-queuehandlers.h"
42#include "tm-modules.h"
43#include "tm-threads.h"
44#include "tm-threads-common.h"
45#include "conf.h"
17a32bda 46#include "util-cpu.h"
c45d8985 47#include "util-debug.h"
51eb9605 48#include "util-device.h"
d65f4585 49#include "util-ebpf.h"
c45d8985
EL
50#include "util-error.h"
51#include "util-privs.h"
e80b30c0 52#include "util-optimize.h"
51eb9605 53#include "util-checksum.h"
ac56b1bf 54#include "util-ioctl.h"
2cd6e128 55#include "util-host-info.h"
c45d8985
EL
56#include "tmqh-packetpool.h"
57#include "source-af-packet.h"
34b3f194 58#include "runmodes.h"
b07bda7a 59#include "flow-storage.h"
3f8e15f7 60#include "util-validate.h"
c45d8985 61
e80b30c0 62#ifdef HAVE_AF_PACKET
472e061c
VJ
63
64#if HAVE_SYS_IOCTL_H
2bc0be6e 65#include <sys/ioctl.h>
472e061c
VJ
66#endif
67
b37554e0
EL
68#if HAVE_LINUX_SOCKIOS_H
69#include <linux/sockios.h>
70#endif
71
06173267
EL
72#ifdef HAVE_PACKET_EBPF
73#include "util-ebpf.h"
74#include <bpf/libbpf.h>
75#include <bpf/bpf.h>
76#endif
77
91e1256b
EL
78struct bpf_program {
79 unsigned int bf_len;
80 struct bpf_insn *bf_insns;
81};
82
83#ifdef HAVE_PCAP_H
84#include <pcap.h>
85#endif
86
87#ifdef HAVE_PCAP_PCAP_H
88#include <pcap/pcap.h>
89#endif
90
28e9e4c8
EL
91#include "util-bpf.h"
92
472e061c 93#if HAVE_LINUX_IF_ETHER_H
c45d8985 94#include <linux/if_ether.h>
472e061c
VJ
95#endif
96
97#if HAVE_LINUX_IF_PACKET_H
c45d8985 98#include <linux/if_packet.h>
472e061c
VJ
99#endif
100
101#if HAVE_LINUX_IF_ARP_H
c45d8985 102#include <linux/if_arp.h>
472e061c 103#endif
f2a6fb8a 104
472e061c 105#if HAVE_LINUX_FILTER_H
f2a6fb8a 106#include <linux/filter.h>
e80b30c0 107#endif
c45d8985 108
472e061c 109#if HAVE_SYS_MMAN_H
49b7b00f 110#include <sys/mman.h>
472e061c
VJ
111#endif
112
a40f08a2
EL
113#ifdef HAVE_HW_TIMESTAMPING
114#include <linux/net_tstamp.h>
115#endif
116
472e061c 117#endif /* HAVE_AF_PACKET */
49b7b00f 118
c45d8985
EL
119extern int max_pending_packets;
120
e80b30c0
EL
121#ifndef HAVE_AF_PACKET
122
ab1200fb 123TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 124
8f1d7503
KS
125void TmModuleReceiveAFPRegister (void)
126{
e80b30c0
EL
127 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
128 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
129 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
130 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
131 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
e80b30c0 132 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 133 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
134}
135
136/**
137 * \brief Registration Function for DecodeAFP.
e80b30c0 138 */
8f1d7503
KS
139void TmModuleDecodeAFPRegister (void)
140{
e80b30c0
EL
141 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
142 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
143 tmm_modules[TMM_DECODEAFP].Func = NULL;
144 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
145 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
e80b30c0 146 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 147 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
148}
149
150/**
151 * \brief this function prints an error message and exits.
152 */
ab1200fb 153TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
154{
155 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
156 "support for AF_PACKET enabled, on Linux host please recompile "
157 "with --enable-af-packet", tv->name);
158 exit(EXIT_FAILURE);
159}
160
161#else /* We have AF_PACKET support */
162
c45d8985
EL
163#define AFP_IFACE_NAME_LENGTH 48
164
165#define AFP_STATE_DOWN 0
166#define AFP_STATE_UP 1
167
168#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 169#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
170
171#define POLL_TIMEOUT 100
172
8b08b034
VJ
173/* kernel flags defined for RX ring tp_status */
174#ifndef TP_STATUS_KERNEL
175#define TP_STATUS_KERNEL 0
176#endif
177#ifndef TP_STATUS_USER
178#define TP_STATUS_USER BIT_U32(0)
179#endif
180#ifndef TP_STATUS_COPY
181#define TP_STATUS_COPY BIT_U32(1)
182#endif
183#ifndef TP_STATUS_LOSING
184#define TP_STATUS_LOSING BIT_U32(2)
185#endif
186#ifndef TP_STATUS_CSUMNOTREADY
187#define TP_STATUS_CSUMNOTREADY BIT_U32(3)
188#endif
189#ifndef TP_STATUS_VLAN_VALID
190#define TP_STATUS_VLAN_VALID BIT_U32(4)
191#endif
192#ifndef TP_STATUS_BLK_TMO
193#define TP_STATUS_BLK_TMO BIT_U32(5)
194#endif
195#ifndef TP_STATUS_VLAN_TPID_VALID
196#define TP_STATUS_VLAN_TPID_VALID BIT_U32(6)
197#endif
198#ifndef TP_STATUS_CSUM_VALID
199#define TP_STATUS_CSUM_VALID BIT_U32(7)
200#endif
201
202#ifndef TP_STATUS_TS_SOFTWARE
203#define TP_STATUS_TS_SOFTWARE BIT_U32(29)
204#endif
205#ifndef TP_STATUS_TS_SYS_HARDWARE
206#define TP_STATUS_TS_SYS_HARDWARE BIT_U32(30) /* kernel comment says: "deprecated, never set" */
207#endif
208#ifndef TP_STATUS_TS_RAW_HARDWARE
209#define TP_STATUS_TS_RAW_HARDWARE BIT_U32(31)
210#endif
211
4a1a0080 212#ifndef TP_STATUS_USER_BUSY
ad862fff
VJ
213/* HACK special setting in the tp_status field for frames we are
214 * still working on. This can happen in autofp mode where the
215 * capture thread goes around the ring and finds a frame that still
216 * hasn't been released by a worker thread.
217 *
218 * We use bits 29, 30, 31. 29 and 31 are software and hardware
219 * timestamps. 30 should not be set by the kernel at all. Combined
220 * they should never be set on the rx-ring together.
221 *
222 * The excessive casting is for handling the fact that the kernel
223 * defines almost all of these as int flags, not unsigned ints. */
224#define TP_STATUS_USER_BUSY \
225 (uint32_t)((uint32_t)TP_STATUS_TS_SOFTWARE | (uint32_t)TP_STATUS_TS_SYS_HARDWARE | \
226 (uint32_t)TP_STATUS_TS_RAW_HARDWARE)
4a1a0080 227#endif
ad862fff
VJ
228#define FRAME_BUSY(tp_status) \
229 (((uint32_t)(tp_status) & (uint32_t)TP_STATUS_USER_BUSY) == (uint32_t)TP_STATUS_USER_BUSY)
4a1a0080 230
62e63e3f
EL
231enum {
232 AFP_READ_OK,
233 AFP_READ_FAILURE,
9efa4ace
EL
234 /** Error during treatment by other functions of Suricata */
235 AFP_SURI_FAILURE,
27b5136b 236 AFP_KERNEL_DROP,
62e63e3f
EL
237};
238
1992a227
EL
239enum {
240 AFP_FATAL_ERROR = 1,
241 AFP_RECOVERABLE_ERROR,
242};
243
49b7b00f
EL
244union thdr {
245 struct tpacket2_hdr *h2;
c2d0d938 246#ifdef HAVE_TPACKET_V3
bae1b03c 247 struct tpacket3_hdr *h3;
c2d0d938 248#endif
49b7b00f
EL
249 void *raw;
250};
251
5e05fedc 252#ifdef HAVE_PACKET_EBPF
06173267 253static int AFPBypassCallback(Packet *p);
8c880879 254static int AFPXDPBypassCallback(Packet *p);
5e05fedc 255#endif
06173267 256
91e1256b 257#define MAX_MAPS 32
c45d8985
EL
258/**
259 * \brief Structure to hold thread specific variables.
260 */
261typedef struct AFPThreadVars_
262{
69d0d484
VJ
263 union AFPRing {
264 char *v2;
265 struct iovec *v3;
266 } ring;
b797fd92 267
c45d8985 268 /* counters */
3ce39433 269 uint64_t pkts;
c45d8985 270
ff6365dd
EL
271 ThreadVars *tv;
272 TmSlot *slot;
9500d12c
EL
273 LiveDevice *livedev;
274 /* data link type for the thread */
b797fd92 275 uint32_t datalink;
9500d12c 276
d65f4585 277#ifdef HAVE_PACKET_EBPF
94a622cb 278 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 279 int v4_map_fd;
94a622cb 280 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
281 int v6_map_fd;
282#endif
283
9500d12c 284 unsigned int frame_offset;
ff6365dd 285
9500d12c
EL
286 ChecksumValidationMode checksum_mode;
287
b797fd92 288 /* references to packet and drop counters */
9500d12c
EL
289 uint16_t capture_kernel_packets;
290 uint16_t capture_kernel_drops;
9efa4ace 291 uint16_t capture_errors;
a022648b 292 uint16_t afpacket_spin;
9500d12c
EL
293
294 /* handle state */
295 uint8_t afp_state;
296 uint8_t copy_mode;
4bfa3aea 297 unsigned int flags;
9500d12c
EL
298
299 /* IPS peer */
300 AFPPeer *mpeer;
301
302 /* no mmap mode */
ff6365dd
EL
303 uint8_t *data; /** Per function and thread data */
304 int datalen; /** Length of per function and thread data */
9500d12c 305 int cooked;
ff6365dd 306
9500d12c
EL
307 /*
308 * Init related members
309 */
51eb9605 310
9500d12c
EL
311 /* thread specific socket */
312 int socket;
b797fd92
EL
313
314 int ring_size;
fa902abe 315 int block_size;
234aefdf 316 int block_timeout;
e80b30c0
EL
317 /* socket buffer size */
318 int buffer_size;
fa902abe 319 /* Filter */
ab1200fb 320 const char *bpf_filter;
9500d12c 321
df7dbe36 322 int promisc;
e80b30c0 323
9500d12c 324 int down_count;
662dccd8 325
df0ed6fd 326 uint16_t cluster_id;
e80b30c0 327 int cluster_type;
c45d8985 328
fbca1a4e
EL
329 int threads;
330
69d0d484
VJ
331 union AFPTpacketReq {
332 struct tpacket_req v2;
c2d0d938 333#ifdef HAVE_TPACKET_V3
69d0d484 334 struct tpacket_req3 v3;
c2d0d938 335#endif
69d0d484 336 } req;
b797fd92
EL
337
338 char iface[AFP_IFACE_NAME_LENGTH];
339 /* IPS output iface */
340 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 341
cba41207
AG
342 /* mmap'ed ring buffer */
343 unsigned int ring_buflen;
344 uint8_t *ring_buf;
91e1256b 345
36838017 346#ifdef HAVE_PACKET_EBPF
5e05fedc
VJ
347 uint8_t xdp_mode;
348 int ebpf_lb_fd;
349 int ebpf_filter_fd;
4cf53100 350 struct ebpf_timeout_config ebpf_t_config;
36838017 351#endif
315c29a8 352
c45d8985
EL
353} AFPThreadVars;
354
15e3bdb7
VJ
355static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
356static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
357static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
358static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 359
15e3bdb7
VJ
360static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
361static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
f8aed4ce 362static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
c45d8985 363
15e3bdb7 364static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 365static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
366static int AFPGetDevFlags(int fd, const char *ifname);
367static int AFPDerefSocket(AFPPeer* peer);
368static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 369
19475165 370
c45d8985
EL
371/**
372 * \brief Registration Function for RecieveAFP.
373 * \todo Unit tests are needed for this module.
374 */
8f1d7503
KS
375void TmModuleReceiveAFPRegister (void)
376{
c45d8985
EL
377 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
378 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 379 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 380 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 381 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 382 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 383 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985 384 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 385 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165 386
c45d8985
EL
387}
388
a6457262
EL
389/**
390 * \defgroup afppeers AFP peers list
391 *
392 * AF_PACKET has an IPS mode were interface are peered: packet from
393 * on interface are sent the peered interface and the other way. The ::AFPPeer
394 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
395 * information to be able to send packet on the interface.
396 * A element of the list must not be destroyed during the run of Suricata as it
397 * is used by ::Packet and other threads.
398 *
399 * @{
400 */
401
662dccd8
EL
402typedef struct AFPPeersList_ {
403 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
404 int cnt;
405 int peered;
60400163
EL
406 int turn; /**< Next value for initialisation order */
407 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
408} AFPPeersList;
409
410/**
a6457262
EL
411 * \brief Update the peer.
412 *
413 * Update the AFPPeer of a thread ie set new state, socket number
414 * or iface index.
415 *
662dccd8 416 */
ab1200fb 417static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
418{
419 if (ptv->mpeer == NULL) {
420 return;
421 }
662dccd8
EL
422 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
423 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
424 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
425}
426
a6457262
EL
427/**
428 * \brief Clean and free ressource used by an ::AFPPeer
429 */
ab1200fb 430static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
431{
432 if (peer->flags & AFP_SOCK_PROTECT)
433 SCMutexDestroy(&peer->sock_protect);
662dccd8
EL
434 SCFree(peer);
435}
436
437AFPPeersList peerslist;
438
439
a6457262
EL
440/**
441 * \brief Init the global list of ::AFPPeer
442 */
662dccd8
EL
443TmEcode AFPPeersListInit()
444{
445 SCEnter();
446 TAILQ_INIT(&peerslist.peers);
447 peerslist.peered = 0;
448 peerslist.cnt = 0;
60400163
EL
449 peerslist.turn = 0;
450 SC_ATOMIC_INIT(peerslist.reached);
451 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
452 SCReturnInt(TM_ECODE_OK);
453}
454
a6457262
EL
455/**
456 * \brief Check that all ::AFPPeer got a peer
457 *
458 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
459 */
662dccd8
EL
460TmEcode AFPPeersListCheck()
461{
462#define AFP_PEERS_MAX_TRY 4
463#define AFP_PEERS_WAIT 20000
464 int try = 0;
465 SCEnter();
466 while (try < AFP_PEERS_MAX_TRY) {
467 if (peerslist.cnt != peerslist.peered) {
468 usleep(AFP_PEERS_WAIT);
469 } else {
470 SCReturnInt(TM_ECODE_OK);
471 }
472 try++;
473 }
474 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
475 SCReturnInt(TM_ECODE_FAILED);
476}
477
a6457262
EL
478/**
479 * \brief Declare a new AFP thread to AFP peers list.
480 */
ab1200fb 481static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
482{
483 SCEnter();
484 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
485 AFPPeer *pitem;
ac56b1bf 486 int mtu, out_mtu;
662dccd8 487
e176be6f 488 if (unlikely(peer == NULL)) {
662dccd8
EL
489 SCReturnInt(TM_ECODE_FAILED);
490 }
491 memset(peer, 0, sizeof(AFPPeer));
492 SC_ATOMIC_INIT(peer->socket);
13f13b6d 493 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
494 SC_ATOMIC_INIT(peer->if_idx);
495 SC_ATOMIC_INIT(peer->state);
496 peer->flags = ptv->flags;
60400163 497 peer->turn = peerslist.turn++;
662dccd8
EL
498
499 if (peer->flags & AFP_SOCK_PROTECT) {
500 SCMutexInit(&peer->sock_protect, NULL);
501 }
502
13f13b6d 503 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
504 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
505 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
506 ptv->mpeer = peer;
507 /* add element to iface list */
508 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 509
13f13b6d
EL
510 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
511 peerslist.cnt++;
512
513 /* Iter to find a peer */
514 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
515 if (pitem->peer)
516 continue;
517 if (strcmp(pitem->iface, ptv->out_iface))
518 continue;
519 peer->peer = pitem;
520 pitem->peer = peer;
521 mtu = GetIfaceMTU(ptv->iface);
522 out_mtu = GetIfaceMTU(ptv->out_iface);
523 if (mtu != out_mtu) {
524 SCLogError(SC_ERR_AFP_CREATE,
525 "MTU on %s (%d) and %s (%d) are not equal, "
526 "transmission of packets bigger than %d will fail.",
527 ptv->iface, mtu,
528 ptv->out_iface, out_mtu,
529 (out_mtu > mtu) ? mtu : out_mtu);
530 }
531 peerslist.peered += 2;
532 break;
ac56b1bf 533 }
662dccd8
EL
534 }
535
536 AFPPeerUpdate(ptv);
537
538 SCReturnInt(TM_ECODE_OK);
539}
540
ab1200fb 541static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 542{
b2691cbe
EL
543 /* If turn is zero, we already have started threads once */
544 if (peerslist.turn == 0)
545 return 0;
546
60400163
EL
547 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
548 return 0;
549 return 1;
550}
551
ab1200fb 552static void AFPPeersListReachedInc(void)
60400163 553{
b2691cbe
EL
554 if (peerslist.turn == 0)
555 return;
556
531ff3dd 557 if ((SC_ATOMIC_ADD(peerslist.reached, 1) + 1) == peerslist.turn) {
b2691cbe
EL
558 SCLogInfo("All AFP capture threads are running.");
559 (void)SC_ATOMIC_SET(peerslist.reached, 0);
560 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
561 * restarted.
562 */
563 peerslist.turn = 0;
564 }
60400163
EL
565}
566
ab1200fb 567static int AFPPeersListStarted(void)
919377d4
EL
568{
569 return !peerslist.turn;
570}
571
a6457262
EL
572/**
573 * \brief Clean the global peers list.
574 */
662dccd8
EL
575void AFPPeersListClean()
576{
577 AFPPeer *pitem;
578
579 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
580 TAILQ_REMOVE(&peerslist.peers, pitem, next);
581 AFPPeerClean(pitem);
582 }
583}
584
a6457262
EL
585/**
586 * @}
587 */
588
c45d8985
EL
589/**
590 * \brief Registration Function for DecodeAFP.
591 * \todo Unit tests are needed for this module.
592 */
8f1d7503
KS
593void TmModuleDecodeAFPRegister (void)
594{
c45d8985
EL
595 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
596 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
597 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
598 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 599 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985 600 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 601 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
602}
603
662dccd8 604
e80b30c0
EL
605static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
606
e8a4a4c4 607static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 608{
6efd37a3 609#ifdef PACKET_STATISTICS
e8a4a4c4
EL
610 struct tpacket_stats kstats;
611 socklen_t len = sizeof (struct tpacket_stats);
612 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
613 &kstats, &len) > -1) {
614 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
615 ptv->tv->name,
616 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
617 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
618 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
619 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
620 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 621 }
e8a4a4c4 622#endif
6efd37a3 623}
c45d8985
EL
624
625/**
626 * \brief AF packet read function.
627 *
628 * This function fills
629 * From here the packets are picked up by the DecodeAFP thread.
630 *
631 * \param user pointer to AFPThreadVars
632 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
633 */
ab1200fb 634static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
635{
636 Packet *p = NULL;
637 /* XXX should try to use read that get directly to packet */
c45d8985
EL
638 int offset = 0;
639 int caplen;
640 struct sockaddr_ll from;
641 struct iovec iov;
642 struct msghdr msg;
c45d8985
EL
643 struct cmsghdr *cmsg;
644 union {
645 struct cmsghdr cmsg;
646 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
647 } cmsg_buf;
6efd37a3 648 unsigned char aux_checksum = 0;
c45d8985
EL
649
650 msg.msg_name = &from;
651 msg.msg_namelen = sizeof(from);
652 msg.msg_iov = &iov;
653 msg.msg_iovlen = 1;
c45d8985
EL
654 msg.msg_control = &cmsg_buf;
655 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
656 msg.msg_flags = 0;
657
658 if (ptv->cooked)
659 offset = SLL_HEADER_LEN;
660 else
661 offset = 0;
e80b30c0
EL
662 iov.iov_len = ptv->datalen - offset;
663 iov.iov_base = ptv->data + offset;
c45d8985
EL
664
665 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
666
667 if (caplen < 0) {
668 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
669 errno);
62e63e3f 670 SCReturnInt(AFP_READ_FAILURE);
c45d8985 671 }
ff6365dd
EL
672
673 p = PacketGetFromQueueOrAlloc();
c45d8985 674 if (p == NULL) {
9efa4ace 675 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 676 }
b33986c8 677 PKT_SET_SRC(p, PKT_SRC_WIRE);
5e05fedc 678#ifdef HAVE_PACKET_EBPF
06173267
EL
679 if (ptv->flags & AFP_BYPASS) {
680 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
681 p->afp_v.v4_map_fd = ptv->v4_map_fd;
682 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 683 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
06173267 684 }
8c880879
EL
685 if (ptv->flags & AFP_XDPBYPASS) {
686 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
687 p->afp_v.v4_map_fd = ptv->v4_map_fd;
688 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 689 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
8c880879 690 }
5e05fedc 691#endif
c45d8985
EL
692
693 /* get timestamp of packet via ioctl */
694 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
695 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
696 errno);
697 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 698 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
699 }
700
701 ptv->pkts++;
51eb9605 702 p->livedev = ptv->livedev;
c45d8985
EL
703
704 /* add forged header */
705 if (ptv->cooked) {
e80b30c0 706 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
707 /* XXX this is minimalist, but this seems enough */
708 hdrp->sll_protocol = from.sll_protocol;
709 }
710
711 p->datalink = ptv->datalink;
712 SET_PKT_LEN(p, caplen + offset);
e80b30c0 713 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 714 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 715 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 716 }
e80b30c0
EL
717 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
718 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
719
6062e00c
EL
720 /* We only check for checksum disable */
721 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
722 p->flags |= PKT_IGNORE_CHECKSUM;
723 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
39577507 724 if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
725 SC_ATOMIC_GET(ptv->livedev->pkts),
726 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
39577507 727 ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
6062e00c 728 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 729 }
6062e00c 730 } else {
6efd37a3
EL
731 aux_checksum = 1;
732 }
6062e00c 733
6efd37a3
EL
734 /* List is NULL if we don't have activated auxiliary data */
735 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
736 struct tpacket_auxdata *aux;
f6ddaf33 737
6efd37a3
EL
738 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
739 cmsg->cmsg_level != SOL_PACKET ||
740 cmsg->cmsg_type != PACKET_AUXDATA)
741 continue;
f6ddaf33 742
6efd37a3
EL
743 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
744
745 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
746 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 747 }
6efd37a3 748 break;
f6ddaf33
EL
749 }
750
c469824b 751 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
9efa4ace 752 SCReturnInt(AFP_SURI_FAILURE);
c469824b 753 }
62e63e3f 754 SCReturnInt(AFP_READ_OK);
c45d8985
EL
755}
756
ecf59be4
EL
757/**
758 * \brief AF packet write function.
759 *
760 * This function has to be called before the memory
761 * related to Packet in ring buffer is released.
762 *
763 * \param pointer to Packet
764 * \param version of capture: TPACKET_V2 or TPACKET_V3
765 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
766 *
767 */
768static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
769{
770 struct sockaddr_ll socket_address;
771 int socket;
ecf59be4
EL
772 uint8_t *pstart;
773 size_t plen;
ee7e689b
AG
774 union thdr h;
775 uint16_t vlan_tci = 0;
662dccd8
EL
776
777 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
4c7eb644 778 if (PacketTestAction(p, ACTION_DROP)) {
662dccd8
EL
779 return TM_ECODE_OK;
780 }
781 }
782
783 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
784 return TM_ECODE_OK;
785
786 if (p->ethh == NULL) {
787 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
788 return TM_ECODE_FAILED;
789 }
790 /* Index of the network device */
791 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
792 /* Address length*/
793 socket_address.sll_halen = ETH_ALEN;
794 /* Destination MAC */
795 memcpy(socket_address.sll_addr, p->ethh, 6);
796
797 /* Send packet, locking the socket if necessary */
798 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
799 SCMutexLock(&p->afp_v.peer->sock_protect);
800 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 801
ee7e689b
AG
802 h.raw = p->afp_v.relptr;
803
ecf59be4 804 if (version == TPACKET_V2) {
ecf59be4
EL
805 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
806 * the flag. It is not defined for older kernel so we go best effort
807 * and test for non zero value of the TCI header. */
808 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
809 vlan_tci = h.h2->tp_vlan_tci;
810 }
811 } else {
812#ifdef HAVE_TPACKET_V3
813 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
814 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 815 }
ee7e689b
AG
816#else
817 /* Should not get here */
818 BUG_ON(1);
819#endif
820 }
821
822 if (vlan_tci != 0) {
823 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
824 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
825 /* move ethernet addresses */
826 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
827 /* write vlan info */
828 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
829 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
830 } else {
831 pstart = GET_PKT_DATA(p);
832 plen = GET_PKT_LEN(p);
833 }
834
835 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
836 (struct sockaddr*) &socket_address,
837 sizeof(struct sockaddr_ll)) < 0) {
838 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
839 socket,
840 strerror(errno));
841 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
842 SCMutexUnlock(&p->afp_v.peer->sock_protect);
843 return TM_ECODE_FAILED;
844 }
845 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
846 SCMutexUnlock(&p->afp_v.peer->sock_protect);
847
848 return TM_ECODE_OK;
849}
850
ab1200fb 851static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 852{
3f8e15f7
VJ
853 DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
854
662dccd8
EL
855 /* Need to be in copy mode and need to detect early release
856 where Ethernet header could not be set (and pseudo packet) */
3f8e15f7 857 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
ecf59be4 858 AFPWritePacket(p, TPACKET_V2);
662dccd8 859 }
13f13b6d 860
2011a3f8
EL
861 if (p->afp_v.relptr) {
862 union thdr h;
863 h.raw = p->afp_v.relptr;
864 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 865 }
680e941a 866
12252ba7
VJ
867 (void)AFPDerefSocket(p->afp_v.mpeer);
868
680e941a 869 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
870}
871
ecf59be4 872#ifdef HAVE_TPACKET_V3
ab1200fb 873static void AFPReleasePacketV3(Packet *p)
bae1b03c 874{
3f8e15f7
VJ
875 DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
876
bae1b03c
EL
877 /* Need to be in copy mode and need to detect early release
878 where Ethernet header could not be set (and pseudo packet) */
3f8e15f7 879 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
ecf59be4 880 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
881 }
882 PacketFreeOrRelease(p);
883}
ecf59be4 884#endif
bae1b03c 885
ab1200fb 886static void AFPReleasePacket(Packet *p)
b076a26c
KS
887{
888 AFPReleaseDataFromRing(p);
889 PacketFreeOrRelease(p);
2011a3f8
EL
890}
891
a022648b
VJ
892/** \internal
893 * \brief recoverable error - release packet and
894 * return AFP_SURI_FAILURE
895 */
896static inline int AFPSuriFailure(AFPThreadVars *ptv, union thdr h)
897{
898 h.h2->tp_status = TP_STATUS_KERNEL;
899 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
900 ptv->frame_offset = 0;
901 }
902 SCReturnInt(AFP_SURI_FAILURE);
903}
904
905static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p)
906{
5e05fedc 907#ifdef HAVE_PACKET_EBPF
a022648b
VJ
908 if (ptv->flags & AFP_BYPASS) {
909 p->BypassPacketsFlow = AFPBypassCallback;
a022648b
VJ
910 p->afp_v.v4_map_fd = ptv->v4_map_fd;
911 p->afp_v.v6_map_fd = ptv->v6_map_fd;
912 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
a022648b
VJ
913 }
914 if (ptv->flags & AFP_XDPBYPASS) {
915 p->BypassPacketsFlow = AFPXDPBypassCallback;
a022648b
VJ
916 p->afp_v.v4_map_fd = ptv->v4_map_fd;
917 p->afp_v.v6_map_fd = ptv->v6_map_fd;
918 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
a022648b 919 }
5e05fedc 920#endif
a022648b
VJ
921}
922
923/** \internal
924 * \brief setup packet for AFPReadFromRing
925 */
926static bool AFPReadFromRingSetupPacket(
927 AFPThreadVars *ptv, union thdr h, const unsigned int tp_status, Packet *p)
928{
929 PKT_SET_SRC(p, PKT_SRC_WIRE);
930
ad862fff
VJ
931 /* flag the packet as TP_STATUS_USER_BUSY, which is ignore by the kernel, but
932 * acts as an indicator that we've reached a frame that is not yet released by
933 * us in autofp mode. It will be cleared when the frame gets released to the kernel. */
a022648b
VJ
934 h.h2->tp_status |= TP_STATUS_USER_BUSY;
935 p->livedev = ptv->livedev;
936 p->datalink = ptv->datalink;
937 ptv->pkts++;
938
939 AFPReadApplyBypass(ptv, p);
940
941 if (h.h2->tp_len > h.h2->tp_snaplen) {
942 SCLogDebug("Packet length (%d) > snaplen (%d), truncating", h.h2->tp_len, h.h2->tp_snaplen);
943 }
944
945 /* get vlan id from header */
946 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
947 (tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
948 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
949 p->vlan_idx = 1;
950 }
951
2cbfcce0 952 (void)PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen);
a022648b 953
558930a1
VJ
954 p->afp_v.relptr = h.raw;
955 p->ReleasePacket = AFPReleasePacket;
956 p->afp_v.mpeer = ptv->mpeer;
957 AFPRefSocket(ptv->mpeer);
a022648b 958
558930a1
VJ
959 p->afp_v.copy_mode = ptv->copy_mode;
960 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
961 p->afp_v.peer = ptv->mpeer->peer;
a022648b 962 } else {
558930a1 963 p->afp_v.peer = NULL;
a022648b 964 }
558930a1 965
a022648b
VJ
966 /* Timestamp */
967 p->ts.tv_sec = h.h2->tp_sec;
968 p->ts.tv_usec = h.h2->tp_nsec / 1000;
969 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", GET_PKT_LEN(p), p, GET_PKT_DATA(p));
970
971 /* We only check for checksum disable */
972 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
973 p->flags |= PKT_IGNORE_CHECKSUM;
974 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
975 if (ChecksumAutoModeCheck(ptv->pkts, SC_ATOMIC_GET(ptv->livedev->pkts),
976 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
977 ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
978 p->flags |= PKT_IGNORE_CHECKSUM;
979 }
980 } else {
981 if (tp_status & TP_STATUS_CSUMNOTREADY) {
982 p->flags |= PKT_IGNORE_CHECKSUM;
983 }
984 }
985 return true;
986}
987
988static inline int AFPReadFromRingWaitForPacket(AFPThreadVars *ptv)
989{
990 union thdr h;
991 struct timeval start_time;
992 gettimeofday(&start_time, NULL);
993 uint64_t busy_loop_iter = 0;
994
995 /* busy wait loop until we have packets available */
996 while (1) {
997 if (unlikely(suricata_ctl_flags != 0)) {
998 break;
999 }
1000 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
1001 if (unlikely(h.raw == NULL)) {
1002 return AFP_READ_FAILURE;
1003 }
1004 const unsigned int tp_status = h.h2->tp_status;
1005 if (tp_status == TP_STATUS_KERNEL) {
1006 busy_loop_iter++;
1007
1008 struct timeval cur_time;
1009 memset(&cur_time, 0, sizeof(cur_time));
1010 uint64_t milliseconds =
1011 ((cur_time.tv_sec - start_time.tv_sec) * 1000) +
1012 (((1000000 + cur_time.tv_usec - start_time.tv_usec) / 1000) - 1000);
1013 if (milliseconds > 1000) {
1014 break;
1015 }
1016 continue;
1017 }
1018 break;
1019 }
1020 if (busy_loop_iter) {
1021 StatsAddUI64(ptv->tv, ptv->afpacket_spin, busy_loop_iter);
1022 }
1023 return AFP_READ_OK;
1024}
1025
49b7b00f
EL
1026/**
1027 * \brief AF packet read function for ring
1028 *
1029 * This function fills
1030 * From here the packets are picked up by the DecodeAFP thread.
1031 *
1032 * \param user pointer to AFPThreadVars
1033 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1034 */
ab1200fb 1035static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f 1036{
49b7b00f 1037 union thdr h;
a022648b
VJ
1038 bool emergency_flush = false;
1039 const unsigned int start_pos = ptv->frame_offset;
4d8f70c6 1040
a022648b
VJ
1041 /* poll() told us there are frames, so lets wait for at least
1042 * one frame to become available. */
1043 if (AFPReadFromRingWaitForPacket(ptv) != AFP_READ_OK)
1044 return AFP_READ_FAILURE;
49b7b00f 1045
a022648b 1046 /* process the frames in the ring */
a369f8c3 1047 while (1) {
53c02334
AS
1048 if (unlikely(suricata_ctl_flags != 0)) {
1049 break;
1050 }
69d0d484 1051 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace 1052 if (unlikely(h.raw == NULL)) {
a022648b 1053 return AFP_READ_FAILURE;
34b3f194 1054 }
a022648b
VJ
1055 const unsigned int tp_status = h.h2->tp_status;
1056 /* if we find a kernel frame we are done */
1057 if (unlikely(tp_status == TP_STATUS_KERNEL)) {
1058 break;
27b5136b 1059 }
a022648b 1060 /* if in autofp mode the frame is still busy, return to poll */
ad862fff 1061 if (unlikely(FRAME_BUSY(tp_status))) {
a022648b 1062 break;
4a1a0080 1063 }
a022648b 1064 emergency_flush |= ((tp_status & TP_STATUS_LOSING) != 0);
4a1a0080 1065
a022648b 1066 if ((ptv->flags & AFP_EMERGENCY_MODE) && emergency_flush) {
27b5136b
EL
1067 h.h2->tp_status = TP_STATUS_KERNEL;
1068 goto next_frame;
a369f8c3
EL
1069 }
1070
a022648b 1071 Packet *p = PacketGetFromQueueOrAlloc();
a369f8c3 1072 if (p == NULL) {
a022648b 1073 return AFPSuriFailure(ptv, h);
8c880879 1074 }
a022648b
VJ
1075 if (AFPReadFromRingSetupPacket(ptv, h, tp_status, p) == false) {
1076 TmqhOutputPacketpool(ptv->tv, p);
1077 return AFPSuriFailure(ptv, h);
a369f8c3 1078 }
5f12b234 1079
a369f8c3 1080 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
a022648b 1081 return AFPSuriFailure(ptv, h);
49b7b00f 1082 }
27b5136b 1083next_frame:
69d0d484 1084 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 1085 ptv->frame_offset = 0;
350d7619 1086 /* Get out of loop to be sure we will reach maintenance tasks */
a022648b
VJ
1087 if (ptv->frame_offset == start_pos)
1088 break;
34b3f194 1089 }
34b3f194 1090 }
a022648b
VJ
1091 if (emergency_flush) {
1092 AFPDumpCounters(ptv);
1093 }
49b7b00f
EL
1094 SCReturnInt(AFP_READ_OK);
1095}
1096
f947539d 1097#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1098static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1099{
1100 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1101}
1102
1103static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1104{
1105 Packet *p = PacketGetFromQueueOrAlloc();
1106 if (p == NULL) {
9efa4ace 1107 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1108 }
1109 PKT_SET_SRC(p, PKT_SRC_WIRE);
a022648b
VJ
1110
1111 AFPReadApplyBypass(ptv, p);
bae1b03c
EL
1112
1113 ptv->pkts++;
bae1b03c
EL
1114 p->livedev = ptv->livedev;
1115 p->datalink = ptv->datalink;
1116
bcc03f17 1117 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
e41a9d63
AG
1118 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1119 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1120 p->vlan_idx = 1;
e41a9d63
AG
1121 }
1122
2cbfcce0
VJ
1123 (void)PacketSetData(p, (unsigned char *)ppd + ppd->tp_mac, ppd->tp_snaplen);
1124
558930a1
VJ
1125 p->afp_v.relptr = ppd;
1126 p->ReleasePacket = AFPReleasePacketV3;
1127 p->afp_v.mpeer = ptv->mpeer;
558930a1
VJ
1128
1129 p->afp_v.copy_mode = ptv->copy_mode;
1130 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1131 p->afp_v.peer = ptv->mpeer->peer;
bae1b03c 1132 } else {
558930a1 1133 p->afp_v.peer = NULL;
bae1b03c 1134 }
558930a1 1135
bae1b03c
EL
1136 /* Timestamp */
1137 p->ts.tv_sec = ppd->tp_sec;
1138 p->ts.tv_usec = ppd->tp_nsec/1000;
1139 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1140 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1141
1142 /* We only check for checksum disable */
1143 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1144 p->flags |= PKT_IGNORE_CHECKSUM;
1145 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
39577507 1146 if (ChecksumAutoModeCheck(ptv->pkts,
bae1b03c
EL
1147 SC_ATOMIC_GET(ptv->livedev->pkts),
1148 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
39577507 1149 ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
bae1b03c
EL
1150 p->flags |= PKT_IGNORE_CHECKSUM;
1151 }
1152 } else {
1153 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1154 p->flags |= PKT_IGNORE_CHECKSUM;
1155 }
1156 }
1157
1158 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
9efa4ace 1159 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1160 }
1161
1162 SCReturnInt(AFP_READ_OK);
1163}
1164
1165static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1166{
e63db9d1
VJ
1167 const int num_pkts = pbd->hdr.bh1.num_pkts;
1168 uint8_t *ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1169
1170 for (int i = 0; i < num_pkts; ++i) {
1171 int ret = AFPParsePacketV3(ptv, pbd, (struct tpacket3_hdr *)ppd);
9efa4ace
EL
1172 switch (ret) {
1173 case AFP_READ_OK:
1174 break;
1175 case AFP_SURI_FAILURE:
1176 /* Internal error but let's just continue and
1177 * treat thenext packet */
1178 break;
1179 case AFP_READ_FAILURE:
1180 SCReturnInt(AFP_READ_FAILURE);
1181 default:
1182 SCReturnInt(ret);
5f84b55d 1183 }
bae1b03c
EL
1184 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1185 }
1186
1187 SCReturnInt(AFP_READ_OK);
1188}
f947539d 1189#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1190
1191/**
1192 * \brief AF packet read function for ring
1193 *
1194 * This function fills
1195 * From here the packets are picked up by the DecodeAFP thread.
1196 *
1197 * \param user pointer to AFPThreadVars
1198 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1199 */
ab1200fb 1200static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1201{
c2d0d938 1202#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1203 /* Loop till we have packets available */
1204 while (1) {
1205 if (unlikely(suricata_ctl_flags != 0)) {
1206 SCLogInfo("Exiting AFP V3 read loop");
1207 break;
1208 }
1209
e63db9d1
VJ
1210 struct tpacket_block_desc *pbd =
1211 (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1212
1213 /* block is not ready to be read */
1214 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1215 SCReturnInt(AFP_READ_OK);
1216 }
1217
e63db9d1 1218 int ret = AFPWalkBlock(ptv, pbd);
9efa4ace 1219 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1220 AFPFlushBlock(pbd);
9efa4ace 1221 SCReturnInt(ret);
bae1b03c
EL
1222 }
1223
1224 AFPFlushBlock(pbd);
69d0d484 1225 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1226 /* return to maintenance task after one loop on the ring */
1227 if (ptv->frame_offset == 0) {
1228 SCReturnInt(AFP_READ_OK);
1229 }
1230 }
c2d0d938 1231#endif
bae1b03c
EL
1232 SCReturnInt(AFP_READ_OK);
1233}
1234
13f13b6d
EL
1235/**
1236 * \brief Reference socket
1237 *
1238 * \retval O in case of failure, 1 in case of success
1239 */
1240static int AFPRefSocket(AFPPeer* peer)
1241{
1242 if (unlikely(peer == NULL))
1243 return 0;
1244
1245 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1246 return 1;
1247}
1248
1249
1250/**
1251 * \brief Dereference socket
1252 *
1253 * \retval 1 if socket is still alive, 0 if not
1254 */
1255static int AFPDerefSocket(AFPPeer* peer)
1256{
4424f5a2
EL
1257 if (peer == NULL)
1258 return 1;
1259
531ff3dd 1260 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 1) {
12252ba7 1261 return 0;
13f13b6d
EL
1262 }
1263 return 1;
1264}
1265
12252ba7 1266static void AFPCloseSocket(AFPThreadVars *ptv)
13f13b6d 1267{
12252ba7
VJ
1268 if (ptv->mpeer != NULL)
1269 BUG_ON(SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0);
49b7b00f 1270
12252ba7 1271 if (ptv->flags & AFP_TPACKET_V3) {
5f84b55d 1272#ifdef HAVE_TPACKET_V3
12252ba7
VJ
1273 if (ptv->ring.v3) {
1274 SCFree(ptv->ring.v3);
1275 ptv->ring.v3 = NULL;
13f13b6d 1276 }
5f84b55d 1277#endif
12252ba7
VJ
1278 } else {
1279 if (ptv->ring.v2) {
1280 /* only used in reading phase, we can free it */
1281 SCFree(ptv->ring.v2);
1282 ptv->ring.v2 = NULL;
13f13b6d
EL
1283 }
1284 }
12252ba7
VJ
1285 if (ptv->socket != -1) {
1286 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
1287 munmap(ptv->ring_buf, ptv->ring_buflen);
1288 close(ptv->socket);
1289 ptv->socket = -1;
1290 }
1291}
1292
1293static void AFPSwitchState(AFPThreadVars *ptv, int state)
1294{
1295 ptv->afp_state = state;
1296 ptv->down_count = 0;
1297
1298 if (state == AFP_STATE_DOWN) {
1299 /* cleanup is done on thread cleanup or try reopen
1300 * as there may still be packets in autofp that
1301 * are referencing us */
1302 (void)SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1);
1303 }
13f13b6d 1304 if (state == AFP_STATE_UP) {
12252ba7
VJ
1305 AFPPeerUpdate(ptv);
1306 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
13f13b6d
EL
1307 }
1308}
49b7b00f 1309
7fea0ec6
EL
1310static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1311 uint64_t *discarded_pkts)
919377d4
EL
1312{
1313 struct sockaddr_ll from;
1314 struct iovec iov;
1315 struct msghdr msg;
1316 struct timeval ts;
1317 union {
1318 struct cmsghdr cmsg;
1319 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1320 } cmsg_buf;
1321
1322
1323 if (unlikely(suricata_ctl_flags != 0)) {
1324 return 1;
1325 }
1326
1327 msg.msg_name = &from;
1328 msg.msg_namelen = sizeof(from);
1329 msg.msg_iov = &iov;
1330 msg.msg_iovlen = 1;
1331 msg.msg_control = &cmsg_buf;
1332 msg.msg_controllen = sizeof(cmsg_buf);
1333 msg.msg_flags = 0;
1334
1335 iov.iov_len = ptv->datalen;
1336 iov.iov_base = ptv->data;
1337
339f0665 1338 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1339
1340 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1341 /* FIXME */
1342 return -1;
1343 }
1344
1345 if ((ts.tv_sec > synctv->tv_sec) ||
1346 (ts.tv_sec >= synctv->tv_sec &&
1347 ts.tv_usec > synctv->tv_usec)) {
1348 return 1;
1349 }
1350 return 0;
1351}
1352
7fea0ec6
EL
1353static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1354 uint64_t *discarded_pkts)
919377d4 1355{
919377d4
EL
1356 if (unlikely(suricata_ctl_flags != 0)) {
1357 return 1;
1358 }
1359
f947539d 1360#ifdef HAVE_TPACKET_V3
bae1b03c 1361 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1362 int ret = 0;
e63db9d1
VJ
1363 struct tpacket_block_desc *pbd =
1364 (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1365 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1366 struct tpacket3_hdr *ppd =
1367 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1368 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1369 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1370 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1371 ret = 1;
1372 }
7fea0ec6 1373 AFPFlushBlock(pbd);
69d0d484 1374 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1375 return ret;
f947539d
VJ
1376
1377 } else
1378#endif
1379 {
7fea0ec6 1380 /* Read packet from ring */
e63db9d1 1381 union thdr h;
69d0d484 1382 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1383 if (h.raw == NULL) {
1384 return -1;
1385 }
a022648b
VJ
1386 if (h.h2->tp_status == TP_STATUS_KERNEL)
1387 return 0;
1388
7fea0ec6
EL
1389 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1390 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1391 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1392 return 1;
1393 }
919377d4 1394
a022648b 1395 (*discarded_pkts)++;
7fea0ec6 1396 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1397 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1398 ptv->frame_offset = 0;
1399 }
919377d4
EL
1400 }
1401
919377d4
EL
1402 return 0;
1403}
1404
806844d8
VJ
1405/** \brief wait for all afpacket threads to fully init
1406 *
1407 * Discard packets before all threads are ready, as the cluster
1408 * setup is not complete yet.
1409 *
1410 * if AFPPeersListStarted() returns true init is complete
1411 *
1412 * \retval r 1 = happy, otherwise unhappy
1413 */
7fea0ec6 1414static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1415{
919377d4 1416 struct timeval synctv;
806844d8
VJ
1417 struct pollfd fds;
1418
1419 fds.fd = ptv->socket;
1420 fds.events = POLLIN;
919377d4
EL
1421
1422 /* Set timeval to end of the world */
1423 synctv.tv_sec = 0xffffffff;
1424 synctv.tv_usec = 0xffffffff;
1425
1426 while (1) {
8709a20d 1427 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1428 if (r > 0 &&
1429 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1430 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1431 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1432 return 0;
1433 } else if (r > 0) {
1434 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1435 gettimeofday(&synctv, NULL);
1436 }
1437 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1438 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1439 } else {
7fea0ec6 1440 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1441 }
1442 SCLogDebug("Discarding on %s", ptv->tv->name);
1443 switch (r) {
1444 case 1:
9f7ba071 1445 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1446 return 1;
1447 case -1:
1448 return r;
1449 }
1450 /* no packets */
1451 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1452 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1453 return 1;
43b6cbd4 1454 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1455 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1456 return 0;
919377d4
EL
1457 }
1458 }
1459 return 1;
1460}
1461
13f13b6d
EL
1462/**
1463 * \brief Try to reopen socket
1464 *
1465 * \retval 0 in case of success, negative if error occurs or a condition
1466 * is not met.
1467 */
c45d8985
EL
1468static int AFPTryReopen(AFPThreadVars *ptv)
1469{
13f13b6d
EL
1470 ptv->down_count++;
1471
13f13b6d
EL
1472 /* Don't reconnect till we have packet that did not release data */
1473 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1474 return -1;
1475 }
c45d8985 1476
12252ba7
VJ
1477 /* ref cnt 0, we can close the old socket */
1478 AFPCloseSocket(ptv);
1479
8709a20d 1480 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1481 if (afp_activate_r != 0) {
13f13b6d
EL
1482 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1483 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1484 ptv->iface);
1485 }
c45d8985
EL
1486 return afp_activate_r;
1487 }
1488
3bea3b39 1489 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1490 return 0;
1491}
1492
e80b30c0
EL
1493/**
1494 * \brief Main AF_PACKET reading Loop function
1495 */
1496TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1497{
34581ce9
AS
1498 SCEnter();
1499
e80b30c0 1500 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1501 struct pollfd fds;
1502 int r;
34581ce9 1503 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1504 time_t last_dump = 0;
49612128 1505 time_t current_time;
5f400785 1506 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1507 uint64_t discarded_pkts = 0;
e80b30c0 1508
34581ce9 1509 ptv->slot = s->slot_next;
e80b30c0 1510
5f400785 1511 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1512 if (ptv->flags & AFP_TPACKET_V3) {
1513 AFPReadFunc = AFPReadFromRingV3;
1514 } else {
1515 AFPReadFunc = AFPReadFromRing;
1516 }
5f400785
EL
1517 } else {
1518 AFPReadFunc = AFPRead;
1519 }
1520
60400163
EL
1521 if (ptv->afp_state == AFP_STATE_DOWN) {
1522 /* Wait for our turn, threads before us must have opened the socket */
1523 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1524 usleep(1000);
1992a227
EL
1525 if (suricata_ctl_flags != 0) {
1526 break;
1527 }
60400163
EL
1528 }
1529 r = AFPCreateSocket(ptv, ptv->iface, 1);
1530 if (r < 0) {
1992a227
EL
1531 switch (-r) {
1532 case AFP_FATAL_ERROR:
1533 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1534 SCReturnInt(TM_ECODE_FAILED);
1535 case AFP_RECOVERABLE_ERROR:
1536 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1537 }
60400163
EL
1538 }
1539 AFPPeersListReachedInc();
1540 }
1541 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1542 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1543 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1544 /* let's reset counter as we will start the capture at the
1545 * next function call */
1546#ifdef PACKET_STATISTICS
1547 struct tpacket_stats kstats;
1548 socklen_t len = sizeof (struct tpacket_stats);
1549 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1550 &kstats, &len) > -1) {
1551 uint64_t pkts = 0;
1552 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1553 ", dropped %" PRIu32 "",
1554 ptv->tv->name,
1555 kstats.tp_packets, kstats.tp_drops);
1556 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1557 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1558 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1559 }
1560#endif
60400163
EL
1561 }
1562
e80b30c0
EL
1563 fds.fd = ptv->socket;
1564 fds.events = POLLIN;
1565
1566 while (1) {
1567 /* Start by checking the state of our interface */
1568 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1569 int dbreak = 0;
662dccd8 1570
e80b30c0
EL
1571 do {
1572 usleep(AFP_RECONNECT_TIMEOUT);
1573 if (suricata_ctl_flags != 0) {
1574 dbreak = 1;
1575 break;
1576 }
1577 r = AFPTryReopen(ptv);
09e709d1 1578 fds.fd = ptv->socket;
e80b30c0
EL
1579 } while (r < 0);
1580 if (dbreak == 1)
1581 break;
1582 }
1583
1584 /* make sure we have at least one packet in the packet pool, to prevent
1585 * us from alloc'ing packets at line rate */
3c6e01f6 1586 PacketPoolWait();
e80b30c0
EL
1587
1588 r = poll(&fds, 1, POLL_TIMEOUT);
1589
1590 if (suricata_ctl_flags != 0) {
1591 break;
1592 }
1593
1594 if (r > 0 &&
1595 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1596 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1597 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1598 continue;
ff6365dd 1599 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1600 char c;
1601 /* Do a recv to get errno */
1602 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1603 continue; /* what, no error? */
3bea3b39 1604 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1605 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1606 ptv->iface, errno, strerror(errno));
13f13b6d 1607 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1608 continue;
ff6365dd 1609 } else if (fds.revents & POLLNVAL) {
e80b30c0 1610 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1611 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1612 continue;
1613 }
1614 } else if (r > 0) {
5f400785 1615 r = AFPReadFunc(ptv);
62e63e3f 1616 switch (r) {
27adbfa8
EL
1617 case AFP_READ_OK:
1618 /* Trigger one dump of stats every second */
49612128
EL
1619 current_time = time(NULL);
1620 if (current_time != last_dump) {
27adbfa8 1621 AFPDumpCounters(ptv);
49612128 1622 last_dump = current_time;
27adbfa8
EL
1623 }
1624 break;
62e63e3f
EL
1625 case AFP_READ_FAILURE:
1626 /* AFPRead in error: best to reset the socket */
3bea3b39 1627 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1628 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1629 ptv->iface, errno, strerror(errno));
13f13b6d 1630 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1631 continue;
9efa4ace
EL
1632 case AFP_SURI_FAILURE:
1633 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1634 break;
27b5136b 1635 case AFP_KERNEL_DROP:
e8a4a4c4 1636 AFPDumpCounters(ptv);
27b5136b 1637 break;
e80b30c0 1638 }
11099cfa 1639 } else if (unlikely(r == 0)) {
f53e687b
EL
1640 /* Trigger one dump of stats every second */
1641 current_time = time(NULL);
1642 if (current_time != last_dump) {
1643 AFPDumpCounters(ptv);
1644 last_dump = current_time;
1645 }
ce71bf1f 1646 /* poll timed out, lets see handle our timeout path */
49599dfe 1647 TmThreadsCaptureHandleTimeout(tv, NULL);
11099cfa 1648
e80b30c0 1649 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1650 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1651 ptv->iface,
e80b30c0 1652 errno, strerror(errno));
13f13b6d 1653 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1654 continue;
1655 }
752f03e7 1656 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1657 }
1658
4e561d6b 1659 AFPDumpCounters(ptv);
752f03e7 1660 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1661 SCReturnInt(TM_ECODE_OK);
1662}
1663
13f13b6d
EL
1664static int AFPGetDevFlags(int fd, const char *ifname)
1665{
1666 struct ifreq ifr;
1667
1668 memset(&ifr, 0, sizeof(ifr));
1669 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1670
1671 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1672 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1673 ifname, strerror(errno));
1674 return -1;
1675 }
1676
1677 return ifr.ifr_flags;
1678}
1679
1680
e80b30c0 1681static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1682{
1683 struct ifreq ifr;
1684
1685 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1686 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1687
1688 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1689 if (verbose)
1690 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1691 ifname, strerror(errno));
c45d8985
EL
1692 return -1;
1693 }
1694
1695 return ifr.ifr_ifindex;
1696}
1697
e80b30c0 1698static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1699{
1700 struct ifreq ifr;
1701
1702 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1703 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1704
1705 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1706 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1707 ifname, strerror(errno));
1708 return -1;
1709 }
1710
e80b30c0
EL
1711 switch (ifr.ifr_hwaddr.sa_family) {
1712 case ARPHRD_LOOPBACK:
1713 return LINKTYPE_ETHERNET;
1714 case ARPHRD_PPP:
11eb1d7c 1715 case ARPHRD_NONE:
e80b30c0
EL
1716 return LINKTYPE_RAW;
1717 default:
1718 return ifr.ifr_hwaddr.sa_family;
1719 }
c45d8985
EL
1720}
1721
b7bf299e
EL
1722int AFPGetLinkType(const char *ifname)
1723{
1724 int ltype;
1725
1726 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1727 if (fd == -1) {
1728 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1729 return LINKTYPE_RAW;
1730 }
1731
1732 ltype = AFPGetDevLinktype(fd, ifname);
1733 close(fd);
1734
1735 return ltype;
1736}
1737
49b7b00f
EL
1738static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1739{
1740 /* Compute structure:
1741 Target is to store all pending packets
1742 with a size equal to MTU + auxdata
1743 And we keep a decent number of block
1744
1745 To do so:
1746 Compute frame_size (aligned to be able to fit in block
1747 Check which block size we need. Blocksize is a 2^n * pagesize
1748 We then need to get order, big enough to have
1749 frame_size < block size
1750 Find number of frame per block (divide)
1751 Fill in packet_req
1752
1753 Compute frame size:
1754 described in packet_mmap.txt
1755 dependant on snaplen (need to use a variable ?)
1756snaplen: MTU ?
1757tp_hdrlen determine_version in daq_afpacket
1758in V1: sizeof(struct tpacket_hdr);
1759in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1760frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1761
1762 */
1763 int tp_hdrlen = sizeof(struct tpacket_hdr);
1764 int snaplen = default_packet_size;
1765
03032457
EL
1766 if (snaplen == 0) {
1767 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1768 if (snaplen <= 0) {
1769 SCLogWarning(SC_ERR_INVALID_VALUE,
1770 "Unable to get MTU, setting snaplen to sane default of 1514");
1771 snaplen = 1514;
1772 }
1773 }
1774
69d0d484
VJ
1775 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1776 ptv->req.v2.tp_block_size = getpagesize() << order;
1777 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1778 if (frames_per_block == 0) {
bae1b03c 1779 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1780 return -1;
1781 }
69d0d484
VJ
1782 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1783 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1784 /* exact division */
69d0d484 1785 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1786 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1787 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1788 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1789 return 1;
1790}
1791
c2d0d938 1792#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1793static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1794{
69d0d484
VJ
1795 ptv->req.v3.tp_block_size = ptv->block_size;
1796 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1797 int frames_per_block = 0;
1798 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1799 int snaplen = default_packet_size;
1800
1801 if (snaplen == 0) {
1802 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1803 if (snaplen <= 0) {
1804 SCLogWarning(SC_ERR_INVALID_VALUE,
1805 "Unable to get MTU, setting snaplen to sane default of 1514");
1806 snaplen = 1514;
1807 }
1808 }
1809
69d0d484
VJ
1810 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1811 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1812
1813 if (frames_per_block == 0) {
1814 SCLogError(SC_ERR_INVALID_VALUE,
1815 "Block size is too small, it should be at least %d",
69d0d484 1816 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1817 return -1;
1818 }
69d0d484 1819 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1820 /* exact division */
69d0d484
VJ
1821 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1822 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1823 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1824 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1825 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1826 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1827 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1828 );
1829 return 1;
1830}
c2d0d938 1831#endif
bae1b03c 1832
c7bde9df
EL
1833static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1834{
1835 int val;
1836 unsigned int len = sizeof(val), i;
c7bde9df 1837 int order;
f5c20191 1838 int r, mmap_flag;
c7bde9df 1839
c2d0d938 1840#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1841 if (ptv->flags & AFP_TPACKET_V3) {
1842 val = TPACKET_V3;
f947539d 1843 } else
c2d0d938 1844#endif
f947539d 1845 {
c7bde9df
EL
1846 val = TPACKET_V2;
1847 }
1848 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1849 if (errno == ENOPROTOOPT) {
1850 if (ptv->flags & AFP_TPACKET_V3) {
1851 SCLogError(SC_ERR_AFP_CREATE,
1852 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1853 } else {
1854 SCLogError(SC_ERR_AFP_CREATE,
1855 "Too old kernel giving up (need 2.6.27 at least)");
1856 }
1857 }
1858 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1859 return AFP_FATAL_ERROR;
1860 }
1861
f947539d
VJ
1862 val = TPACKET_V2;
1863#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1864 if (ptv->flags & AFP_TPACKET_V3) {
1865 val = TPACKET_V3;
c7bde9df 1866 }
f947539d 1867#endif
c7bde9df
EL
1868 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1869 sizeof(val)) < 0) {
1870 SCLogError(SC_ERR_AFP_CREATE,
1871 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1872 strerror(errno));
1873 return AFP_FATAL_ERROR;
1874 }
1875
a40f08a2
EL
1876#ifdef HAVE_HW_TIMESTAMPING
1877 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1878 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1879 sizeof(req)) < 0) {
1880 SCLogWarning(SC_ERR_AFP_CREATE,
1881 "Can't activate hardware timestamping on packet socket: %s",
1882 strerror(errno));
1883 }
1884#endif
1885
ecf59be4
EL
1886 /* Let's reserve head room so we can add the VLAN header in IPS
1887 * or TAP mode before write the packet */
1888 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1889 /* Only one vlan is extracted from AFP header so
1890 * one VLAN header length is enough. */
1891 int reserve = VLAN_HEADER_LEN;
1892 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1893 sizeof(reserve)) < 0) {
1894 SCLogError(SC_ERR_AFP_CREATE,
1895 "Can't activate reserve on packet socket: %s",
1896 strerror(errno));
1897 return AFP_FATAL_ERROR;
1898 }
1899 }
1900
c7bde9df 1901 /* Allocate RX ring */
c2d0d938 1902#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1903 if (ptv->flags & AFP_TPACKET_V3) {
1904 if (AFPComputeRingParamsV3(ptv) != 1) {
1905 return AFP_FATAL_ERROR;
1906 }
1907 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1908 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1909 if (r < 0) {
1910 SCLogError(SC_ERR_MEM_ALLOC,
1911 "Unable to allocate RX Ring for iface %s: (%d) %s",
1912 devname,
1913 errno,
1914 strerror(errno));
1915 return AFP_FATAL_ERROR;
1916 }
1917 } else {
c2d0d938 1918#endif
fa902abe 1919 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1920 if (AFPComputeRingParams(ptv, order) != 1) {
1921 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1922 return AFP_FATAL_ERROR;
1923 }
1924
1925 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1926 (void *) &ptv->req, sizeof(ptv->req));
1927
1928 if (r < 0) {
1929 if (errno == ENOMEM) {
1930 SCLogInfo("Memory issue with ring parameters. Retrying.");
1931 continue;
1932 }
1933 SCLogError(SC_ERR_MEM_ALLOC,
1934 "Unable to allocate RX Ring for iface %s: (%d) %s",
1935 devname,
1936 errno,
1937 strerror(errno));
1938 return AFP_FATAL_ERROR;
1939 } else {
1940 break;
1941 }
1942 }
1943 if (order < 0) {
1944 SCLogError(SC_ERR_MEM_ALLOC,
1945 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1946 devname);
1947 return AFP_FATAL_ERROR;
1948 }
c2d0d938 1949#ifdef HAVE_TPACKET_V3
c7bde9df 1950 }
c2d0d938 1951#endif
c7bde9df
EL
1952
1953 /* Allocate the Ring */
c2d0d938 1954#ifdef HAVE_TPACKET_V3
c7bde9df 1955 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1956 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1957 } else {
c2d0d938 1958#endif
69d0d484 1959 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1960#ifdef HAVE_TPACKET_V3
c7bde9df 1961 }
c2d0d938 1962#endif
f5c20191
EL
1963 mmap_flag = MAP_SHARED;
1964 if (ptv->flags & AFP_MMAP_LOCKED)
1965 mmap_flag |= MAP_LOCKED;
cba41207 1966 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1967 mmap_flag, ptv->socket, 0);
cba41207 1968 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1969 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1970 strerror(errno));
c7bde9df
EL
1971 goto mmap_err;
1972 }
c2d0d938 1973#ifdef HAVE_TPACKET_V3
c7bde9df 1974 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1975 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1976 if (!ptv->ring.v3) {
1977 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1978 goto postmmap_err;
c7bde9df 1979 }
69d0d484
VJ
1980 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1981 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1982 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1983 }
1984 } else {
c2d0d938 1985#endif
c7bde9df 1986 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1987 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1988 if (ptv->ring.v2 == NULL) {
c7bde9df 1989 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1990 goto postmmap_err;
c7bde9df 1991 }
69d0d484 1992 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1993 /* fill the header ring with proper frame ptr*/
1994 ptv->frame_offset = 0;
69d0d484
VJ
1995 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1996 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1997 unsigned int j;
69d0d484
VJ
1998 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1999 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
2000 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
2001 }
2002 }
2003 ptv->frame_offset = 0;
c2d0d938 2004#ifdef HAVE_TPACKET_V3
c7bde9df 2005 }
c2d0d938 2006#endif
c7bde9df
EL
2007
2008 return 0;
2009
291af719 2010postmmap_err:
cba41207 2011 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
2012 if (ptv->ring.v2)
2013 SCFree(ptv->ring.v2);
2014 if (ptv->ring.v3)
2015 SCFree(ptv->ring.v3);
c7bde9df
EL
2016mmap_err:
2017 /* Packet mmap does the cleaning when socket is closed */
2018 return AFP_FATAL_ERROR;
2019}
2020
402bdf9b
VJ
2021/** \brief test if we can use FANOUT. Older kernels like those in
2022 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
2023 */
df0ed6fd 2024int AFPIsFanoutSupported(uint16_t cluster_id)
402bdf9b
VJ
2025{
2026#ifdef HAVE_PACKET_FANOUT
2027 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
2028 if (fd < 0)
2029 return 0;
402bdf9b 2030
d8c82d4f 2031 uint32_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
df0ed6fd 2032 uint32_t option = (mode << 16) | cluster_id;
6227d095
VJ
2033 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2034 close(fd);
2035
2036 if (r < 0) {
8940a9d3
SB
2037 SCLogError(SC_ERR_INVALID_VALUE, "fanout not supported by kernel: "
2038 "Kernel too old or cluster-id %d already in use.", cluster_id);
6227d095 2039 return 0;
402bdf9b 2040 }
6227d095
VJ
2041 return 1;
2042#else
402bdf9b 2043 return 0;
6227d095 2044#endif
402bdf9b
VJ
2045}
2046
91e1256b
EL
2047#ifdef HAVE_PACKET_EBPF
2048
2049static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2050{
2051 int pfd = ptv->ebpf_lb_fd;
2052 if (pfd == -1) {
2053 SCLogError(SC_ERR_INVALID_VALUE,
2054 "Fanout file descriptor is invalid");
2055 return -1;
2056 }
2057
2058 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2059 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2060 return -1;
2061 }
2062 SCLogInfo("Activated eBPF on socket");
2063
2064 return 0;
2065}
2066
2067static int SetEbpfFilter(AFPThreadVars *ptv)
2068{
2069 int pfd = ptv->ebpf_filter_fd;
2070 if (pfd == -1) {
2071 SCLogError(SC_ERR_INVALID_VALUE,
2072 "Filter file descriptor is invalid");
2073 return -1;
2074 }
2075
2076 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2077 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2078 return -1;
2079 }
2080 SCLogInfo("Activated eBPF filter on socket");
2081
2082 return 0;
2083}
2084#endif
2085
e80b30c0 2086static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2087{
2088 int r;
1992a227 2089 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2090 struct packet_mreq sock_params;
2091 struct sockaddr_ll bind_address;
662dccd8 2092 int if_idx;
49b7b00f 2093
c45d8985
EL
2094 /* open socket */
2095 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2096 if (ptv->socket == -1) {
e80b30c0 2097 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2098 goto error;
c45d8985 2099 }
cba41207 2100
662dccd8 2101 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2102
2103 if (if_idx == -1) {
fcd5e138 2104 goto socket_err;
cba41207
AG
2105 }
2106
c45d8985
EL
2107 /* bind socket */
2108 memset(&bind_address, 0, sizeof(bind_address));
2109 bind_address.sll_family = AF_PACKET;
2110 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2111 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2112 if (bind_address.sll_ifindex == -1) {
2113 if (verbose)
e80b30c0 2114 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2115 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2116 goto socket_err;
2117 }
2118
cba41207
AG
2119 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2120 if (if_flags == -1) {
2121 if (verbose) {
2122 SCLogError(SC_ERR_AFP_READ,
2123 "Couldn't get flags for interface '%s'",
2124 ptv->iface);
2125 }
2126 ret = AFP_RECOVERABLE_ERROR;
2127 goto socket_err;
2128 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2129 if (verbose) {
2130 SCLogError(SC_ERR_AFP_READ,
2131 "Interface '%s' is down",
2132 ptv->iface);
2133 }
2134 ret = AFP_RECOVERABLE_ERROR;
2135 goto socket_err;
2136 }
2137
13f13b6d
EL
2138 if (ptv->promisc != 0) {
2139 /* Force promiscuous mode */
2140 memset(&sock_params, 0, sizeof(sock_params));
2141 sock_params.mr_type = PACKET_MR_PROMISC;
2142 sock_params.mr_ifindex = bind_address.sll_ifindex;
2143 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2144 if (r < 0) {
2145 SCLogError(SC_ERR_AFP_CREATE,
2146 "Couldn't switch iface %s to promiscuous, error %s",
2147 devname, strerror(errno));
c7bde9df 2148 goto socket_err;
13f13b6d
EL
2149 }
2150 }
2151
2152 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2153 int val = 1;
2154 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2155 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2156 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2157 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2158 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2159 }
2160 }
2161
2162 /* set socket recv buffer size */
2163 if (ptv->buffer_size != 0) {
2164 /*
2165 * Set the socket buffer size to the specified value.
2166 */
b3bf7a57 2167 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2168 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2169 &ptv->buffer_size,
2170 sizeof(ptv->buffer_size)) == -1) {
2171 SCLogError(SC_ERR_AFP_CREATE,
2172 "Couldn't set buffer size to %d on iface %s, error %s",
2173 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2174 goto socket_err;
13f13b6d
EL
2175 }
2176 }
2177
2178 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2179 if (r < 0) {
2180 if (verbose) {
2181 if (errno == ENETDOWN) {
2182 SCLogError(SC_ERR_AFP_CREATE,
2183 "Couldn't bind AF_PACKET socket, iface %s is down",
2184 devname);
2185 } else {
2186 SCLogError(SC_ERR_AFP_CREATE,
2187 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2188 devname, strerror(errno));
2189 }
2190 }
1992a227 2191 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2192 goto socket_err;
13f13b6d
EL
2193 }
2194
91e1256b 2195
238ff231
EL
2196#ifdef HAVE_PACKET_FANOUT
2197 /* add binded socket to fanout group */
2198 if (ptv->threads > 1) {
d8c82d4f 2199 uint32_t mode = ptv->cluster_type;
238ff231 2200 uint16_t id = ptv->cluster_id;
4111331a 2201 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2202 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2203 if (r < 0) {
2204 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2205 "Couldn't set fanout mode, error %s",
238ff231 2206 strerror(errno));
c7bde9df 2207 goto socket_err;
238ff231
EL
2208 }
2209 }
2210#endif
2211
91e1256b
EL
2212#ifdef HAVE_PACKET_EBPF
2213 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2214 r = SockFanoutSeteBPF(ptv);
2215 if (r < 0) {
2216 SCLogError(SC_ERR_AFP_CREATE,
2217 "Coudn't set EBPF, error %s",
2218 strerror(errno));
2219 goto socket_err;
2220 }
2221 }
2222#endif
2223
49b7b00f 2224 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2225 ret = AFPSetupRing(ptv, devname);
2226 if (ret != 0)
13f13b6d 2227 goto socket_err;
49b7b00f
EL
2228 }
2229
86a3f064 2230 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2231
c85ee1e3
EL
2232 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2233 switch (ptv->datalink) {
2234 case ARPHRD_PPP:
2235 case ARPHRD_ATM:
2236 ptv->cooked = 1;
619414c5 2237 break;
c85ee1e3
EL
2238 }
2239
f47df5a6 2240 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2241 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2242 ret = AFP_FATAL_ERROR;
2243 goto socket_err;
f2a6fb8a
EL
2244 }
2245
49b7b00f 2246 /* Init is ok */
13f13b6d 2247 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2248 return 0;
13f13b6d 2249
13f13b6d
EL
2250socket_err:
2251 close(ptv->socket);
2252 ptv->socket = -1;
f47df5a6 2253 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2254 if (ptv->ring.v3) {
2255 SCFree(ptv->ring.v3);
2256 ptv->ring.v3 = NULL;
f47df5a6
VJ
2257 }
2258 } else {
69d0d484
VJ
2259 if (ptv->ring.v2) {
2260 SCFree(ptv->ring.v2);
2261 ptv->ring.v2 = NULL;
f47df5a6
VJ
2262 }
2263 }
2264
13f13b6d 2265error:
1992a227 2266 return -ret;
c45d8985
EL
2267}
2268
f2a6fb8a
EL
2269TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2270{
2271 struct bpf_program filter;
2272 struct sock_fprog fcode;
2273 int rc;
2274
91e1256b
EL
2275#ifdef HAVE_PACKET_EBPF
2276 if (ptv->ebpf_filter_fd != -1) {
2277 return SetEbpfFilter(ptv);
2278 }
2279#endif
2280
f2a6fb8a
EL
2281 if (!ptv->bpf_filter)
2282 return TM_ECODE_OK;
2283
f2a6fb8a
EL
2284 SCLogInfo("Using BPF '%s' on iface '%s'",
2285 ptv->bpf_filter,
2286 ptv->iface);
28e9e4c8
EL
2287
2288 char errbuf[PCAP_ERRBUF_SIZE];
2289 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2290 ptv->datalink, /* linktype_arg */
2291 &filter, /* program */
2292 ptv->bpf_filter, /* const char *buf */
cc82ef06 2293 1, /* optimize */
28e9e4c8
EL
2294 0, /* mask */
2295 errbuf,
2296 sizeof(errbuf)) == -1) {
2297 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2298 ptv->bpf_filter,
2299 errbuf);
f2a6fb8a
EL
2300 return TM_ECODE_FAILED;
2301 }
2302
2303 fcode.len = filter.bf_len;
2304 fcode.filter = (struct sock_filter*)filter.bf_insns;
2305
2306 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2307
28e9e4c8 2308 SCBPFFree(&filter);
f2a6fb8a
EL
2309 if(rc == -1) {
2310 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2311 return TM_ECODE_FAILED;
2312 }
2313
f2a6fb8a
EL
2314 return TM_ECODE_OK;
2315}
2316
06173267
EL
2317#ifdef HAVE_PACKET_EBPF
2318/**
2319 * Insert a half flow in the kernel bypass table
2320 *
2321 * \param mapfd file descriptor of the protocol bypass table
2322 * \param key data to use as key in the table
2598078e 2323 * \return 0 in case of error, 1 if success
06173267 2324 */
69d2c8eb 2325static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
06173267 2326{
651a27e4 2327 BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
17a32bda 2328 unsigned int i;
1e729f05
EL
2329
2330 if (mapd == -1) {
2331 return 0;
2332 }
2333
94a622cb 2334 /* We use a per CPU structure so we have to set an array of values as the kernel
6ab1cbcb
EL
2335 * is not duplicating the data on each CPU by itself. */
2336 for (i = 0; i < nr_cpus; i++) {
651a27e4
EL
2337 BPF_PERCPU(value, i).packets = 0;
2338 BPF_PERCPU(value, i).bytes = 0;
17a32bda 2339 }
17a32bda
EL
2340 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2341 switch (errno) {
3379311e 2342 /* no more place in the hash */
17a32bda 2343 case E2BIG:
17a32bda 2344 return 0;
fcae1c18
EL
2345 /* no more place in the hash for some hardware bypass */
2346 case EAGAIN:
2347 return 0;
3379311e
EL
2348 /* if we already have the key then bypass is a success */
2349 case EEXIST:
2350 return 1;
2351 /* Not supposed to be there so issue a error */
17a32bda
EL
2352 default:
2353 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2354 strerror(errno),
2355 errno);
2356 return 0;
06173267 2357 }
17a32bda
EL
2358 }
2359 return 1;
06173267 2360}
b07bda7a 2361
9206b30f
EL
2362static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
2363 int family)
b07bda7a
EL
2364{
2365 FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
2366 if (fc) {
2367 EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
2368 if (eb == NULL) {
9206b30f
EL
2369 EBPFDeleteKey(map_fd, key0);
2370 EBPFDeleteKey(map_fd, key1);
2371 LiveDevAddBypassFail(p->livedev, 1, family);
b07bda7a
EL
2372 SCFree(key0);
2373 SCFree(key1);
2374 return 0;
2375 }
2376 eb->key[0] = key0;
2377 eb->key[1] = key1;
2378 eb->mapfd = map_fd;
2379 eb->cpus_count = p->afp_v.nr_cpus;
2380 fc->BypassUpdate = EBPFBypassUpdate;
2381 fc->BypassFree = EBPFBypassFree;
2382 fc->bypass_data = eb;
9206b30f
EL
2383 } else {
2384 EBPFDeleteKey(map_fd, key0);
2385 EBPFDeleteKey(map_fd, key1);
2386 LiveDevAddBypassFail(p->livedev, 1, family);
2387 SCFree(key0);
2388 SCFree(key1);
2389 return 0;
b07bda7a 2390 }
9206b30f
EL
2391
2392 LiveDevAddBypassStats(p->livedev, 1, family);
6126f105 2393 LiveDevAddBypassSuccess(p->livedev, 1, family);
b07bda7a
EL
2394 return 1;
2395}
2396
2598078e 2397/**
94a622cb
EL
2398 * Bypass function for AF_PACKET capture in eBPF mode
2399 *
2400 * This function creates two half flows in the map shared with the kernel
2401 * to trigger bypass.
2402 *
2403 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2404 * This table contains the list of half flows to bypass. The in-kernel filter
2405 * will skip/drop the packet if they belong to a flow in one of the flows
2406 * table.
2407 *
2408 * \param p the packet belonging to the flow to bypass
2409 * \return 0 if unable to bypass, 1 if success
2598078e 2410 */
06173267
EL
2411static int AFPBypassCallback(Packet *p)
2412{
06173267
EL
2413 SCLogDebug("Calling af_packet callback function");
2414 /* Only bypass TCP and UDP */
2415 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2416 return 0;
2417 }
2418
fc2f2fa7
EL
2419 /* If we don't have a flow attached to packet the eBPF map entries
2420 * will be destroyed at first flow bypass manager pass as we won't
2421 * find any associated entry */
2422 if (p->flow == NULL) {
2423 return 0;
2424 }
06173267
EL
2425 /* Bypassing tunneled packets is currently not supported
2426 * because we can't discard the inner packet only due to
2427 * primitive parsing in eBPF */
2428 if (IS_TUNNEL_PKT(p)) {
2429 return 0;
2430 }
06173267 2431 if (PKT_IS_IPV4(p)) {
d65f4585 2432 SCLogDebug("add an IPv4");
eff10fce
EL
2433 if (p->afp_v.v4_map_fd == -1) {
2434 return 0;
2435 }
b07bda7a
EL
2436 struct flowv4_keys *keys[2];
2437 keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
2438 if (keys[0] == NULL) {
2439 return 0;
2440 }
2441 keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2442 keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2443 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2444 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2445 keys[0]->vlan0 = p->vlan_id[0];
2446 keys[0]->vlan1 = p->vlan_id[1];
8c880879 2447
d119845d
EL
2448 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2449 keys[0]->ip_proto = 1;
2450 } else {
2451 keys[0]->ip_proto = 0;
2452 }
69d2c8eb 2453 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2454 p->afp_v.nr_cpus) == 0) {
9206b30f 2455 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2456 SCFree(keys[0]);
2457 return 0;
2458 }
2459 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2460 if (keys[1] == NULL) {
9206b30f
EL
2461 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2462 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2463 SCFree(keys[0]);
06173267
EL
2464 return 0;
2465 }
b07bda7a
EL
2466 keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
2467 keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2468 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2469 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2470 keys[1]->vlan0 = p->vlan_id[0];
2471 keys[1]->vlan1 = p->vlan_id[1];
b07bda7a 2472
d119845d 2473 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2474 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2475 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2476 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2477 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2478 SCFree(keys[0]);
2479 SCFree(keys[1]);
06173267
EL
2480 return 0;
2481 }
315c29a8 2482 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2483 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
06173267
EL
2484 }
2485 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2486 if (PKT_IS_IPV6(p) &&
06173267 2487 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2488 int i;
eff10fce
EL
2489 if (p->afp_v.v6_map_fd == -1) {
2490 return 0;
2491 }
06173267 2492 SCLogDebug("add an IPv6");
b07bda7a
EL
2493 struct flowv6_keys *keys[2];
2494 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2495 if (keys[0] == NULL) {
9206b30f 2496 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2497 return 0;
2498 }
06173267 2499 for (i = 0; i < 4; i++) {
b07bda7a
EL
2500 keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2501 keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2502 }
2503 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2504 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2505 keys[0]->vlan0 = p->vlan_id[0];
2506 keys[0]->vlan1 = p->vlan_id[1];
2507
2508 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2509 keys[0]->ip_proto = 1;
2510 } else {
2511 keys[0]->ip_proto = 0;
2512 }
69d2c8eb 2513 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2514 p->afp_v.nr_cpus) == 0) {
9206b30f 2515 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2516 SCFree(keys[0]);
06173267
EL
2517 return 0;
2518 }
b07bda7a
EL
2519 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2520 if (keys[1] == NULL) {
9206b30f
EL
2521 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2522 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2523 SCFree(keys[0]);
2524 return 0;
06173267 2525 }
b07bda7a
EL
2526 for (i = 0; i < 4; i++) {
2527 keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2528 keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2529 }
2530 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2531 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2532 keys[1]->vlan0 = p->vlan_id[0];
2533 keys[1]->vlan1 = p->vlan_id[1];
2534
2535 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2536 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2537 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2538 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2539 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2540 SCFree(keys[0]);
2541 SCFree(keys[1]);
06173267
EL
2542 return 0;
2543 }
fc2f2fa7
EL
2544 if (p->flow)
2545 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2546 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
06173267 2547 }
06173267
EL
2548 return 0;
2549}
2550
94a622cb
EL
2551/**
2552 * Bypass function for AF_PACKET capture in XDP mode
2553 *
2554 * This function creates two half flows in the map shared with the kernel
2555 * to trigger bypass. This function is similar to AFPBypassCallback() but
2556 * the bytes order is changed for some data due to the way we get the data
2557 * in the XDP case.
2558 *
2559 * \param p the packet belonging to the flow to bypass
2560 * \return 0 if unable to bypass, 1 if success
2561 */
8c880879
EL
2562static int AFPXDPBypassCallback(Packet *p)
2563{
8c880879
EL
2564 SCLogDebug("Calling af_packet callback function");
2565 /* Only bypass TCP and UDP */
2566 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2567 return 0;
2568 }
2569
fc2f2fa7
EL
2570 /* If we don't have a flow attached to packet the eBPF map entries
2571 * will be destroyed at first flow bypass manager pass as we won't
2572 * find any associated entry */
2573 if (p->flow == NULL) {
2574 return 0;
2575 }
8c880879
EL
2576 /* Bypassing tunneled packets is currently not supported
2577 * because we can't discard the inner packet only due to
2578 * primitive parsing in eBPF */
2579 if (IS_TUNNEL_PKT(p)) {
2580 return 0;
2581 }
8c880879 2582 if (PKT_IS_IPV4(p)) {
b07bda7a
EL
2583 struct flowv4_keys *keys[2];
2584 keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
2585 if (keys[0] == NULL) {
9206b30f 2586 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2587 return 0;
2588 }
eff10fce 2589 if (p->afp_v.v4_map_fd == -1) {
b07bda7a 2590 SCFree(keys[0]);
eff10fce
EL
2591 return 0;
2592 }
b07bda7a
EL
2593 keys[0]->src = p->src.addr_data32[0];
2594 keys[0]->dst = p->dst.addr_data32[0];
94a622cb 2595 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2596 * (as in eBPF filter) so we need to pass from host to network order */
b07bda7a
EL
2597 keys[0]->port16[0] = htons(p->sp);
2598 keys[0]->port16[1] = htons(p->dp);
d119845d
EL
2599 keys[0]->vlan0 = p->vlan_id[0];
2600 keys[0]->vlan1 = p->vlan_id[1];
2601 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2602 keys[0]->ip_proto = 1;
2603 } else {
2604 keys[0]->ip_proto = 0;
2605 }
69d2c8eb 2606 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2607 p->afp_v.nr_cpus) == 0) {
9206b30f 2608 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2609 SCFree(keys[0]);
2610 return 0;
2611 }
2612 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2613 if (keys[1] == NULL) {
9206b30f
EL
2614 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2615 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2616 SCFree(keys[0]);
8c880879
EL
2617 return 0;
2618 }
b07bda7a
EL
2619 keys[1]->src = p->dst.addr_data32[0];
2620 keys[1]->dst = p->src.addr_data32[0];
2621 keys[1]->port16[0] = htons(p->dp);
2622 keys[1]->port16[1] = htons(p->sp);
d119845d
EL
2623 keys[1]->vlan0 = p->vlan_id[0];
2624 keys[1]->vlan1 = p->vlan_id[1];
2625 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2626 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2627 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2628 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2629 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2630 SCFree(keys[0]);
2631 SCFree(keys[1]);
8c880879
EL
2632 return 0;
2633 }
9206b30f 2634 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
8c880879
EL
2635 }
2636 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2637 if (PKT_IS_IPV6(p) &&
8c880879 2638 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2639 SCLogDebug("add an IPv6");
eff10fce
EL
2640 if (p->afp_v.v6_map_fd == -1) {
2641 return 0;
2642 }
d65f4585 2643 int i;
b07bda7a
EL
2644 struct flowv6_keys *keys[2];
2645 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2646 if (keys[0] == NULL) {
2647 return 0;
2648 }
2649
8c880879 2650 for (i = 0; i < 4; i++) {
b07bda7a
EL
2651 keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
2652 keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
2653 }
2654 keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
2655 keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
d119845d
EL
2656 keys[0]->vlan0 = p->vlan_id[0];
2657 keys[0]->vlan1 = p->vlan_id[1];
2658 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2659 keys[0]->ip_proto = 1;
2660 } else {
2661 keys[0]->ip_proto = 0;
2662 }
69d2c8eb 2663 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2664 p->afp_v.nr_cpus) == 0) {
9206b30f 2665 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2666 SCFree(keys[0]);
8c880879
EL
2667 return 0;
2668 }
b07bda7a
EL
2669 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2670 if (keys[1] == NULL) {
9206b30f
EL
2671 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2672 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2673 SCFree(keys[0]);
2674 return 0;
8c880879 2675 }
b07bda7a
EL
2676 for (i = 0; i < 4; i++) {
2677 keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
2678 keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2679 }
2680 keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
2681 keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
d119845d
EL
2682 keys[1]->vlan0 = p->vlan_id[0];
2683 keys[1]->vlan1 = p->vlan_id[1];
2684 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2685 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2686 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2687 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2688 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2689 SCFree(keys[0]);
2690 SCFree(keys[1]);
8c880879
EL
2691 return 0;
2692 }
9206b30f 2693 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
8c880879 2694 }
8c880879
EL
2695 return 0;
2696}
2697
5e62ae6d
EL
2698bool g_flowv4_ok = true;
2699bool g_flowv6_ok = true;
2700
5e05fedc
VJ
2701#endif /* HAVE_PACKET_EBPF */
2702
c45d8985
EL
2703/**
2704 * \brief Init function for ReceiveAFP.
2705 *
2706 * \param tv pointer to ThreadVars
2707 * \param initdata pointer to the interface passed from the user
2708 * \param data pointer gets populated with AFPThreadVars
2709 *
2710 * \todo Create a general AFP setup function.
2711 */
ab1200fb 2712TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2713{
c45d8985 2714 SCEnter();
ab1200fb 2715 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2716
c45d8985
EL
2717 if (initdata == NULL) {
2718 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2719 SCReturnInt(TM_ECODE_FAILED);
2720 }
2721
2722 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2723 if (unlikely(ptv == NULL)) {
45d5c3ca 2724 afpconfig->DerefFunc(afpconfig);
c45d8985 2725 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2726 }
c45d8985
EL
2727 memset(ptv, 0, sizeof(AFPThreadVars));
2728
2729 ptv->tv = tv;
2730 ptv->cooked = 0;
2731
fbca1a4e 2732 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2733 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2734
51eb9605
EL
2735 ptv->livedev = LiveGetDevice(ptv->iface);
2736 if (ptv->livedev == NULL) {
2737 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2738 SCFree(ptv);
51eb9605
EL
2739 SCReturnInt(TM_ECODE_FAILED);
2740 }
2741
fbca1a4e 2742 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2743 ptv->ring_size = afpconfig->ring_size;
fa902abe 2744 ptv->block_size = afpconfig->block_size;
8baf64f5 2745 ptv->block_timeout = afpconfig->block_timeout;
e80b30c0 2746
df7dbe36 2747 ptv->promisc = afpconfig->promisc;
6062e00c 2748 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2749 ptv->bpf_filter = NULL;
df7dbe36 2750
fbca1a4e 2751 ptv->threads = 1;
e80b30c0
EL
2752#ifdef HAVE_PACKET_FANOUT
2753 ptv->cluster_type = PACKET_FANOUT_LB;
2754 ptv->cluster_id = 1;
2755 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2756 if (afpconfig->threads > 1) {
9d882116
VJ
2757 ptv->cluster_id = afpconfig->cluster_id;
2758 ptv->cluster_type = afpconfig->cluster_type;
2759 ptv->threads = afpconfig->threads;
e80b30c0
EL
2760 }
2761#endif
49b7b00f 2762 ptv->flags = afpconfig->flags;
e80b30c0 2763
f2a6fb8a
EL
2764 if (afpconfig->bpf_filter) {
2765 ptv->bpf_filter = afpconfig->bpf_filter;
2766 }
5e05fedc 2767#ifdef HAVE_PACKET_EBPF
91e1256b
EL
2768 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2769 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2770 ptv->xdp_mode = afpconfig->xdp_mode;
4cf53100 2771 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
f2a6fb8a 2772
d65f4585 2773 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2774 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585 2775 if (ptv->v4_map_fd == -1) {
5e62ae6d
EL
2776 if (g_flowv4_ok == false) {
2777 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2778 "flow_table_v4");
2779 g_flowv4_ok = true;
2780 }
d65f4585 2781 }
126488f7 2782 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585 2783 if (ptv->v6_map_fd == -1) {
5e62ae6d
EL
2784 if (g_flowv6_ok) {
2785 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2786 "flow_table_v6");
2787 g_flowv6_ok = false;
2788 }
d65f4585
EL
2789 }
2790 }
4cf53100 2791 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
d65f4585
EL
2792#endif
2793
6efd37a3 2794#ifdef PACKET_STATISTICS
1ef786e7
VJ
2795 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2796 ptv->tv);
2797 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2798 ptv->tv);
9efa4ace
EL
2799 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2800 ptv->tv);
a022648b 2801 ptv->afpacket_spin = StatsRegisterAvgCounter("afpacket.busy_loop_avg", ptv->tv);
6efd37a3
EL
2802#endif
2803
662dccd8
EL
2804 ptv->copy_mode = afpconfig->copy_mode;
2805 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2806 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2807 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2808 /* Warn about BPF filter consequence */
2809 if (ptv->bpf_filter) {
2810 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2811 " in dropping all non matching packets.");
2812 }
662dccd8 2813 }
c85ee1e3 2814
b7e78d33 2815
0581a23f
EL
2816 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2817 SCFree(ptv);
2818 afpconfig->DerefFunc(afpconfig);
2819 SCReturnInt(TM_ECODE_FAILED);
2820 }
2821
e80b30c0
EL
2822#define T_DATA_SIZE 70000
2823 ptv->data = SCMalloc(T_DATA_SIZE);
2824 if (ptv->data == NULL) {
45d5c3ca 2825 afpconfig->DerefFunc(afpconfig);
6019ae3d 2826 SCFree(ptv);
e80b30c0 2827 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2828 }
e80b30c0
EL
2829 ptv->datalen = T_DATA_SIZE;
2830#undef T_DATA_SIZE
2831
c45d8985 2832 *data = (void *)ptv;
fbca1a4e 2833
45d5c3ca 2834 afpconfig->DerefFunc(afpconfig);
71e47868 2835
2cd6e128
EL
2836 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2837 * get the info from packet extended header but we will use a standard
2838 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
bcc03f17
MF
2839 if (SCKernelVersionIsAtLeast(3, 0)) {
2840 ptv->flags |= AFP_VLAN_IN_HEADER;
2cd6e128
EL
2841 }
2842
c45d8985
EL
2843 SCReturnInt(TM_ECODE_OK);
2844}
2845
2846/**
2847 * \brief This function prints stats to the screen at exit.
2848 * \param tv pointer to ThreadVars
2849 * \param data pointer that gets cast into AFPThreadVars for ptv
2850 */
8f1d7503
KS
2851void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2852{
c45d8985
EL
2853 SCEnter();
2854 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2855
2856#ifdef PACKET_STATISTICS
e8a4a4c4 2857 AFPDumpCounters(ptv);
b3bf7a57 2858 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2859 tv->name,
752f03e7
VJ
2860 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2861 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2862#endif
c45d8985
EL
2863}
2864
2865/**
2866 * \brief DeInit function closes af packet socket at exit.
2867 * \param tv pointer to ThreadVars
2868 * \param data pointer that gets cast into AFPThreadVars for ptv
2869 */
8f1d7503
KS
2870TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2871{
c45d8985
EL
2872 AFPThreadVars *ptv = (AFPThreadVars *)data;
2873
13f13b6d
EL
2874 AFPSwitchState(ptv, AFP_STATE_DOWN);
2875
8c880879 2876#ifdef HAVE_PACKET_XDP
4cf53100
EL
2877 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2878 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2879 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2880 }
8c880879 2881#endif
e80b30c0
EL
2882 if (ptv->data != NULL) {
2883 SCFree(ptv->data);
2884 ptv->data = NULL;
2885 }
2886 ptv->datalen = 0;
2887
f2a6fb8a 2888 ptv->bpf_filter = NULL;
69d0d484
VJ
2889 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2890 SCFree(ptv->ring.v3);
ce59ec5d 2891 } else {
69d0d484
VJ
2892 if (ptv->ring.v2)
2893 SCFree(ptv->ring.v2);
ce59ec5d 2894 }
f2a6fb8a 2895
7127ae2b 2896 SCFree(ptv);
c45d8985
EL
2897 SCReturnInt(TM_ECODE_OK);
2898}
2899
2900/**
2901 * \brief This function passes off to link type decoders.
2902 *
f8aed4ce 2903 * DecodeAFP decodes packets from AF_PACKET and passes
c45d8985
EL
2904 * them off to the proper link type decoder.
2905 *
2906 * \param t pointer to ThreadVars
2907 * \param p pointer to the current packet
2908 * \param data pointer that gets cast into AFPThreadVars for ptv
c45d8985 2909 */
f8aed4ce 2910TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
c45d8985
EL
2911{
2912 SCEnter();
2913 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2914
3f8e15f7 2915 DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
f7b1aefa 2916
c45d8985 2917 /* update counters */
14466a80 2918 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2919
1fb7c0dd
EL
2920 /* If suri has set vlan during reading, we increase vlan counter */
2921 if (p->vlan_idx) {
1c0b4ee0 2922 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2923 }
2924
c45d8985 2925 /* call the decoder */
88bccfb8 2926 DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
c45d8985 2927
3088b6ac 2928 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2929
c45d8985
EL
2930 SCReturnInt(TM_ECODE_OK);
2931}
2932
ab1200fb 2933TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2934{
2935 SCEnter();
e63db9d1 2936 DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2937 if (dtv == NULL)
2938 SCReturnInt(TM_ECODE_FAILED);
2939
2940 DecodeRegisterPerfCounters(dtv, tv);
2941
2942 *data = (void *)dtv;
2943
2944 SCReturnInt(TM_ECODE_OK);
2945}
2946
2864f9ee
VJ
2947TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2948{
2949 if (data != NULL)
98c88d51 2950 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2951 SCReturnInt(TM_ECODE_OK);
2952}
2953
e80b30c0 2954#endif /* HAVE_AF_PACKET */
c45d8985 2955/* eof */
a6457262
EL
2956/**
2957 * @}
2958 */