]> git.ipfire.org Git - people/ms/suricata.git/blob - src/source-af-packet.c
capture: check for flow packets on capture timeout
[people/ms/suricata.git] / src / source-af-packet.c
1 /* Copyright (C) 2011-2018 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
24 /**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
31 */
32
33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35 #include "suricata-common.h"
36 #include "config.h"
37 #include "suricata.h"
38 #include "decode.h"
39 #include "packet-queue.h"
40 #include "threads.h"
41 #include "threadvars.h"
42 #include "tm-queuehandlers.h"
43 #include "tm-modules.h"
44 #include "tm-threads.h"
45 #include "tm-threads-common.h"
46 #include "conf.h"
47 #include "util-cpu.h"
48 #include "util-debug.h"
49 #include "util-device.h"
50 #include "util-ebpf.h"
51 #include "util-error.h"
52 #include "util-privs.h"
53 #include "util-optimize.h"
54 #include "util-checksum.h"
55 #include "util-ioctl.h"
56 #include "util-host-info.h"
57 #include "tmqh-packetpool.h"
58 #include "source-af-packet.h"
59 #include "runmodes.h"
60
61 #ifdef HAVE_AF_PACKET
62
63 #if HAVE_SYS_IOCTL_H
64 #include <sys/ioctl.h>
65 #endif
66
67 #ifdef HAVE_PACKET_EBPF
68 #include "util-ebpf.h"
69 #include <bpf/libbpf.h>
70 #include <bpf/bpf.h>
71 #endif
72
73 struct bpf_program {
74 unsigned int bf_len;
75 struct bpf_insn *bf_insns;
76 };
77
78 #ifdef HAVE_PCAP_H
79 #include <pcap.h>
80 #endif
81
82 #ifdef HAVE_PCAP_PCAP_H
83 #include <pcap/pcap.h>
84 #endif
85
86 #include "util-bpf.h"
87
88 #if HAVE_LINUX_IF_ETHER_H
89 #include <linux/if_ether.h>
90 #endif
91
92 #if HAVE_LINUX_IF_PACKET_H
93 #include <linux/if_packet.h>
94 #endif
95
96 #if HAVE_LINUX_IF_ARP_H
97 #include <linux/if_arp.h>
98 #endif
99
100 #if HAVE_LINUX_FILTER_H
101 #include <linux/filter.h>
102 #endif
103
104 #if HAVE_SYS_MMAN_H
105 #include <sys/mman.h>
106 #endif
107
108 #ifdef HAVE_HW_TIMESTAMPING
109 #include <linux/net_tstamp.h>
110 #endif
111
112 #endif /* HAVE_AF_PACKET */
113
114 extern int max_pending_packets;
115
116 #ifndef HAVE_AF_PACKET
117
118 TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
119
120 void TmModuleReceiveAFPRegister (void)
121 {
122 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
124 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
125 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
126 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
127 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
128 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
129 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
130 }
131
132 /**
133 * \brief Registration Function for DecodeAFP.
134 */
135 void TmModuleDecodeAFPRegister (void)
136 {
137 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
138 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
139 tmm_modules[TMM_DECODEAFP].Func = NULL;
140 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
141 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
142 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
143 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
144 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
145 }
146
147 /**
148 * \brief this function prints an error message and exits.
149 */
150 TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
151 {
152 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
153 "support for AF_PACKET enabled, on Linux host please recompile "
154 "with --enable-af-packet", tv->name);
155 exit(EXIT_FAILURE);
156 }
157
158 #else /* We have AF_PACKET support */
159
160 #define AFP_IFACE_NAME_LENGTH 48
161
162 #define AFP_STATE_DOWN 0
163 #define AFP_STATE_UP 1
164
165 #define AFP_RECONNECT_TIMEOUT 500000
166 #define AFP_DOWN_COUNTER_INTERVAL 40
167
168 #define POLL_TIMEOUT 100
169
170 #ifndef TP_STATUS_USER_BUSY
171 /* for new use latest bit available in tp_status */
172 #define TP_STATUS_USER_BUSY (1 << 31)
173 #endif
174
175 #ifndef TP_STATUS_VLAN_VALID
176 #define TP_STATUS_VLAN_VALID (1 << 4)
177 #endif
178
179 enum {
180 AFP_READ_OK,
181 AFP_READ_FAILURE,
182 /** Error during treatment by other functions of Suricata */
183 AFP_SURI_FAILURE,
184 AFP_KERNEL_DROP,
185 };
186
187 enum {
188 AFP_FATAL_ERROR = 1,
189 AFP_RECOVERABLE_ERROR,
190 };
191
192 union thdr {
193 struct tpacket2_hdr *h2;
194 #ifdef HAVE_TPACKET_V3
195 struct tpacket3_hdr *h3;
196 #endif
197 void *raw;
198 };
199
200 static int AFPBypassCallback(Packet *p);
201 static int AFPXDPBypassCallback(Packet *p);
202
203 #define MAX_MAPS 32
204 /**
205 * \brief Structure to hold thread specific variables.
206 */
207 typedef struct AFPThreadVars_
208 {
209 union AFPRing {
210 char *v2;
211 struct iovec *v3;
212 } ring;
213
214 /* counters */
215 uint64_t pkts;
216
217 ThreadVars *tv;
218 TmSlot *slot;
219 LiveDevice *livedev;
220 /* data link type for the thread */
221 uint32_t datalink;
222
223 #ifdef HAVE_PACKET_EBPF
224 /* File descriptor of the IPv4 flow bypass table maps */
225 int v4_map_fd;
226 /* File descriptor of the IPv6 flow bypass table maps */
227 int v6_map_fd;
228 #endif
229
230 unsigned int frame_offset;
231
232 ChecksumValidationMode checksum_mode;
233
234 /* references to packet and drop counters */
235 uint16_t capture_kernel_packets;
236 uint16_t capture_kernel_drops;
237 uint16_t capture_errors;
238
239 /* handle state */
240 uint8_t afp_state;
241 uint8_t copy_mode;
242 unsigned int flags;
243
244 /* IPS peer */
245 AFPPeer *mpeer;
246
247 /* no mmap mode */
248 uint8_t *data; /** Per function and thread data */
249 int datalen; /** Length of per function and thread data */
250 int cooked;
251
252 /*
253 * Init related members
254 */
255
256 /* thread specific socket */
257 int socket;
258
259 int ring_size;
260 int block_size;
261 int block_timeout;
262 /* socket buffer size */
263 int buffer_size;
264 /* Filter */
265 const char *bpf_filter;
266 int ebpf_lb_fd;
267 int ebpf_filter_fd;
268
269 int promisc;
270
271 int down_count;
272
273 int cluster_id;
274 int cluster_type;
275
276 int threads;
277
278 union AFPTpacketReq {
279 struct tpacket_req v2;
280 #ifdef HAVE_TPACKET_V3
281 struct tpacket_req3 v3;
282 #endif
283 } req;
284
285 char iface[AFP_IFACE_NAME_LENGTH];
286 /* IPS output iface */
287 char out_iface[AFP_IFACE_NAME_LENGTH];
288
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen;
291 uint8_t *ring_buf;
292
293 uint8_t xdp_mode;
294
295 } AFPThreadVars;
296
297 TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
298 TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
299 void ReceiveAFPThreadExitStats(ThreadVars *, void *);
300 TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
301 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
302
303 TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
304 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
305 TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
306
307 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
308 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
309 static int AFPGetDevFlags(int fd, const char *ifname);
310 static int AFPDerefSocket(AFPPeer* peer);
311 static int AFPRefSocket(AFPPeer* peer);
312
313
314 static unsigned int nr_cpus;
315
316 /**
317 * \brief Registration Function for RecieveAFP.
318 * \todo Unit tests are needed for this module.
319 */
320 void TmModuleReceiveAFPRegister (void)
321 {
322 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
323 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
324 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
325 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
326 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
327 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
328 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
329 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
330 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
331 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
332
333 nr_cpus = UtilCpuGetNumProcessorsConfigured();
334 }
335
336
337 /**
338 * \defgroup afppeers AFP peers list
339 *
340 * AF_PACKET has an IPS mode were interface are peered: packet from
341 * on interface are sent the peered interface and the other way. The ::AFPPeer
342 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
343 * information to be able to send packet on the interface.
344 * A element of the list must not be destroyed during the run of Suricata as it
345 * is used by ::Packet and other threads.
346 *
347 * @{
348 */
349
350 typedef struct AFPPeersList_ {
351 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
352 int cnt;
353 int peered;
354 int turn; /**< Next value for initialisation order */
355 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
356 } AFPPeersList;
357
358 /**
359 * \brief Update the peer.
360 *
361 * Update the AFPPeer of a thread ie set new state, socket number
362 * or iface index.
363 *
364 */
365 static void AFPPeerUpdate(AFPThreadVars *ptv)
366 {
367 if (ptv->mpeer == NULL) {
368 return;
369 }
370 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
371 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
372 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
373 }
374
375 /**
376 * \brief Clean and free ressource used by an ::AFPPeer
377 */
378 static void AFPPeerClean(AFPPeer *peer)
379 {
380 if (peer->flags & AFP_SOCK_PROTECT)
381 SCMutexDestroy(&peer->sock_protect);
382 SC_ATOMIC_DESTROY(peer->socket);
383 SC_ATOMIC_DESTROY(peer->if_idx);
384 SC_ATOMIC_DESTROY(peer->state);
385 SCFree(peer);
386 }
387
388 AFPPeersList peerslist;
389
390
391 /**
392 * \brief Init the global list of ::AFPPeer
393 */
394 TmEcode AFPPeersListInit()
395 {
396 SCEnter();
397 TAILQ_INIT(&peerslist.peers);
398 peerslist.peered = 0;
399 peerslist.cnt = 0;
400 peerslist.turn = 0;
401 SC_ATOMIC_INIT(peerslist.reached);
402 (void) SC_ATOMIC_SET(peerslist.reached, 0);
403 SCReturnInt(TM_ECODE_OK);
404 }
405
406 /**
407 * \brief Check that all ::AFPPeer got a peer
408 *
409 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
410 */
411 TmEcode AFPPeersListCheck()
412 {
413 #define AFP_PEERS_MAX_TRY 4
414 #define AFP_PEERS_WAIT 20000
415 int try = 0;
416 SCEnter();
417 while (try < AFP_PEERS_MAX_TRY) {
418 if (peerslist.cnt != peerslist.peered) {
419 usleep(AFP_PEERS_WAIT);
420 } else {
421 SCReturnInt(TM_ECODE_OK);
422 }
423 try++;
424 }
425 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
426 SCReturnInt(TM_ECODE_FAILED);
427 }
428
429 /**
430 * \brief Declare a new AFP thread to AFP peers list.
431 */
432 static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
433 {
434 SCEnter();
435 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
436 AFPPeer *pitem;
437 int mtu, out_mtu;
438
439 if (unlikely(peer == NULL)) {
440 SCReturnInt(TM_ECODE_FAILED);
441 }
442 memset(peer, 0, sizeof(AFPPeer));
443 SC_ATOMIC_INIT(peer->socket);
444 SC_ATOMIC_INIT(peer->sock_usage);
445 SC_ATOMIC_INIT(peer->if_idx);
446 SC_ATOMIC_INIT(peer->state);
447 peer->flags = ptv->flags;
448 peer->turn = peerslist.turn++;
449
450 if (peer->flags & AFP_SOCK_PROTECT) {
451 SCMutexInit(&peer->sock_protect, NULL);
452 }
453
454 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
455 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
456 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
457 ptv->mpeer = peer;
458 /* add element to iface list */
459 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
460
461 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
462 peerslist.cnt++;
463
464 /* Iter to find a peer */
465 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
466 if (pitem->peer)
467 continue;
468 if (strcmp(pitem->iface, ptv->out_iface))
469 continue;
470 peer->peer = pitem;
471 pitem->peer = peer;
472 mtu = GetIfaceMTU(ptv->iface);
473 out_mtu = GetIfaceMTU(ptv->out_iface);
474 if (mtu != out_mtu) {
475 SCLogError(SC_ERR_AFP_CREATE,
476 "MTU on %s (%d) and %s (%d) are not equal, "
477 "transmission of packets bigger than %d will fail.",
478 ptv->iface, mtu,
479 ptv->out_iface, out_mtu,
480 (out_mtu > mtu) ? mtu : out_mtu);
481 }
482 peerslist.peered += 2;
483 break;
484 }
485 }
486
487 AFPPeerUpdate(ptv);
488
489 SCReturnInt(TM_ECODE_OK);
490 }
491
492 static int AFPPeersListWaitTurn(AFPPeer *peer)
493 {
494 /* If turn is zero, we already have started threads once */
495 if (peerslist.turn == 0)
496 return 0;
497
498 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
499 return 0;
500 return 1;
501 }
502
503 static void AFPPeersListReachedInc(void)
504 {
505 if (peerslist.turn == 0)
506 return;
507
508 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
509 SCLogInfo("All AFP capture threads are running.");
510 (void)SC_ATOMIC_SET(peerslist.reached, 0);
511 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
512 * restarted.
513 */
514 peerslist.turn = 0;
515 }
516 }
517
518 static int AFPPeersListStarted(void)
519 {
520 return !peerslist.turn;
521 }
522
523 /**
524 * \brief Clean the global peers list.
525 */
526 void AFPPeersListClean()
527 {
528 AFPPeer *pitem;
529
530 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
531 TAILQ_REMOVE(&peerslist.peers, pitem, next);
532 AFPPeerClean(pitem);
533 }
534 }
535
536 /**
537 * @}
538 */
539
540 /**
541 * \brief Registration Function for DecodeAFP.
542 * \todo Unit tests are needed for this module.
543 */
544 void TmModuleDecodeAFPRegister (void)
545 {
546 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
547 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
548 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
549 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
550 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
551 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
552 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
553 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
554 }
555
556
557 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
558
559 static inline void AFPDumpCounters(AFPThreadVars *ptv)
560 {
561 #ifdef PACKET_STATISTICS
562 struct tpacket_stats kstats;
563 socklen_t len = sizeof (struct tpacket_stats);
564 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
565 &kstats, &len) > -1) {
566 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
567 ptv->tv->name,
568 kstats.tp_packets, kstats.tp_drops);
569 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
570 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
571 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
572 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
573 }
574 #endif
575 }
576
577 /**
578 * \brief AF packet read function.
579 *
580 * This function fills
581 * From here the packets are picked up by the DecodeAFP thread.
582 *
583 * \param user pointer to AFPThreadVars
584 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
585 */
586 static int AFPRead(AFPThreadVars *ptv)
587 {
588 Packet *p = NULL;
589 /* XXX should try to use read that get directly to packet */
590 int offset = 0;
591 int caplen;
592 struct sockaddr_ll from;
593 struct iovec iov;
594 struct msghdr msg;
595 struct cmsghdr *cmsg;
596 union {
597 struct cmsghdr cmsg;
598 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
599 } cmsg_buf;
600 unsigned char aux_checksum = 0;
601
602 msg.msg_name = &from;
603 msg.msg_namelen = sizeof(from);
604 msg.msg_iov = &iov;
605 msg.msg_iovlen = 1;
606 msg.msg_control = &cmsg_buf;
607 msg.msg_controllen = sizeof(cmsg_buf);
608 msg.msg_flags = 0;
609
610 if (ptv->cooked)
611 offset = SLL_HEADER_LEN;
612 else
613 offset = 0;
614 iov.iov_len = ptv->datalen - offset;
615 iov.iov_base = ptv->data + offset;
616
617 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
618
619 if (caplen < 0) {
620 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
621 errno);
622 SCReturnInt(AFP_READ_FAILURE);
623 }
624
625 p = PacketGetFromQueueOrAlloc();
626 if (p == NULL) {
627 SCReturnInt(AFP_SURI_FAILURE);
628 }
629 PKT_SET_SRC(p, PKT_SRC_WIRE);
630 if (ptv->flags & AFP_BYPASS) {
631 p->BypassPacketsFlow = AFPBypassCallback;
632 #ifdef HAVE_PACKET_EBPF
633 p->afp_v.v4_map_fd = ptv->v4_map_fd;
634 p->afp_v.v6_map_fd = ptv->v6_map_fd;
635 #endif
636 }
637 if (ptv->flags & AFP_XDPBYPASS) {
638 p->BypassPacketsFlow = AFPXDPBypassCallback;
639 #ifdef HAVE_PACKET_EBPF
640 p->afp_v.v4_map_fd = ptv->v4_map_fd;
641 p->afp_v.v6_map_fd = ptv->v6_map_fd;
642 #endif
643 }
644
645 /* get timestamp of packet via ioctl */
646 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
647 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
648 errno);
649 TmqhOutputPacketpool(ptv->tv, p);
650 SCReturnInt(AFP_READ_FAILURE);
651 }
652
653 ptv->pkts++;
654 p->livedev = ptv->livedev;
655
656 /* add forged header */
657 if (ptv->cooked) {
658 SllHdr * hdrp = (SllHdr *)ptv->data;
659 /* XXX this is minimalist, but this seems enough */
660 hdrp->sll_protocol = from.sll_protocol;
661 }
662
663 p->datalink = ptv->datalink;
664 SET_PKT_LEN(p, caplen + offset);
665 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
666 TmqhOutputPacketpool(ptv->tv, p);
667 SCReturnInt(AFP_SURI_FAILURE);
668 }
669 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
670 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
671
672 /* We only check for checksum disable */
673 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
674 p->flags |= PKT_IGNORE_CHECKSUM;
675 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
676 if (ptv->livedev->ignore_checksum) {
677 p->flags |= PKT_IGNORE_CHECKSUM;
678 } else if (ChecksumAutoModeCheck(ptv->pkts,
679 SC_ATOMIC_GET(ptv->livedev->pkts),
680 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
681 ptv->livedev->ignore_checksum = 1;
682 p->flags |= PKT_IGNORE_CHECKSUM;
683 }
684 } else {
685 aux_checksum = 1;
686 }
687
688 /* List is NULL if we don't have activated auxiliary data */
689 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
690 struct tpacket_auxdata *aux;
691
692 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
693 cmsg->cmsg_level != SOL_PACKET ||
694 cmsg->cmsg_type != PACKET_AUXDATA)
695 continue;
696
697 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
698
699 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
700 p->flags |= PKT_IGNORE_CHECKSUM;
701 }
702 break;
703 }
704
705 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
706 TmqhOutputPacketpool(ptv->tv, p);
707 SCReturnInt(AFP_SURI_FAILURE);
708 }
709 SCReturnInt(AFP_READ_OK);
710 }
711
712 /**
713 * \brief AF packet write function.
714 *
715 * This function has to be called before the memory
716 * related to Packet in ring buffer is released.
717 *
718 * \param pointer to Packet
719 * \param version of capture: TPACKET_V2 or TPACKET_V3
720 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
721 *
722 */
723 static TmEcode AFPWritePacket(Packet *p, int version)
724 {
725 struct sockaddr_ll socket_address;
726 int socket;
727 uint8_t *pstart;
728 size_t plen;
729 union thdr h;
730 uint16_t vlan_tci = 0;
731
732 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
733 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
734 return TM_ECODE_OK;
735 }
736 }
737
738 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
739 return TM_ECODE_OK;
740
741 if (p->ethh == NULL) {
742 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
743 return TM_ECODE_FAILED;
744 }
745 /* Index of the network device */
746 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
747 /* Address length*/
748 socket_address.sll_halen = ETH_ALEN;
749 /* Destination MAC */
750 memcpy(socket_address.sll_addr, p->ethh, 6);
751
752 /* Send packet, locking the socket if necessary */
753 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
754 SCMutexLock(&p->afp_v.peer->sock_protect);
755 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
756
757 h.raw = p->afp_v.relptr;
758
759 if (version == TPACKET_V2) {
760 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
761 * the flag. It is not defined for older kernel so we go best effort
762 * and test for non zero value of the TCI header. */
763 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
764 vlan_tci = h.h2->tp_vlan_tci;
765 }
766 } else {
767 #ifdef HAVE_TPACKET_V3
768 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
769 vlan_tci = h.h3->hv1.tp_vlan_tci;
770 }
771 #else
772 /* Should not get here */
773 BUG_ON(1);
774 #endif
775 }
776
777 if (vlan_tci != 0) {
778 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
779 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
780 /* move ethernet addresses */
781 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
782 /* write vlan info */
783 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
784 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
785 } else {
786 pstart = GET_PKT_DATA(p);
787 plen = GET_PKT_LEN(p);
788 }
789
790 if (sendto(socket, pstart, plen, 0,
791 (struct sockaddr*) &socket_address,
792 sizeof(struct sockaddr_ll)) < 0) {
793 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
794 socket,
795 strerror(errno));
796 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
797 SCMutexUnlock(&p->afp_v.peer->sock_protect);
798 return TM_ECODE_FAILED;
799 }
800 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
801 SCMutexUnlock(&p->afp_v.peer->sock_protect);
802
803 return TM_ECODE_OK;
804 }
805
806 static void AFPReleaseDataFromRing(Packet *p)
807 {
808 /* Need to be in copy mode and need to detect early release
809 where Ethernet header could not be set (and pseudo packet) */
810 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
811 AFPWritePacket(p, TPACKET_V2);
812 }
813
814 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
815 goto cleanup;
816
817 if (p->afp_v.relptr) {
818 union thdr h;
819 h.raw = p->afp_v.relptr;
820 h.h2->tp_status = TP_STATUS_KERNEL;
821 }
822
823 cleanup:
824 AFPV_CLEANUP(&p->afp_v);
825 }
826
827 #ifdef HAVE_TPACKET_V3
828 static void AFPReleasePacketV3(Packet *p)
829 {
830 /* Need to be in copy mode and need to detect early release
831 where Ethernet header could not be set (and pseudo packet) */
832 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
833 AFPWritePacket(p, TPACKET_V3);
834 }
835 PacketFreeOrRelease(p);
836 }
837 #endif
838
839 static void AFPReleasePacket(Packet *p)
840 {
841 AFPReleaseDataFromRing(p);
842 PacketFreeOrRelease(p);
843 }
844
845 /**
846 * \brief AF packet read function for ring
847 *
848 * This function fills
849 * From here the packets are picked up by the DecodeAFP thread.
850 *
851 * \param user pointer to AFPThreadVars
852 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
853 */
854 static int AFPReadFromRing(AFPThreadVars *ptv)
855 {
856 Packet *p = NULL;
857 union thdr h;
858 uint8_t emergency_flush = 0;
859 int read_pkts = 0;
860 int loop_start = -1;
861
862
863 /* Loop till we have packets available */
864 while (1) {
865 if (unlikely(suricata_ctl_flags != 0)) {
866 break;
867 }
868
869 /* Read packet from ring */
870 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
871 if (unlikely(h.raw == NULL)) {
872 /* Impossible we reach this point in normal condition, so trigger
873 * a failure in reading */
874 SCReturnInt(AFP_READ_FAILURE);
875 }
876
877 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
878 if (read_pkts == 0) {
879 if (loop_start == -1) {
880 loop_start = ptv->frame_offset;
881 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
882 SCReturnInt(AFP_READ_OK);
883 }
884 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
885 ptv->frame_offset = 0;
886 }
887 continue;
888 }
889 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
890 SCReturnInt(AFP_KERNEL_DROP);
891 } else {
892 SCReturnInt(AFP_READ_OK);
893 }
894 }
895
896 read_pkts++;
897 loop_start = -1;
898
899 /* Our packet is still used by suricata, we exit read loop to
900 * gain some time */
901 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
902 SCReturnInt(AFP_READ_OK);
903 }
904
905 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
906 h.h2->tp_status = TP_STATUS_KERNEL;
907 goto next_frame;
908 }
909
910 p = PacketGetFromQueueOrAlloc();
911 if (p == NULL) {
912 SCReturnInt(AFP_SURI_FAILURE);
913 }
914 PKT_SET_SRC(p, PKT_SRC_WIRE);
915 if (ptv->flags & AFP_BYPASS) {
916 p->BypassPacketsFlow = AFPBypassCallback;
917 #ifdef HAVE_PACKET_EBPF
918 p->afp_v.v4_map_fd = ptv->v4_map_fd;
919 p->afp_v.v6_map_fd = ptv->v6_map_fd;
920 #endif
921 }
922 if (ptv->flags & AFP_XDPBYPASS) {
923 p->BypassPacketsFlow = AFPXDPBypassCallback;
924 #ifdef HAVE_PACKET_EBPF
925 p->afp_v.v4_map_fd = ptv->v4_map_fd;
926 p->afp_v.v6_map_fd = ptv->v6_map_fd;
927 #endif
928 }
929
930 /* Suricata will treat packet so telling it is busy, this
931 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
932 * function. */
933 h.h2->tp_status |= TP_STATUS_USER_BUSY;
934
935 ptv->pkts++;
936 p->livedev = ptv->livedev;
937 p->datalink = ptv->datalink;
938
939 if (h.h2->tp_len > h.h2->tp_snaplen) {
940 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
941 h.h2->tp_len, h.h2->tp_snaplen);
942 }
943
944 /* get vlan id from header */
945 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
946 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
947 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
948 p->vlan_idx = 1;
949 p->vlanh[0] = NULL;
950 }
951
952 if (ptv->flags & AFP_ZERO_COPY) {
953 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
954 TmqhOutputPacketpool(ptv->tv, p);
955 SCReturnInt(AFP_SURI_FAILURE);
956 } else {
957 p->afp_v.relptr = h.raw;
958 p->ReleasePacket = AFPReleasePacket;
959 p->afp_v.mpeer = ptv->mpeer;
960 AFPRefSocket(ptv->mpeer);
961
962 p->afp_v.copy_mode = ptv->copy_mode;
963 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
964 p->afp_v.peer = ptv->mpeer->peer;
965 } else {
966 p->afp_v.peer = NULL;
967 }
968 }
969 } else {
970 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
971 /* As we can possibly fail to copy the data due to invalid data, let's
972 * skip this packet and switch to the next one.
973 */
974 h.h2->tp_status = TP_STATUS_KERNEL;
975 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
976 ptv->frame_offset = 0;
977 }
978 TmqhOutputPacketpool(ptv->tv, p);
979 SCReturnInt(AFP_SURI_FAILURE);
980 }
981 }
982
983 /* Timestamp */
984 p->ts.tv_sec = h.h2->tp_sec;
985 p->ts.tv_usec = h.h2->tp_nsec/1000;
986 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
987 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
988
989 /* We only check for checksum disable */
990 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
991 p->flags |= PKT_IGNORE_CHECKSUM;
992 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
993 if (ptv->livedev->ignore_checksum) {
994 p->flags |= PKT_IGNORE_CHECKSUM;
995 } else if (ChecksumAutoModeCheck(ptv->pkts,
996 SC_ATOMIC_GET(ptv->livedev->pkts),
997 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
998 ptv->livedev->ignore_checksum = 1;
999 p->flags |= PKT_IGNORE_CHECKSUM;
1000 }
1001 } else {
1002 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
1003 p->flags |= PKT_IGNORE_CHECKSUM;
1004 }
1005 }
1006 if (h.h2->tp_status & TP_STATUS_LOSING) {
1007 emergency_flush = 1;
1008 AFPDumpCounters(ptv);
1009 }
1010
1011 /* release frame if not in zero copy mode */
1012 if (!(ptv->flags & AFP_ZERO_COPY)) {
1013 h.h2->tp_status = TP_STATUS_KERNEL;
1014 }
1015
1016 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1017 h.h2->tp_status = TP_STATUS_KERNEL;
1018 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1019 ptv->frame_offset = 0;
1020 }
1021 TmqhOutputPacketpool(ptv->tv, p);
1022 SCReturnInt(AFP_SURI_FAILURE);
1023 }
1024
1025 next_frame:
1026 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1027 ptv->frame_offset = 0;
1028 /* Get out of loop to be sure we will reach maintenance tasks */
1029 SCReturnInt(AFP_READ_OK);
1030 }
1031 }
1032
1033 SCReturnInt(AFP_READ_OK);
1034 }
1035
1036 #ifdef HAVE_TPACKET_V3
1037 static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1038 {
1039 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1040 }
1041
1042 static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1043 {
1044 Packet *p = PacketGetFromQueueOrAlloc();
1045 if (p == NULL) {
1046 SCReturnInt(AFP_SURI_FAILURE);
1047 }
1048 PKT_SET_SRC(p, PKT_SRC_WIRE);
1049 if (ptv->flags & AFP_BYPASS) {
1050 p->BypassPacketsFlow = AFPBypassCallback;
1051 #ifdef HAVE_PACKET_EBPF
1052 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1053 p->afp_v.v6_map_fd = ptv->v6_map_fd;
1054 #endif
1055 } else if (ptv->flags & AFP_XDPBYPASS) {
1056 p->BypassPacketsFlow = AFPXDPBypassCallback;
1057 #ifdef HAVE_PACKET_EBPF
1058 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1059 p->afp_v.v6_map_fd = ptv->v6_map_fd;
1060 #endif
1061 }
1062
1063 ptv->pkts++;
1064 p->livedev = ptv->livedev;
1065 p->datalink = ptv->datalink;
1066
1067 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1068 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1069 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1070 p->vlan_idx = 1;
1071 p->vlanh[0] = NULL;
1072 }
1073
1074 if (ptv->flags & AFP_ZERO_COPY) {
1075 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1076 TmqhOutputPacketpool(ptv->tv, p);
1077 SCReturnInt(AFP_SURI_FAILURE);
1078 }
1079 p->afp_v.relptr = ppd;
1080 p->ReleasePacket = AFPReleasePacketV3;
1081 p->afp_v.mpeer = ptv->mpeer;
1082 AFPRefSocket(ptv->mpeer);
1083
1084 p->afp_v.copy_mode = ptv->copy_mode;
1085 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1086 p->afp_v.peer = ptv->mpeer->peer;
1087 } else {
1088 p->afp_v.peer = NULL;
1089 }
1090 } else {
1091 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1092 TmqhOutputPacketpool(ptv->tv, p);
1093 SCReturnInt(AFP_SURI_FAILURE);
1094 }
1095 }
1096 /* Timestamp */
1097 p->ts.tv_sec = ppd->tp_sec;
1098 p->ts.tv_usec = ppd->tp_nsec/1000;
1099 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1100 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1101
1102 /* We only check for checksum disable */
1103 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1104 p->flags |= PKT_IGNORE_CHECKSUM;
1105 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1106 if (ptv->livedev->ignore_checksum) {
1107 p->flags |= PKT_IGNORE_CHECKSUM;
1108 } else if (ChecksumAutoModeCheck(ptv->pkts,
1109 SC_ATOMIC_GET(ptv->livedev->pkts),
1110 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1111 ptv->livedev->ignore_checksum = 1;
1112 p->flags |= PKT_IGNORE_CHECKSUM;
1113 }
1114 } else {
1115 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1116 p->flags |= PKT_IGNORE_CHECKSUM;
1117 }
1118 }
1119
1120 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1121 TmqhOutputPacketpool(ptv->tv, p);
1122 SCReturnInt(AFP_SURI_FAILURE);
1123 }
1124
1125 SCReturnInt(AFP_READ_OK);
1126 }
1127
1128 static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1129 {
1130 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1131 uint8_t *ppd;
1132 int ret = 0;
1133
1134 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1135 for (i = 0; i < num_pkts; ++i) {
1136 ret = AFPParsePacketV3(ptv, pbd,
1137 (struct tpacket3_hdr *)ppd);
1138 switch (ret) {
1139 case AFP_READ_OK:
1140 break;
1141 case AFP_SURI_FAILURE:
1142 /* Internal error but let's just continue and
1143 * treat thenext packet */
1144 break;
1145 case AFP_READ_FAILURE:
1146 SCReturnInt(AFP_READ_FAILURE);
1147 default:
1148 SCReturnInt(ret);
1149 }
1150 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1151 }
1152
1153 SCReturnInt(AFP_READ_OK);
1154 }
1155 #endif /* HAVE_TPACKET_V3 */
1156
1157 /**
1158 * \brief AF packet read function for ring
1159 *
1160 * This function fills
1161 * From here the packets are picked up by the DecodeAFP thread.
1162 *
1163 * \param user pointer to AFPThreadVars
1164 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1165 */
1166 static int AFPReadFromRingV3(AFPThreadVars *ptv)
1167 {
1168 #ifdef HAVE_TPACKET_V3
1169 struct tpacket_block_desc *pbd;
1170 int ret = 0;
1171
1172 /* Loop till we have packets available */
1173 while (1) {
1174 if (unlikely(suricata_ctl_flags != 0)) {
1175 SCLogInfo("Exiting AFP V3 read loop");
1176 break;
1177 }
1178
1179 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
1180
1181 /* block is not ready to be read */
1182 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1183 SCReturnInt(AFP_READ_OK);
1184 }
1185
1186 ret = AFPWalkBlock(ptv, pbd);
1187 if (unlikely(ret != AFP_READ_OK)) {
1188 AFPFlushBlock(pbd);
1189 SCReturnInt(ret);
1190 }
1191
1192 AFPFlushBlock(pbd);
1193 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1194 /* return to maintenance task after one loop on the ring */
1195 if (ptv->frame_offset == 0) {
1196 SCReturnInt(AFP_READ_OK);
1197 }
1198 }
1199 #endif
1200 SCReturnInt(AFP_READ_OK);
1201 }
1202
1203 /**
1204 * \brief Reference socket
1205 *
1206 * \retval O in case of failure, 1 in case of success
1207 */
1208 static int AFPRefSocket(AFPPeer* peer)
1209 {
1210 if (unlikely(peer == NULL))
1211 return 0;
1212
1213 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1214 return 1;
1215 }
1216
1217
1218 /**
1219 * \brief Dereference socket
1220 *
1221 * \retval 1 if socket is still alive, 0 if not
1222 */
1223 static int AFPDerefSocket(AFPPeer* peer)
1224 {
1225 if (peer == NULL)
1226 return 1;
1227
1228 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1229 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1230 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1231 close(SC_ATOMIC_GET(peer->socket));
1232 return 0;
1233 }
1234 }
1235 return 1;
1236 }
1237
1238 static void AFPSwitchState(AFPThreadVars *ptv, int state)
1239 {
1240 ptv->afp_state = state;
1241 ptv->down_count = 0;
1242
1243 AFPPeerUpdate(ptv);
1244
1245 /* Do cleaning if switching to down state */
1246 if (state == AFP_STATE_DOWN) {
1247 #ifdef HAVE_TPACKET_V3
1248 if (ptv->flags & AFP_TPACKET_V3) {
1249 if (!ptv->ring.v3) {
1250 SCFree(ptv->ring.v3);
1251 ptv->ring.v3 = NULL;
1252 }
1253 } else {
1254 #endif
1255 if (ptv->ring.v2) {
1256 /* only used in reading phase, we can free it */
1257 SCFree(ptv->ring.v2);
1258 ptv->ring.v2 = NULL;
1259 }
1260 #ifdef HAVE_TPACKET_V3
1261 }
1262 #endif
1263 if (ptv->socket != -1) {
1264 /* we need to wait for all packets to return data */
1265 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
1266 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
1267 munmap(ptv->ring_buf, ptv->ring_buflen);
1268 close(ptv->socket);
1269 ptv->socket = -1;
1270 }
1271 }
1272 }
1273 if (state == AFP_STATE_UP) {
1274 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1275 }
1276 }
1277
1278 static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1279 uint64_t *discarded_pkts)
1280 {
1281 struct sockaddr_ll from;
1282 struct iovec iov;
1283 struct msghdr msg;
1284 struct timeval ts;
1285 union {
1286 struct cmsghdr cmsg;
1287 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1288 } cmsg_buf;
1289
1290
1291 if (unlikely(suricata_ctl_flags != 0)) {
1292 return 1;
1293 }
1294
1295 msg.msg_name = &from;
1296 msg.msg_namelen = sizeof(from);
1297 msg.msg_iov = &iov;
1298 msg.msg_iovlen = 1;
1299 msg.msg_control = &cmsg_buf;
1300 msg.msg_controllen = sizeof(cmsg_buf);
1301 msg.msg_flags = 0;
1302
1303 iov.iov_len = ptv->datalen;
1304 iov.iov_base = ptv->data;
1305
1306 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
1307
1308 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1309 /* FIXME */
1310 return -1;
1311 }
1312
1313 if ((ts.tv_sec > synctv->tv_sec) ||
1314 (ts.tv_sec >= synctv->tv_sec &&
1315 ts.tv_usec > synctv->tv_usec)) {
1316 return 1;
1317 }
1318 return 0;
1319 }
1320
1321 static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1322 uint64_t *discarded_pkts)
1323 {
1324 union thdr h;
1325
1326 if (unlikely(suricata_ctl_flags != 0)) {
1327 return 1;
1328 }
1329
1330 #ifdef HAVE_TPACKET_V3
1331 if (ptv->flags & AFP_TPACKET_V3) {
1332 int ret = 0;
1333 struct tpacket_block_desc *pbd;
1334 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
1335 *discarded_pkts += pbd->hdr.bh1.num_pkts;
1336 struct tpacket3_hdr *ppd =
1337 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1338 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1339 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1340 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1341 ret = 1;
1342 }
1343 AFPFlushBlock(pbd);
1344 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1345 return ret;
1346
1347 } else
1348 #endif
1349 {
1350 /* Read packet from ring */
1351 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
1352 if (h.raw == NULL) {
1353 return -1;
1354 }
1355 (*discarded_pkts)++;
1356 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1357 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1358 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1359 return 1;
1360 }
1361
1362 h.h2->tp_status = TP_STATUS_KERNEL;
1363 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1364 ptv->frame_offset = 0;
1365 }
1366 }
1367
1368
1369 return 0;
1370 }
1371
1372 /** \brief wait for all afpacket threads to fully init
1373 *
1374 * Discard packets before all threads are ready, as the cluster
1375 * setup is not complete yet.
1376 *
1377 * if AFPPeersListStarted() returns true init is complete
1378 *
1379 * \retval r 1 = happy, otherwise unhappy
1380 */
1381 static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
1382 {
1383 struct timeval synctv;
1384 struct pollfd fds;
1385
1386 fds.fd = ptv->socket;
1387 fds.events = POLLIN;
1388
1389 /* Set timeval to end of the world */
1390 synctv.tv_sec = 0xffffffff;
1391 synctv.tv_usec = 0xffffffff;
1392
1393 while (1) {
1394 int r = poll(&fds, 1, POLL_TIMEOUT);
1395 if (r > 0 &&
1396 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1397 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1398 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1399 return 0;
1400 } else if (r > 0) {
1401 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1402 gettimeofday(&synctv, NULL);
1403 }
1404 if (ptv->flags & AFP_RING_MODE) {
1405 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
1406 } else {
1407 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
1408 }
1409 SCLogDebug("Discarding on %s", ptv->tv->name);
1410 switch (r) {
1411 case 1:
1412 SCLogDebug("Starting to read on %s", ptv->tv->name);
1413 return 1;
1414 case -1:
1415 return r;
1416 }
1417 /* no packets */
1418 } else if (r == 0 && AFPPeersListStarted()) {
1419 SCLogDebug("Starting to read on %s", ptv->tv->name);
1420 return 1;
1421 } else if (r < 0) { /* only exit on error */
1422 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1423 return 0;
1424 }
1425 }
1426 return 1;
1427 }
1428
1429 /**
1430 * \brief Try to reopen socket
1431 *
1432 * \retval 0 in case of success, negative if error occurs or a condition
1433 * is not met.
1434 */
1435 static int AFPTryReopen(AFPThreadVars *ptv)
1436 {
1437 ptv->down_count++;
1438
1439 /* Don't reconnect till we have packet that did not release data */
1440 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1441 return -1;
1442 }
1443
1444 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
1445 if (afp_activate_r != 0) {
1446 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1447 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1448 ptv->iface);
1449 }
1450 return afp_activate_r;
1451 }
1452
1453 SCLogInfo("Interface '%s' is back", ptv->iface);
1454 return 0;
1455 }
1456
1457 /**
1458 * \brief Main AF_PACKET reading Loop function
1459 */
1460 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1461 {
1462 SCEnter();
1463
1464 AFPThreadVars *ptv = (AFPThreadVars *)data;
1465 struct pollfd fds;
1466 int r;
1467 TmSlot *s = (TmSlot *)slot;
1468 time_t last_dump = 0;
1469 time_t current_time;
1470 int (*AFPReadFunc) (AFPThreadVars *);
1471 uint64_t discarded_pkts = 0;
1472
1473 ptv->slot = s->slot_next;
1474
1475 if (ptv->flags & AFP_RING_MODE) {
1476 if (ptv->flags & AFP_TPACKET_V3) {
1477 AFPReadFunc = AFPReadFromRingV3;
1478 } else {
1479 AFPReadFunc = AFPReadFromRing;
1480 }
1481 } else {
1482 AFPReadFunc = AFPRead;
1483 }
1484
1485 if (ptv->afp_state == AFP_STATE_DOWN) {
1486 /* Wait for our turn, threads before us must have opened the socket */
1487 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1488 usleep(1000);
1489 if (suricata_ctl_flags != 0) {
1490 break;
1491 }
1492 }
1493 r = AFPCreateSocket(ptv, ptv->iface, 1);
1494 if (r < 0) {
1495 switch (-r) {
1496 case AFP_FATAL_ERROR:
1497 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1498 SCReturnInt(TM_ECODE_FAILED);
1499 case AFP_RECOVERABLE_ERROR:
1500 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1501 }
1502 }
1503 AFPPeersListReachedInc();
1504 }
1505 if (ptv->afp_state == AFP_STATE_UP) {
1506 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
1507 AFPSynchronizeStart(ptv, &discarded_pkts);
1508 /* let's reset counter as we will start the capture at the
1509 * next function call */
1510 #ifdef PACKET_STATISTICS
1511 struct tpacket_stats kstats;
1512 socklen_t len = sizeof (struct tpacket_stats);
1513 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1514 &kstats, &len) > -1) {
1515 uint64_t pkts = 0;
1516 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1517 ", dropped %" PRIu32 "",
1518 ptv->tv->name,
1519 kstats.tp_packets, kstats.tp_drops);
1520 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1521 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1522 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1523 }
1524 #endif
1525 }
1526
1527 fds.fd = ptv->socket;
1528 fds.events = POLLIN;
1529
1530 while (1) {
1531 /* Start by checking the state of our interface */
1532 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1533 int dbreak = 0;
1534
1535 do {
1536 usleep(AFP_RECONNECT_TIMEOUT);
1537 if (suricata_ctl_flags != 0) {
1538 dbreak = 1;
1539 break;
1540 }
1541 r = AFPTryReopen(ptv);
1542 fds.fd = ptv->socket;
1543 } while (r < 0);
1544 if (dbreak == 1)
1545 break;
1546 }
1547
1548 /* make sure we have at least one packet in the packet pool, to prevent
1549 * us from alloc'ing packets at line rate */
1550 PacketPoolWait();
1551
1552 r = poll(&fds, 1, POLL_TIMEOUT);
1553
1554 if (suricata_ctl_flags != 0) {
1555 break;
1556 }
1557
1558 if (r > 0 &&
1559 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1560 if (fds.revents & (POLLHUP | POLLRDHUP)) {
1561 AFPSwitchState(ptv, AFP_STATE_DOWN);
1562 continue;
1563 } else if (fds.revents & POLLERR) {
1564 char c;
1565 /* Do a recv to get errno */
1566 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1567 continue; /* what, no error? */
1568 SCLogError(SC_ERR_AFP_READ,
1569 "Error reading data from iface '%s': (%d) %s",
1570 ptv->iface, errno, strerror(errno));
1571 AFPSwitchState(ptv, AFP_STATE_DOWN);
1572 continue;
1573 } else if (fds.revents & POLLNVAL) {
1574 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
1575 AFPSwitchState(ptv, AFP_STATE_DOWN);
1576 continue;
1577 }
1578 } else if (r > 0) {
1579 r = AFPReadFunc(ptv);
1580 switch (r) {
1581 case AFP_READ_OK:
1582 /* Trigger one dump of stats every second */
1583 current_time = time(NULL);
1584 if (current_time != last_dump) {
1585 AFPDumpCounters(ptv);
1586 last_dump = current_time;
1587 }
1588 break;
1589 case AFP_READ_FAILURE:
1590 /* AFPRead in error: best to reset the socket */
1591 SCLogError(SC_ERR_AFP_READ,
1592 "AFPRead error reading data from iface '%s': (%d) %s",
1593 ptv->iface, errno, strerror(errno));
1594 AFPSwitchState(ptv, AFP_STATE_DOWN);
1595 continue;
1596 case AFP_SURI_FAILURE:
1597 StatsIncr(ptv->tv, ptv->capture_errors);
1598 break;
1599 case AFP_KERNEL_DROP:
1600 AFPDumpCounters(ptv);
1601 break;
1602 }
1603 } else if (unlikely(r == 0)) {
1604 /* Trigger one dump of stats every second */
1605 current_time = time(NULL);
1606 if (current_time != last_dump) {
1607 AFPDumpCounters(ptv);
1608 last_dump = current_time;
1609 }
1610 /* poll timed out, lets see handle our timeout path */
1611 TmThreadsCaptureHandleTimeout(tv, ptv->slot, NULL);
1612
1613 } else if ((r < 0) && (errno != EINTR)) {
1614 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
1615 ptv->iface,
1616 errno, strerror(errno));
1617 AFPSwitchState(ptv, AFP_STATE_DOWN);
1618 continue;
1619 }
1620 StatsSyncCountersIfSignalled(tv);
1621 }
1622
1623 AFPDumpCounters(ptv);
1624 StatsSyncCountersIfSignalled(tv);
1625 SCReturnInt(TM_ECODE_OK);
1626 }
1627
1628 static int AFPGetDevFlags(int fd, const char *ifname)
1629 {
1630 struct ifreq ifr;
1631
1632 memset(&ifr, 0, sizeof(ifr));
1633 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1634
1635 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1636 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1637 ifname, strerror(errno));
1638 return -1;
1639 }
1640
1641 return ifr.ifr_flags;
1642 }
1643
1644
1645 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
1646 {
1647 struct ifreq ifr;
1648
1649 memset(&ifr, 0, sizeof(ifr));
1650 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1651
1652 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1653 if (verbose)
1654 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1655 ifname, strerror(errno));
1656 return -1;
1657 }
1658
1659 return ifr.ifr_ifindex;
1660 }
1661
1662 static int AFPGetDevLinktype(int fd, const char *ifname)
1663 {
1664 struct ifreq ifr;
1665
1666 memset(&ifr, 0, sizeof(ifr));
1667 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1668
1669 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1670 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1671 ifname, strerror(errno));
1672 return -1;
1673 }
1674
1675 switch (ifr.ifr_hwaddr.sa_family) {
1676 case ARPHRD_LOOPBACK:
1677 return LINKTYPE_ETHERNET;
1678 case ARPHRD_PPP:
1679 case ARPHRD_NONE:
1680 return LINKTYPE_RAW;
1681 default:
1682 return ifr.ifr_hwaddr.sa_family;
1683 }
1684 }
1685
1686 int AFPGetLinkType(const char *ifname)
1687 {
1688 int ltype;
1689
1690 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1691 if (fd == -1) {
1692 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1693 return LINKTYPE_RAW;
1694 }
1695
1696 ltype = AFPGetDevLinktype(fd, ifname);
1697 close(fd);
1698
1699 return ltype;
1700 }
1701
1702 static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1703 {
1704 /* Compute structure:
1705 Target is to store all pending packets
1706 with a size equal to MTU + auxdata
1707 And we keep a decent number of block
1708
1709 To do so:
1710 Compute frame_size (aligned to be able to fit in block
1711 Check which block size we need. Blocksize is a 2^n * pagesize
1712 We then need to get order, big enough to have
1713 frame_size < block size
1714 Find number of frame per block (divide)
1715 Fill in packet_req
1716
1717 Compute frame size:
1718 described in packet_mmap.txt
1719 dependant on snaplen (need to use a variable ?)
1720 snaplen: MTU ?
1721 tp_hdrlen determine_version in daq_afpacket
1722 in V1: sizeof(struct tpacket_hdr);
1723 in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1724 frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1725
1726 */
1727 int tp_hdrlen = sizeof(struct tpacket_hdr);
1728 int snaplen = default_packet_size;
1729
1730 if (snaplen == 0) {
1731 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1732 if (snaplen <= 0) {
1733 SCLogWarning(SC_ERR_INVALID_VALUE,
1734 "Unable to get MTU, setting snaplen to sane default of 1514");
1735 snaplen = 1514;
1736 }
1737 }
1738
1739 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1740 ptv->req.v2.tp_block_size = getpagesize() << order;
1741 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
1742 if (frames_per_block == 0) {
1743 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
1744 return -1;
1745 }
1746 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1747 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
1748 /* exact division */
1749 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
1750 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
1751 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1752 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
1753 return 1;
1754 }
1755
1756 #ifdef HAVE_TPACKET_V3
1757 static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1758 {
1759 ptv->req.v3.tp_block_size = ptv->block_size;
1760 ptv->req.v3.tp_frame_size = 2048;
1761 int frames_per_block = 0;
1762 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1763 int snaplen = default_packet_size;
1764
1765 if (snaplen == 0) {
1766 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1767 if (snaplen <= 0) {
1768 SCLogWarning(SC_ERR_INVALID_VALUE,
1769 "Unable to get MTU, setting snaplen to sane default of 1514");
1770 snaplen = 1514;
1771 }
1772 }
1773
1774 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1775 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
1776
1777 if (frames_per_block == 0) {
1778 SCLogError(SC_ERR_INVALID_VALUE,
1779 "Block size is too small, it should be at least %d",
1780 ptv->req.v3.tp_frame_size);
1781 return -1;
1782 }
1783 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1784 /* exact division */
1785 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1786 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1787 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
1788 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1789 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1790 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1791 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
1792 );
1793 return 1;
1794 }
1795 #endif
1796
1797 static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1798 {
1799 int val;
1800 unsigned int len = sizeof(val), i;
1801 int order;
1802 int r, mmap_flag;
1803
1804 #ifdef HAVE_TPACKET_V3
1805 if (ptv->flags & AFP_TPACKET_V3) {
1806 val = TPACKET_V3;
1807 } else
1808 #endif
1809 {
1810 val = TPACKET_V2;
1811 }
1812 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1813 if (errno == ENOPROTOOPT) {
1814 if (ptv->flags & AFP_TPACKET_V3) {
1815 SCLogError(SC_ERR_AFP_CREATE,
1816 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1817 } else {
1818 SCLogError(SC_ERR_AFP_CREATE,
1819 "Too old kernel giving up (need 2.6.27 at least)");
1820 }
1821 }
1822 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1823 return AFP_FATAL_ERROR;
1824 }
1825
1826 val = TPACKET_V2;
1827 #ifdef HAVE_TPACKET_V3
1828 if (ptv->flags & AFP_TPACKET_V3) {
1829 val = TPACKET_V3;
1830 }
1831 #endif
1832 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1833 sizeof(val)) < 0) {
1834 SCLogError(SC_ERR_AFP_CREATE,
1835 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1836 strerror(errno));
1837 return AFP_FATAL_ERROR;
1838 }
1839
1840 #ifdef HAVE_HW_TIMESTAMPING
1841 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1842 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1843 sizeof(req)) < 0) {
1844 SCLogWarning(SC_ERR_AFP_CREATE,
1845 "Can't activate hardware timestamping on packet socket: %s",
1846 strerror(errno));
1847 }
1848 #endif
1849
1850 /* Let's reserve head room so we can add the VLAN header in IPS
1851 * or TAP mode before write the packet */
1852 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1853 /* Only one vlan is extracted from AFP header so
1854 * one VLAN header length is enough. */
1855 int reserve = VLAN_HEADER_LEN;
1856 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1857 sizeof(reserve)) < 0) {
1858 SCLogError(SC_ERR_AFP_CREATE,
1859 "Can't activate reserve on packet socket: %s",
1860 strerror(errno));
1861 return AFP_FATAL_ERROR;
1862 }
1863 }
1864
1865 /* Allocate RX ring */
1866 #ifdef HAVE_TPACKET_V3
1867 if (ptv->flags & AFP_TPACKET_V3) {
1868 if (AFPComputeRingParamsV3(ptv) != 1) {
1869 return AFP_FATAL_ERROR;
1870 }
1871 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1872 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
1873 if (r < 0) {
1874 SCLogError(SC_ERR_MEM_ALLOC,
1875 "Unable to allocate RX Ring for iface %s: (%d) %s",
1876 devname,
1877 errno,
1878 strerror(errno));
1879 return AFP_FATAL_ERROR;
1880 }
1881 } else {
1882 #endif
1883 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
1884 if (AFPComputeRingParams(ptv, order) != 1) {
1885 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1886 return AFP_FATAL_ERROR;
1887 }
1888
1889 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1890 (void *) &ptv->req, sizeof(ptv->req));
1891
1892 if (r < 0) {
1893 if (errno == ENOMEM) {
1894 SCLogInfo("Memory issue with ring parameters. Retrying.");
1895 continue;
1896 }
1897 SCLogError(SC_ERR_MEM_ALLOC,
1898 "Unable to allocate RX Ring for iface %s: (%d) %s",
1899 devname,
1900 errno,
1901 strerror(errno));
1902 return AFP_FATAL_ERROR;
1903 } else {
1904 break;
1905 }
1906 }
1907 if (order < 0) {
1908 SCLogError(SC_ERR_MEM_ALLOC,
1909 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1910 devname);
1911 return AFP_FATAL_ERROR;
1912 }
1913 #ifdef HAVE_TPACKET_V3
1914 }
1915 #endif
1916
1917 /* Allocate the Ring */
1918 #ifdef HAVE_TPACKET_V3
1919 if (ptv->flags & AFP_TPACKET_V3) {
1920 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
1921 } else {
1922 #endif
1923 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
1924 #ifdef HAVE_TPACKET_V3
1925 }
1926 #endif
1927 mmap_flag = MAP_SHARED;
1928 if (ptv->flags & AFP_MMAP_LOCKED)
1929 mmap_flag |= MAP_LOCKED;
1930 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
1931 mmap_flag, ptv->socket, 0);
1932 if (ptv->ring_buf == MAP_FAILED) {
1933 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1934 strerror(errno));
1935 goto mmap_err;
1936 }
1937 #ifdef HAVE_TPACKET_V3
1938 if (ptv->flags & AFP_TPACKET_V3) {
1939 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1940 if (!ptv->ring.v3) {
1941 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
1942 goto postmmap_err;
1943 }
1944 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1945 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1946 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
1947 }
1948 } else {
1949 #endif
1950 /* allocate a ring for each frame header pointer*/
1951 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1952 if (ptv->ring.v2 == NULL) {
1953 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
1954 goto postmmap_err;
1955 }
1956 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1957 /* fill the header ring with proper frame ptr*/
1958 ptv->frame_offset = 0;
1959 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1960 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
1961 unsigned int j;
1962 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1963 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1964 base += ptv->req.v2.tp_frame_size;
1965 }
1966 }
1967 ptv->frame_offset = 0;
1968 #ifdef HAVE_TPACKET_V3
1969 }
1970 #endif
1971
1972 return 0;
1973
1974 postmmap_err:
1975 munmap(ptv->ring_buf, ptv->ring_buflen);
1976 if (ptv->ring.v2)
1977 SCFree(ptv->ring.v2);
1978 if (ptv->ring.v3)
1979 SCFree(ptv->ring.v3);
1980 mmap_err:
1981 /* Packet mmap does the cleaning when socket is closed */
1982 return AFP_FATAL_ERROR;
1983 }
1984
1985 /** \brief test if we can use FANOUT. Older kernels like those in
1986 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1987 */
1988 int AFPIsFanoutSupported(void)
1989 {
1990 #ifdef HAVE_PACKET_FANOUT
1991 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1992 if (fd < 0)
1993 return 0;
1994
1995 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
1996 uint16_t id = 1;
1997 uint32_t option = (mode << 16) | (id & 0xffff);
1998 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1999 close(fd);
2000
2001 if (r < 0) {
2002 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2003 return 0;
2004 }
2005 return 1;
2006 #else
2007 return 0;
2008 #endif
2009 }
2010
2011 #ifdef HAVE_PACKET_EBPF
2012
2013 static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2014 {
2015 int pfd = ptv->ebpf_lb_fd;
2016 if (pfd == -1) {
2017 SCLogError(SC_ERR_INVALID_VALUE,
2018 "Fanout file descriptor is invalid");
2019 return -1;
2020 }
2021
2022 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2023 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2024 return -1;
2025 }
2026 SCLogInfo("Activated eBPF on socket");
2027
2028 return 0;
2029 }
2030
2031 static int SetEbpfFilter(AFPThreadVars *ptv)
2032 {
2033 int pfd = ptv->ebpf_filter_fd;
2034 if (pfd == -1) {
2035 SCLogError(SC_ERR_INVALID_VALUE,
2036 "Filter file descriptor is invalid");
2037 return -1;
2038 }
2039
2040 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2041 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2042 return -1;
2043 }
2044 SCLogInfo("Activated eBPF filter on socket");
2045
2046 return 0;
2047 }
2048 #endif
2049
2050 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
2051 {
2052 int r;
2053 int ret = AFP_FATAL_ERROR;
2054 struct packet_mreq sock_params;
2055 struct sockaddr_ll bind_address;
2056 int if_idx;
2057
2058 /* open socket */
2059 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2060 if (ptv->socket == -1) {
2061 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
2062 goto error;
2063 }
2064
2065 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
2066
2067 if (if_idx == -1) {
2068 goto socket_err;
2069 }
2070
2071 /* bind socket */
2072 memset(&bind_address, 0, sizeof(bind_address));
2073 bind_address.sll_family = AF_PACKET;
2074 bind_address.sll_protocol = htons(ETH_P_ALL);
2075 bind_address.sll_ifindex = if_idx;
2076 if (bind_address.sll_ifindex == -1) {
2077 if (verbose)
2078 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
2079 ret = AFP_RECOVERABLE_ERROR;
2080 goto socket_err;
2081 }
2082
2083 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2084 if (if_flags == -1) {
2085 if (verbose) {
2086 SCLogError(SC_ERR_AFP_READ,
2087 "Couldn't get flags for interface '%s'",
2088 ptv->iface);
2089 }
2090 ret = AFP_RECOVERABLE_ERROR;
2091 goto socket_err;
2092 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2093 if (verbose) {
2094 SCLogError(SC_ERR_AFP_READ,
2095 "Interface '%s' is down",
2096 ptv->iface);
2097 }
2098 ret = AFP_RECOVERABLE_ERROR;
2099 goto socket_err;
2100 }
2101
2102 if (ptv->promisc != 0) {
2103 /* Force promiscuous mode */
2104 memset(&sock_params, 0, sizeof(sock_params));
2105 sock_params.mr_type = PACKET_MR_PROMISC;
2106 sock_params.mr_ifindex = bind_address.sll_ifindex;
2107 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2108 if (r < 0) {
2109 SCLogError(SC_ERR_AFP_CREATE,
2110 "Couldn't switch iface %s to promiscuous, error %s",
2111 devname, strerror(errno));
2112 goto socket_err;
2113 }
2114 }
2115
2116 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2117 int val = 1;
2118 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2119 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2120 SCLogWarning(SC_ERR_NO_AF_PACKET,
2121 "'kernel' checksum mode not supported, falling back to full mode.");
2122 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2123 }
2124 }
2125
2126 /* set socket recv buffer size */
2127 if (ptv->buffer_size != 0) {
2128 /*
2129 * Set the socket buffer size to the specified value.
2130 */
2131 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
2132 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2133 &ptv->buffer_size,
2134 sizeof(ptv->buffer_size)) == -1) {
2135 SCLogError(SC_ERR_AFP_CREATE,
2136 "Couldn't set buffer size to %d on iface %s, error %s",
2137 ptv->buffer_size, devname, strerror(errno));
2138 goto socket_err;
2139 }
2140 }
2141
2142 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2143 if (r < 0) {
2144 if (verbose) {
2145 if (errno == ENETDOWN) {
2146 SCLogError(SC_ERR_AFP_CREATE,
2147 "Couldn't bind AF_PACKET socket, iface %s is down",
2148 devname);
2149 } else {
2150 SCLogError(SC_ERR_AFP_CREATE,
2151 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2152 devname, strerror(errno));
2153 }
2154 }
2155 ret = AFP_RECOVERABLE_ERROR;
2156 goto socket_err;
2157 }
2158
2159
2160 #ifdef HAVE_PACKET_FANOUT
2161 /* add binded socket to fanout group */
2162 if (ptv->threads > 1) {
2163 uint16_t mode = ptv->cluster_type;
2164 uint16_t id = ptv->cluster_id;
2165 uint32_t option = (mode << 16) | (id & 0xffff);
2166 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2167 if (r < 0) {
2168 SCLogError(SC_ERR_AFP_CREATE,
2169 "Couldn't set fanout mode, error %s",
2170 strerror(errno));
2171 goto socket_err;
2172 }
2173 }
2174 #endif
2175
2176 #ifdef HAVE_PACKET_EBPF
2177 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2178 r = SockFanoutSeteBPF(ptv);
2179 if (r < 0) {
2180 SCLogError(SC_ERR_AFP_CREATE,
2181 "Coudn't set EBPF, error %s",
2182 strerror(errno));
2183 goto socket_err;
2184 }
2185 }
2186 #endif
2187
2188 if (ptv->flags & AFP_RING_MODE) {
2189 ret = AFPSetupRing(ptv, devname);
2190 if (ret != 0)
2191 goto socket_err;
2192 }
2193
2194 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
2195
2196 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2197 switch (ptv->datalink) {
2198 case ARPHRD_PPP:
2199 case ARPHRD_ATM:
2200 ptv->cooked = 1;
2201 break;
2202 }
2203
2204 TmEcode rc = AFPSetBPFFilter(ptv);
2205 if (rc == TM_ECODE_FAILED) {
2206 ret = AFP_FATAL_ERROR;
2207 goto socket_err;
2208 }
2209
2210 /* Init is ok */
2211 AFPSwitchState(ptv, AFP_STATE_UP);
2212 return 0;
2213
2214 socket_err:
2215 close(ptv->socket);
2216 ptv->socket = -1;
2217 if (ptv->flags & AFP_TPACKET_V3) {
2218 if (ptv->ring.v3) {
2219 SCFree(ptv->ring.v3);
2220 ptv->ring.v3 = NULL;
2221 }
2222 } else {
2223 if (ptv->ring.v2) {
2224 SCFree(ptv->ring.v2);
2225 ptv->ring.v2 = NULL;
2226 }
2227 }
2228
2229 error:
2230 return -ret;
2231 }
2232
2233 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2234 {
2235 struct bpf_program filter;
2236 struct sock_fprog fcode;
2237 int rc;
2238
2239 #ifdef HAVE_PACKET_EBPF
2240 if (ptv->ebpf_filter_fd != -1) {
2241 return SetEbpfFilter(ptv);
2242 }
2243 #endif
2244
2245 if (!ptv->bpf_filter)
2246 return TM_ECODE_OK;
2247
2248 SCLogInfo("Using BPF '%s' on iface '%s'",
2249 ptv->bpf_filter,
2250 ptv->iface);
2251
2252 char errbuf[PCAP_ERRBUF_SIZE];
2253 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
2254 ptv->datalink, /* linktype_arg */
2255 &filter, /* program */
2256 ptv->bpf_filter, /* const char *buf */
2257 1, /* optimize */
2258 0, /* mask */
2259 errbuf,
2260 sizeof(errbuf)) == -1) {
2261 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2262 ptv->bpf_filter,
2263 errbuf);
2264 return TM_ECODE_FAILED;
2265 }
2266
2267 fcode.len = filter.bf_len;
2268 fcode.filter = (struct sock_filter*)filter.bf_insns;
2269
2270 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2271
2272 SCBPFFree(&filter);
2273 if(rc == -1) {
2274 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2275 return TM_ECODE_FAILED;
2276 }
2277
2278 return TM_ECODE_OK;
2279 }
2280
2281 #ifdef HAVE_PACKET_EBPF
2282 /**
2283 * Insert a half flow in the kernel bypass table
2284 *
2285 * \param mapfd file descriptor of the protocol bypass table
2286 * \param key data to use as key in the table
2287 * \param inittime time of creation of the entry (in monotonic clock)
2288 * \return 0 in case of error, 1 if success
2289 */
2290 static int AFPInsertHalfFlow(int mapd, void *key, uint64_t inittime)
2291 {
2292 struct pair value[nr_cpus];
2293 unsigned int i;
2294
2295 if (mapd == -1) {
2296 return 0;
2297 }
2298
2299 /* We use a per CPU structure so we have to set an array of values as the kernel
2300 * is not duplicating the data on each CPU by itself. */
2301 for (i = 0; i < nr_cpus; i++) {
2302 value[i].time = inittime;
2303 value[i].packets = 0;
2304 value[i].bytes = 0;
2305 }
2306 SCLogDebug("Inserting element in eBPF mapping: %lu", inittime);
2307 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2308 switch (errno) {
2309 /* no more place in the hash */
2310 case E2BIG:
2311 return 0;
2312 /* if we already have the key then bypass is a success */
2313 case EEXIST:
2314 return 1;
2315 /* Not supposed to be there so issue a error */
2316 default:
2317 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2318 strerror(errno),
2319 errno);
2320 return 0;
2321 }
2322 }
2323 return 1;
2324 }
2325 #endif
2326
2327 /**
2328 * Bypass function for AF_PACKET capture in eBPF mode
2329 *
2330 * This function creates two half flows in the map shared with the kernel
2331 * to trigger bypass.
2332 *
2333 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2334 * This table contains the list of half flows to bypass. The in-kernel filter
2335 * will skip/drop the packet if they belong to a flow in one of the flows
2336 * table.
2337 *
2338 * \param p the packet belonging to the flow to bypass
2339 * \return 0 if unable to bypass, 1 if success
2340 */
2341 static int AFPBypassCallback(Packet *p)
2342 {
2343 #ifdef HAVE_PACKET_EBPF
2344 SCLogDebug("Calling af_packet callback function");
2345 /* Only bypass TCP and UDP */
2346 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2347 return 0;
2348 }
2349
2350 /* Bypassing tunneled packets is currently not supported
2351 * because we can't discard the inner packet only due to
2352 * primitive parsing in eBPF */
2353 if (IS_TUNNEL_PKT(p)) {
2354 return 0;
2355 }
2356 struct timespec curtime;
2357 uint64_t inittime = 0;
2358 /* In eBPF, the function that we have use to get time return the
2359 * monotonic clock (the time since start of the computer). So we
2360 * can't use the timestamp of the packet. */
2361 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2362 inittime = curtime.tv_sec * 1000000000;
2363 }
2364 if (PKT_IS_IPV4(p)) {
2365 SCLogDebug("add an IPv4");
2366 if (p->afp_v.v4_map_fd == -1) {
2367 return 0;
2368 }
2369 struct flowv4_keys key = {};
2370 key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2371 key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2372 key.port16[0] = GET_TCP_SRC_PORT(p);
2373 key.port16[1] = GET_TCP_DST_PORT(p);
2374
2375 key.ip_proto = IPV4_GET_IPPROTO(p);
2376 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2377 return 0;
2378 }
2379 key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2380 key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2381 key.port16[0] = GET_TCP_DST_PORT(p);
2382 key.port16[1] = GET_TCP_SRC_PORT(p);
2383 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2384 return 0;
2385 }
2386 EBPFUpdateFlow(p->flow, p);
2387 return 1;
2388 }
2389 /* For IPv6 case we don't handle extended header in eBPF */
2390 if (PKT_IS_IPV6(p) &&
2391 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2392 int i;
2393 if (p->afp_v.v6_map_fd == -1) {
2394 return 0;
2395 }
2396 SCLogDebug("add an IPv6");
2397 struct flowv6_keys key = {};
2398 for (i = 0; i < 4; i++) {
2399 key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2400 key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2401 }
2402 key.port16[0] = GET_TCP_SRC_PORT(p);
2403 key.port16[1] = GET_TCP_DST_PORT(p);
2404 key.ip_proto = IPV6_GET_NH(p);
2405 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2406 return 0;
2407 }
2408 for (i = 0; i < 4; i++) {
2409 key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2410 key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2411 }
2412 key.port16[0] = GET_TCP_DST_PORT(p);
2413 key.port16[1] = GET_TCP_SRC_PORT(p);
2414 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2415 return 0;
2416 }
2417 EBPFUpdateFlow(p->flow, p);
2418 return 1;
2419 }
2420 #endif
2421 return 0;
2422 }
2423
2424 /**
2425 * Bypass function for AF_PACKET capture in XDP mode
2426 *
2427 * This function creates two half flows in the map shared with the kernel
2428 * to trigger bypass. This function is similar to AFPBypassCallback() but
2429 * the bytes order is changed for some data due to the way we get the data
2430 * in the XDP case.
2431 *
2432 * \param p the packet belonging to the flow to bypass
2433 * \return 0 if unable to bypass, 1 if success
2434 */
2435 static int AFPXDPBypassCallback(Packet *p)
2436 {
2437 #ifdef HAVE_PACKET_XDP
2438 SCLogDebug("Calling af_packet callback function");
2439 /* Only bypass TCP and UDP */
2440 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2441 return 0;
2442 }
2443
2444 /* Bypassing tunneled packets is currently not supported
2445 * because we can't discard the inner packet only due to
2446 * primitive parsing in eBPF */
2447 if (IS_TUNNEL_PKT(p)) {
2448 return 0;
2449 }
2450 struct timespec curtime;
2451 uint64_t inittime = 0;
2452 /* In eBPF, the function that we have use to get time return the
2453 * monotonic clock (the time since start of the computer). So we
2454 * can't use the timestamp of the packet. */
2455 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2456 inittime = curtime.tv_sec * 1000000000;
2457 }
2458 if (PKT_IS_IPV4(p)) {
2459 struct flowv4_keys key = {};
2460 if (p->afp_v.v4_map_fd == -1) {
2461 return 0;
2462 }
2463 key.src = GET_IPV4_SRC_ADDR_U32(p);
2464 key.dst = GET_IPV4_DST_ADDR_U32(p);
2465 /* In the XDP filter we get port from parsing of packet and not from skb
2466 * (as in eBPF filter) so we need to pass from host to network order */
2467 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2468 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2469 key.ip_proto = IPV4_GET_IPPROTO(p);
2470 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2471 return 0;
2472 }
2473 key.src = GET_IPV4_DST_ADDR_U32(p);
2474 key.dst = GET_IPV4_SRC_ADDR_U32(p);
2475 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2476 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
2477 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2478 return 0;
2479 }
2480 return 1;
2481 }
2482 /* For IPv6 case we don't handle extended header in eBPF */
2483 if (PKT_IS_IPV6(p) &&
2484 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2485 SCLogDebug("add an IPv6");
2486 if (p->afp_v.v6_map_fd == -1) {
2487 return 0;
2488 }
2489 int i;
2490 struct flowv6_keys key = {};
2491 for (i = 0; i < 4; i++) {
2492 key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2493 key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2494 }
2495 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2496 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2497 key.ip_proto = IPV6_GET_NH(p);
2498 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2499 return 0;
2500 }
2501 for (i = 0; i < 4; i++) {
2502 key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2503 key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2504 }
2505 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2506 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
2507 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2508 return 0;
2509 }
2510 return 1;
2511 }
2512 #endif
2513 return 0;
2514 }
2515
2516 /**
2517 * \brief Init function for ReceiveAFP.
2518 *
2519 * \param tv pointer to ThreadVars
2520 * \param initdata pointer to the interface passed from the user
2521 * \param data pointer gets populated with AFPThreadVars
2522 *
2523 * \todo Create a general AFP setup function.
2524 */
2525 TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2526 {
2527 SCEnter();
2528 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
2529
2530 if (initdata == NULL) {
2531 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2532 SCReturnInt(TM_ECODE_FAILED);
2533 }
2534
2535 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
2536 if (unlikely(ptv == NULL)) {
2537 afpconfig->DerefFunc(afpconfig);
2538 SCReturnInt(TM_ECODE_FAILED);
2539 }
2540 memset(ptv, 0, sizeof(AFPThreadVars));
2541
2542 ptv->tv = tv;
2543 ptv->cooked = 0;
2544
2545 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
2546 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2547
2548 ptv->livedev = LiveGetDevice(ptv->iface);
2549 if (ptv->livedev == NULL) {
2550 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
2551 SCFree(ptv);
2552 SCReturnInt(TM_ECODE_FAILED);
2553 }
2554
2555 ptv->buffer_size = afpconfig->buffer_size;
2556 ptv->ring_size = afpconfig->ring_size;
2557 ptv->block_size = afpconfig->block_size;
2558 ptv->block_timeout = afpconfig->block_timeout;
2559
2560 ptv->promisc = afpconfig->promisc;
2561 ptv->checksum_mode = afpconfig->checksum_mode;
2562 ptv->bpf_filter = NULL;
2563
2564 ptv->threads = 1;
2565 #ifdef HAVE_PACKET_FANOUT
2566 ptv->cluster_type = PACKET_FANOUT_LB;
2567 ptv->cluster_id = 1;
2568 /* We only set cluster info if the number of reader threads is greater than 1 */
2569 if (afpconfig->threads > 1) {
2570 ptv->cluster_id = afpconfig->cluster_id;
2571 ptv->cluster_type = afpconfig->cluster_type;
2572 ptv->threads = afpconfig->threads;
2573 }
2574 #endif
2575 ptv->flags = afpconfig->flags;
2576
2577 if (afpconfig->bpf_filter) {
2578 ptv->bpf_filter = afpconfig->bpf_filter;
2579 }
2580 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2581 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
2582 ptv->xdp_mode = afpconfig->xdp_mode;
2583
2584 #ifdef HAVE_PACKET_EBPF
2585 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
2586 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
2587 if (ptv->v4_map_fd == -1) {
2588 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2589 }
2590 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
2591 if (ptv->v6_map_fd == -1) {
2592 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2593 }
2594 }
2595 #endif
2596
2597 #ifdef PACKET_STATISTICS
2598 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2599 ptv->tv);
2600 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2601 ptv->tv);
2602 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2603 ptv->tv);
2604 #endif
2605
2606 ptv->copy_mode = afpconfig->copy_mode;
2607 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2608 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2609 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2610 /* Warn about BPF filter consequence */
2611 if (ptv->bpf_filter) {
2612 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2613 " in dropping all non matching packets.");
2614 }
2615 }
2616
2617
2618 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2619 SCFree(ptv);
2620 afpconfig->DerefFunc(afpconfig);
2621 SCReturnInt(TM_ECODE_FAILED);
2622 }
2623
2624 #define T_DATA_SIZE 70000
2625 ptv->data = SCMalloc(T_DATA_SIZE);
2626 if (ptv->data == NULL) {
2627 afpconfig->DerefFunc(afpconfig);
2628 SCFree(ptv);
2629 SCReturnInt(TM_ECODE_FAILED);
2630 }
2631 ptv->datalen = T_DATA_SIZE;
2632 #undef T_DATA_SIZE
2633
2634 *data = (void *)ptv;
2635
2636 afpconfig->DerefFunc(afpconfig);
2637
2638 /* A bit strange to have this here but we only have vlan information
2639 * during reading so we need to know if we want to keep vlan during
2640 * the capture phase */
2641 int vlanbool = 0;
2642 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
2643 ptv->flags |= AFP_VLAN_DISABLED;
2644 }
2645
2646 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2647 * get the info from packet extended header but we will use a standard
2648 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2649 if (! SCKernelVersionIsAtLeast(3, 0)) {
2650 ptv->flags |= AFP_VLAN_DISABLED;
2651 }
2652
2653 SCReturnInt(TM_ECODE_OK);
2654 }
2655
2656 /**
2657 * \brief This function prints stats to the screen at exit.
2658 * \param tv pointer to ThreadVars
2659 * \param data pointer that gets cast into AFPThreadVars for ptv
2660 */
2661 void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2662 {
2663 SCEnter();
2664 AFPThreadVars *ptv = (AFPThreadVars *)data;
2665
2666 #ifdef PACKET_STATISTICS
2667 AFPDumpCounters(ptv);
2668 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
2669 tv->name,
2670 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2671 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
2672 #endif
2673 }
2674
2675 /**
2676 * \brief DeInit function closes af packet socket at exit.
2677 * \param tv pointer to ThreadVars
2678 * \param data pointer that gets cast into AFPThreadVars for ptv
2679 */
2680 TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2681 {
2682 AFPThreadVars *ptv = (AFPThreadVars *)data;
2683
2684 AFPSwitchState(ptv, AFP_STATE_DOWN);
2685
2686 #ifdef HAVE_PACKET_XDP
2687 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2688 #endif
2689 if (ptv->data != NULL) {
2690 SCFree(ptv->data);
2691 ptv->data = NULL;
2692 }
2693 ptv->datalen = 0;
2694
2695 ptv->bpf_filter = NULL;
2696 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2697 SCFree(ptv->ring.v3);
2698 } else {
2699 if (ptv->ring.v2)
2700 SCFree(ptv->ring.v2);
2701 }
2702
2703 SCFree(ptv);
2704 SCReturnInt(TM_ECODE_OK);
2705 }
2706
2707 /**
2708 * \brief This function passes off to link type decoders.
2709 *
2710 * DecodeAFP reads packets from the PacketQueue and passes
2711 * them off to the proper link type decoder.
2712 *
2713 * \param t pointer to ThreadVars
2714 * \param p pointer to the current packet
2715 * \param data pointer that gets cast into AFPThreadVars for ptv
2716 * \param pq pointer to the current PacketQueue
2717 */
2718 TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2719 {
2720 SCEnter();
2721 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2722
2723 /* XXX HACK: flow timeout can call us for injected pseudo packets
2724 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2725 if (p->flags & PKT_PSEUDO_STREAM_END)
2726 return TM_ECODE_OK;
2727
2728 /* update counters */
2729 DecodeUpdatePacketCounters(tv, dtv, p);
2730
2731 /* If suri has set vlan during reading, we increase vlan counter */
2732 if (p->vlan_idx) {
2733 StatsIncr(tv, dtv->counter_vlan);
2734 }
2735
2736 /* call the decoder */
2737 switch (p->datalink) {
2738 case LINKTYPE_ETHERNET:
2739 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2740 break;
2741 case LINKTYPE_LINUX_SLL:
2742 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2743 break;
2744 case LINKTYPE_PPP:
2745 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2746 break;
2747 case LINKTYPE_RAW:
2748 case LINKTYPE_GRE_OVER_IP:
2749 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2750 break;
2751 case LINKTYPE_NULL:
2752 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2753 break;
2754 default:
2755 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2756 break;
2757 }
2758
2759 PacketDecodeFinalize(tv, dtv, p);
2760
2761 SCReturnInt(TM_ECODE_OK);
2762 }
2763
2764 TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2765 {
2766 SCEnter();
2767 DecodeThreadVars *dtv = NULL;
2768
2769 dtv = DecodeThreadVarsAlloc(tv);
2770
2771 if (dtv == NULL)
2772 SCReturnInt(TM_ECODE_FAILED);
2773
2774 DecodeRegisterPerfCounters(dtv, tv);
2775
2776 *data = (void *)dtv;
2777
2778 SCReturnInt(TM_ECODE_OK);
2779 }
2780
2781 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2782 {
2783 if (data != NULL)
2784 DecodeThreadVarsFree(tv, data);
2785 SCReturnInt(TM_ECODE_OK);
2786 }
2787
2788 #endif /* HAVE_AF_PACKET */
2789 /* eof */
2790 /**
2791 * @}
2792 */