]> git.ipfire.org Git - people/ms/suricata.git/blob - src/source-af-packet.c
af-packet: end of map factoring
[people/ms/suricata.git] / src / source-af-packet.c
1 /* Copyright (C) 2011-2017 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
24 /**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
31 * \todo watch other interface event to detect suppression of the monitored
32 * interface
33 */
34
35 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
36 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
37 #include "suricata-common.h"
38 #include "config.h"
39 #include "suricata.h"
40 #include "decode.h"
41 #include "packet-queue.h"
42 #include "threads.h"
43 #include "threadvars.h"
44 #include "tm-queuehandlers.h"
45 #include "tm-modules.h"
46 #include "tm-threads.h"
47 #include "tm-threads-common.h"
48 #include "conf.h"
49 #include "util-cpu.h"
50 #include "util-debug.h"
51 #include "util-device.h"
52 #include "util-ebpf.h"
53 #include "util-error.h"
54 #include "util-privs.h"
55 #include "util-optimize.h"
56 #include "util-checksum.h"
57 #include "util-ioctl.h"
58 #include "util-host-info.h"
59 #include "tmqh-packetpool.h"
60 #include "source-af-packet.h"
61 #include "runmodes.h"
62
63 #ifdef HAVE_AF_PACKET
64
65 #if HAVE_SYS_IOCTL_H
66 #include <sys/ioctl.h>
67 #endif
68
69 #ifdef HAVE_PACKET_EBPF
70 #include "util-ebpf.h"
71 #include <bpf/libbpf.h>
72 #include <bpf/bpf.h>
73 #endif
74
75 struct bpf_program {
76 unsigned int bf_len;
77 struct bpf_insn *bf_insns;
78 };
79
80 #ifdef HAVE_PCAP_H
81 #include <pcap.h>
82 #endif
83
84 #ifdef HAVE_PCAP_PCAP_H
85 #include <pcap/pcap.h>
86 #endif
87
88 #if HAVE_LINUX_IF_ETHER_H
89 #include <linux/if_ether.h>
90 #endif
91
92 #if HAVE_LINUX_IF_PACKET_H
93 #include <linux/if_packet.h>
94 #endif
95
96 #if HAVE_LINUX_IF_ARP_H
97 #include <linux/if_arp.h>
98 #endif
99
100 #if HAVE_LINUX_FILTER_H
101 #include <linux/filter.h>
102 #endif
103
104 #if HAVE_SYS_MMAN_H
105 #include <sys/mman.h>
106 #endif
107
108 #ifdef HAVE_HW_TIMESTAMPING
109 #include <linux/net_tstamp.h>
110 #endif
111
112 #endif /* HAVE_AF_PACKET */
113
114 extern int max_pending_packets;
115
116 #ifndef HAVE_AF_PACKET
117
118 TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
119
120 void TmModuleReceiveAFPRegister (void)
121 {
122 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
124 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
125 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
126 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
127 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
128 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
129 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
130 }
131
132 /**
133 * \brief Registration Function for DecodeAFP.
134 * \todo Unit tests are needed for this module.
135 */
136 void TmModuleDecodeAFPRegister (void)
137 {
138 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
139 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
140 tmm_modules[TMM_DECODEAFP].Func = NULL;
141 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
142 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
143 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
144 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
145 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
146 }
147
148 /**
149 * \brief this function prints an error message and exits.
150 */
151 TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
152 {
153 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
154 "support for AF_PACKET enabled, on Linux host please recompile "
155 "with --enable-af-packet", tv->name);
156 exit(EXIT_FAILURE);
157 }
158
159 #else /* We have AF_PACKET support */
160
161 #define AFP_IFACE_NAME_LENGTH 48
162
163 #define AFP_STATE_DOWN 0
164 #define AFP_STATE_UP 1
165
166 #define AFP_RECONNECT_TIMEOUT 500000
167 #define AFP_DOWN_COUNTER_INTERVAL 40
168
169 #define POLL_TIMEOUT 100
170
171 #ifndef TP_STATUS_USER_BUSY
172 /* for new use latest bit available in tp_status */
173 #define TP_STATUS_USER_BUSY (1 << 31)
174 #endif
175
176 #ifndef TP_STATUS_VLAN_VALID
177 #define TP_STATUS_VLAN_VALID (1 << 4)
178 #endif
179
180 /** protect pfring_set_bpf_filter, as it is not thread safe */
181 static SCMutex afpacket_bpf_set_filter_lock = SCMUTEX_INITIALIZER;
182
183 enum {
184 AFP_READ_OK,
185 AFP_READ_FAILURE,
186 AFP_FAILURE,
187 AFP_KERNEL_DROP,
188 };
189
190 enum {
191 AFP_FATAL_ERROR = 1,
192 AFP_RECOVERABLE_ERROR,
193 };
194
195 union thdr {
196 struct tpacket2_hdr *h2;
197 #ifdef HAVE_TPACKET_V3
198 struct tpacket3_hdr *h3;
199 #endif
200 void *raw;
201 };
202
203 static int AFPBypassCallback(Packet *p);
204 static int AFPXDPBypassCallback(Packet *p);
205
206 #define MAX_MAPS 32
207 /**
208 * \brief Structure to hold thread specific variables.
209 */
210 typedef struct AFPThreadVars_
211 {
212 union {
213 char *ring_v2;
214 struct iovec *ring_v3;
215 };
216
217 /* counters */
218 uint64_t pkts;
219
220 ThreadVars *tv;
221 TmSlot *slot;
222 LiveDevice *livedev;
223 /* data link type for the thread */
224 uint32_t datalink;
225
226 #ifdef HAVE_PACKET_EBPF
227 int v4_map_fd;
228 int v6_map_fd;
229 #endif
230
231 unsigned int frame_offset;
232
233 ChecksumValidationMode checksum_mode;
234
235 /* references to packet and drop counters */
236 uint16_t capture_kernel_packets;
237 uint16_t capture_kernel_drops;
238
239 /* handle state */
240 uint8_t afp_state;
241 uint8_t copy_mode;
242 unsigned int flags;
243
244 /* IPS peer */
245 AFPPeer *mpeer;
246
247 /* no mmap mode */
248 uint8_t *data; /** Per function and thread data */
249 int datalen; /** Length of per function and thread data */
250 int cooked;
251
252 /*
253 * Init related members
254 */
255
256 /* thread specific socket */
257 int socket;
258
259 int ring_size;
260 int block_size;
261 int block_timeout;
262 /* socket buffer size */
263 int buffer_size;
264 /* Filter */
265 const char *bpf_filter;
266 int ebpf_lb_fd;
267 int ebpf_filter_fd;
268
269 int promisc;
270
271 int down_count;
272
273 int cluster_id;
274 int cluster_type;
275
276 int threads;
277
278 union {
279 struct tpacket_req req;
280 #ifdef HAVE_TPACKET_V3
281 struct tpacket_req3 req3;
282 #endif
283 };
284
285 char iface[AFP_IFACE_NAME_LENGTH];
286 /* IPS output iface */
287 char out_iface[AFP_IFACE_NAME_LENGTH];
288
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen;
291 uint8_t *ring_buf;
292
293 uint8_t xdp_mode;
294
295 int map_fd[MAX_MAPS];
296
297 } AFPThreadVars;
298
299 TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
300 TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
301 void ReceiveAFPThreadExitStats(ThreadVars *, void *);
302 TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
303 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
304
305 TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
306 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
307 TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
308
309 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
310 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
311 static int AFPGetDevFlags(int fd, const char *ifname);
312 static int AFPDerefSocket(AFPPeer* peer);
313 static int AFPRefSocket(AFPPeer* peer);
314
315 /**
316 * \brief Registration Function for RecieveAFP.
317 * \todo Unit tests are needed for this module.
318 */
319 void TmModuleReceiveAFPRegister (void)
320 {
321 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
322 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
323 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
324 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
325 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
326 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
327 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
328 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
329 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
330 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
331 }
332
333
334 /**
335 * \defgroup afppeers AFP peers list
336 *
337 * AF_PACKET has an IPS mode were interface are peered: packet from
338 * on interface are sent the peered interface and the other way. The ::AFPPeer
339 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
340 * information to be able to send packet on the interface.
341 * A element of the list must not be destroyed during the run of Suricata as it
342 * is used by ::Packet and other threads.
343 *
344 * @{
345 */
346
347 typedef struct AFPPeersList_ {
348 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
349 int cnt;
350 int peered;
351 int turn; /**< Next value for initialisation order */
352 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
353 } AFPPeersList;
354
355 /**
356 * \brief Update the peer.
357 *
358 * Update the AFPPeer of a thread ie set new state, socket number
359 * or iface index.
360 *
361 */
362 static void AFPPeerUpdate(AFPThreadVars *ptv)
363 {
364 if (ptv->mpeer == NULL) {
365 return;
366 }
367 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
368 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
369 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
370 }
371
372 /**
373 * \brief Clean and free ressource used by an ::AFPPeer
374 */
375 static void AFPPeerClean(AFPPeer *peer)
376 {
377 if (peer->flags & AFP_SOCK_PROTECT)
378 SCMutexDestroy(&peer->sock_protect);
379 SC_ATOMIC_DESTROY(peer->socket);
380 SC_ATOMIC_DESTROY(peer->if_idx);
381 SC_ATOMIC_DESTROY(peer->state);
382 SCFree(peer);
383 }
384
385 AFPPeersList peerslist;
386
387
388 /**
389 * \brief Init the global list of ::AFPPeer
390 */
391 TmEcode AFPPeersListInit()
392 {
393 SCEnter();
394 TAILQ_INIT(&peerslist.peers);
395 peerslist.peered = 0;
396 peerslist.cnt = 0;
397 peerslist.turn = 0;
398 SC_ATOMIC_INIT(peerslist.reached);
399 (void) SC_ATOMIC_SET(peerslist.reached, 0);
400 SCReturnInt(TM_ECODE_OK);
401 }
402
403 /**
404 * \brief Check that all ::AFPPeer got a peer
405 *
406 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
407 */
408 TmEcode AFPPeersListCheck()
409 {
410 #define AFP_PEERS_MAX_TRY 4
411 #define AFP_PEERS_WAIT 20000
412 int try = 0;
413 SCEnter();
414 while (try < AFP_PEERS_MAX_TRY) {
415 if (peerslist.cnt != peerslist.peered) {
416 usleep(AFP_PEERS_WAIT);
417 } else {
418 SCReturnInt(TM_ECODE_OK);
419 }
420 try++;
421 }
422 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
423 SCReturnInt(TM_ECODE_FAILED);
424 }
425
426 /**
427 * \brief Declare a new AFP thread to AFP peers list.
428 */
429 static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
430 {
431 SCEnter();
432 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
433 AFPPeer *pitem;
434 int mtu, out_mtu;
435
436 if (unlikely(peer == NULL)) {
437 SCReturnInt(TM_ECODE_FAILED);
438 }
439 memset(peer, 0, sizeof(AFPPeer));
440 SC_ATOMIC_INIT(peer->socket);
441 SC_ATOMIC_INIT(peer->sock_usage);
442 SC_ATOMIC_INIT(peer->if_idx);
443 SC_ATOMIC_INIT(peer->state);
444 peer->flags = ptv->flags;
445 peer->turn = peerslist.turn++;
446
447 if (peer->flags & AFP_SOCK_PROTECT) {
448 SCMutexInit(&peer->sock_protect, NULL);
449 }
450
451 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
452 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
453 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
454 ptv->mpeer = peer;
455 /* add element to iface list */
456 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
457
458 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
459 peerslist.cnt++;
460
461 /* Iter to find a peer */
462 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
463 if (pitem->peer)
464 continue;
465 if (strcmp(pitem->iface, ptv->out_iface))
466 continue;
467 peer->peer = pitem;
468 pitem->peer = peer;
469 mtu = GetIfaceMTU(ptv->iface);
470 out_mtu = GetIfaceMTU(ptv->out_iface);
471 if (mtu != out_mtu) {
472 SCLogError(SC_ERR_AFP_CREATE,
473 "MTU on %s (%d) and %s (%d) are not equal, "
474 "transmission of packets bigger than %d will fail.",
475 ptv->iface, mtu,
476 ptv->out_iface, out_mtu,
477 (out_mtu > mtu) ? mtu : out_mtu);
478 }
479 peerslist.peered += 2;
480 break;
481 }
482 }
483
484 AFPPeerUpdate(ptv);
485
486 SCReturnInt(TM_ECODE_OK);
487 }
488
489 static int AFPPeersListWaitTurn(AFPPeer *peer)
490 {
491 /* If turn is zero, we already have started threads once */
492 if (peerslist.turn == 0)
493 return 0;
494
495 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
496 return 0;
497 return 1;
498 }
499
500 static void AFPPeersListReachedInc(void)
501 {
502 if (peerslist.turn == 0)
503 return;
504
505 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
506 SCLogInfo("All AFP capture threads are running.");
507 (void)SC_ATOMIC_SET(peerslist.reached, 0);
508 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
509 * restarted.
510 */
511 peerslist.turn = 0;
512 }
513 }
514
515 static int AFPPeersListStarted(void)
516 {
517 return !peerslist.turn;
518 }
519
520 /**
521 * \brief Clean the global peers list.
522 */
523 void AFPPeersListClean()
524 {
525 AFPPeer *pitem;
526
527 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
528 TAILQ_REMOVE(&peerslist.peers, pitem, next);
529 AFPPeerClean(pitem);
530 }
531 }
532
533 /**
534 * @}
535 */
536
537 /**
538 * \brief Registration Function for DecodeAFP.
539 * \todo Unit tests are needed for this module.
540 */
541 void TmModuleDecodeAFPRegister (void)
542 {
543 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
544 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
545 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
546 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
547 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
548 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
549 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
550 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
551 }
552
553
554 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
555
556 static inline void AFPDumpCounters(AFPThreadVars *ptv)
557 {
558 #ifdef PACKET_STATISTICS
559 struct tpacket_stats kstats;
560 socklen_t len = sizeof (struct tpacket_stats);
561 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
562 &kstats, &len) > -1) {
563 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
564 ptv->tv->name,
565 kstats.tp_packets, kstats.tp_drops);
566 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
567 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
568 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
569 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
570 }
571 #endif
572 }
573
574 /**
575 * \brief AF packet read function.
576 *
577 * This function fills
578 * From here the packets are picked up by the DecodeAFP thread.
579 *
580 * \param user pointer to AFPThreadVars
581 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
582 */
583 static int AFPRead(AFPThreadVars *ptv)
584 {
585 Packet *p = NULL;
586 /* XXX should try to use read that get directly to packet */
587 int offset = 0;
588 int caplen;
589 struct sockaddr_ll from;
590 struct iovec iov;
591 struct msghdr msg;
592 struct cmsghdr *cmsg;
593 union {
594 struct cmsghdr cmsg;
595 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
596 } cmsg_buf;
597 unsigned char aux_checksum = 0;
598
599 msg.msg_name = &from;
600 msg.msg_namelen = sizeof(from);
601 msg.msg_iov = &iov;
602 msg.msg_iovlen = 1;
603 msg.msg_control = &cmsg_buf;
604 msg.msg_controllen = sizeof(cmsg_buf);
605 msg.msg_flags = 0;
606
607 if (ptv->cooked)
608 offset = SLL_HEADER_LEN;
609 else
610 offset = 0;
611 iov.iov_len = ptv->datalen - offset;
612 iov.iov_base = ptv->data + offset;
613
614 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
615
616 if (caplen < 0) {
617 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
618 errno);
619 SCReturnInt(AFP_READ_FAILURE);
620 }
621
622 p = PacketGetFromQueueOrAlloc();
623 if (p == NULL) {
624 SCReturnInt(AFP_FAILURE);
625 }
626 PKT_SET_SRC(p, PKT_SRC_WIRE);
627 if (ptv->flags & AFP_BYPASS) {
628 p->BypassPacketsFlow = AFPBypassCallback;
629 #ifdef HAVE_PACKET_EBPF
630 p->afp_v.v4_map_fd = ptv->v4_map_fd;
631 p->afp_v.v6_map_fd = ptv->v6_map_fd;
632 #endif
633 }
634 if (ptv->flags & AFP_XDPBYPASS) {
635 p->BypassPacketsFlow = AFPXDPBypassCallback;
636 #ifdef HAVE_PACKET_EBPF
637 p->afp_v.v4_map_fd = ptv->v4_map_fd;
638 p->afp_v.v6_map_fd = ptv->v6_map_fd;
639 #endif
640 }
641
642 /* get timestamp of packet via ioctl */
643 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
644 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
645 errno);
646 TmqhOutputPacketpool(ptv->tv, p);
647 SCReturnInt(AFP_READ_FAILURE);
648 }
649
650 ptv->pkts++;
651 p->livedev = ptv->livedev;
652
653 /* add forged header */
654 if (ptv->cooked) {
655 SllHdr * hdrp = (SllHdr *)ptv->data;
656 /* XXX this is minimalist, but this seems enough */
657 hdrp->sll_protocol = from.sll_protocol;
658 }
659
660 p->datalink = ptv->datalink;
661 SET_PKT_LEN(p, caplen + offset);
662 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
663 TmqhOutputPacketpool(ptv->tv, p);
664 SCReturnInt(AFP_FAILURE);
665 }
666 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
667 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
668
669 /* We only check for checksum disable */
670 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
671 p->flags |= PKT_IGNORE_CHECKSUM;
672 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
673 if (ptv->livedev->ignore_checksum) {
674 p->flags |= PKT_IGNORE_CHECKSUM;
675 } else if (ChecksumAutoModeCheck(ptv->pkts,
676 SC_ATOMIC_GET(ptv->livedev->pkts),
677 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
678 ptv->livedev->ignore_checksum = 1;
679 p->flags |= PKT_IGNORE_CHECKSUM;
680 }
681 } else {
682 aux_checksum = 1;
683 }
684
685 /* List is NULL if we don't have activated auxiliary data */
686 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
687 struct tpacket_auxdata *aux;
688
689 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
690 cmsg->cmsg_level != SOL_PACKET ||
691 cmsg->cmsg_type != PACKET_AUXDATA)
692 continue;
693
694 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
695
696 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
697 p->flags |= PKT_IGNORE_CHECKSUM;
698 }
699 break;
700 }
701
702 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
703 TmqhOutputPacketpool(ptv->tv, p);
704 SCReturnInt(AFP_FAILURE);
705 }
706 SCReturnInt(AFP_READ_OK);
707 }
708
709 /**
710 * \brief AF packet write function.
711 *
712 * This function has to be called before the memory
713 * related to Packet in ring buffer is released.
714 *
715 * \param pointer to Packet
716 * \param version of capture: TPACKET_V2 or TPACKET_V3
717 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
718 *
719 */
720 static TmEcode AFPWritePacket(Packet *p, int version)
721 {
722 struct sockaddr_ll socket_address;
723 int socket;
724 uint8_t *pstart;
725 size_t plen;
726 union thdr h;
727 uint16_t vlan_tci = 0;
728
729 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
730 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
731 return TM_ECODE_OK;
732 }
733 }
734
735 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
736 return TM_ECODE_OK;
737
738 if (p->ethh == NULL) {
739 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
740 return TM_ECODE_FAILED;
741 }
742 /* Index of the network device */
743 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
744 /* Address length*/
745 socket_address.sll_halen = ETH_ALEN;
746 /* Destination MAC */
747 memcpy(socket_address.sll_addr, p->ethh, 6);
748
749 /* Send packet, locking the socket if necessary */
750 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
751 SCMutexLock(&p->afp_v.peer->sock_protect);
752 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
753
754 h.raw = p->afp_v.relptr;
755
756 if (version == TPACKET_V2) {
757 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
758 * the flag. It is not defined for older kernel so we go best effort
759 * and test for non zero value of the TCI header. */
760 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
761 vlan_tci = h.h2->tp_vlan_tci;
762 }
763 } else {
764 #ifdef HAVE_TPACKET_V3
765 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
766 vlan_tci = h.h3->hv1.tp_vlan_tci;
767 }
768 #else
769 /* Should not get here */
770 BUG_ON(1);
771 #endif
772 }
773
774 if (vlan_tci != 0) {
775 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
776 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
777 /* move ethernet addresses */
778 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
779 /* write vlan info */
780 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
781 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
782 } else {
783 pstart = GET_PKT_DATA(p);
784 plen = GET_PKT_LEN(p);
785 }
786
787 if (sendto(socket, pstart, plen, 0,
788 (struct sockaddr*) &socket_address,
789 sizeof(struct sockaddr_ll)) < 0) {
790 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
791 socket,
792 strerror(errno));
793 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
794 SCMutexUnlock(&p->afp_v.peer->sock_protect);
795 return TM_ECODE_FAILED;
796 }
797 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
798 SCMutexUnlock(&p->afp_v.peer->sock_protect);
799
800 return TM_ECODE_OK;
801 }
802
803 static void AFPReleaseDataFromRing(Packet *p)
804 {
805 /* Need to be in copy mode and need to detect early release
806 where Ethernet header could not be set (and pseudo packet) */
807 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
808 AFPWritePacket(p, TPACKET_V2);
809 }
810
811 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
812 goto cleanup;
813
814 if (p->afp_v.relptr) {
815 union thdr h;
816 h.raw = p->afp_v.relptr;
817 h.h2->tp_status = TP_STATUS_KERNEL;
818 }
819
820 cleanup:
821 AFPV_CLEANUP(&p->afp_v);
822 }
823
824 #ifdef HAVE_TPACKET_V3
825 static void AFPReleasePacketV3(Packet *p)
826 {
827 /* Need to be in copy mode and need to detect early release
828 where Ethernet header could not be set (and pseudo packet) */
829 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
830 AFPWritePacket(p, TPACKET_V3);
831 }
832 PacketFreeOrRelease(p);
833 }
834 #endif
835
836 static void AFPReleasePacket(Packet *p)
837 {
838 AFPReleaseDataFromRing(p);
839 PacketFreeOrRelease(p);
840 }
841
842 /**
843 * \brief AF packet read function for ring
844 *
845 * This function fills
846 * From here the packets are picked up by the DecodeAFP thread.
847 *
848 * \param user pointer to AFPThreadVars
849 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
850 */
851 static int AFPReadFromRing(AFPThreadVars *ptv)
852 {
853 Packet *p = NULL;
854 union thdr h;
855 uint8_t emergency_flush = 0;
856 int read_pkts = 0;
857 int loop_start = -1;
858
859
860 /* Loop till we have packets available */
861 while (1) {
862 if (unlikely(suricata_ctl_flags != 0)) {
863 break;
864 }
865
866 /* Read packet from ring */
867 h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
868 if (h.raw == NULL) {
869 SCReturnInt(AFP_FAILURE);
870 }
871
872 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
873 if (read_pkts == 0) {
874 if (loop_start == -1) {
875 loop_start = ptv->frame_offset;
876 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
877 SCReturnInt(AFP_READ_OK);
878 }
879 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
880 ptv->frame_offset = 0;
881 }
882 continue;
883 }
884 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
885 SCReturnInt(AFP_KERNEL_DROP);
886 } else {
887 SCReturnInt(AFP_READ_OK);
888 }
889 }
890
891 read_pkts++;
892 loop_start = -1;
893
894 /* Our packet is still used by suricata, we exit read loop to
895 * gain some time */
896 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
897 SCReturnInt(AFP_READ_OK);
898 }
899
900 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
901 h.h2->tp_status = TP_STATUS_KERNEL;
902 goto next_frame;
903 }
904
905 p = PacketGetFromQueueOrAlloc();
906 if (p == NULL) {
907 SCReturnInt(AFP_FAILURE);
908 }
909 PKT_SET_SRC(p, PKT_SRC_WIRE);
910 if (ptv->flags & AFP_BYPASS) {
911 p->BypassPacketsFlow = AFPBypassCallback;
912 #ifdef HAVE_PACKET_EBPF
913 p->afp_v.v4_map_fd = ptv->v4_map_fd;
914 p->afp_v.v6_map_fd = ptv->v6_map_fd;
915 #endif
916 }
917 if (ptv->flags & AFP_XDPBYPASS) {
918 p->BypassPacketsFlow = AFPXDPBypassCallback;
919 #ifdef HAVE_PACKET_EBPF
920 p->afp_v.v4_map_fd = ptv->v4_map_fd;
921 p->afp_v.v6_map_fd = ptv->v6_map_fd;
922 #endif
923 }
924
925 /* Suricata will treat packet so telling it is busy, this
926 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
927 * function. */
928 h.h2->tp_status |= TP_STATUS_USER_BUSY;
929
930 ptv->pkts++;
931 p->livedev = ptv->livedev;
932 p->datalink = ptv->datalink;
933
934 if (h.h2->tp_len > h.h2->tp_snaplen) {
935 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
936 h.h2->tp_len, h.h2->tp_snaplen);
937 }
938
939 /* get vlan id from header */
940 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
941 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
942 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
943 p->vlan_idx = 1;
944 p->vlanh[0] = NULL;
945 }
946
947 if (ptv->flags & AFP_ZERO_COPY) {
948 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
949 TmqhOutputPacketpool(ptv->tv, p);
950 SCReturnInt(AFP_FAILURE);
951 } else {
952 p->afp_v.relptr = h.raw;
953 p->ReleasePacket = AFPReleasePacket;
954 p->afp_v.mpeer = ptv->mpeer;
955 AFPRefSocket(ptv->mpeer);
956
957 p->afp_v.copy_mode = ptv->copy_mode;
958 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
959 p->afp_v.peer = ptv->mpeer->peer;
960 } else {
961 p->afp_v.peer = NULL;
962 }
963 }
964 } else {
965 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
966 TmqhOutputPacketpool(ptv->tv, p);
967 SCReturnInt(AFP_FAILURE);
968 }
969 }
970
971 /* Timestamp */
972 p->ts.tv_sec = h.h2->tp_sec;
973 p->ts.tv_usec = h.h2->tp_nsec/1000;
974 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
975 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
976
977 /* We only check for checksum disable */
978 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
979 p->flags |= PKT_IGNORE_CHECKSUM;
980 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
981 if (ptv->livedev->ignore_checksum) {
982 p->flags |= PKT_IGNORE_CHECKSUM;
983 } else if (ChecksumAutoModeCheck(ptv->pkts,
984 SC_ATOMIC_GET(ptv->livedev->pkts),
985 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
986 ptv->livedev->ignore_checksum = 1;
987 p->flags |= PKT_IGNORE_CHECKSUM;
988 }
989 } else {
990 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
991 p->flags |= PKT_IGNORE_CHECKSUM;
992 }
993 }
994 if (h.h2->tp_status & TP_STATUS_LOSING) {
995 emergency_flush = 1;
996 AFPDumpCounters(ptv);
997 }
998
999 /* release frame if not in zero copy mode */
1000 if (!(ptv->flags & AFP_ZERO_COPY)) {
1001 h.h2->tp_status = TP_STATUS_KERNEL;
1002 }
1003
1004 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1005 h.h2->tp_status = TP_STATUS_KERNEL;
1006 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1007 ptv->frame_offset = 0;
1008 }
1009 TmqhOutputPacketpool(ptv->tv, p);
1010 SCReturnInt(AFP_FAILURE);
1011 }
1012
1013 next_frame:
1014 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1015 ptv->frame_offset = 0;
1016 /* Get out of loop to be sure we will reach maintenance tasks */
1017 SCReturnInt(AFP_READ_OK);
1018 }
1019 }
1020
1021 SCReturnInt(AFP_READ_OK);
1022 }
1023
1024 #ifdef HAVE_TPACKET_V3
1025 static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1026 {
1027 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1028 }
1029
1030 static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1031 {
1032 Packet *p = PacketGetFromQueueOrAlloc();
1033 if (p == NULL) {
1034 SCReturnInt(AFP_FAILURE);
1035 }
1036 PKT_SET_SRC(p, PKT_SRC_WIRE);
1037 if (ptv->flags & AFP_BYPASS) {
1038 p->BypassPacketsFlow = AFPBypassCallback;
1039 #ifdef HAVE_PACKET_EBPF
1040 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1041 p->afp_v.v6_map_fd = ptv->v6_map_fd;
1042 #endif
1043 }
1044 if (ptv->flags & AFP_XDPBYPASS) {
1045 p->BypassPacketsFlow = AFPXDPBypassCallback;
1046 #ifdef HAVE_PACKET_EBPF
1047 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1048 p->afp_v.v6_map_fd = ptv->v6_map_fd;
1049 #endif
1050 }
1051
1052 ptv->pkts++;
1053 p->livedev = ptv->livedev;
1054 p->datalink = ptv->datalink;
1055
1056 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1057 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1058 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1059 p->vlan_idx = 1;
1060 p->vlanh[0] = NULL;
1061 }
1062
1063 if (ptv->flags & AFP_ZERO_COPY) {
1064 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1065 TmqhOutputPacketpool(ptv->tv, p);
1066 SCReturnInt(AFP_FAILURE);
1067 }
1068 p->afp_v.relptr = ppd;
1069 p->ReleasePacket = AFPReleasePacketV3;
1070 p->afp_v.mpeer = ptv->mpeer;
1071 AFPRefSocket(ptv->mpeer);
1072
1073 p->afp_v.copy_mode = ptv->copy_mode;
1074 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1075 p->afp_v.peer = ptv->mpeer->peer;
1076 } else {
1077 p->afp_v.peer = NULL;
1078 }
1079 } else {
1080 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1081 TmqhOutputPacketpool(ptv->tv, p);
1082 SCReturnInt(AFP_FAILURE);
1083 }
1084 }
1085 /* Timestamp */
1086 p->ts.tv_sec = ppd->tp_sec;
1087 p->ts.tv_usec = ppd->tp_nsec/1000;
1088 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1089 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1090
1091 /* We only check for checksum disable */
1092 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1093 p->flags |= PKT_IGNORE_CHECKSUM;
1094 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1095 if (ptv->livedev->ignore_checksum) {
1096 p->flags |= PKT_IGNORE_CHECKSUM;
1097 } else if (ChecksumAutoModeCheck(ptv->pkts,
1098 SC_ATOMIC_GET(ptv->livedev->pkts),
1099 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1100 ptv->livedev->ignore_checksum = 1;
1101 p->flags |= PKT_IGNORE_CHECKSUM;
1102 }
1103 } else {
1104 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1105 p->flags |= PKT_IGNORE_CHECKSUM;
1106 }
1107 }
1108
1109 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1110 TmqhOutputPacketpool(ptv->tv, p);
1111 SCReturnInt(AFP_FAILURE);
1112 }
1113
1114 SCReturnInt(AFP_READ_OK);
1115 }
1116
1117 static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1118 {
1119 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1120 uint8_t *ppd;
1121
1122 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1123 for (i = 0; i < num_pkts; ++i) {
1124 if (unlikely(AFPParsePacketV3(ptv, pbd,
1125 (struct tpacket3_hdr *)ppd) == AFP_FAILURE)) {
1126 SCReturnInt(AFP_READ_FAILURE);
1127 }
1128 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1129 }
1130
1131 SCReturnInt(AFP_READ_OK);
1132 }
1133 #endif /* HAVE_TPACKET_V3 */
1134
1135 /**
1136 * \brief AF packet read function for ring
1137 *
1138 * This function fills
1139 * From here the packets are picked up by the DecodeAFP thread.
1140 *
1141 * \param user pointer to AFPThreadVars
1142 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1143 */
1144 static int AFPReadFromRingV3(AFPThreadVars *ptv)
1145 {
1146 #ifdef HAVE_TPACKET_V3
1147 struct tpacket_block_desc *pbd;
1148
1149 /* Loop till we have packets available */
1150 while (1) {
1151 if (unlikely(suricata_ctl_flags != 0)) {
1152 SCLogInfo("Exiting AFP V3 read loop");
1153 break;
1154 }
1155
1156 pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
1157
1158 /* block is not ready to be read */
1159 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1160 SCReturnInt(AFP_READ_OK);
1161 }
1162
1163 if (unlikely(AFPWalkBlock(ptv, pbd) != AFP_READ_OK)) {
1164 AFPFlushBlock(pbd);
1165 SCReturnInt(AFP_READ_FAILURE);
1166 }
1167
1168 AFPFlushBlock(pbd);
1169 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
1170 /* return to maintenance task after one loop on the ring */
1171 if (ptv->frame_offset == 0) {
1172 SCReturnInt(AFP_READ_OK);
1173 }
1174 }
1175 #endif
1176 SCReturnInt(AFP_READ_OK);
1177 }
1178
1179 /**
1180 * \brief Reference socket
1181 *
1182 * \retval O in case of failure, 1 in case of success
1183 */
1184 static int AFPRefSocket(AFPPeer* peer)
1185 {
1186 if (unlikely(peer == NULL))
1187 return 0;
1188
1189 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1190 return 1;
1191 }
1192
1193
1194 /**
1195 * \brief Dereference socket
1196 *
1197 * \retval 1 if socket is still alive, 0 if not
1198 */
1199 static int AFPDerefSocket(AFPPeer* peer)
1200 {
1201 if (peer == NULL)
1202 return 1;
1203
1204 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1205 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1206 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1207 close(SC_ATOMIC_GET(peer->socket));
1208 return 0;
1209 }
1210 }
1211 return 1;
1212 }
1213
1214 static void AFPSwitchState(AFPThreadVars *ptv, int state)
1215 {
1216 ptv->afp_state = state;
1217 ptv->down_count = 0;
1218
1219 AFPPeerUpdate(ptv);
1220
1221 /* Do cleaning if switching to down state */
1222 if (state == AFP_STATE_DOWN) {
1223 #ifdef HAVE_TPACKET_V3
1224 if (ptv->flags & AFP_TPACKET_V3) {
1225 if (!ptv->ring_v3) {
1226 SCFree(ptv->ring_v3);
1227 ptv->ring_v3 = NULL;
1228 }
1229 } else {
1230 #endif
1231 if (ptv->ring_v2) {
1232 /* only used in reading phase, we can free it */
1233 SCFree(ptv->ring_v2);
1234 ptv->ring_v2 = NULL;
1235 }
1236 #ifdef HAVE_TPACKET_V3
1237 }
1238 #endif
1239 if (ptv->socket != -1) {
1240 /* we need to wait for all packets to return data */
1241 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
1242 SCLogInfo("Cleaning socket connected to '%s'", ptv->iface);
1243 munmap(ptv->ring_buf, ptv->ring_buflen);
1244 close(ptv->socket);
1245 ptv->socket = -1;
1246 }
1247 }
1248 }
1249 if (state == AFP_STATE_UP) {
1250 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1251 }
1252 }
1253
1254 static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1255 uint64_t *discarded_pkts)
1256 {
1257 struct sockaddr_ll from;
1258 struct iovec iov;
1259 struct msghdr msg;
1260 struct timeval ts;
1261 union {
1262 struct cmsghdr cmsg;
1263 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1264 } cmsg_buf;
1265
1266
1267 if (unlikely(suricata_ctl_flags != 0)) {
1268 return 1;
1269 }
1270
1271 msg.msg_name = &from;
1272 msg.msg_namelen = sizeof(from);
1273 msg.msg_iov = &iov;
1274 msg.msg_iovlen = 1;
1275 msg.msg_control = &cmsg_buf;
1276 msg.msg_controllen = sizeof(cmsg_buf);
1277 msg.msg_flags = 0;
1278
1279 iov.iov_len = ptv->datalen;
1280 iov.iov_base = ptv->data;
1281
1282 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
1283
1284 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1285 /* FIXME */
1286 return -1;
1287 }
1288
1289 if ((ts.tv_sec > synctv->tv_sec) ||
1290 (ts.tv_sec >= synctv->tv_sec &&
1291 ts.tv_usec > synctv->tv_usec)) {
1292 return 1;
1293 }
1294 return 0;
1295 }
1296
1297 static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1298 uint64_t *discarded_pkts)
1299 {
1300 union thdr h;
1301
1302 if (unlikely(suricata_ctl_flags != 0)) {
1303 return 1;
1304 }
1305
1306 #ifdef HAVE_TPACKET_V3
1307 if (ptv->flags & AFP_TPACKET_V3) {
1308 struct tpacket_block_desc *pbd;
1309 pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
1310 *discarded_pkts += pbd->hdr.bh1.num_pkts;
1311 AFPFlushBlock(pbd);
1312 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
1313 return 1;
1314
1315 } else
1316 #endif
1317 {
1318 /* Read packet from ring */
1319 h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
1320 if (h.raw == NULL) {
1321 return -1;
1322 }
1323 (*discarded_pkts)++;
1324 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1325 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1326 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1327 return 1;
1328 }
1329
1330 h.h2->tp_status = TP_STATUS_KERNEL;
1331 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1332 ptv->frame_offset = 0;
1333 }
1334 }
1335
1336
1337 return 0;
1338 }
1339
1340 /** \brief wait for all afpacket threads to fully init
1341 *
1342 * Discard packets before all threads are ready, as the cluster
1343 * setup is not complete yet.
1344 *
1345 * if AFPPeersListStarted() returns true init is complete
1346 *
1347 * \retval r 1 = happy, otherwise unhappy
1348 */
1349 static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
1350 {
1351 int r;
1352 struct timeval synctv;
1353 struct pollfd fds;
1354
1355 fds.fd = ptv->socket;
1356 fds.events = POLLIN;
1357
1358 /* Set timeval to end of the world */
1359 synctv.tv_sec = 0xffffffff;
1360 synctv.tv_usec = 0xffffffff;
1361
1362 while (1) {
1363 r = poll(&fds, 1, POLL_TIMEOUT);
1364 if (r > 0 &&
1365 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1366 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1367 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1368 return 0;
1369 } else if (r > 0) {
1370 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1371 gettimeofday(&synctv, NULL);
1372 }
1373 if (ptv->flags & AFP_RING_MODE) {
1374 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
1375 } else {
1376 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
1377 }
1378 SCLogDebug("Discarding on %s", ptv->tv->name);
1379 switch (r) {
1380 case 1:
1381 SCLogDebug("Starting to read on %s", ptv->tv->name);
1382 return 1;
1383 case -1:
1384 return r;
1385 }
1386 /* no packets */
1387 } else if (r == 0 && AFPPeersListStarted()) {
1388 SCLogDebug("Starting to read on %s", ptv->tv->name);
1389 return 1;
1390 } else if (r < 0) { /* only exit on error */
1391 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1392 return 0;
1393 }
1394 }
1395 return 1;
1396 }
1397
1398 /**
1399 * \brief Try to reopen socket
1400 *
1401 * \retval 0 in case of success, negative if error occurs or a condition
1402 * is not met.
1403 */
1404 static int AFPTryReopen(AFPThreadVars *ptv)
1405 {
1406 int afp_activate_r;
1407
1408 ptv->down_count++;
1409
1410 /* Don't reconnect till we have packet that did not release data */
1411 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1412 return -1;
1413 }
1414
1415 afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
1416 if (afp_activate_r != 0) {
1417 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1418 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1419 ptv->iface);
1420 }
1421 return afp_activate_r;
1422 }
1423
1424 SCLogInfo("Interface '%s' is back", ptv->iface);
1425 return 0;
1426 }
1427
1428 /**
1429 * \brief Main AF_PACKET reading Loop function
1430 */
1431 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1432 {
1433 SCEnter();
1434
1435 AFPThreadVars *ptv = (AFPThreadVars *)data;
1436 struct pollfd fds;
1437 int r;
1438 TmSlot *s = (TmSlot *)slot;
1439 time_t last_dump = 0;
1440 time_t current_time;
1441 int (*AFPReadFunc) (AFPThreadVars *);
1442 uint64_t discarded_pkts = 0;
1443
1444 ptv->slot = s->slot_next;
1445
1446 if (ptv->flags & AFP_RING_MODE) {
1447 if (ptv->flags & AFP_TPACKET_V3) {
1448 AFPReadFunc = AFPReadFromRingV3;
1449 } else {
1450 AFPReadFunc = AFPReadFromRing;
1451 }
1452 } else {
1453 AFPReadFunc = AFPRead;
1454 }
1455
1456 if (ptv->afp_state == AFP_STATE_DOWN) {
1457 /* Wait for our turn, threads before us must have opened the socket */
1458 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1459 usleep(1000);
1460 if (suricata_ctl_flags != 0) {
1461 break;
1462 }
1463 }
1464 r = AFPCreateSocket(ptv, ptv->iface, 1);
1465 if (r < 0) {
1466 switch (-r) {
1467 case AFP_FATAL_ERROR:
1468 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1469 SCReturnInt(TM_ECODE_FAILED);
1470 case AFP_RECOVERABLE_ERROR:
1471 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1472 }
1473 }
1474 AFPPeersListReachedInc();
1475 }
1476 if (ptv->afp_state == AFP_STATE_UP) {
1477 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
1478 if ((ptv->flags & AFP_TPACKET_V3) != 0) {
1479 AFPSynchronizeStart(ptv, &discarded_pkts);
1480 }
1481 /* let's reset counter as we will start the capture at the
1482 * next function call */
1483 #ifdef PACKET_STATISTICS
1484 struct tpacket_stats kstats;
1485 socklen_t len = sizeof (struct tpacket_stats);
1486 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1487 &kstats, &len) > -1) {
1488 uint64_t pkts = 0;
1489 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1490 ", dropped %" PRIu32 "",
1491 ptv->tv->name,
1492 kstats.tp_packets, kstats.tp_drops);
1493 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1494 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1495 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1496 }
1497 #endif
1498 }
1499
1500 fds.fd = ptv->socket;
1501 fds.events = POLLIN;
1502
1503 while (1) {
1504 /* Start by checking the state of our interface */
1505 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1506 int dbreak = 0;
1507
1508 do {
1509 usleep(AFP_RECONNECT_TIMEOUT);
1510 if (suricata_ctl_flags != 0) {
1511 dbreak = 1;
1512 break;
1513 }
1514 r = AFPTryReopen(ptv);
1515 fds.fd = ptv->socket;
1516 } while (r < 0);
1517 if (dbreak == 1)
1518 break;
1519 }
1520
1521 /* make sure we have at least one packet in the packet pool, to prevent
1522 * us from alloc'ing packets at line rate */
1523 PacketPoolWait();
1524
1525 r = poll(&fds, 1, POLL_TIMEOUT);
1526
1527 if (suricata_ctl_flags != 0) {
1528 break;
1529 }
1530
1531 if (r > 0 &&
1532 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1533 if (fds.revents & (POLLHUP | POLLRDHUP)) {
1534 AFPSwitchState(ptv, AFP_STATE_DOWN);
1535 continue;
1536 } else if (fds.revents & POLLERR) {
1537 char c;
1538 /* Do a recv to get errno */
1539 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1540 continue; /* what, no error? */
1541 SCLogError(SC_ERR_AFP_READ,
1542 "Error reading data from iface '%s': (%d" PRIu32 ") %s",
1543 ptv->iface, errno, strerror(errno));
1544 AFPSwitchState(ptv, AFP_STATE_DOWN);
1545 continue;
1546 } else if (fds.revents & POLLNVAL) {
1547 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
1548 AFPSwitchState(ptv, AFP_STATE_DOWN);
1549 continue;
1550 }
1551 } else if (r > 0) {
1552 r = AFPReadFunc(ptv);
1553 switch (r) {
1554 case AFP_READ_OK:
1555 /* Trigger one dump of stats every second */
1556 current_time = time(NULL);
1557 if (current_time != last_dump) {
1558 AFPDumpCounters(ptv);
1559 last_dump = current_time;
1560 }
1561 break;
1562 case AFP_READ_FAILURE:
1563 /* AFPRead in error: best to reset the socket */
1564 SCLogError(SC_ERR_AFP_READ,
1565 "AFPRead error reading data from iface '%s': (%d" PRIu32 ") %s",
1566 ptv->iface, errno, strerror(errno));
1567 AFPSwitchState(ptv, AFP_STATE_DOWN);
1568 continue;
1569 case AFP_FAILURE:
1570 AFPSwitchState(ptv, AFP_STATE_DOWN);
1571 SCReturnInt(TM_ECODE_FAILED);
1572 break;
1573 case AFP_KERNEL_DROP:
1574 AFPDumpCounters(ptv);
1575 break;
1576 }
1577 } else if (unlikely(r == 0)) {
1578 /* poll timed out, lets see if we need to inject a fake packet */
1579 TmThreadsCaptureInjectPacket(tv, ptv->slot, NULL);
1580
1581 } else if ((r < 0) && (errno != EINTR)) {
1582 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d" PRIu32 ") %s",
1583 ptv->iface,
1584 errno, strerror(errno));
1585 AFPSwitchState(ptv, AFP_STATE_DOWN);
1586 continue;
1587 }
1588 StatsSyncCountersIfSignalled(tv);
1589 }
1590
1591 AFPDumpCounters(ptv);
1592 StatsSyncCountersIfSignalled(tv);
1593 SCReturnInt(TM_ECODE_OK);
1594 }
1595
1596 static int AFPGetDevFlags(int fd, const char *ifname)
1597 {
1598 struct ifreq ifr;
1599
1600 memset(&ifr, 0, sizeof(ifr));
1601 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1602
1603 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1604 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1605 ifname, strerror(errno));
1606 return -1;
1607 }
1608
1609 return ifr.ifr_flags;
1610 }
1611
1612
1613 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
1614 {
1615 struct ifreq ifr;
1616
1617 memset(&ifr, 0, sizeof(ifr));
1618 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1619
1620 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1621 if (verbose)
1622 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1623 ifname, strerror(errno));
1624 return -1;
1625 }
1626
1627 return ifr.ifr_ifindex;
1628 }
1629
1630 static int AFPGetDevLinktype(int fd, const char *ifname)
1631 {
1632 struct ifreq ifr;
1633
1634 memset(&ifr, 0, sizeof(ifr));
1635 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1636
1637 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1638 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1639 ifname, strerror(errno));
1640 return -1;
1641 }
1642
1643 switch (ifr.ifr_hwaddr.sa_family) {
1644 case ARPHRD_LOOPBACK:
1645 return LINKTYPE_ETHERNET;
1646 case ARPHRD_PPP:
1647 case ARPHRD_NONE:
1648 return LINKTYPE_RAW;
1649 default:
1650 return ifr.ifr_hwaddr.sa_family;
1651 }
1652 }
1653
1654 int AFPGetLinkType(const char *ifname)
1655 {
1656 int ltype;
1657
1658 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1659 if (fd == -1) {
1660 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1661 return LINKTYPE_RAW;
1662 }
1663
1664 ltype = AFPGetDevLinktype(fd, ifname);
1665 close(fd);
1666
1667 return ltype;
1668 }
1669
1670 static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1671 {
1672 /* Compute structure:
1673 Target is to store all pending packets
1674 with a size equal to MTU + auxdata
1675 And we keep a decent number of block
1676
1677 To do so:
1678 Compute frame_size (aligned to be able to fit in block
1679 Check which block size we need. Blocksize is a 2^n * pagesize
1680 We then need to get order, big enough to have
1681 frame_size < block size
1682 Find number of frame per block (divide)
1683 Fill in packet_req
1684
1685 Compute frame size:
1686 described in packet_mmap.txt
1687 dependant on snaplen (need to use a variable ?)
1688 snaplen: MTU ?
1689 tp_hdrlen determine_version in daq_afpacket
1690 in V1: sizeof(struct tpacket_hdr);
1691 in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1692 frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1693
1694 */
1695 int tp_hdrlen = sizeof(struct tpacket_hdr);
1696 int snaplen = default_packet_size;
1697
1698 if (snaplen == 0) {
1699 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1700 if (snaplen <= 0) {
1701 SCLogWarning(SC_ERR_INVALID_VALUE,
1702 "Unable to get MTU, setting snaplen to sane default of 1514");
1703 snaplen = 1514;
1704 }
1705 }
1706
1707 ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1708 ptv->req.tp_block_size = getpagesize() << order;
1709 int frames_per_block = ptv->req.tp_block_size / ptv->req.tp_frame_size;
1710 if (frames_per_block == 0) {
1711 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
1712 return -1;
1713 }
1714 ptv->req.tp_frame_nr = ptv->ring_size;
1715 ptv->req.tp_block_nr = ptv->req.tp_frame_nr / frames_per_block + 1;
1716 /* exact division */
1717 ptv->req.tp_frame_nr = ptv->req.tp_block_nr * frames_per_block;
1718 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
1719 ptv->req.tp_block_size, ptv->req.tp_block_nr,
1720 ptv->req.tp_frame_size, ptv->req.tp_frame_nr);
1721 return 1;
1722 }
1723
1724 #ifdef HAVE_TPACKET_V3
1725 static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1726 {
1727 ptv->req3.tp_block_size = ptv->block_size;
1728 ptv->req3.tp_frame_size = 2048;
1729 int frames_per_block = 0;
1730 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1731 int snaplen = default_packet_size;
1732
1733 if (snaplen == 0) {
1734 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1735 if (snaplen <= 0) {
1736 SCLogWarning(SC_ERR_INVALID_VALUE,
1737 "Unable to get MTU, setting snaplen to sane default of 1514");
1738 snaplen = 1514;
1739 }
1740 }
1741
1742 ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1743 frames_per_block = ptv->req3.tp_block_size / ptv->req3.tp_frame_size;
1744
1745 if (frames_per_block == 0) {
1746 SCLogError(SC_ERR_INVALID_VALUE,
1747 "Block size is too small, it should be at least %d",
1748 ptv->req3.tp_frame_size);
1749 return -1;
1750 }
1751 ptv->req3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1752 /* exact division */
1753 ptv->req3.tp_frame_nr = ptv->req3.tp_block_nr * frames_per_block;
1754 ptv->req3.tp_retire_blk_tov = ptv->block_timeout;
1755 ptv->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
1756 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1757 ptv->req3.tp_block_size, ptv->req3.tp_block_nr,
1758 ptv->req3.tp_frame_size, ptv->req3.tp_frame_nr,
1759 ptv->req3.tp_block_size * ptv->req3.tp_block_nr
1760 );
1761 return 1;
1762 }
1763 #endif
1764
1765 static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1766 {
1767 int val;
1768 unsigned int len = sizeof(val), i;
1769 int order;
1770 int r, mmap_flag;
1771
1772 #ifdef HAVE_TPACKET_V3
1773 if (ptv->flags & AFP_TPACKET_V3) {
1774 val = TPACKET_V3;
1775 } else
1776 #endif
1777 {
1778 val = TPACKET_V2;
1779 }
1780 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1781 if (errno == ENOPROTOOPT) {
1782 if (ptv->flags & AFP_TPACKET_V3) {
1783 SCLogError(SC_ERR_AFP_CREATE,
1784 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1785 } else {
1786 SCLogError(SC_ERR_AFP_CREATE,
1787 "Too old kernel giving up (need 2.6.27 at least)");
1788 }
1789 }
1790 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1791 return AFP_FATAL_ERROR;
1792 }
1793
1794 val = TPACKET_V2;
1795 #ifdef HAVE_TPACKET_V3
1796 if (ptv->flags & AFP_TPACKET_V3) {
1797 val = TPACKET_V3;
1798 }
1799 #endif
1800 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1801 sizeof(val)) < 0) {
1802 SCLogError(SC_ERR_AFP_CREATE,
1803 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1804 strerror(errno));
1805 return AFP_FATAL_ERROR;
1806 }
1807
1808 #ifdef HAVE_HW_TIMESTAMPING
1809 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1810 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1811 sizeof(req)) < 0) {
1812 SCLogWarning(SC_ERR_AFP_CREATE,
1813 "Can't activate hardware timestamping on packet socket: %s",
1814 strerror(errno));
1815 }
1816 #endif
1817
1818 /* Let's reserve head room so we can add the VLAN header in IPS
1819 * or TAP mode before write the packet */
1820 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1821 /* Only one vlan is extracted from AFP header so
1822 * one VLAN header length is enough. */
1823 int reserve = VLAN_HEADER_LEN;
1824 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1825 sizeof(reserve)) < 0) {
1826 SCLogError(SC_ERR_AFP_CREATE,
1827 "Can't activate reserve on packet socket: %s",
1828 strerror(errno));
1829 return AFP_FATAL_ERROR;
1830 }
1831 }
1832
1833 /* Allocate RX ring */
1834 #ifdef HAVE_TPACKET_V3
1835 if (ptv->flags & AFP_TPACKET_V3) {
1836 if (AFPComputeRingParamsV3(ptv) != 1) {
1837 return AFP_FATAL_ERROR;
1838 }
1839 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1840 (void *) &ptv->req3, sizeof(ptv->req3));
1841 if (r < 0) {
1842 SCLogError(SC_ERR_MEM_ALLOC,
1843 "Unable to allocate RX Ring for iface %s: (%d) %s",
1844 devname,
1845 errno,
1846 strerror(errno));
1847 return AFP_FATAL_ERROR;
1848 }
1849 } else {
1850 #endif
1851 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
1852 if (AFPComputeRingParams(ptv, order) != 1) {
1853 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1854 return AFP_FATAL_ERROR;
1855 }
1856
1857 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1858 (void *) &ptv->req, sizeof(ptv->req));
1859
1860 if (r < 0) {
1861 if (errno == ENOMEM) {
1862 SCLogInfo("Memory issue with ring parameters. Retrying.");
1863 continue;
1864 }
1865 SCLogError(SC_ERR_MEM_ALLOC,
1866 "Unable to allocate RX Ring for iface %s: (%d) %s",
1867 devname,
1868 errno,
1869 strerror(errno));
1870 return AFP_FATAL_ERROR;
1871 } else {
1872 break;
1873 }
1874 }
1875 if (order < 0) {
1876 SCLogError(SC_ERR_MEM_ALLOC,
1877 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1878 devname);
1879 return AFP_FATAL_ERROR;
1880 }
1881 #ifdef HAVE_TPACKET_V3
1882 }
1883 #endif
1884
1885 /* Allocate the Ring */
1886 #ifdef HAVE_TPACKET_V3
1887 if (ptv->flags & AFP_TPACKET_V3) {
1888 ptv->ring_buflen = ptv->req3.tp_block_nr * ptv->req3.tp_block_size;
1889 } else {
1890 #endif
1891 ptv->ring_buflen = ptv->req.tp_block_nr * ptv->req.tp_block_size;
1892 #ifdef HAVE_TPACKET_V3
1893 }
1894 #endif
1895 mmap_flag = MAP_SHARED;
1896 if (ptv->flags & AFP_MMAP_LOCKED)
1897 mmap_flag |= MAP_LOCKED;
1898 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
1899 mmap_flag, ptv->socket, 0);
1900 if (ptv->ring_buf == MAP_FAILED) {
1901 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1902 strerror(errno));
1903 goto mmap_err;
1904 }
1905 #ifdef HAVE_TPACKET_V3
1906 if (ptv->flags & AFP_TPACKET_V3) {
1907 ptv->ring_v3 = SCMalloc(ptv->req3.tp_block_nr * sizeof(*ptv->ring_v3));
1908 if (!ptv->ring_v3) {
1909 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring_v3");
1910 goto postmmap_err;
1911 }
1912 for (i = 0; i < ptv->req3.tp_block_nr; ++i) {
1913 ptv->ring_v3[i].iov_base = ptv->ring_buf + (i * ptv->req3.tp_block_size);
1914 ptv->ring_v3[i].iov_len = ptv->req3.tp_block_size;
1915 }
1916 } else {
1917 #endif
1918 /* allocate a ring for each frame header pointer*/
1919 ptv->ring_v2 = SCMalloc(ptv->req.tp_frame_nr * sizeof (union thdr *));
1920 if (ptv->ring_v2 == NULL) {
1921 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
1922 goto postmmap_err;
1923 }
1924 memset(ptv->ring_v2, 0, ptv->req.tp_frame_nr * sizeof (union thdr *));
1925 /* fill the header ring with proper frame ptr*/
1926 ptv->frame_offset = 0;
1927 for (i = 0; i < ptv->req.tp_block_nr; ++i) {
1928 void *base = &(ptv->ring_buf[i * ptv->req.tp_block_size]);
1929 unsigned int j;
1930 for (j = 0; j < ptv->req.tp_block_size / ptv->req.tp_frame_size; ++j, ++ptv->frame_offset) {
1931 (((union thdr **)ptv->ring_v2)[ptv->frame_offset]) = base;
1932 base += ptv->req.tp_frame_size;
1933 }
1934 }
1935 ptv->frame_offset = 0;
1936 #ifdef HAVE_TPACKET_V3
1937 }
1938 #endif
1939
1940 return 0;
1941
1942 postmmap_err:
1943 munmap(ptv->ring_buf, ptv->ring_buflen);
1944 if (ptv->ring_v2)
1945 SCFree(ptv->ring_v2);
1946 if (ptv->ring_v3)
1947 SCFree(ptv->ring_v3);
1948 mmap_err:
1949 /* Packet mmap does the cleaning when socket is closed */
1950 return AFP_FATAL_ERROR;
1951 }
1952
1953 /** \brief test if we can use FANOUT. Older kernels like those in
1954 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1955 */
1956 int AFPIsFanoutSupported(void)
1957 {
1958 #ifdef HAVE_PACKET_FANOUT
1959 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1960 if (fd < 0)
1961 return 0;
1962
1963 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
1964 uint16_t id = 1;
1965 uint32_t option = (mode << 16) | (id & 0xffff);
1966 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1967 close(fd);
1968
1969 if (r < 0) {
1970 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
1971 return 0;
1972 }
1973 return 1;
1974 #else
1975 return 0;
1976 #endif
1977 }
1978
1979 #ifdef HAVE_PACKET_EBPF
1980
1981 static int SockFanoutSeteBPF(AFPThreadVars *ptv)
1982 {
1983 int pfd = ptv->ebpf_lb_fd;
1984 if (pfd == -1) {
1985 SCLogError(SC_ERR_INVALID_VALUE,
1986 "Fanout file descriptor is invalid");
1987 return -1;
1988 }
1989
1990 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
1991 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
1992 return -1;
1993 }
1994 SCLogInfo("Activated eBPF on socket");
1995
1996 return 0;
1997 }
1998
1999 static int SetEbpfFilter(AFPThreadVars *ptv)
2000 {
2001 int pfd = ptv->ebpf_filter_fd;
2002 if (pfd == -1) {
2003 SCLogError(SC_ERR_INVALID_VALUE,
2004 "Filter file descriptor is invalid");
2005 return -1;
2006 }
2007
2008 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2009 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2010 return -1;
2011 }
2012 SCLogInfo("Activated eBPF filter on socket");
2013
2014 return 0;
2015 }
2016 #endif
2017
2018 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
2019 {
2020 int r;
2021 int ret = AFP_FATAL_ERROR;
2022 struct packet_mreq sock_params;
2023 struct sockaddr_ll bind_address;
2024 int if_idx;
2025
2026 /* open socket */
2027 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2028 if (ptv->socket == -1) {
2029 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
2030 goto error;
2031 }
2032
2033 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
2034
2035 if (if_idx == -1) {
2036 goto error;
2037 }
2038
2039 /* bind socket */
2040 memset(&bind_address, 0, sizeof(bind_address));
2041 bind_address.sll_family = AF_PACKET;
2042 bind_address.sll_protocol = htons(ETH_P_ALL);
2043 bind_address.sll_ifindex = if_idx;
2044 if (bind_address.sll_ifindex == -1) {
2045 if (verbose)
2046 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
2047 ret = AFP_RECOVERABLE_ERROR;
2048 goto socket_err;
2049 }
2050
2051 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2052 if (if_flags == -1) {
2053 if (verbose) {
2054 SCLogError(SC_ERR_AFP_READ,
2055 "Couldn't get flags for interface '%s'",
2056 ptv->iface);
2057 }
2058 ret = AFP_RECOVERABLE_ERROR;
2059 goto socket_err;
2060 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2061 if (verbose) {
2062 SCLogError(SC_ERR_AFP_READ,
2063 "Interface '%s' is down",
2064 ptv->iface);
2065 }
2066 ret = AFP_RECOVERABLE_ERROR;
2067 goto socket_err;
2068 }
2069
2070 if (ptv->promisc != 0) {
2071 /* Force promiscuous mode */
2072 memset(&sock_params, 0, sizeof(sock_params));
2073 sock_params.mr_type = PACKET_MR_PROMISC;
2074 sock_params.mr_ifindex = bind_address.sll_ifindex;
2075 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2076 if (r < 0) {
2077 SCLogError(SC_ERR_AFP_CREATE,
2078 "Couldn't switch iface %s to promiscuous, error %s",
2079 devname, strerror(errno));
2080 goto socket_err;
2081 }
2082 }
2083
2084 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2085 int val = 1;
2086 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2087 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2088 SCLogWarning(SC_ERR_NO_AF_PACKET,
2089 "'kernel' checksum mode not supported, falling back to full mode.");
2090 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2091 }
2092 }
2093
2094 /* set socket recv buffer size */
2095 if (ptv->buffer_size != 0) {
2096 /*
2097 * Set the socket buffer size to the specified value.
2098 */
2099 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
2100 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2101 &ptv->buffer_size,
2102 sizeof(ptv->buffer_size)) == -1) {
2103 SCLogError(SC_ERR_AFP_CREATE,
2104 "Couldn't set buffer size to %d on iface %s, error %s",
2105 ptv->buffer_size, devname, strerror(errno));
2106 goto socket_err;
2107 }
2108 }
2109
2110 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2111 if (r < 0) {
2112 if (verbose) {
2113 if (errno == ENETDOWN) {
2114 SCLogError(SC_ERR_AFP_CREATE,
2115 "Couldn't bind AF_PACKET socket, iface %s is down",
2116 devname);
2117 } else {
2118 SCLogError(SC_ERR_AFP_CREATE,
2119 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2120 devname, strerror(errno));
2121 }
2122 }
2123 ret = AFP_RECOVERABLE_ERROR;
2124 goto socket_err;
2125 }
2126
2127
2128 #ifdef HAVE_PACKET_FANOUT
2129 /* add binded socket to fanout group */
2130 if (ptv->threads > 1) {
2131 uint16_t mode = ptv->cluster_type;
2132 uint16_t id = ptv->cluster_id;
2133 uint32_t option = (mode << 16) | (id & 0xffff);
2134 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2135 if (r < 0) {
2136 SCLogError(SC_ERR_AFP_CREATE,
2137 "Couldn't set fanout mode, error %s",
2138 strerror(errno));
2139 goto socket_err;
2140 }
2141 }
2142 #endif
2143
2144 #ifdef HAVE_PACKET_EBPF
2145 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2146 r = SockFanoutSeteBPF(ptv);
2147 if (r < 0) {
2148 SCLogError(SC_ERR_AFP_CREATE,
2149 "Coudn't set EBPF, error %s",
2150 strerror(errno));
2151 goto socket_err;
2152 }
2153 }
2154 #endif
2155
2156 if (ptv->flags & AFP_RING_MODE) {
2157 ret = AFPSetupRing(ptv, devname);
2158 if (ret != 0)
2159 goto socket_err;
2160 }
2161
2162 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
2163
2164 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2165 switch (ptv->datalink) {
2166 case ARPHRD_PPP:
2167 case ARPHRD_ATM:
2168 ptv->cooked = 1;
2169 break;
2170 }
2171
2172 TmEcode rc = AFPSetBPFFilter(ptv);
2173 if (rc == TM_ECODE_FAILED) {
2174 SCLogError(SC_ERR_AFP_CREATE, "Set AF_PACKET bpf filter \"%s\" failed.", ptv->bpf_filter);
2175 ret = AFP_FATAL_ERROR;
2176 goto socket_err;
2177 }
2178
2179 /* Init is ok */
2180 AFPSwitchState(ptv, AFP_STATE_UP);
2181 return 0;
2182
2183 socket_err:
2184 close(ptv->socket);
2185 ptv->socket = -1;
2186 if (ptv->flags & AFP_TPACKET_V3) {
2187 if (ptv->ring_v3) {
2188 SCFree(ptv->ring_v3);
2189 ptv->ring_v3 = NULL;
2190 }
2191 } else {
2192 if (ptv->ring_v2) {
2193 SCFree(ptv->ring_v2);
2194 ptv->ring_v2 = NULL;
2195 }
2196 }
2197
2198 error:
2199 return -ret;
2200 }
2201
2202 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2203 {
2204 struct bpf_program filter;
2205 struct sock_fprog fcode;
2206 int rc;
2207
2208 #ifdef HAVE_PACKET_EBPF
2209 if (ptv->ebpf_filter_fd != -1) {
2210 return SetEbpfFilter(ptv);
2211 }
2212 #endif
2213
2214 if (!ptv->bpf_filter)
2215 return TM_ECODE_OK;
2216
2217 SCMutexLock(&afpacket_bpf_set_filter_lock);
2218
2219 SCLogInfo("Using BPF '%s' on iface '%s'",
2220 ptv->bpf_filter,
2221 ptv->iface);
2222 if (pcap_compile_nopcap(default_packet_size, /* snaplen_arg */
2223 ptv->datalink, /* linktype_arg */
2224 &filter, /* program */
2225 ptv->bpf_filter, /* const char *buf */
2226 1, /* optimize */
2227 0 /* mask */
2228 ) == -1) {
2229 SCLogError(SC_ERR_AFP_CREATE, "Filter compilation failed.");
2230 SCMutexUnlock(&afpacket_bpf_set_filter_lock);
2231 return TM_ECODE_FAILED;
2232 }
2233 SCMutexUnlock(&afpacket_bpf_set_filter_lock);
2234
2235 if (filter.bf_insns == NULL) {
2236 SCLogError(SC_ERR_AFP_CREATE, "Filter badly setup.");
2237 pcap_freecode(&filter);
2238 return TM_ECODE_FAILED;
2239 }
2240
2241 fcode.len = filter.bf_len;
2242 fcode.filter = (struct sock_filter*)filter.bf_insns;
2243
2244 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2245
2246 pcap_freecode(&filter);
2247 if(rc == -1) {
2248 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2249 return TM_ECODE_FAILED;
2250 }
2251
2252 return TM_ECODE_OK;
2253 }
2254
2255 #ifdef HAVE_PACKET_EBPF
2256 /**
2257 * Insert a half flow in the kernel bypass table
2258 *
2259 * \param mapfd file descriptor of the protocol bypass table
2260 * \param key data to use as key in the table
2261 * \param inittime time of creation of the entry (in monotonic clock)
2262 */
2263 static int AFPInsertHalfFlow(int mapd, void *key, uint64_t inittime)
2264 {
2265 /* FIXME error handling */
2266 unsigned int nr_cpus = UtilCpuGetNumProcessorsConfigured();
2267 struct pair value[nr_cpus];
2268 unsigned int i;
2269 for (i = 0; i < nr_cpus; i++) {
2270 value[i].time = inittime;
2271 value[i].packets = 0;
2272 value[i].bytes = 0;
2273 }
2274 SCLogDebug("Inserting element in eBPF mapping: %lu", inittime);
2275 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2276 switch (errno) {
2277 case E2BIG:
2278 case EEXIST:
2279 return 0;
2280 default:
2281 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2282 strerror(errno),
2283 errno);
2284 return 0;
2285 }
2286 }
2287 return 1;
2288 }
2289 #endif
2290
2291 static int AFPBypassCallback(Packet *p)
2292 {
2293 #ifdef HAVE_PACKET_EBPF
2294 SCLogDebug("Calling af_packet callback function");
2295 /* Only bypass TCP and UDP */
2296 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2297 return 0;
2298 }
2299
2300 /* Bypassing tunneled packets is currently not supported
2301 * because we can't discard the inner packet only due to
2302 * primitive parsing in eBPF */
2303 if (IS_TUNNEL_PKT(p)) {
2304 return 0;
2305 }
2306 struct timespec curtime;
2307 uint64_t inittime = 0;
2308 /* In eBPF, the function that we have use to get time return the
2309 * monotonic clock (the time since start of the computer). So we
2310 * can't use the timestamp of the packet. */
2311 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2312 inittime = curtime.tv_sec * 1000000000;
2313 }
2314 if (PKT_IS_IPV4(p)) {
2315 SCLogDebug("add an IPv4");
2316 if (p->afp_v.v4_map_fd == -1) {
2317 return 0;
2318 }
2319 struct flowv4_keys key = {};
2320 key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2321 key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2322 key.port16[0] = GET_TCP_SRC_PORT(p);
2323 key.port16[1] = GET_TCP_DST_PORT(p);
2324
2325 key.ip_proto = IPV4_GET_IPPROTO(p);
2326 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2327 return 0;
2328 }
2329 key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2330 key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2331 key.port16[0] = GET_TCP_DST_PORT(p);
2332 key.port16[1] = GET_TCP_SRC_PORT(p);
2333 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2334 return 0;
2335 }
2336 return 1;
2337 }
2338 /* For IPv6 case we don't handle extended header in eBPF */
2339 if (PKT_IS_IPV6(p) &&
2340 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2341 int i;
2342 if (p->afp_v.v6_map_fd == -1) {
2343 return 0;
2344 }
2345 SCLogDebug("add an IPv6");
2346 struct flowv6_keys key = {};
2347 for (i = 0; i < 4; i++) {
2348 key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2349 key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2350 }
2351 key.port16[0] = GET_TCP_SRC_PORT(p);
2352 key.port16[1] = GET_TCP_DST_PORT(p);
2353 key.ip_proto = IPV6_GET_NH(p);
2354 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2355 return 0;
2356 }
2357 for (i = 0; i < 4; i++) {
2358 key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2359 key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2360 }
2361 key.port16[0] = GET_TCP_DST_PORT(p);
2362 key.port16[1] = GET_TCP_SRC_PORT(p);
2363 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2364 return 0;
2365 }
2366 return 1;
2367 }
2368 #endif
2369 return 0;
2370 }
2371
2372 static int AFPXDPBypassCallback(Packet *p)
2373 {
2374 #ifdef HAVE_PACKET_XDP
2375 SCLogDebug("Calling af_packet callback function");
2376 /* Only bypass TCP and UDP */
2377 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2378 return 0;
2379 }
2380
2381 /* Bypassing tunneled packets is currently not supported
2382 * because we can't discard the inner packet only due to
2383 * primitive parsing in eBPF */
2384 if (IS_TUNNEL_PKT(p)) {
2385 return 0;
2386 }
2387 struct timespec curtime;
2388 uint64_t inittime = 0;
2389 if (clock_gettime(CLOCK_MONOTONIC, &curtime) == 0) {
2390 inittime = curtime.tv_sec * 1000000000;
2391 }
2392 if (PKT_IS_IPV4(p)) {
2393 struct flowv4_keys key = {};
2394 if (p->afp_v.v4_map_fd == -1) {
2395 return 0;
2396 }
2397 key.src = GET_IPV4_SRC_ADDR_U32(p);
2398 key.dst = GET_IPV4_DST_ADDR_U32(p);
2399 /* FIXME htons or not depending of XDP and af_packet eBPF */
2400 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2401 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2402 key.ip_proto = IPV4_GET_IPPROTO(p);
2403 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2404 return 0;
2405 }
2406 key.src = GET_IPV4_DST_ADDR_U32(p);
2407 key.dst = GET_IPV4_SRC_ADDR_U32(p);
2408 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2409 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
2410 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, inittime) == 0) {
2411 return 0;
2412 }
2413 return 1;
2414 }
2415 /* For IPv6 case we don't handle extended header in eBPF */
2416 if (PKT_IS_IPV6(p) &&
2417 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2418 SCLogDebug("add an IPv6");
2419 if (p->afp_v.v6_map_fd == -1) {
2420 return 0;
2421 }
2422 int i;
2423 struct flowv6_keys key = {};
2424 for (i = 0; i < 4; i++) {
2425 key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2426 key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2427 }
2428 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2429 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2430 key.ip_proto = IPV6_GET_NH(p);
2431 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2432 return 0;
2433 }
2434 for (i = 0; i < 4; i++) {
2435 key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2436 key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2437 }
2438 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2439 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
2440 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, inittime) == 0) {
2441 return 0;
2442 }
2443 return 1;
2444 }
2445 #endif
2446 return 0;
2447 }
2448
2449 /**
2450 * \brief Init function for ReceiveAFP.
2451 *
2452 * \param tv pointer to ThreadVars
2453 * \param initdata pointer to the interface passed from the user
2454 * \param data pointer gets populated with AFPThreadVars
2455 *
2456 * \todo Create a general AFP setup function.
2457 */
2458 TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2459 {
2460 SCEnter();
2461 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
2462
2463 if (initdata == NULL) {
2464 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2465 SCReturnInt(TM_ECODE_FAILED);
2466 }
2467
2468 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
2469 if (unlikely(ptv == NULL)) {
2470 afpconfig->DerefFunc(afpconfig);
2471 SCReturnInt(TM_ECODE_FAILED);
2472 }
2473 memset(ptv, 0, sizeof(AFPThreadVars));
2474
2475 ptv->tv = tv;
2476 ptv->cooked = 0;
2477
2478 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
2479 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2480
2481 ptv->livedev = LiveGetDevice(ptv->iface);
2482 if (ptv->livedev == NULL) {
2483 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
2484 SCFree(ptv);
2485 SCReturnInt(TM_ECODE_FAILED);
2486 }
2487
2488 ptv->buffer_size = afpconfig->buffer_size;
2489 ptv->ring_size = afpconfig->ring_size;
2490 ptv->block_size = afpconfig->block_size;
2491
2492 ptv->promisc = afpconfig->promisc;
2493 ptv->checksum_mode = afpconfig->checksum_mode;
2494 ptv->bpf_filter = NULL;
2495
2496 ptv->threads = 1;
2497 #ifdef HAVE_PACKET_FANOUT
2498 ptv->cluster_type = PACKET_FANOUT_LB;
2499 ptv->cluster_id = 1;
2500 /* We only set cluster info if the number of reader threads is greater than 1 */
2501 if (afpconfig->threads > 1) {
2502 ptv->cluster_id = afpconfig->cluster_id;
2503 ptv->cluster_type = afpconfig->cluster_type;
2504 ptv->threads = afpconfig->threads;
2505 }
2506 #endif
2507 ptv->flags = afpconfig->flags;
2508
2509 if (afpconfig->bpf_filter) {
2510 ptv->bpf_filter = afpconfig->bpf_filter;
2511 }
2512 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2513 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
2514 ptv->xdp_mode = afpconfig->xdp_mode;
2515
2516 #ifdef HAVE_PACKET_EBPF
2517 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
2518 ptv->v4_map_fd = EBPFGetMapFDByName("flow_table_v4");
2519 if (ptv->v4_map_fd == -1) {
2520 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2521 }
2522 ptv->v6_map_fd = EBPFGetMapFDByName("flow_table_v6");
2523 if (ptv->v6_map_fd == -1) {
2524 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2525 }
2526 }
2527 #endif
2528
2529 #ifdef PACKET_STATISTICS
2530 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2531 ptv->tv);
2532 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2533 ptv->tv);
2534 #endif
2535
2536 ptv->copy_mode = afpconfig->copy_mode;
2537 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2538 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2539 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2540 /* Warn about BPF filter consequence */
2541 if (ptv->bpf_filter) {
2542 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2543 " in dropping all non matching packets.");
2544 }
2545 }
2546
2547
2548 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2549 SCFree(ptv);
2550 afpconfig->DerefFunc(afpconfig);
2551 SCReturnInt(TM_ECODE_FAILED);
2552 }
2553
2554 #define T_DATA_SIZE 70000
2555 ptv->data = SCMalloc(T_DATA_SIZE);
2556 if (ptv->data == NULL) {
2557 afpconfig->DerefFunc(afpconfig);
2558 SCFree(ptv);
2559 SCReturnInt(TM_ECODE_FAILED);
2560 }
2561 ptv->datalen = T_DATA_SIZE;
2562 #undef T_DATA_SIZE
2563
2564 *data = (void *)ptv;
2565
2566 afpconfig->DerefFunc(afpconfig);
2567
2568 /* A bit strange to have this here but we only have vlan information
2569 * during reading so we need to know if we want to keep vlan during
2570 * the capture phase */
2571 int vlanbool = 0;
2572 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
2573 ptv->flags |= AFP_VLAN_DISABLED;
2574 }
2575
2576 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2577 * get the info from packet extended header but we will use a standard
2578 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2579 if (! SCKernelVersionIsAtLeast(3, 0)) {
2580 ptv->flags |= AFP_VLAN_DISABLED;
2581 }
2582
2583 SCReturnInt(TM_ECODE_OK);
2584 }
2585
2586 /**
2587 * \brief This function prints stats to the screen at exit.
2588 * \param tv pointer to ThreadVars
2589 * \param data pointer that gets cast into AFPThreadVars for ptv
2590 */
2591 void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2592 {
2593 SCEnter();
2594 AFPThreadVars *ptv = (AFPThreadVars *)data;
2595
2596 #ifdef PACKET_STATISTICS
2597 AFPDumpCounters(ptv);
2598 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
2599 tv->name,
2600 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2601 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
2602 #endif
2603 }
2604
2605 /**
2606 * \brief DeInit function closes af packet socket at exit.
2607 * \param tv pointer to ThreadVars
2608 * \param data pointer that gets cast into AFPThreadVars for ptv
2609 */
2610 TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2611 {
2612 AFPThreadVars *ptv = (AFPThreadVars *)data;
2613
2614 AFPSwitchState(ptv, AFP_STATE_DOWN);
2615
2616 #ifdef HAVE_PACKET_XDP
2617 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2618 #endif
2619 if (ptv->data != NULL) {
2620 SCFree(ptv->data);
2621 ptv->data = NULL;
2622 }
2623 ptv->datalen = 0;
2624
2625 ptv->bpf_filter = NULL;
2626 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring_v3) {
2627 SCFree(ptv->ring_v3);
2628 } else {
2629 if (ptv->ring_v2)
2630 SCFree(ptv->ring_v2);
2631 }
2632
2633 SCFree(ptv);
2634 SCReturnInt(TM_ECODE_OK);
2635 }
2636
2637 /**
2638 * \brief This function passes off to link type decoders.
2639 *
2640 * DecodeAFP reads packets from the PacketQueue and passes
2641 * them off to the proper link type decoder.
2642 *
2643 * \param t pointer to ThreadVars
2644 * \param p pointer to the current packet
2645 * \param data pointer that gets cast into AFPThreadVars for ptv
2646 * \param pq pointer to the current PacketQueue
2647 */
2648 TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2649 {
2650 SCEnter();
2651 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2652
2653 /* XXX HACK: flow timeout can call us for injected pseudo packets
2654 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2655 if (p->flags & PKT_PSEUDO_STREAM_END)
2656 return TM_ECODE_OK;
2657
2658 /* update counters */
2659 DecodeUpdatePacketCounters(tv, dtv, p);
2660
2661 /* If suri has set vlan during reading, we increase vlan counter */
2662 if (p->vlan_idx) {
2663 StatsIncr(tv, dtv->counter_vlan);
2664 }
2665
2666 /* call the decoder */
2667 switch (p->datalink) {
2668 case LINKTYPE_ETHERNET:
2669 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2670 break;
2671 case LINKTYPE_LINUX_SLL:
2672 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2673 break;
2674 case LINKTYPE_PPP:
2675 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2676 break;
2677 case LINKTYPE_RAW:
2678 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2679 break;
2680 case LINKTYPE_NULL:
2681 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2682 break;
2683 default:
2684 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2685 break;
2686 }
2687
2688 PacketDecodeFinalize(tv, dtv, p);
2689
2690 SCReturnInt(TM_ECODE_OK);
2691 }
2692
2693 TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2694 {
2695 SCEnter();
2696 DecodeThreadVars *dtv = NULL;
2697
2698 dtv = DecodeThreadVarsAlloc(tv);
2699
2700 if (dtv == NULL)
2701 SCReturnInt(TM_ECODE_FAILED);
2702
2703 DecodeRegisterPerfCounters(dtv, tv);
2704
2705 *data = (void *)dtv;
2706
2707 SCReturnInt(TM_ECODE_OK);
2708 }
2709
2710 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2711 {
2712 if (data != NULL)
2713 DecodeThreadVarsFree(tv, data);
2714 SCReturnInt(TM_ECODE_OK);
2715 }
2716
2717 #endif /* HAVE_AF_PACKET */
2718 /* eof */
2719 /**
2720 * @}
2721 */