]> git.ipfire.org Git - people/ms/suricata.git/blob - src/source-af-packet.c
threading: change local packet queue logic
[people/ms/suricata.git] / src / source-af-packet.c
1 /* Copyright (C) 2011-2018 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
24 /**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
31 */
32
33 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
35 #include "suricata-common.h"
36 #include "config.h"
37 #include "suricata.h"
38 #include "decode.h"
39 #include "packet-queue.h"
40 #include "threads.h"
41 #include "threadvars.h"
42 #include "tm-queuehandlers.h"
43 #include "tm-modules.h"
44 #include "tm-threads.h"
45 #include "tm-threads-common.h"
46 #include "conf.h"
47 #include "util-cpu.h"
48 #include "util-debug.h"
49 #include "util-device.h"
50 #include "util-ebpf.h"
51 #include "util-error.h"
52 #include "util-privs.h"
53 #include "util-optimize.h"
54 #include "util-checksum.h"
55 #include "util-ioctl.h"
56 #include "util-host-info.h"
57 #include "tmqh-packetpool.h"
58 #include "source-af-packet.h"
59 #include "runmodes.h"
60 #include "flow-storage.h"
61
62 #ifdef HAVE_AF_PACKET
63
64 #if HAVE_SYS_IOCTL_H
65 #include <sys/ioctl.h>
66 #endif
67
68 #if HAVE_LINUX_SOCKIOS_H
69 #include <linux/sockios.h>
70 #endif
71
72 #ifdef HAVE_PACKET_EBPF
73 #include "util-ebpf.h"
74 #include <bpf/libbpf.h>
75 #include <bpf/bpf.h>
76 #endif
77
78 struct bpf_program {
79 unsigned int bf_len;
80 struct bpf_insn *bf_insns;
81 };
82
83 #ifdef HAVE_PCAP_H
84 #include <pcap.h>
85 #endif
86
87 #ifdef HAVE_PCAP_PCAP_H
88 #include <pcap/pcap.h>
89 #endif
90
91 #include "util-bpf.h"
92
93 #if HAVE_LINUX_IF_ETHER_H
94 #include <linux/if_ether.h>
95 #endif
96
97 #if HAVE_LINUX_IF_PACKET_H
98 #include <linux/if_packet.h>
99 #endif
100
101 #if HAVE_LINUX_IF_ARP_H
102 #include <linux/if_arp.h>
103 #endif
104
105 #if HAVE_LINUX_FILTER_H
106 #include <linux/filter.h>
107 #endif
108
109 #if HAVE_SYS_MMAN_H
110 #include <sys/mman.h>
111 #endif
112
113 #ifdef HAVE_HW_TIMESTAMPING
114 #include <linux/net_tstamp.h>
115 #endif
116
117 #endif /* HAVE_AF_PACKET */
118
119 extern int max_pending_packets;
120
121 #ifndef HAVE_AF_PACKET
122
123 TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
124
125 void TmModuleReceiveAFPRegister (void)
126 {
127 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
128 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
129 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
130 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
131 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
132 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
133 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
134 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
135 }
136
137 /**
138 * \brief Registration Function for DecodeAFP.
139 */
140 void TmModuleDecodeAFPRegister (void)
141 {
142 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
143 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
144 tmm_modules[TMM_DECODEAFP].Func = NULL;
145 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
146 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
147 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
148 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
149 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
150 }
151
152 /**
153 * \brief this function prints an error message and exits.
154 */
155 TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
156 {
157 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
158 "support for AF_PACKET enabled, on Linux host please recompile "
159 "with --enable-af-packet", tv->name);
160 exit(EXIT_FAILURE);
161 }
162
163 #else /* We have AF_PACKET support */
164
165 #define AFP_IFACE_NAME_LENGTH 48
166
167 #define AFP_STATE_DOWN 0
168 #define AFP_STATE_UP 1
169
170 #define AFP_RECONNECT_TIMEOUT 500000
171 #define AFP_DOWN_COUNTER_INTERVAL 40
172
173 #define POLL_TIMEOUT 100
174
175 #ifndef TP_STATUS_USER_BUSY
176 /* for new use latest bit available in tp_status */
177 #define TP_STATUS_USER_BUSY (1 << 31)
178 #endif
179
180 #ifndef TP_STATUS_VLAN_VALID
181 #define TP_STATUS_VLAN_VALID (1 << 4)
182 #endif
183
184 enum {
185 AFP_READ_OK,
186 AFP_READ_FAILURE,
187 /** Error during treatment by other functions of Suricata */
188 AFP_SURI_FAILURE,
189 AFP_KERNEL_DROP,
190 };
191
192 enum {
193 AFP_FATAL_ERROR = 1,
194 AFP_RECOVERABLE_ERROR,
195 };
196
197 union thdr {
198 struct tpacket2_hdr *h2;
199 #ifdef HAVE_TPACKET_V3
200 struct tpacket3_hdr *h3;
201 #endif
202 void *raw;
203 };
204
205 static int AFPBypassCallback(Packet *p);
206 static int AFPXDPBypassCallback(Packet *p);
207
208 #define MAX_MAPS 32
209 /**
210 * \brief Structure to hold thread specific variables.
211 */
212 typedef struct AFPThreadVars_
213 {
214 union AFPRing {
215 char *v2;
216 struct iovec *v3;
217 } ring;
218
219 /* counters */
220 uint64_t pkts;
221
222 ThreadVars *tv;
223 TmSlot *slot;
224 LiveDevice *livedev;
225 /* data link type for the thread */
226 uint32_t datalink;
227
228 #ifdef HAVE_PACKET_EBPF
229 /* File descriptor of the IPv4 flow bypass table maps */
230 int v4_map_fd;
231 /* File descriptor of the IPv6 flow bypass table maps */
232 int v6_map_fd;
233 #endif
234
235 unsigned int frame_offset;
236
237 ChecksumValidationMode checksum_mode;
238
239 /* references to packet and drop counters */
240 uint16_t capture_kernel_packets;
241 uint16_t capture_kernel_drops;
242 uint16_t capture_errors;
243
244 /* handle state */
245 uint8_t afp_state;
246 uint8_t copy_mode;
247 unsigned int flags;
248
249 /* IPS peer */
250 AFPPeer *mpeer;
251
252 /* no mmap mode */
253 uint8_t *data; /** Per function and thread data */
254 int datalen; /** Length of per function and thread data */
255 int cooked;
256
257 /*
258 * Init related members
259 */
260
261 /* thread specific socket */
262 int socket;
263
264 int ring_size;
265 int block_size;
266 int block_timeout;
267 /* socket buffer size */
268 int buffer_size;
269 /* Filter */
270 const char *bpf_filter;
271 int ebpf_lb_fd;
272 int ebpf_filter_fd;
273
274 int promisc;
275
276 int down_count;
277
278 int cluster_id;
279 int cluster_type;
280
281 int threads;
282
283 union AFPTpacketReq {
284 struct tpacket_req v2;
285 #ifdef HAVE_TPACKET_V3
286 struct tpacket_req3 v3;
287 #endif
288 } req;
289
290 char iface[AFP_IFACE_NAME_LENGTH];
291 /* IPS output iface */
292 char out_iface[AFP_IFACE_NAME_LENGTH];
293
294 /* mmap'ed ring buffer */
295 unsigned int ring_buflen;
296 uint8_t *ring_buf;
297
298 uint8_t xdp_mode;
299
300 #ifdef HAVE_PACKET_EBPF
301 struct ebpf_timeout_config ebpf_t_config;
302 #endif
303
304 } AFPThreadVars;
305
306 static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
307 static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
308 static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
309 static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
310
311 static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
312 static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
313 static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
314
315 static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
316 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
317 static int AFPGetDevFlags(int fd, const char *ifname);
318 static int AFPDerefSocket(AFPPeer* peer);
319 static int AFPRefSocket(AFPPeer* peer);
320
321
322 /**
323 * \brief Registration Function for RecieveAFP.
324 * \todo Unit tests are needed for this module.
325 */
326 void TmModuleReceiveAFPRegister (void)
327 {
328 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
329 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
330 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
331 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
332 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
333 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
334 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
335 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
336 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
337 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
338
339 }
340
341
342 /**
343 * \defgroup afppeers AFP peers list
344 *
345 * AF_PACKET has an IPS mode were interface are peered: packet from
346 * on interface are sent the peered interface and the other way. The ::AFPPeer
347 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
348 * information to be able to send packet on the interface.
349 * A element of the list must not be destroyed during the run of Suricata as it
350 * is used by ::Packet and other threads.
351 *
352 * @{
353 */
354
355 typedef struct AFPPeersList_ {
356 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
357 int cnt;
358 int peered;
359 int turn; /**< Next value for initialisation order */
360 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
361 } AFPPeersList;
362
363 /**
364 * \brief Update the peer.
365 *
366 * Update the AFPPeer of a thread ie set new state, socket number
367 * or iface index.
368 *
369 */
370 static void AFPPeerUpdate(AFPThreadVars *ptv)
371 {
372 if (ptv->mpeer == NULL) {
373 return;
374 }
375 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
376 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
377 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
378 }
379
380 /**
381 * \brief Clean and free ressource used by an ::AFPPeer
382 */
383 static void AFPPeerClean(AFPPeer *peer)
384 {
385 if (peer->flags & AFP_SOCK_PROTECT)
386 SCMutexDestroy(&peer->sock_protect);
387 SC_ATOMIC_DESTROY(peer->socket);
388 SC_ATOMIC_DESTROY(peer->if_idx);
389 SC_ATOMIC_DESTROY(peer->state);
390 SCFree(peer);
391 }
392
393 AFPPeersList peerslist;
394
395
396 /**
397 * \brief Init the global list of ::AFPPeer
398 */
399 TmEcode AFPPeersListInit()
400 {
401 SCEnter();
402 TAILQ_INIT(&peerslist.peers);
403 peerslist.peered = 0;
404 peerslist.cnt = 0;
405 peerslist.turn = 0;
406 SC_ATOMIC_INIT(peerslist.reached);
407 (void) SC_ATOMIC_SET(peerslist.reached, 0);
408 SCReturnInt(TM_ECODE_OK);
409 }
410
411 /**
412 * \brief Check that all ::AFPPeer got a peer
413 *
414 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
415 */
416 TmEcode AFPPeersListCheck()
417 {
418 #define AFP_PEERS_MAX_TRY 4
419 #define AFP_PEERS_WAIT 20000
420 int try = 0;
421 SCEnter();
422 while (try < AFP_PEERS_MAX_TRY) {
423 if (peerslist.cnt != peerslist.peered) {
424 usleep(AFP_PEERS_WAIT);
425 } else {
426 SCReturnInt(TM_ECODE_OK);
427 }
428 try++;
429 }
430 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
431 SCReturnInt(TM_ECODE_FAILED);
432 }
433
434 /**
435 * \brief Declare a new AFP thread to AFP peers list.
436 */
437 static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
438 {
439 SCEnter();
440 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
441 AFPPeer *pitem;
442 int mtu, out_mtu;
443
444 if (unlikely(peer == NULL)) {
445 SCReturnInt(TM_ECODE_FAILED);
446 }
447 memset(peer, 0, sizeof(AFPPeer));
448 SC_ATOMIC_INIT(peer->socket);
449 SC_ATOMIC_INIT(peer->sock_usage);
450 SC_ATOMIC_INIT(peer->if_idx);
451 SC_ATOMIC_INIT(peer->state);
452 peer->flags = ptv->flags;
453 peer->turn = peerslist.turn++;
454
455 if (peer->flags & AFP_SOCK_PROTECT) {
456 SCMutexInit(&peer->sock_protect, NULL);
457 }
458
459 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
460 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
461 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
462 ptv->mpeer = peer;
463 /* add element to iface list */
464 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
465
466 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
467 peerslist.cnt++;
468
469 /* Iter to find a peer */
470 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
471 if (pitem->peer)
472 continue;
473 if (strcmp(pitem->iface, ptv->out_iface))
474 continue;
475 peer->peer = pitem;
476 pitem->peer = peer;
477 mtu = GetIfaceMTU(ptv->iface);
478 out_mtu = GetIfaceMTU(ptv->out_iface);
479 if (mtu != out_mtu) {
480 SCLogError(SC_ERR_AFP_CREATE,
481 "MTU on %s (%d) and %s (%d) are not equal, "
482 "transmission of packets bigger than %d will fail.",
483 ptv->iface, mtu,
484 ptv->out_iface, out_mtu,
485 (out_mtu > mtu) ? mtu : out_mtu);
486 }
487 peerslist.peered += 2;
488 break;
489 }
490 }
491
492 AFPPeerUpdate(ptv);
493
494 SCReturnInt(TM_ECODE_OK);
495 }
496
497 static int AFPPeersListWaitTurn(AFPPeer *peer)
498 {
499 /* If turn is zero, we already have started threads once */
500 if (peerslist.turn == 0)
501 return 0;
502
503 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
504 return 0;
505 return 1;
506 }
507
508 static void AFPPeersListReachedInc(void)
509 {
510 if (peerslist.turn == 0)
511 return;
512
513 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
514 SCLogInfo("All AFP capture threads are running.");
515 (void)SC_ATOMIC_SET(peerslist.reached, 0);
516 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
517 * restarted.
518 */
519 peerslist.turn = 0;
520 }
521 }
522
523 static int AFPPeersListStarted(void)
524 {
525 return !peerslist.turn;
526 }
527
528 /**
529 * \brief Clean the global peers list.
530 */
531 void AFPPeersListClean()
532 {
533 AFPPeer *pitem;
534
535 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
536 TAILQ_REMOVE(&peerslist.peers, pitem, next);
537 AFPPeerClean(pitem);
538 }
539 }
540
541 /**
542 * @}
543 */
544
545 /**
546 * \brief Registration Function for DecodeAFP.
547 * \todo Unit tests are needed for this module.
548 */
549 void TmModuleDecodeAFPRegister (void)
550 {
551 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
552 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
553 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
554 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
555 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
556 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
557 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
558 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
559 }
560
561
562 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
563
564 static inline void AFPDumpCounters(AFPThreadVars *ptv)
565 {
566 #ifdef PACKET_STATISTICS
567 struct tpacket_stats kstats;
568 socklen_t len = sizeof (struct tpacket_stats);
569 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
570 &kstats, &len) > -1) {
571 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
572 ptv->tv->name,
573 kstats.tp_packets, kstats.tp_drops);
574 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
575 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
576 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
577 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
578 }
579 #endif
580 }
581
582 /**
583 * \brief AF packet read function.
584 *
585 * This function fills
586 * From here the packets are picked up by the DecodeAFP thread.
587 *
588 * \param user pointer to AFPThreadVars
589 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
590 */
591 static int AFPRead(AFPThreadVars *ptv)
592 {
593 Packet *p = NULL;
594 /* XXX should try to use read that get directly to packet */
595 int offset = 0;
596 int caplen;
597 struct sockaddr_ll from;
598 struct iovec iov;
599 struct msghdr msg;
600 struct cmsghdr *cmsg;
601 union {
602 struct cmsghdr cmsg;
603 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
604 } cmsg_buf;
605 unsigned char aux_checksum = 0;
606
607 msg.msg_name = &from;
608 msg.msg_namelen = sizeof(from);
609 msg.msg_iov = &iov;
610 msg.msg_iovlen = 1;
611 msg.msg_control = &cmsg_buf;
612 msg.msg_controllen = sizeof(cmsg_buf);
613 msg.msg_flags = 0;
614
615 if (ptv->cooked)
616 offset = SLL_HEADER_LEN;
617 else
618 offset = 0;
619 iov.iov_len = ptv->datalen - offset;
620 iov.iov_base = ptv->data + offset;
621
622 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
623
624 if (caplen < 0) {
625 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
626 errno);
627 SCReturnInt(AFP_READ_FAILURE);
628 }
629
630 p = PacketGetFromQueueOrAlloc();
631 if (p == NULL) {
632 SCReturnInt(AFP_SURI_FAILURE);
633 }
634 PKT_SET_SRC(p, PKT_SRC_WIRE);
635 if (ptv->flags & AFP_BYPASS) {
636 p->BypassPacketsFlow = AFPBypassCallback;
637 #ifdef HAVE_PACKET_EBPF
638 p->afp_v.v4_map_fd = ptv->v4_map_fd;
639 p->afp_v.v6_map_fd = ptv->v6_map_fd;
640 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
641 #endif
642 }
643 if (ptv->flags & AFP_XDPBYPASS) {
644 p->BypassPacketsFlow = AFPXDPBypassCallback;
645 #ifdef HAVE_PACKET_EBPF
646 p->afp_v.v4_map_fd = ptv->v4_map_fd;
647 p->afp_v.v6_map_fd = ptv->v6_map_fd;
648 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
649 #endif
650 }
651
652 /* get timestamp of packet via ioctl */
653 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
654 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
655 errno);
656 TmqhOutputPacketpool(ptv->tv, p);
657 SCReturnInt(AFP_READ_FAILURE);
658 }
659
660 ptv->pkts++;
661 p->livedev = ptv->livedev;
662
663 /* add forged header */
664 if (ptv->cooked) {
665 SllHdr * hdrp = (SllHdr *)ptv->data;
666 /* XXX this is minimalist, but this seems enough */
667 hdrp->sll_protocol = from.sll_protocol;
668 }
669
670 p->datalink = ptv->datalink;
671 SET_PKT_LEN(p, caplen + offset);
672 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
673 TmqhOutputPacketpool(ptv->tv, p);
674 SCReturnInt(AFP_SURI_FAILURE);
675 }
676 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
677 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
678
679 /* We only check for checksum disable */
680 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
681 p->flags |= PKT_IGNORE_CHECKSUM;
682 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
683 if (ptv->livedev->ignore_checksum) {
684 p->flags |= PKT_IGNORE_CHECKSUM;
685 } else if (ChecksumAutoModeCheck(ptv->pkts,
686 SC_ATOMIC_GET(ptv->livedev->pkts),
687 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
688 ptv->livedev->ignore_checksum = 1;
689 p->flags |= PKT_IGNORE_CHECKSUM;
690 }
691 } else {
692 aux_checksum = 1;
693 }
694
695 /* List is NULL if we don't have activated auxiliary data */
696 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
697 struct tpacket_auxdata *aux;
698
699 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
700 cmsg->cmsg_level != SOL_PACKET ||
701 cmsg->cmsg_type != PACKET_AUXDATA)
702 continue;
703
704 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
705
706 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
707 p->flags |= PKT_IGNORE_CHECKSUM;
708 }
709 break;
710 }
711
712 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
713 SCReturnInt(AFP_SURI_FAILURE);
714 }
715 SCReturnInt(AFP_READ_OK);
716 }
717
718 /**
719 * \brief AF packet write function.
720 *
721 * This function has to be called before the memory
722 * related to Packet in ring buffer is released.
723 *
724 * \param pointer to Packet
725 * \param version of capture: TPACKET_V2 or TPACKET_V3
726 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
727 *
728 */
729 static TmEcode AFPWritePacket(Packet *p, int version)
730 {
731 struct sockaddr_ll socket_address;
732 int socket;
733 uint8_t *pstart;
734 size_t plen;
735 union thdr h;
736 uint16_t vlan_tci = 0;
737
738 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
739 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
740 return TM_ECODE_OK;
741 }
742 }
743
744 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
745 return TM_ECODE_OK;
746
747 if (p->ethh == NULL) {
748 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
749 return TM_ECODE_FAILED;
750 }
751 /* Index of the network device */
752 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
753 /* Address length*/
754 socket_address.sll_halen = ETH_ALEN;
755 /* Destination MAC */
756 memcpy(socket_address.sll_addr, p->ethh, 6);
757
758 /* Send packet, locking the socket if necessary */
759 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
760 SCMutexLock(&p->afp_v.peer->sock_protect);
761 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
762
763 h.raw = p->afp_v.relptr;
764
765 if (version == TPACKET_V2) {
766 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
767 * the flag. It is not defined for older kernel so we go best effort
768 * and test for non zero value of the TCI header. */
769 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
770 vlan_tci = h.h2->tp_vlan_tci;
771 }
772 } else {
773 #ifdef HAVE_TPACKET_V3
774 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
775 vlan_tci = h.h3->hv1.tp_vlan_tci;
776 }
777 #else
778 /* Should not get here */
779 BUG_ON(1);
780 #endif
781 }
782
783 if (vlan_tci != 0) {
784 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
785 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
786 /* move ethernet addresses */
787 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
788 /* write vlan info */
789 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
790 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
791 } else {
792 pstart = GET_PKT_DATA(p);
793 plen = GET_PKT_LEN(p);
794 }
795
796 if (sendto(socket, pstart, plen, 0,
797 (struct sockaddr*) &socket_address,
798 sizeof(struct sockaddr_ll)) < 0) {
799 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
800 socket,
801 strerror(errno));
802 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
803 SCMutexUnlock(&p->afp_v.peer->sock_protect);
804 return TM_ECODE_FAILED;
805 }
806 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
807 SCMutexUnlock(&p->afp_v.peer->sock_protect);
808
809 return TM_ECODE_OK;
810 }
811
812 static void AFPReleaseDataFromRing(Packet *p)
813 {
814 /* Need to be in copy mode and need to detect early release
815 where Ethernet header could not be set (and pseudo packet) */
816 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
817 AFPWritePacket(p, TPACKET_V2);
818 }
819
820 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
821 goto cleanup;
822
823 if (p->afp_v.relptr) {
824 union thdr h;
825 h.raw = p->afp_v.relptr;
826 h.h2->tp_status = TP_STATUS_KERNEL;
827 }
828
829 cleanup:
830 AFPV_CLEANUP(&p->afp_v);
831 }
832
833 #ifdef HAVE_TPACKET_V3
834 static void AFPReleasePacketV3(Packet *p)
835 {
836 /* Need to be in copy mode and need to detect early release
837 where Ethernet header could not be set (and pseudo packet) */
838 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
839 AFPWritePacket(p, TPACKET_V3);
840 }
841 PacketFreeOrRelease(p);
842 }
843 #endif
844
845 static void AFPReleasePacket(Packet *p)
846 {
847 AFPReleaseDataFromRing(p);
848 PacketFreeOrRelease(p);
849 }
850
851 /**
852 * \brief AF packet read function for ring
853 *
854 * This function fills
855 * From here the packets are picked up by the DecodeAFP thread.
856 *
857 * \param user pointer to AFPThreadVars
858 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
859 */
860 static int AFPReadFromRing(AFPThreadVars *ptv)
861 {
862 Packet *p = NULL;
863 union thdr h;
864 uint8_t emergency_flush = 0;
865 int read_pkts = 0;
866 int loop_start = -1;
867
868
869 /* Loop till we have packets available */
870 while (1) {
871 if (unlikely(suricata_ctl_flags != 0)) {
872 break;
873 }
874
875 /* Read packet from ring */
876 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
877 if (unlikely(h.raw == NULL)) {
878 /* Impossible we reach this point in normal condition, so trigger
879 * a failure in reading */
880 SCReturnInt(AFP_READ_FAILURE);
881 }
882
883 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
884 if (read_pkts == 0) {
885 if (loop_start == -1) {
886 loop_start = ptv->frame_offset;
887 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
888 SCReturnInt(AFP_READ_OK);
889 }
890 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
891 ptv->frame_offset = 0;
892 }
893 continue;
894 }
895 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
896 SCReturnInt(AFP_KERNEL_DROP);
897 } else {
898 SCReturnInt(AFP_READ_OK);
899 }
900 }
901
902 read_pkts++;
903 loop_start = -1;
904
905 /* Our packet is still used by suricata, we exit read loop to
906 * gain some time */
907 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
908 SCReturnInt(AFP_READ_OK);
909 }
910
911 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
912 h.h2->tp_status = TP_STATUS_KERNEL;
913 goto next_frame;
914 }
915
916 p = PacketGetFromQueueOrAlloc();
917 if (p == NULL) {
918 SCReturnInt(AFP_SURI_FAILURE);
919 }
920 PKT_SET_SRC(p, PKT_SRC_WIRE);
921 if (ptv->flags & AFP_BYPASS) {
922 p->BypassPacketsFlow = AFPBypassCallback;
923 #ifdef HAVE_PACKET_EBPF
924 p->afp_v.v4_map_fd = ptv->v4_map_fd;
925 p->afp_v.v6_map_fd = ptv->v6_map_fd;
926 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
927 #endif
928 }
929 if (ptv->flags & AFP_XDPBYPASS) {
930 p->BypassPacketsFlow = AFPXDPBypassCallback;
931 #ifdef HAVE_PACKET_EBPF
932 p->afp_v.v4_map_fd = ptv->v4_map_fd;
933 p->afp_v.v6_map_fd = ptv->v6_map_fd;
934 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
935 #endif
936 }
937
938 /* Suricata will treat packet so telling it is busy, this
939 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
940 * function. */
941 h.h2->tp_status |= TP_STATUS_USER_BUSY;
942
943 ptv->pkts++;
944 p->livedev = ptv->livedev;
945 p->datalink = ptv->datalink;
946
947 if (h.h2->tp_len > h.h2->tp_snaplen) {
948 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
949 h.h2->tp_len, h.h2->tp_snaplen);
950 }
951
952 /* get vlan id from header */
953 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
954 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
955 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
956 p->vlan_idx = 1;
957 }
958
959 if (ptv->flags & AFP_ZERO_COPY) {
960 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
961 TmqhOutputPacketpool(ptv->tv, p);
962 SCReturnInt(AFP_SURI_FAILURE);
963 } else {
964 p->afp_v.relptr = h.raw;
965 p->ReleasePacket = AFPReleasePacket;
966 p->afp_v.mpeer = ptv->mpeer;
967 AFPRefSocket(ptv->mpeer);
968
969 p->afp_v.copy_mode = ptv->copy_mode;
970 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
971 p->afp_v.peer = ptv->mpeer->peer;
972 } else {
973 p->afp_v.peer = NULL;
974 }
975 }
976 } else {
977 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
978 /* As we can possibly fail to copy the data due to invalid data, let's
979 * skip this packet and switch to the next one.
980 */
981 h.h2->tp_status = TP_STATUS_KERNEL;
982 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
983 ptv->frame_offset = 0;
984 }
985 TmqhOutputPacketpool(ptv->tv, p);
986 SCReturnInt(AFP_SURI_FAILURE);
987 }
988 }
989
990 /* Timestamp */
991 p->ts.tv_sec = h.h2->tp_sec;
992 p->ts.tv_usec = h.h2->tp_nsec/1000;
993 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
994 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
995
996 /* We only check for checksum disable */
997 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
998 p->flags |= PKT_IGNORE_CHECKSUM;
999 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1000 if (ptv->livedev->ignore_checksum) {
1001 p->flags |= PKT_IGNORE_CHECKSUM;
1002 } else if (ChecksumAutoModeCheck(ptv->pkts,
1003 SC_ATOMIC_GET(ptv->livedev->pkts),
1004 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1005 ptv->livedev->ignore_checksum = 1;
1006 p->flags |= PKT_IGNORE_CHECKSUM;
1007 }
1008 } else {
1009 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
1010 p->flags |= PKT_IGNORE_CHECKSUM;
1011 }
1012 }
1013 if (h.h2->tp_status & TP_STATUS_LOSING) {
1014 emergency_flush = 1;
1015 AFPDumpCounters(ptv);
1016 }
1017
1018 /* release frame if not in zero copy mode */
1019 if (!(ptv->flags & AFP_ZERO_COPY)) {
1020 h.h2->tp_status = TP_STATUS_KERNEL;
1021 }
1022
1023 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1024 h.h2->tp_status = TP_STATUS_KERNEL;
1025 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1026 ptv->frame_offset = 0;
1027 }
1028 SCReturnInt(AFP_SURI_FAILURE);
1029 }
1030
1031 next_frame:
1032 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1033 ptv->frame_offset = 0;
1034 /* Get out of loop to be sure we will reach maintenance tasks */
1035 SCReturnInt(AFP_READ_OK);
1036 }
1037 }
1038
1039 SCReturnInt(AFP_READ_OK);
1040 }
1041
1042 #ifdef HAVE_TPACKET_V3
1043 static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1044 {
1045 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1046 }
1047
1048 static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1049 {
1050 Packet *p = PacketGetFromQueueOrAlloc();
1051 if (p == NULL) {
1052 SCReturnInt(AFP_SURI_FAILURE);
1053 }
1054 PKT_SET_SRC(p, PKT_SRC_WIRE);
1055 if (ptv->flags & AFP_BYPASS) {
1056 p->BypassPacketsFlow = AFPBypassCallback;
1057 #ifdef HAVE_PACKET_EBPF
1058 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1059 p->afp_v.v6_map_fd = ptv->v6_map_fd;
1060 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
1061 #endif
1062 } else if (ptv->flags & AFP_XDPBYPASS) {
1063 p->BypassPacketsFlow = AFPXDPBypassCallback;
1064 #ifdef HAVE_PACKET_EBPF
1065 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1066 p->afp_v.v6_map_fd = ptv->v6_map_fd;
1067 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
1068 #endif
1069 }
1070
1071 ptv->pkts++;
1072 p->livedev = ptv->livedev;
1073 p->datalink = ptv->datalink;
1074
1075 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
1076 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1077 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1078 p->vlan_idx = 1;
1079 }
1080
1081 if (ptv->flags & AFP_ZERO_COPY) {
1082 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1083 TmqhOutputPacketpool(ptv->tv, p);
1084 SCReturnInt(AFP_SURI_FAILURE);
1085 }
1086 p->afp_v.relptr = ppd;
1087 p->ReleasePacket = AFPReleasePacketV3;
1088 p->afp_v.mpeer = ptv->mpeer;
1089 AFPRefSocket(ptv->mpeer);
1090
1091 p->afp_v.copy_mode = ptv->copy_mode;
1092 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1093 p->afp_v.peer = ptv->mpeer->peer;
1094 } else {
1095 p->afp_v.peer = NULL;
1096 }
1097 } else {
1098 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1099 TmqhOutputPacketpool(ptv->tv, p);
1100 SCReturnInt(AFP_SURI_FAILURE);
1101 }
1102 }
1103 /* Timestamp */
1104 p->ts.tv_sec = ppd->tp_sec;
1105 p->ts.tv_usec = ppd->tp_nsec/1000;
1106 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1107 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1108
1109 /* We only check for checksum disable */
1110 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1111 p->flags |= PKT_IGNORE_CHECKSUM;
1112 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1113 if (ptv->livedev->ignore_checksum) {
1114 p->flags |= PKT_IGNORE_CHECKSUM;
1115 } else if (ChecksumAutoModeCheck(ptv->pkts,
1116 SC_ATOMIC_GET(ptv->livedev->pkts),
1117 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1118 ptv->livedev->ignore_checksum = 1;
1119 p->flags |= PKT_IGNORE_CHECKSUM;
1120 }
1121 } else {
1122 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1123 p->flags |= PKT_IGNORE_CHECKSUM;
1124 }
1125 }
1126
1127 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1128 SCReturnInt(AFP_SURI_FAILURE);
1129 }
1130
1131 SCReturnInt(AFP_READ_OK);
1132 }
1133
1134 static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1135 {
1136 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1137 uint8_t *ppd;
1138 int ret = 0;
1139
1140 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1141 for (i = 0; i < num_pkts; ++i) {
1142 ret = AFPParsePacketV3(ptv, pbd,
1143 (struct tpacket3_hdr *)ppd);
1144 switch (ret) {
1145 case AFP_READ_OK:
1146 break;
1147 case AFP_SURI_FAILURE:
1148 /* Internal error but let's just continue and
1149 * treat thenext packet */
1150 break;
1151 case AFP_READ_FAILURE:
1152 SCReturnInt(AFP_READ_FAILURE);
1153 default:
1154 SCReturnInt(ret);
1155 }
1156 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1157 }
1158
1159 SCReturnInt(AFP_READ_OK);
1160 }
1161 #endif /* HAVE_TPACKET_V3 */
1162
1163 /**
1164 * \brief AF packet read function for ring
1165 *
1166 * This function fills
1167 * From here the packets are picked up by the DecodeAFP thread.
1168 *
1169 * \param user pointer to AFPThreadVars
1170 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1171 */
1172 static int AFPReadFromRingV3(AFPThreadVars *ptv)
1173 {
1174 #ifdef HAVE_TPACKET_V3
1175 struct tpacket_block_desc *pbd;
1176 int ret = 0;
1177
1178 /* Loop till we have packets available */
1179 while (1) {
1180 if (unlikely(suricata_ctl_flags != 0)) {
1181 SCLogInfo("Exiting AFP V3 read loop");
1182 break;
1183 }
1184
1185 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
1186
1187 /* block is not ready to be read */
1188 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1189 SCReturnInt(AFP_READ_OK);
1190 }
1191
1192 ret = AFPWalkBlock(ptv, pbd);
1193 if (unlikely(ret != AFP_READ_OK)) {
1194 AFPFlushBlock(pbd);
1195 SCReturnInt(ret);
1196 }
1197
1198 AFPFlushBlock(pbd);
1199 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1200 /* return to maintenance task after one loop on the ring */
1201 if (ptv->frame_offset == 0) {
1202 SCReturnInt(AFP_READ_OK);
1203 }
1204 }
1205 #endif
1206 SCReturnInt(AFP_READ_OK);
1207 }
1208
1209 /**
1210 * \brief Reference socket
1211 *
1212 * \retval O in case of failure, 1 in case of success
1213 */
1214 static int AFPRefSocket(AFPPeer* peer)
1215 {
1216 if (unlikely(peer == NULL))
1217 return 0;
1218
1219 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1220 return 1;
1221 }
1222
1223
1224 /**
1225 * \brief Dereference socket
1226 *
1227 * \retval 1 if socket is still alive, 0 if not
1228 */
1229 static int AFPDerefSocket(AFPPeer* peer)
1230 {
1231 if (peer == NULL)
1232 return 1;
1233
1234 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1235 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1236 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1237 close(SC_ATOMIC_GET(peer->socket));
1238 return 0;
1239 }
1240 }
1241 return 1;
1242 }
1243
1244 static void AFPSwitchState(AFPThreadVars *ptv, int state)
1245 {
1246 ptv->afp_state = state;
1247 ptv->down_count = 0;
1248
1249 AFPPeerUpdate(ptv);
1250
1251 /* Do cleaning if switching to down state */
1252 if (state == AFP_STATE_DOWN) {
1253 #ifdef HAVE_TPACKET_V3
1254 if (ptv->flags & AFP_TPACKET_V3) {
1255 if (!ptv->ring.v3) {
1256 SCFree(ptv->ring.v3);
1257 ptv->ring.v3 = NULL;
1258 }
1259 } else {
1260 #endif
1261 if (ptv->ring.v2) {
1262 /* only used in reading phase, we can free it */
1263 SCFree(ptv->ring.v2);
1264 ptv->ring.v2 = NULL;
1265 }
1266 #ifdef HAVE_TPACKET_V3
1267 }
1268 #endif
1269 if (ptv->socket != -1) {
1270 /* we need to wait for all packets to return data */
1271 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
1272 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
1273 munmap(ptv->ring_buf, ptv->ring_buflen);
1274 close(ptv->socket);
1275 ptv->socket = -1;
1276 }
1277 }
1278 }
1279 if (state == AFP_STATE_UP) {
1280 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1281 }
1282 }
1283
1284 static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1285 uint64_t *discarded_pkts)
1286 {
1287 struct sockaddr_ll from;
1288 struct iovec iov;
1289 struct msghdr msg;
1290 struct timeval ts;
1291 union {
1292 struct cmsghdr cmsg;
1293 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1294 } cmsg_buf;
1295
1296
1297 if (unlikely(suricata_ctl_flags != 0)) {
1298 return 1;
1299 }
1300
1301 msg.msg_name = &from;
1302 msg.msg_namelen = sizeof(from);
1303 msg.msg_iov = &iov;
1304 msg.msg_iovlen = 1;
1305 msg.msg_control = &cmsg_buf;
1306 msg.msg_controllen = sizeof(cmsg_buf);
1307 msg.msg_flags = 0;
1308
1309 iov.iov_len = ptv->datalen;
1310 iov.iov_base = ptv->data;
1311
1312 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
1313
1314 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1315 /* FIXME */
1316 return -1;
1317 }
1318
1319 if ((ts.tv_sec > synctv->tv_sec) ||
1320 (ts.tv_sec >= synctv->tv_sec &&
1321 ts.tv_usec > synctv->tv_usec)) {
1322 return 1;
1323 }
1324 return 0;
1325 }
1326
1327 static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1328 uint64_t *discarded_pkts)
1329 {
1330 union thdr h;
1331
1332 if (unlikely(suricata_ctl_flags != 0)) {
1333 return 1;
1334 }
1335
1336 #ifdef HAVE_TPACKET_V3
1337 if (ptv->flags & AFP_TPACKET_V3) {
1338 int ret = 0;
1339 struct tpacket_block_desc *pbd;
1340 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
1341 *discarded_pkts += pbd->hdr.bh1.num_pkts;
1342 struct tpacket3_hdr *ppd =
1343 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1344 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1345 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1346 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1347 ret = 1;
1348 }
1349 AFPFlushBlock(pbd);
1350 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
1351 return ret;
1352
1353 } else
1354 #endif
1355 {
1356 /* Read packet from ring */
1357 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
1358 if (h.raw == NULL) {
1359 return -1;
1360 }
1361 (*discarded_pkts)++;
1362 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1363 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1364 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1365 return 1;
1366 }
1367
1368 h.h2->tp_status = TP_STATUS_KERNEL;
1369 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
1370 ptv->frame_offset = 0;
1371 }
1372 }
1373
1374
1375 return 0;
1376 }
1377
1378 /** \brief wait for all afpacket threads to fully init
1379 *
1380 * Discard packets before all threads are ready, as the cluster
1381 * setup is not complete yet.
1382 *
1383 * if AFPPeersListStarted() returns true init is complete
1384 *
1385 * \retval r 1 = happy, otherwise unhappy
1386 */
1387 static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
1388 {
1389 struct timeval synctv;
1390 struct pollfd fds;
1391
1392 fds.fd = ptv->socket;
1393 fds.events = POLLIN;
1394
1395 /* Set timeval to end of the world */
1396 synctv.tv_sec = 0xffffffff;
1397 synctv.tv_usec = 0xffffffff;
1398
1399 while (1) {
1400 int r = poll(&fds, 1, POLL_TIMEOUT);
1401 if (r > 0 &&
1402 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1403 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1404 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1405 return 0;
1406 } else if (r > 0) {
1407 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1408 gettimeofday(&synctv, NULL);
1409 }
1410 if (ptv->flags & AFP_RING_MODE) {
1411 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
1412 } else {
1413 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
1414 }
1415 SCLogDebug("Discarding on %s", ptv->tv->name);
1416 switch (r) {
1417 case 1:
1418 SCLogDebug("Starting to read on %s", ptv->tv->name);
1419 return 1;
1420 case -1:
1421 return r;
1422 }
1423 /* no packets */
1424 } else if (r == 0 && AFPPeersListStarted()) {
1425 SCLogDebug("Starting to read on %s", ptv->tv->name);
1426 return 1;
1427 } else if (r < 0) { /* only exit on error */
1428 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1429 return 0;
1430 }
1431 }
1432 return 1;
1433 }
1434
1435 /**
1436 * \brief Try to reopen socket
1437 *
1438 * \retval 0 in case of success, negative if error occurs or a condition
1439 * is not met.
1440 */
1441 static int AFPTryReopen(AFPThreadVars *ptv)
1442 {
1443 ptv->down_count++;
1444
1445 /* Don't reconnect till we have packet that did not release data */
1446 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1447 return -1;
1448 }
1449
1450 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
1451 if (afp_activate_r != 0) {
1452 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1453 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1454 ptv->iface);
1455 }
1456 return afp_activate_r;
1457 }
1458
1459 SCLogInfo("Interface '%s' is back", ptv->iface);
1460 return 0;
1461 }
1462
1463 /**
1464 * \brief Main AF_PACKET reading Loop function
1465 */
1466 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1467 {
1468 SCEnter();
1469
1470 AFPThreadVars *ptv = (AFPThreadVars *)data;
1471 struct pollfd fds;
1472 int r;
1473 TmSlot *s = (TmSlot *)slot;
1474 time_t last_dump = 0;
1475 time_t current_time;
1476 int (*AFPReadFunc) (AFPThreadVars *);
1477 uint64_t discarded_pkts = 0;
1478
1479 ptv->slot = s->slot_next;
1480
1481 if (ptv->flags & AFP_RING_MODE) {
1482 if (ptv->flags & AFP_TPACKET_V3) {
1483 AFPReadFunc = AFPReadFromRingV3;
1484 } else {
1485 AFPReadFunc = AFPReadFromRing;
1486 }
1487 } else {
1488 AFPReadFunc = AFPRead;
1489 }
1490
1491 if (ptv->afp_state == AFP_STATE_DOWN) {
1492 /* Wait for our turn, threads before us must have opened the socket */
1493 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1494 usleep(1000);
1495 if (suricata_ctl_flags != 0) {
1496 break;
1497 }
1498 }
1499 r = AFPCreateSocket(ptv, ptv->iface, 1);
1500 if (r < 0) {
1501 switch (-r) {
1502 case AFP_FATAL_ERROR:
1503 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1504 SCReturnInt(TM_ECODE_FAILED);
1505 case AFP_RECOVERABLE_ERROR:
1506 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1507 }
1508 }
1509 AFPPeersListReachedInc();
1510 }
1511 if (ptv->afp_state == AFP_STATE_UP) {
1512 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
1513 AFPSynchronizeStart(ptv, &discarded_pkts);
1514 /* let's reset counter as we will start the capture at the
1515 * next function call */
1516 #ifdef PACKET_STATISTICS
1517 struct tpacket_stats kstats;
1518 socklen_t len = sizeof (struct tpacket_stats);
1519 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1520 &kstats, &len) > -1) {
1521 uint64_t pkts = 0;
1522 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1523 ", dropped %" PRIu32 "",
1524 ptv->tv->name,
1525 kstats.tp_packets, kstats.tp_drops);
1526 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1527 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1528 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1529 }
1530 #endif
1531 }
1532
1533 fds.fd = ptv->socket;
1534 fds.events = POLLIN;
1535
1536 while (1) {
1537 /* Start by checking the state of our interface */
1538 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1539 int dbreak = 0;
1540
1541 do {
1542 usleep(AFP_RECONNECT_TIMEOUT);
1543 if (suricata_ctl_flags != 0) {
1544 dbreak = 1;
1545 break;
1546 }
1547 r = AFPTryReopen(ptv);
1548 fds.fd = ptv->socket;
1549 } while (r < 0);
1550 if (dbreak == 1)
1551 break;
1552 }
1553
1554 /* make sure we have at least one packet in the packet pool, to prevent
1555 * us from alloc'ing packets at line rate */
1556 PacketPoolWait();
1557
1558 r = poll(&fds, 1, POLL_TIMEOUT);
1559
1560 if (suricata_ctl_flags != 0) {
1561 break;
1562 }
1563
1564 if (r > 0 &&
1565 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1566 if (fds.revents & (POLLHUP | POLLRDHUP)) {
1567 AFPSwitchState(ptv, AFP_STATE_DOWN);
1568 continue;
1569 } else if (fds.revents & POLLERR) {
1570 char c;
1571 /* Do a recv to get errno */
1572 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1573 continue; /* what, no error? */
1574 SCLogError(SC_ERR_AFP_READ,
1575 "Error reading data from iface '%s': (%d) %s",
1576 ptv->iface, errno, strerror(errno));
1577 AFPSwitchState(ptv, AFP_STATE_DOWN);
1578 continue;
1579 } else if (fds.revents & POLLNVAL) {
1580 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
1581 AFPSwitchState(ptv, AFP_STATE_DOWN);
1582 continue;
1583 }
1584 } else if (r > 0) {
1585 r = AFPReadFunc(ptv);
1586 switch (r) {
1587 case AFP_READ_OK:
1588 /* Trigger one dump of stats every second */
1589 current_time = time(NULL);
1590 if (current_time != last_dump) {
1591 AFPDumpCounters(ptv);
1592 last_dump = current_time;
1593 }
1594 break;
1595 case AFP_READ_FAILURE:
1596 /* AFPRead in error: best to reset the socket */
1597 SCLogError(SC_ERR_AFP_READ,
1598 "AFPRead error reading data from iface '%s': (%d) %s",
1599 ptv->iface, errno, strerror(errno));
1600 AFPSwitchState(ptv, AFP_STATE_DOWN);
1601 continue;
1602 case AFP_SURI_FAILURE:
1603 StatsIncr(ptv->tv, ptv->capture_errors);
1604 break;
1605 case AFP_KERNEL_DROP:
1606 AFPDumpCounters(ptv);
1607 break;
1608 }
1609 } else if (unlikely(r == 0)) {
1610 /* Trigger one dump of stats every second */
1611 current_time = time(NULL);
1612 if (current_time != last_dump) {
1613 AFPDumpCounters(ptv);
1614 last_dump = current_time;
1615 }
1616 /* poll timed out, lets see handle our timeout path */
1617 TmThreadsCaptureHandleTimeout(tv, NULL);
1618
1619 } else if ((r < 0) && (errno != EINTR)) {
1620 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
1621 ptv->iface,
1622 errno, strerror(errno));
1623 AFPSwitchState(ptv, AFP_STATE_DOWN);
1624 continue;
1625 }
1626 StatsSyncCountersIfSignalled(tv);
1627 }
1628
1629 AFPDumpCounters(ptv);
1630 StatsSyncCountersIfSignalled(tv);
1631 SCReturnInt(TM_ECODE_OK);
1632 }
1633
1634 static int AFPGetDevFlags(int fd, const char *ifname)
1635 {
1636 struct ifreq ifr;
1637
1638 memset(&ifr, 0, sizeof(ifr));
1639 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1640
1641 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1642 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1643 ifname, strerror(errno));
1644 return -1;
1645 }
1646
1647 return ifr.ifr_flags;
1648 }
1649
1650
1651 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
1652 {
1653 struct ifreq ifr;
1654
1655 memset(&ifr, 0, sizeof(ifr));
1656 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1657
1658 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1659 if (verbose)
1660 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1661 ifname, strerror(errno));
1662 return -1;
1663 }
1664
1665 return ifr.ifr_ifindex;
1666 }
1667
1668 static int AFPGetDevLinktype(int fd, const char *ifname)
1669 {
1670 struct ifreq ifr;
1671
1672 memset(&ifr, 0, sizeof(ifr));
1673 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1674
1675 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1676 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1677 ifname, strerror(errno));
1678 return -1;
1679 }
1680
1681 switch (ifr.ifr_hwaddr.sa_family) {
1682 case ARPHRD_LOOPBACK:
1683 return LINKTYPE_ETHERNET;
1684 case ARPHRD_PPP:
1685 case ARPHRD_NONE:
1686 return LINKTYPE_RAW;
1687 default:
1688 return ifr.ifr_hwaddr.sa_family;
1689 }
1690 }
1691
1692 int AFPGetLinkType(const char *ifname)
1693 {
1694 int ltype;
1695
1696 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1697 if (fd == -1) {
1698 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1699 return LINKTYPE_RAW;
1700 }
1701
1702 ltype = AFPGetDevLinktype(fd, ifname);
1703 close(fd);
1704
1705 return ltype;
1706 }
1707
1708 static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1709 {
1710 /* Compute structure:
1711 Target is to store all pending packets
1712 with a size equal to MTU + auxdata
1713 And we keep a decent number of block
1714
1715 To do so:
1716 Compute frame_size (aligned to be able to fit in block
1717 Check which block size we need. Blocksize is a 2^n * pagesize
1718 We then need to get order, big enough to have
1719 frame_size < block size
1720 Find number of frame per block (divide)
1721 Fill in packet_req
1722
1723 Compute frame size:
1724 described in packet_mmap.txt
1725 dependant on snaplen (need to use a variable ?)
1726 snaplen: MTU ?
1727 tp_hdrlen determine_version in daq_afpacket
1728 in V1: sizeof(struct tpacket_hdr);
1729 in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1730 frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1731
1732 */
1733 int tp_hdrlen = sizeof(struct tpacket_hdr);
1734 int snaplen = default_packet_size;
1735
1736 if (snaplen == 0) {
1737 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1738 if (snaplen <= 0) {
1739 SCLogWarning(SC_ERR_INVALID_VALUE,
1740 "Unable to get MTU, setting snaplen to sane default of 1514");
1741 snaplen = 1514;
1742 }
1743 }
1744
1745 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1746 ptv->req.v2.tp_block_size = getpagesize() << order;
1747 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
1748 if (frames_per_block == 0) {
1749 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
1750 return -1;
1751 }
1752 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1753 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
1754 /* exact division */
1755 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
1756 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
1757 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1758 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
1759 return 1;
1760 }
1761
1762 #ifdef HAVE_TPACKET_V3
1763 static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1764 {
1765 ptv->req.v3.tp_block_size = ptv->block_size;
1766 ptv->req.v3.tp_frame_size = 2048;
1767 int frames_per_block = 0;
1768 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1769 int snaplen = default_packet_size;
1770
1771 if (snaplen == 0) {
1772 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1773 if (snaplen <= 0) {
1774 SCLogWarning(SC_ERR_INVALID_VALUE,
1775 "Unable to get MTU, setting snaplen to sane default of 1514");
1776 snaplen = 1514;
1777 }
1778 }
1779
1780 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1781 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
1782
1783 if (frames_per_block == 0) {
1784 SCLogError(SC_ERR_INVALID_VALUE,
1785 "Block size is too small, it should be at least %d",
1786 ptv->req.v3.tp_frame_size);
1787 return -1;
1788 }
1789 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1790 /* exact division */
1791 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1792 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1793 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
1794 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1795 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1796 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1797 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
1798 );
1799 return 1;
1800 }
1801 #endif
1802
1803 static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1804 {
1805 int val;
1806 unsigned int len = sizeof(val), i;
1807 int order;
1808 int r, mmap_flag;
1809
1810 #ifdef HAVE_TPACKET_V3
1811 if (ptv->flags & AFP_TPACKET_V3) {
1812 val = TPACKET_V3;
1813 } else
1814 #endif
1815 {
1816 val = TPACKET_V2;
1817 }
1818 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1819 if (errno == ENOPROTOOPT) {
1820 if (ptv->flags & AFP_TPACKET_V3) {
1821 SCLogError(SC_ERR_AFP_CREATE,
1822 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1823 } else {
1824 SCLogError(SC_ERR_AFP_CREATE,
1825 "Too old kernel giving up (need 2.6.27 at least)");
1826 }
1827 }
1828 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1829 return AFP_FATAL_ERROR;
1830 }
1831
1832 val = TPACKET_V2;
1833 #ifdef HAVE_TPACKET_V3
1834 if (ptv->flags & AFP_TPACKET_V3) {
1835 val = TPACKET_V3;
1836 }
1837 #endif
1838 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1839 sizeof(val)) < 0) {
1840 SCLogError(SC_ERR_AFP_CREATE,
1841 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1842 strerror(errno));
1843 return AFP_FATAL_ERROR;
1844 }
1845
1846 #ifdef HAVE_HW_TIMESTAMPING
1847 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1848 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1849 sizeof(req)) < 0) {
1850 SCLogWarning(SC_ERR_AFP_CREATE,
1851 "Can't activate hardware timestamping on packet socket: %s",
1852 strerror(errno));
1853 }
1854 #endif
1855
1856 /* Let's reserve head room so we can add the VLAN header in IPS
1857 * or TAP mode before write the packet */
1858 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1859 /* Only one vlan is extracted from AFP header so
1860 * one VLAN header length is enough. */
1861 int reserve = VLAN_HEADER_LEN;
1862 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1863 sizeof(reserve)) < 0) {
1864 SCLogError(SC_ERR_AFP_CREATE,
1865 "Can't activate reserve on packet socket: %s",
1866 strerror(errno));
1867 return AFP_FATAL_ERROR;
1868 }
1869 }
1870
1871 /* Allocate RX ring */
1872 #ifdef HAVE_TPACKET_V3
1873 if (ptv->flags & AFP_TPACKET_V3) {
1874 if (AFPComputeRingParamsV3(ptv) != 1) {
1875 return AFP_FATAL_ERROR;
1876 }
1877 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1878 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
1879 if (r < 0) {
1880 SCLogError(SC_ERR_MEM_ALLOC,
1881 "Unable to allocate RX Ring for iface %s: (%d) %s",
1882 devname,
1883 errno,
1884 strerror(errno));
1885 return AFP_FATAL_ERROR;
1886 }
1887 } else {
1888 #endif
1889 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
1890 if (AFPComputeRingParams(ptv, order) != 1) {
1891 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1892 return AFP_FATAL_ERROR;
1893 }
1894
1895 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1896 (void *) &ptv->req, sizeof(ptv->req));
1897
1898 if (r < 0) {
1899 if (errno == ENOMEM) {
1900 SCLogInfo("Memory issue with ring parameters. Retrying.");
1901 continue;
1902 }
1903 SCLogError(SC_ERR_MEM_ALLOC,
1904 "Unable to allocate RX Ring for iface %s: (%d) %s",
1905 devname,
1906 errno,
1907 strerror(errno));
1908 return AFP_FATAL_ERROR;
1909 } else {
1910 break;
1911 }
1912 }
1913 if (order < 0) {
1914 SCLogError(SC_ERR_MEM_ALLOC,
1915 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1916 devname);
1917 return AFP_FATAL_ERROR;
1918 }
1919 #ifdef HAVE_TPACKET_V3
1920 }
1921 #endif
1922
1923 /* Allocate the Ring */
1924 #ifdef HAVE_TPACKET_V3
1925 if (ptv->flags & AFP_TPACKET_V3) {
1926 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
1927 } else {
1928 #endif
1929 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
1930 #ifdef HAVE_TPACKET_V3
1931 }
1932 #endif
1933 mmap_flag = MAP_SHARED;
1934 if (ptv->flags & AFP_MMAP_LOCKED)
1935 mmap_flag |= MAP_LOCKED;
1936 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
1937 mmap_flag, ptv->socket, 0);
1938 if (ptv->ring_buf == MAP_FAILED) {
1939 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1940 strerror(errno));
1941 goto mmap_err;
1942 }
1943 #ifdef HAVE_TPACKET_V3
1944 if (ptv->flags & AFP_TPACKET_V3) {
1945 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1946 if (!ptv->ring.v3) {
1947 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
1948 goto postmmap_err;
1949 }
1950 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1951 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1952 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
1953 }
1954 } else {
1955 #endif
1956 /* allocate a ring for each frame header pointer*/
1957 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1958 if (ptv->ring.v2 == NULL) {
1959 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
1960 goto postmmap_err;
1961 }
1962 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1963 /* fill the header ring with proper frame ptr*/
1964 ptv->frame_offset = 0;
1965 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1966 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
1967 unsigned int j;
1968 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1969 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1970 base += ptv->req.v2.tp_frame_size;
1971 }
1972 }
1973 ptv->frame_offset = 0;
1974 #ifdef HAVE_TPACKET_V3
1975 }
1976 #endif
1977
1978 return 0;
1979
1980 postmmap_err:
1981 munmap(ptv->ring_buf, ptv->ring_buflen);
1982 if (ptv->ring.v2)
1983 SCFree(ptv->ring.v2);
1984 if (ptv->ring.v3)
1985 SCFree(ptv->ring.v3);
1986 mmap_err:
1987 /* Packet mmap does the cleaning when socket is closed */
1988 return AFP_FATAL_ERROR;
1989 }
1990
1991 /** \brief test if we can use FANOUT. Older kernels like those in
1992 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1993 */
1994 int AFPIsFanoutSupported(int cluster_id)
1995 {
1996 #ifdef HAVE_PACKET_FANOUT
1997 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1998 if (fd < 0)
1999 return 0;
2000
2001 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
2002 uint16_t id = 1;
2003 uint32_t option = (mode << 16) | (id & 0xffff);
2004 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2005 close(fd);
2006
2007 if (r < 0) {
2008 SCLogError(SC_ERR_INVALID_VALUE, "fanout not supported by kernel: "
2009 "Kernel too old or cluster-id %d already in use.", cluster_id);
2010 return 0;
2011 }
2012 return 1;
2013 #else
2014 return 0;
2015 #endif
2016 }
2017
2018 #ifdef HAVE_PACKET_EBPF
2019
2020 static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2021 {
2022 int pfd = ptv->ebpf_lb_fd;
2023 if (pfd == -1) {
2024 SCLogError(SC_ERR_INVALID_VALUE,
2025 "Fanout file descriptor is invalid");
2026 return -1;
2027 }
2028
2029 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2030 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2031 return -1;
2032 }
2033 SCLogInfo("Activated eBPF on socket");
2034
2035 return 0;
2036 }
2037
2038 static int SetEbpfFilter(AFPThreadVars *ptv)
2039 {
2040 int pfd = ptv->ebpf_filter_fd;
2041 if (pfd == -1) {
2042 SCLogError(SC_ERR_INVALID_VALUE,
2043 "Filter file descriptor is invalid");
2044 return -1;
2045 }
2046
2047 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2048 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2049 return -1;
2050 }
2051 SCLogInfo("Activated eBPF filter on socket");
2052
2053 return 0;
2054 }
2055 #endif
2056
2057 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
2058 {
2059 int r;
2060 int ret = AFP_FATAL_ERROR;
2061 struct packet_mreq sock_params;
2062 struct sockaddr_ll bind_address;
2063 int if_idx;
2064
2065 /* open socket */
2066 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2067 if (ptv->socket == -1) {
2068 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
2069 goto error;
2070 }
2071
2072 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
2073
2074 if (if_idx == -1) {
2075 goto socket_err;
2076 }
2077
2078 /* bind socket */
2079 memset(&bind_address, 0, sizeof(bind_address));
2080 bind_address.sll_family = AF_PACKET;
2081 bind_address.sll_protocol = htons(ETH_P_ALL);
2082 bind_address.sll_ifindex = if_idx;
2083 if (bind_address.sll_ifindex == -1) {
2084 if (verbose)
2085 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
2086 ret = AFP_RECOVERABLE_ERROR;
2087 goto socket_err;
2088 }
2089
2090 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2091 if (if_flags == -1) {
2092 if (verbose) {
2093 SCLogError(SC_ERR_AFP_READ,
2094 "Couldn't get flags for interface '%s'",
2095 ptv->iface);
2096 }
2097 ret = AFP_RECOVERABLE_ERROR;
2098 goto socket_err;
2099 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2100 if (verbose) {
2101 SCLogError(SC_ERR_AFP_READ,
2102 "Interface '%s' is down",
2103 ptv->iface);
2104 }
2105 ret = AFP_RECOVERABLE_ERROR;
2106 goto socket_err;
2107 }
2108
2109 if (ptv->promisc != 0) {
2110 /* Force promiscuous mode */
2111 memset(&sock_params, 0, sizeof(sock_params));
2112 sock_params.mr_type = PACKET_MR_PROMISC;
2113 sock_params.mr_ifindex = bind_address.sll_ifindex;
2114 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2115 if (r < 0) {
2116 SCLogError(SC_ERR_AFP_CREATE,
2117 "Couldn't switch iface %s to promiscuous, error %s",
2118 devname, strerror(errno));
2119 goto socket_err;
2120 }
2121 }
2122
2123 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2124 int val = 1;
2125 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2126 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2127 SCLogWarning(SC_ERR_NO_AF_PACKET,
2128 "'kernel' checksum mode not supported, falling back to full mode.");
2129 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2130 }
2131 }
2132
2133 /* set socket recv buffer size */
2134 if (ptv->buffer_size != 0) {
2135 /*
2136 * Set the socket buffer size to the specified value.
2137 */
2138 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
2139 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2140 &ptv->buffer_size,
2141 sizeof(ptv->buffer_size)) == -1) {
2142 SCLogError(SC_ERR_AFP_CREATE,
2143 "Couldn't set buffer size to %d on iface %s, error %s",
2144 ptv->buffer_size, devname, strerror(errno));
2145 goto socket_err;
2146 }
2147 }
2148
2149 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2150 if (r < 0) {
2151 if (verbose) {
2152 if (errno == ENETDOWN) {
2153 SCLogError(SC_ERR_AFP_CREATE,
2154 "Couldn't bind AF_PACKET socket, iface %s is down",
2155 devname);
2156 } else {
2157 SCLogError(SC_ERR_AFP_CREATE,
2158 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2159 devname, strerror(errno));
2160 }
2161 }
2162 ret = AFP_RECOVERABLE_ERROR;
2163 goto socket_err;
2164 }
2165
2166
2167 #ifdef HAVE_PACKET_FANOUT
2168 /* add binded socket to fanout group */
2169 if (ptv->threads > 1) {
2170 uint16_t mode = ptv->cluster_type;
2171 uint16_t id = ptv->cluster_id;
2172 uint32_t option = (mode << 16) | (id & 0xffff);
2173 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2174 if (r < 0) {
2175 SCLogError(SC_ERR_AFP_CREATE,
2176 "Couldn't set fanout mode, error %s",
2177 strerror(errno));
2178 goto socket_err;
2179 }
2180 }
2181 #endif
2182
2183 #ifdef HAVE_PACKET_EBPF
2184 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2185 r = SockFanoutSeteBPF(ptv);
2186 if (r < 0) {
2187 SCLogError(SC_ERR_AFP_CREATE,
2188 "Coudn't set EBPF, error %s",
2189 strerror(errno));
2190 goto socket_err;
2191 }
2192 }
2193 #endif
2194
2195 if (ptv->flags & AFP_RING_MODE) {
2196 ret = AFPSetupRing(ptv, devname);
2197 if (ret != 0)
2198 goto socket_err;
2199 }
2200
2201 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
2202
2203 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2204 switch (ptv->datalink) {
2205 case ARPHRD_PPP:
2206 case ARPHRD_ATM:
2207 ptv->cooked = 1;
2208 break;
2209 }
2210
2211 TmEcode rc = AFPSetBPFFilter(ptv);
2212 if (rc == TM_ECODE_FAILED) {
2213 ret = AFP_FATAL_ERROR;
2214 goto socket_err;
2215 }
2216
2217 /* Init is ok */
2218 AFPSwitchState(ptv, AFP_STATE_UP);
2219 return 0;
2220
2221 socket_err:
2222 close(ptv->socket);
2223 ptv->socket = -1;
2224 if (ptv->flags & AFP_TPACKET_V3) {
2225 if (ptv->ring.v3) {
2226 SCFree(ptv->ring.v3);
2227 ptv->ring.v3 = NULL;
2228 }
2229 } else {
2230 if (ptv->ring.v2) {
2231 SCFree(ptv->ring.v2);
2232 ptv->ring.v2 = NULL;
2233 }
2234 }
2235
2236 error:
2237 return -ret;
2238 }
2239
2240 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2241 {
2242 struct bpf_program filter;
2243 struct sock_fprog fcode;
2244 int rc;
2245
2246 #ifdef HAVE_PACKET_EBPF
2247 if (ptv->ebpf_filter_fd != -1) {
2248 return SetEbpfFilter(ptv);
2249 }
2250 #endif
2251
2252 if (!ptv->bpf_filter)
2253 return TM_ECODE_OK;
2254
2255 SCLogInfo("Using BPF '%s' on iface '%s'",
2256 ptv->bpf_filter,
2257 ptv->iface);
2258
2259 char errbuf[PCAP_ERRBUF_SIZE];
2260 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
2261 ptv->datalink, /* linktype_arg */
2262 &filter, /* program */
2263 ptv->bpf_filter, /* const char *buf */
2264 1, /* optimize */
2265 0, /* mask */
2266 errbuf,
2267 sizeof(errbuf)) == -1) {
2268 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2269 ptv->bpf_filter,
2270 errbuf);
2271 return TM_ECODE_FAILED;
2272 }
2273
2274 fcode.len = filter.bf_len;
2275 fcode.filter = (struct sock_filter*)filter.bf_insns;
2276
2277 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2278
2279 SCBPFFree(&filter);
2280 if(rc == -1) {
2281 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2282 return TM_ECODE_FAILED;
2283 }
2284
2285 return TM_ECODE_OK;
2286 }
2287
2288 #ifdef HAVE_PACKET_EBPF
2289 /**
2290 * Insert a half flow in the kernel bypass table
2291 *
2292 * \param mapfd file descriptor of the protocol bypass table
2293 * \param key data to use as key in the table
2294 * \return 0 in case of error, 1 if success
2295 */
2296 static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
2297 {
2298 BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
2299 unsigned int i;
2300
2301 if (mapd == -1) {
2302 return 0;
2303 }
2304
2305 /* We use a per CPU structure so we have to set an array of values as the kernel
2306 * is not duplicating the data on each CPU by itself. */
2307 for (i = 0; i < nr_cpus; i++) {
2308 BPF_PERCPU(value, i).packets = 0;
2309 BPF_PERCPU(value, i).bytes = 0;
2310 }
2311 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2312 switch (errno) {
2313 /* no more place in the hash */
2314 case E2BIG:
2315 return 0;
2316 /* no more place in the hash for some hardware bypass */
2317 case EAGAIN:
2318 return 0;
2319 /* if we already have the key then bypass is a success */
2320 case EEXIST:
2321 return 1;
2322 /* Not supposed to be there so issue a error */
2323 default:
2324 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2325 strerror(errno),
2326 errno);
2327 return 0;
2328 }
2329 }
2330 return 1;
2331 }
2332
2333 static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
2334 int family)
2335 {
2336 FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
2337 if (fc) {
2338 EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
2339 if (eb == NULL) {
2340 EBPFDeleteKey(map_fd, key0);
2341 EBPFDeleteKey(map_fd, key1);
2342 LiveDevAddBypassFail(p->livedev, 1, family);
2343 SCFree(key0);
2344 SCFree(key1);
2345 return 0;
2346 }
2347 eb->key[0] = key0;
2348 eb->key[1] = key1;
2349 eb->mapfd = map_fd;
2350 eb->cpus_count = p->afp_v.nr_cpus;
2351 fc->BypassUpdate = EBPFBypassUpdate;
2352 fc->BypassFree = EBPFBypassFree;
2353 fc->bypass_data = eb;
2354 } else {
2355 EBPFDeleteKey(map_fd, key0);
2356 EBPFDeleteKey(map_fd, key1);
2357 LiveDevAddBypassFail(p->livedev, 1, family);
2358 SCFree(key0);
2359 SCFree(key1);
2360 return 0;
2361 }
2362
2363 LiveDevAddBypassStats(p->livedev, 1, family);
2364 LiveDevAddBypassSuccess(p->livedev, 1, family);
2365 return 1;
2366 }
2367
2368 #endif
2369
2370 /**
2371 * Bypass function for AF_PACKET capture in eBPF mode
2372 *
2373 * This function creates two half flows in the map shared with the kernel
2374 * to trigger bypass.
2375 *
2376 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2377 * This table contains the list of half flows to bypass. The in-kernel filter
2378 * will skip/drop the packet if they belong to a flow in one of the flows
2379 * table.
2380 *
2381 * \param p the packet belonging to the flow to bypass
2382 * \return 0 if unable to bypass, 1 if success
2383 */
2384 static int AFPBypassCallback(Packet *p)
2385 {
2386 #ifdef HAVE_PACKET_EBPF
2387 SCLogDebug("Calling af_packet callback function");
2388 /* Only bypass TCP and UDP */
2389 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2390 return 0;
2391 }
2392
2393 /* If we don't have a flow attached to packet the eBPF map entries
2394 * will be destroyed at first flow bypass manager pass as we won't
2395 * find any associated entry */
2396 if (p->flow == NULL) {
2397 return 0;
2398 }
2399 /* Bypassing tunneled packets is currently not supported
2400 * because we can't discard the inner packet only due to
2401 * primitive parsing in eBPF */
2402 if (IS_TUNNEL_PKT(p)) {
2403 return 0;
2404 }
2405 if (PKT_IS_IPV4(p)) {
2406 SCLogDebug("add an IPv4");
2407 if (p->afp_v.v4_map_fd == -1) {
2408 return 0;
2409 }
2410 struct flowv4_keys *keys[2];
2411 keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
2412 if (keys[0] == NULL) {
2413 return 0;
2414 }
2415 keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2416 keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2417 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2418 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
2419 keys[0]->vlan0 = p->vlan_id[0];
2420 keys[0]->vlan1 = p->vlan_id[1];
2421
2422 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2423 keys[0]->ip_proto = 1;
2424 } else {
2425 keys[0]->ip_proto = 0;
2426 }
2427 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
2428 p->afp_v.nr_cpus) == 0) {
2429 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2430 SCFree(keys[0]);
2431 return 0;
2432 }
2433 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2434 if (keys[1] == NULL) {
2435 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2436 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2437 SCFree(keys[0]);
2438 return 0;
2439 }
2440 keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
2441 keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2442 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2443 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
2444 keys[1]->vlan0 = p->vlan_id[0];
2445 keys[1]->vlan1 = p->vlan_id[1];
2446
2447 keys[1]->ip_proto = keys[0]->ip_proto;
2448 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
2449 p->afp_v.nr_cpus) == 0) {
2450 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2451 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2452 SCFree(keys[0]);
2453 SCFree(keys[1]);
2454 return 0;
2455 }
2456 EBPFUpdateFlow(p->flow, p, NULL);
2457 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
2458 }
2459 /* For IPv6 case we don't handle extended header in eBPF */
2460 if (PKT_IS_IPV6(p) &&
2461 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2462 int i;
2463 if (p->afp_v.v6_map_fd == -1) {
2464 return 0;
2465 }
2466 SCLogDebug("add an IPv6");
2467 struct flowv6_keys *keys[2];
2468 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2469 if (keys[0] == NULL) {
2470 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2471 return 0;
2472 }
2473 for (i = 0; i < 4; i++) {
2474 keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2475 keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2476 }
2477 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2478 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
2479 keys[0]->vlan0 = p->vlan_id[0];
2480 keys[0]->vlan1 = p->vlan_id[1];
2481
2482 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2483 keys[0]->ip_proto = 1;
2484 } else {
2485 keys[0]->ip_proto = 0;
2486 }
2487 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
2488 p->afp_v.nr_cpus) == 0) {
2489 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2490 SCFree(keys[0]);
2491 return 0;
2492 }
2493 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2494 if (keys[1] == NULL) {
2495 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2496 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2497 SCFree(keys[0]);
2498 return 0;
2499 }
2500 for (i = 0; i < 4; i++) {
2501 keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2502 keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2503 }
2504 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2505 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
2506 keys[1]->vlan0 = p->vlan_id[0];
2507 keys[1]->vlan1 = p->vlan_id[1];
2508
2509 keys[1]->ip_proto = keys[0]->ip_proto;
2510 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
2511 p->afp_v.nr_cpus) == 0) {
2512 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2513 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2514 SCFree(keys[0]);
2515 SCFree(keys[1]);
2516 return 0;
2517 }
2518 if (p->flow)
2519 EBPFUpdateFlow(p->flow, p, NULL);
2520 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
2521 }
2522 #endif
2523 return 0;
2524 }
2525
2526 /**
2527 * Bypass function for AF_PACKET capture in XDP mode
2528 *
2529 * This function creates two half flows in the map shared with the kernel
2530 * to trigger bypass. This function is similar to AFPBypassCallback() but
2531 * the bytes order is changed for some data due to the way we get the data
2532 * in the XDP case.
2533 *
2534 * \param p the packet belonging to the flow to bypass
2535 * \return 0 if unable to bypass, 1 if success
2536 */
2537 static int AFPXDPBypassCallback(Packet *p)
2538 {
2539 #ifdef HAVE_PACKET_XDP
2540 SCLogDebug("Calling af_packet callback function");
2541 /* Only bypass TCP and UDP */
2542 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2543 return 0;
2544 }
2545
2546 /* If we don't have a flow attached to packet the eBPF map entries
2547 * will be destroyed at first flow bypass manager pass as we won't
2548 * find any associated entry */
2549 if (p->flow == NULL) {
2550 return 0;
2551 }
2552 /* Bypassing tunneled packets is currently not supported
2553 * because we can't discard the inner packet only due to
2554 * primitive parsing in eBPF */
2555 if (IS_TUNNEL_PKT(p)) {
2556 return 0;
2557 }
2558 if (PKT_IS_IPV4(p)) {
2559 struct flowv4_keys *keys[2];
2560 keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
2561 if (keys[0] == NULL) {
2562 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2563 return 0;
2564 }
2565 if (p->afp_v.v4_map_fd == -1) {
2566 SCFree(keys[0]);
2567 return 0;
2568 }
2569 keys[0]->src = p->src.addr_data32[0];
2570 keys[0]->dst = p->dst.addr_data32[0];
2571 /* In the XDP filter we get port from parsing of packet and not from skb
2572 * (as in eBPF filter) so we need to pass from host to network order */
2573 keys[0]->port16[0] = htons(p->sp);
2574 keys[0]->port16[1] = htons(p->dp);
2575 keys[0]->vlan0 = p->vlan_id[0];
2576 keys[0]->vlan1 = p->vlan_id[1];
2577 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2578 keys[0]->ip_proto = 1;
2579 } else {
2580 keys[0]->ip_proto = 0;
2581 }
2582 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
2583 p->afp_v.nr_cpus) == 0) {
2584 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2585 SCFree(keys[0]);
2586 return 0;
2587 }
2588 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2589 if (keys[1] == NULL) {
2590 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2591 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2592 SCFree(keys[0]);
2593 return 0;
2594 }
2595 keys[1]->src = p->dst.addr_data32[0];
2596 keys[1]->dst = p->src.addr_data32[0];
2597 keys[1]->port16[0] = htons(p->dp);
2598 keys[1]->port16[1] = htons(p->sp);
2599 keys[1]->vlan0 = p->vlan_id[0];
2600 keys[1]->vlan1 = p->vlan_id[1];
2601 keys[1]->ip_proto = keys[0]->ip_proto;
2602 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
2603 p->afp_v.nr_cpus) == 0) {
2604 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2605 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
2606 SCFree(keys[0]);
2607 SCFree(keys[1]);
2608 return 0;
2609 }
2610 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
2611 }
2612 /* For IPv6 case we don't handle extended header in eBPF */
2613 if (PKT_IS_IPV6(p) &&
2614 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
2615 SCLogDebug("add an IPv6");
2616 if (p->afp_v.v6_map_fd == -1) {
2617 return 0;
2618 }
2619 int i;
2620 struct flowv6_keys *keys[2];
2621 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2622 if (keys[0] == NULL) {
2623 return 0;
2624 }
2625
2626 for (i = 0; i < 4; i++) {
2627 keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
2628 keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
2629 }
2630 keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
2631 keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
2632 keys[0]->vlan0 = p->vlan_id[0];
2633 keys[0]->vlan1 = p->vlan_id[1];
2634 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2635 keys[0]->ip_proto = 1;
2636 } else {
2637 keys[0]->ip_proto = 0;
2638 }
2639 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
2640 p->afp_v.nr_cpus) == 0) {
2641 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2642 SCFree(keys[0]);
2643 return 0;
2644 }
2645 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2646 if (keys[1] == NULL) {
2647 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2648 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2649 SCFree(keys[0]);
2650 return 0;
2651 }
2652 for (i = 0; i < 4; i++) {
2653 keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
2654 keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2655 }
2656 keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
2657 keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
2658 keys[1]->vlan0 = p->vlan_id[0];
2659 keys[1]->vlan1 = p->vlan_id[1];
2660 keys[1]->ip_proto = keys[0]->ip_proto;
2661 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
2662 p->afp_v.nr_cpus) == 0) {
2663 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2664 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
2665 SCFree(keys[0]);
2666 SCFree(keys[1]);
2667 return 0;
2668 }
2669 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
2670 }
2671 #endif
2672 return 0;
2673 }
2674
2675
2676 bool g_flowv4_ok = true;
2677 bool g_flowv6_ok = true;
2678
2679 /**
2680 * \brief Init function for ReceiveAFP.
2681 *
2682 * \param tv pointer to ThreadVars
2683 * \param initdata pointer to the interface passed from the user
2684 * \param data pointer gets populated with AFPThreadVars
2685 *
2686 * \todo Create a general AFP setup function.
2687 */
2688 TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2689 {
2690 SCEnter();
2691 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
2692
2693 if (initdata == NULL) {
2694 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2695 SCReturnInt(TM_ECODE_FAILED);
2696 }
2697
2698 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
2699 if (unlikely(ptv == NULL)) {
2700 afpconfig->DerefFunc(afpconfig);
2701 SCReturnInt(TM_ECODE_FAILED);
2702 }
2703 memset(ptv, 0, sizeof(AFPThreadVars));
2704
2705 ptv->tv = tv;
2706 ptv->cooked = 0;
2707
2708 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
2709 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2710
2711 ptv->livedev = LiveGetDevice(ptv->iface);
2712 if (ptv->livedev == NULL) {
2713 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
2714 SCFree(ptv);
2715 SCReturnInt(TM_ECODE_FAILED);
2716 }
2717
2718 ptv->buffer_size = afpconfig->buffer_size;
2719 ptv->ring_size = afpconfig->ring_size;
2720 ptv->block_size = afpconfig->block_size;
2721 ptv->block_timeout = afpconfig->block_timeout;
2722
2723 ptv->promisc = afpconfig->promisc;
2724 ptv->checksum_mode = afpconfig->checksum_mode;
2725 ptv->bpf_filter = NULL;
2726
2727 ptv->threads = 1;
2728 #ifdef HAVE_PACKET_FANOUT
2729 ptv->cluster_type = PACKET_FANOUT_LB;
2730 ptv->cluster_id = 1;
2731 /* We only set cluster info if the number of reader threads is greater than 1 */
2732 if (afpconfig->threads > 1) {
2733 ptv->cluster_id = afpconfig->cluster_id;
2734 ptv->cluster_type = afpconfig->cluster_type;
2735 ptv->threads = afpconfig->threads;
2736 }
2737 #endif
2738 ptv->flags = afpconfig->flags;
2739
2740 if (afpconfig->bpf_filter) {
2741 ptv->bpf_filter = afpconfig->bpf_filter;
2742 }
2743 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2744 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
2745 ptv->xdp_mode = afpconfig->xdp_mode;
2746 #ifdef HAVE_PACKET_EBPF
2747 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
2748
2749 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
2750 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
2751 if (ptv->v4_map_fd == -1) {
2752 if (g_flowv4_ok == false) {
2753 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2754 "flow_table_v4");
2755 g_flowv4_ok = true;
2756 }
2757 }
2758 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
2759 if (ptv->v6_map_fd == -1) {
2760 if (g_flowv6_ok) {
2761 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2762 "flow_table_v6");
2763 g_flowv6_ok = false;
2764 }
2765 }
2766 }
2767 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
2768 #endif
2769
2770 #ifdef PACKET_STATISTICS
2771 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2772 ptv->tv);
2773 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2774 ptv->tv);
2775 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2776 ptv->tv);
2777 #endif
2778
2779 ptv->copy_mode = afpconfig->copy_mode;
2780 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2781 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2782 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2783 /* Warn about BPF filter consequence */
2784 if (ptv->bpf_filter) {
2785 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2786 " in dropping all non matching packets.");
2787 }
2788 }
2789
2790
2791 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2792 SCFree(ptv);
2793 afpconfig->DerefFunc(afpconfig);
2794 SCReturnInt(TM_ECODE_FAILED);
2795 }
2796
2797 #define T_DATA_SIZE 70000
2798 ptv->data = SCMalloc(T_DATA_SIZE);
2799 if (ptv->data == NULL) {
2800 afpconfig->DerefFunc(afpconfig);
2801 SCFree(ptv);
2802 SCReturnInt(TM_ECODE_FAILED);
2803 }
2804 ptv->datalen = T_DATA_SIZE;
2805 #undef T_DATA_SIZE
2806
2807 *data = (void *)ptv;
2808
2809 afpconfig->DerefFunc(afpconfig);
2810
2811 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2812 * get the info from packet extended header but we will use a standard
2813 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2814 if (SCKernelVersionIsAtLeast(3, 0)) {
2815 ptv->flags |= AFP_VLAN_IN_HEADER;
2816 }
2817
2818 SCReturnInt(TM_ECODE_OK);
2819 }
2820
2821 /**
2822 * \brief This function prints stats to the screen at exit.
2823 * \param tv pointer to ThreadVars
2824 * \param data pointer that gets cast into AFPThreadVars for ptv
2825 */
2826 void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2827 {
2828 SCEnter();
2829 AFPThreadVars *ptv = (AFPThreadVars *)data;
2830
2831 #ifdef PACKET_STATISTICS
2832 AFPDumpCounters(ptv);
2833 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
2834 tv->name,
2835 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2836 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
2837 #endif
2838 }
2839
2840 /**
2841 * \brief DeInit function closes af packet socket at exit.
2842 * \param tv pointer to ThreadVars
2843 * \param data pointer that gets cast into AFPThreadVars for ptv
2844 */
2845 TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2846 {
2847 AFPThreadVars *ptv = (AFPThreadVars *)data;
2848
2849 AFPSwitchState(ptv, AFP_STATE_DOWN);
2850
2851 #ifdef HAVE_PACKET_XDP
2852 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2853 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2854 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2855 }
2856 #endif
2857 if (ptv->data != NULL) {
2858 SCFree(ptv->data);
2859 ptv->data = NULL;
2860 }
2861 ptv->datalen = 0;
2862
2863 ptv->bpf_filter = NULL;
2864 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2865 SCFree(ptv->ring.v3);
2866 } else {
2867 if (ptv->ring.v2)
2868 SCFree(ptv->ring.v2);
2869 }
2870
2871 SCFree(ptv);
2872 SCReturnInt(TM_ECODE_OK);
2873 }
2874
2875 /**
2876 * \brief This function passes off to link type decoders.
2877 *
2878 * DecodeAFP decodes packets from AF_PACKET and passes
2879 * them off to the proper link type decoder.
2880 *
2881 * \param t pointer to ThreadVars
2882 * \param p pointer to the current packet
2883 * \param data pointer that gets cast into AFPThreadVars for ptv
2884 */
2885 TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
2886 {
2887 SCEnter();
2888 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2889
2890 /* XXX HACK: flow timeout can call us for injected pseudo packets
2891 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2892 if (p->flags & PKT_PSEUDO_STREAM_END)
2893 return TM_ECODE_OK;
2894
2895 /* update counters */
2896 DecodeUpdatePacketCounters(tv, dtv, p);
2897
2898 /* If suri has set vlan during reading, we increase vlan counter */
2899 if (p->vlan_idx) {
2900 StatsIncr(tv, dtv->counter_vlan);
2901 }
2902
2903 /* call the decoder */
2904 switch (p->datalink) {
2905 case LINKTYPE_ETHERNET:
2906 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p));
2907 break;
2908 case LINKTYPE_LINUX_SLL:
2909 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
2910 break;
2911 case LINKTYPE_PPP:
2912 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
2913 break;
2914 case LINKTYPE_RAW:
2915 case LINKTYPE_GRE_OVER_IP:
2916 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
2917 break;
2918 case LINKTYPE_NULL:
2919 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
2920 break;
2921 default:
2922 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2923 break;
2924 }
2925
2926 PacketDecodeFinalize(tv, dtv, p);
2927
2928 SCReturnInt(TM_ECODE_OK);
2929 }
2930
2931 TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2932 {
2933 SCEnter();
2934 DecodeThreadVars *dtv = NULL;
2935
2936 dtv = DecodeThreadVarsAlloc(tv);
2937
2938 if (dtv == NULL)
2939 SCReturnInt(TM_ECODE_FAILED);
2940
2941 DecodeRegisterPerfCounters(dtv, tv);
2942
2943 *data = (void *)dtv;
2944
2945 SCReturnInt(TM_ECODE_OK);
2946 }
2947
2948 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2949 {
2950 if (data != NULL)
2951 DecodeThreadVarsFree(tv, data);
2952 SCReturnInt(TM_ECODE_OK);
2953 }
2954
2955 #endif /* HAVE_AF_PACKET */
2956 /* eof */
2957 /**
2958 * @}
2959 */