]> git.ipfire.org Git - people/ms/suricata.git/blob - src/source-af-packet.c
af-packet: optimize BPF
[people/ms/suricata.git] / src / source-af-packet.c
1 /* Copyright (C) 2011-2017 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
24 /**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
31 * \todo watch other interface event to detect suppression of the monitored
32 * interface
33 */
34
35 #include "suricata-common.h"
36 #include "config.h"
37 #include "suricata.h"
38 #include "decode.h"
39 #include "packet-queue.h"
40 #include "threads.h"
41 #include "threadvars.h"
42 #include "tm-queuehandlers.h"
43 #include "tm-modules.h"
44 #include "tm-threads.h"
45 #include "tm-threads-common.h"
46 #include "conf.h"
47 #include "util-debug.h"
48 #include "util-device.h"
49 #include "util-error.h"
50 #include "util-privs.h"
51 #include "util-optimize.h"
52 #include "util-checksum.h"
53 #include "util-ioctl.h"
54 #include "util-host-info.h"
55 #include "tmqh-packetpool.h"
56 #include "source-af-packet.h"
57 #include "runmodes.h"
58
59 #ifdef __SC_CUDA_SUPPORT__
60
61 #include "util-cuda.h"
62 #include "util-cuda-buffer.h"
63 #include "util-mpm-ac.h"
64 #include "util-cuda-handlers.h"
65 #include "detect-engine.h"
66 #include "detect-engine-mpm.h"
67 #include "util-cuda-vars.h"
68
69 #endif /* __SC_CUDA_SUPPORT__ */
70
71 #ifdef HAVE_AF_PACKET
72
73 #if HAVE_SYS_IOCTL_H
74 #include <sys/ioctl.h>
75 #endif
76
77 #if HAVE_LINUX_IF_ETHER_H
78 #include <linux/if_ether.h>
79 #endif
80
81 #if HAVE_LINUX_IF_PACKET_H
82 #include <linux/if_packet.h>
83 #endif
84
85 #if HAVE_LINUX_IF_ARP_H
86 #include <linux/if_arp.h>
87 #endif
88
89 #if HAVE_LINUX_FILTER_H
90 #include <linux/filter.h>
91 #endif
92
93 #if HAVE_SYS_MMAN_H
94 #include <sys/mman.h>
95 #endif
96
97 #ifdef HAVE_HW_TIMESTAMPING
98 #include <linux/net_tstamp.h>
99 #endif
100
101 #endif /* HAVE_AF_PACKET */
102
103 extern int max_pending_packets;
104
105 #ifndef HAVE_AF_PACKET
106
107 TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
108
109 void TmModuleReceiveAFPRegister (void)
110 {
111 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
112 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
113 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
114 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
115 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
116 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
117 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
118 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
119 }
120
121 /**
122 * \brief Registration Function for DecodeAFP.
123 * \todo Unit tests are needed for this module.
124 */
125 void TmModuleDecodeAFPRegister (void)
126 {
127 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
128 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
129 tmm_modules[TMM_DECODEAFP].Func = NULL;
130 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
131 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
132 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
133 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
134 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
135 }
136
137 /**
138 * \brief this function prints an error message and exits.
139 */
140 TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
141 {
142 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
143 "support for AF_PACKET enabled, on Linux host please recompile "
144 "with --enable-af-packet", tv->name);
145 exit(EXIT_FAILURE);
146 }
147
148 #else /* We have AF_PACKET support */
149
150 #define AFP_IFACE_NAME_LENGTH 48
151
152 #define AFP_STATE_DOWN 0
153 #define AFP_STATE_UP 1
154
155 #define AFP_RECONNECT_TIMEOUT 500000
156 #define AFP_DOWN_COUNTER_INTERVAL 40
157
158 #define POLL_TIMEOUT 100
159
160 #ifndef TP_STATUS_USER_BUSY
161 /* for new use latest bit available in tp_status */
162 #define TP_STATUS_USER_BUSY (1 << 31)
163 #endif
164
165 #ifndef TP_STATUS_VLAN_VALID
166 #define TP_STATUS_VLAN_VALID (1 << 4)
167 #endif
168
169 /** protect pfring_set_bpf_filter, as it is not thread safe */
170 static SCMutex afpacket_bpf_set_filter_lock = SCMUTEX_INITIALIZER;
171
172 enum {
173 AFP_READ_OK,
174 AFP_READ_FAILURE,
175 AFP_FAILURE,
176 AFP_KERNEL_DROP,
177 };
178
179 enum {
180 AFP_FATAL_ERROR = 1,
181 AFP_RECOVERABLE_ERROR,
182 };
183
184 union thdr {
185 struct tpacket2_hdr *h2;
186 #ifdef HAVE_TPACKET_V3
187 struct tpacket3_hdr *h3;
188 #endif
189 void *raw;
190 };
191
192 /**
193 * \brief Structure to hold thread specific variables.
194 */
195 typedef struct AFPThreadVars_
196 {
197 union {
198 char *ring_v2;
199 struct iovec *ring_v3;
200 };
201
202 /* counters */
203 uint64_t pkts;
204
205 ThreadVars *tv;
206 TmSlot *slot;
207 LiveDevice *livedev;
208 /* data link type for the thread */
209 uint32_t datalink;
210
211 unsigned int frame_offset;
212
213 ChecksumValidationMode checksum_mode;
214
215 /* references to packet and drop counters */
216 uint16_t capture_kernel_packets;
217 uint16_t capture_kernel_drops;
218
219 /* handle state */
220 uint8_t afp_state;
221 uint8_t copy_mode;
222 uint8_t flags;
223
224 /* IPS peer */
225 AFPPeer *mpeer;
226
227 /* no mmap mode */
228 uint8_t *data; /** Per function and thread data */
229 int datalen; /** Length of per function and thread data */
230 int cooked;
231
232 /*
233 * Init related members
234 */
235
236 /* thread specific socket */
237 int socket;
238
239 int ring_size;
240 int block_size;
241 int block_timeout;
242 /* socket buffer size */
243 int buffer_size;
244 /* Filter */
245 const char *bpf_filter;
246
247 int promisc;
248
249 int down_count;
250
251 int cluster_id;
252 int cluster_type;
253
254 int threads;
255
256 union {
257 struct tpacket_req req;
258 #ifdef HAVE_TPACKET_V3
259 struct tpacket_req3 req3;
260 #endif
261 };
262
263 char iface[AFP_IFACE_NAME_LENGTH];
264 /* IPS output iface */
265 char out_iface[AFP_IFACE_NAME_LENGTH];
266
267 } AFPThreadVars;
268
269 TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
270 TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
271 void ReceiveAFPThreadExitStats(ThreadVars *, void *);
272 TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
273 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
274
275 TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
276 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
277 TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
278
279 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
280 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
281 static int AFPGetDevFlags(int fd, const char *ifname);
282 static int AFPDerefSocket(AFPPeer* peer);
283 static int AFPRefSocket(AFPPeer* peer);
284
285 /**
286 * \brief Registration Function for RecieveAFP.
287 * \todo Unit tests are needed for this module.
288 */
289 void TmModuleReceiveAFPRegister (void)
290 {
291 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
292 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
293 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
294 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
295 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
296 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
297 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
298 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
299 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
300 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
301 }
302
303
304 /**
305 * \defgroup afppeers AFP peers list
306 *
307 * AF_PACKET has an IPS mode were interface are peered: packet from
308 * on interface are sent the peered interface and the other way. The ::AFPPeer
309 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
310 * information to be able to send packet on the interface.
311 * A element of the list must not be destroyed during the run of Suricata as it
312 * is used by ::Packet and other threads.
313 *
314 * @{
315 */
316
317 typedef struct AFPPeersList_ {
318 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
319 int cnt;
320 int peered;
321 int turn; /**< Next value for initialisation order */
322 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
323 } AFPPeersList;
324
325 /**
326 * \brief Update the peer.
327 *
328 * Update the AFPPeer of a thread ie set new state, socket number
329 * or iface index.
330 *
331 */
332 static void AFPPeerUpdate(AFPThreadVars *ptv)
333 {
334 if (ptv->mpeer == NULL) {
335 return;
336 }
337 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
338 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
339 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
340 }
341
342 /**
343 * \brief Clean and free ressource used by an ::AFPPeer
344 */
345 static void AFPPeerClean(AFPPeer *peer)
346 {
347 if (peer->flags & AFP_SOCK_PROTECT)
348 SCMutexDestroy(&peer->sock_protect);
349 SC_ATOMIC_DESTROY(peer->socket);
350 SC_ATOMIC_DESTROY(peer->if_idx);
351 SC_ATOMIC_DESTROY(peer->state);
352 SCFree(peer);
353 }
354
355 AFPPeersList peerslist;
356
357
358 /**
359 * \brief Init the global list of ::AFPPeer
360 */
361 TmEcode AFPPeersListInit()
362 {
363 SCEnter();
364 TAILQ_INIT(&peerslist.peers);
365 peerslist.peered = 0;
366 peerslist.cnt = 0;
367 peerslist.turn = 0;
368 SC_ATOMIC_INIT(peerslist.reached);
369 (void) SC_ATOMIC_SET(peerslist.reached, 0);
370 SCReturnInt(TM_ECODE_OK);
371 }
372
373 /**
374 * \brief Check that all ::AFPPeer got a peer
375 *
376 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
377 */
378 TmEcode AFPPeersListCheck()
379 {
380 #define AFP_PEERS_MAX_TRY 4
381 #define AFP_PEERS_WAIT 20000
382 int try = 0;
383 SCEnter();
384 while (try < AFP_PEERS_MAX_TRY) {
385 if (peerslist.cnt != peerslist.peered) {
386 usleep(AFP_PEERS_WAIT);
387 } else {
388 SCReturnInt(TM_ECODE_OK);
389 }
390 try++;
391 }
392 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
393 SCReturnInt(TM_ECODE_FAILED);
394 }
395
396 /**
397 * \brief Declare a new AFP thread to AFP peers list.
398 */
399 static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
400 {
401 SCEnter();
402 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
403 AFPPeer *pitem;
404 int mtu, out_mtu;
405
406 if (unlikely(peer == NULL)) {
407 SCReturnInt(TM_ECODE_FAILED);
408 }
409 memset(peer, 0, sizeof(AFPPeer));
410 SC_ATOMIC_INIT(peer->socket);
411 SC_ATOMIC_INIT(peer->sock_usage);
412 SC_ATOMIC_INIT(peer->if_idx);
413 SC_ATOMIC_INIT(peer->state);
414 peer->flags = ptv->flags;
415 peer->turn = peerslist.turn++;
416
417 if (peer->flags & AFP_SOCK_PROTECT) {
418 SCMutexInit(&peer->sock_protect, NULL);
419 }
420
421 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
422 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
423 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
424 ptv->mpeer = peer;
425 /* add element to iface list */
426 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
427
428 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
429 peerslist.cnt++;
430
431 /* Iter to find a peer */
432 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
433 if (pitem->peer)
434 continue;
435 if (strcmp(pitem->iface, ptv->out_iface))
436 continue;
437 peer->peer = pitem;
438 pitem->peer = peer;
439 mtu = GetIfaceMTU(ptv->iface);
440 out_mtu = GetIfaceMTU(ptv->out_iface);
441 if (mtu != out_mtu) {
442 SCLogError(SC_ERR_AFP_CREATE,
443 "MTU on %s (%d) and %s (%d) are not equal, "
444 "transmission of packets bigger than %d will fail.",
445 ptv->iface, mtu,
446 ptv->out_iface, out_mtu,
447 (out_mtu > mtu) ? mtu : out_mtu);
448 }
449 peerslist.peered += 2;
450 break;
451 }
452 }
453
454 AFPPeerUpdate(ptv);
455
456 SCReturnInt(TM_ECODE_OK);
457 }
458
459 static int AFPPeersListWaitTurn(AFPPeer *peer)
460 {
461 /* If turn is zero, we already have started threads once */
462 if (peerslist.turn == 0)
463 return 0;
464
465 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
466 return 0;
467 return 1;
468 }
469
470 static void AFPPeersListReachedInc(void)
471 {
472 if (peerslist.turn == 0)
473 return;
474
475 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
476 SCLogInfo("All AFP capture threads are running.");
477 (void)SC_ATOMIC_SET(peerslist.reached, 0);
478 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
479 * restarted.
480 */
481 peerslist.turn = 0;
482 }
483 }
484
485 static int AFPPeersListStarted(void)
486 {
487 return !peerslist.turn;
488 }
489
490 /**
491 * \brief Clean the global peers list.
492 */
493 void AFPPeersListClean()
494 {
495 AFPPeer *pitem;
496
497 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
498 TAILQ_REMOVE(&peerslist.peers, pitem, next);
499 AFPPeerClean(pitem);
500 }
501 }
502
503 /**
504 * @}
505 */
506
507 /**
508 * \brief Registration Function for DecodeAFP.
509 * \todo Unit tests are needed for this module.
510 */
511 void TmModuleDecodeAFPRegister (void)
512 {
513 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
514 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
515 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
516 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
517 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
518 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
519 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
520 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
521 }
522
523
524 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
525
526 static inline void AFPDumpCounters(AFPThreadVars *ptv)
527 {
528 #ifdef PACKET_STATISTICS
529 struct tpacket_stats kstats;
530 socklen_t len = sizeof (struct tpacket_stats);
531 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
532 &kstats, &len) > -1) {
533 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
534 ptv->tv->name,
535 kstats.tp_packets, kstats.tp_drops);
536 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
537 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
538 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
539 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
540 }
541 #endif
542 }
543
544 /**
545 * \brief AF packet read function.
546 *
547 * This function fills
548 * From here the packets are picked up by the DecodeAFP thread.
549 *
550 * \param user pointer to AFPThreadVars
551 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
552 */
553 static int AFPRead(AFPThreadVars *ptv)
554 {
555 Packet *p = NULL;
556 /* XXX should try to use read that get directly to packet */
557 int offset = 0;
558 int caplen;
559 struct sockaddr_ll from;
560 struct iovec iov;
561 struct msghdr msg;
562 struct cmsghdr *cmsg;
563 union {
564 struct cmsghdr cmsg;
565 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
566 } cmsg_buf;
567 unsigned char aux_checksum = 0;
568
569 msg.msg_name = &from;
570 msg.msg_namelen = sizeof(from);
571 msg.msg_iov = &iov;
572 msg.msg_iovlen = 1;
573 msg.msg_control = &cmsg_buf;
574 msg.msg_controllen = sizeof(cmsg_buf);
575 msg.msg_flags = 0;
576
577 if (ptv->cooked)
578 offset = SLL_HEADER_LEN;
579 else
580 offset = 0;
581 iov.iov_len = ptv->datalen - offset;
582 iov.iov_base = ptv->data + offset;
583
584 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
585
586 if (caplen < 0) {
587 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
588 errno);
589 SCReturnInt(AFP_READ_FAILURE);
590 }
591
592 p = PacketGetFromQueueOrAlloc();
593 if (p == NULL) {
594 SCReturnInt(AFP_FAILURE);
595 }
596 PKT_SET_SRC(p, PKT_SRC_WIRE);
597
598 /* get timestamp of packet via ioctl */
599 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
600 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
601 errno);
602 TmqhOutputPacketpool(ptv->tv, p);
603 SCReturnInt(AFP_READ_FAILURE);
604 }
605
606 ptv->pkts++;
607 p->livedev = ptv->livedev;
608
609 /* add forged header */
610 if (ptv->cooked) {
611 SllHdr * hdrp = (SllHdr *)ptv->data;
612 /* XXX this is minimalist, but this seems enough */
613 hdrp->sll_protocol = from.sll_protocol;
614 }
615
616 p->datalink = ptv->datalink;
617 SET_PKT_LEN(p, caplen + offset);
618 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
619 TmqhOutputPacketpool(ptv->tv, p);
620 SCReturnInt(AFP_FAILURE);
621 }
622 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
623 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
624
625 /* We only check for checksum disable */
626 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
627 p->flags |= PKT_IGNORE_CHECKSUM;
628 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
629 if (ptv->livedev->ignore_checksum) {
630 p->flags |= PKT_IGNORE_CHECKSUM;
631 } else if (ChecksumAutoModeCheck(ptv->pkts,
632 SC_ATOMIC_GET(ptv->livedev->pkts),
633 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
634 ptv->livedev->ignore_checksum = 1;
635 p->flags |= PKT_IGNORE_CHECKSUM;
636 }
637 } else {
638 aux_checksum = 1;
639 }
640
641 /* List is NULL if we don't have activated auxiliary data */
642 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
643 struct tpacket_auxdata *aux;
644
645 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
646 cmsg->cmsg_level != SOL_PACKET ||
647 cmsg->cmsg_type != PACKET_AUXDATA)
648 continue;
649
650 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
651
652 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
653 p->flags |= PKT_IGNORE_CHECKSUM;
654 }
655 break;
656 }
657
658 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
659 TmqhOutputPacketpool(ptv->tv, p);
660 SCReturnInt(AFP_FAILURE);
661 }
662 SCReturnInt(AFP_READ_OK);
663 }
664
665 /**
666 * \brief AF packet write function.
667 *
668 * This function has to be called before the memory
669 * related to Packet in ring buffer is released.
670 *
671 * \param pointer to Packet
672 * \param version of capture: TPACKET_V2 or TPACKET_V3
673 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
674 *
675 */
676 static TmEcode AFPWritePacket(Packet *p, int version)
677 {
678 struct sockaddr_ll socket_address;
679 int socket;
680 uint8_t *pstart;
681 size_t plen;
682 union thdr h;
683 uint16_t vlan_tci = 0;
684
685 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
686 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
687 return TM_ECODE_OK;
688 }
689 }
690
691 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
692 return TM_ECODE_OK;
693
694 if (p->ethh == NULL) {
695 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
696 return TM_ECODE_FAILED;
697 }
698 /* Index of the network device */
699 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
700 /* Address length*/
701 socket_address.sll_halen = ETH_ALEN;
702 /* Destination MAC */
703 memcpy(socket_address.sll_addr, p->ethh, 6);
704
705 /* Send packet, locking the socket if necessary */
706 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
707 SCMutexLock(&p->afp_v.peer->sock_protect);
708 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
709
710 h.raw = p->afp_v.relptr;
711
712 if (version == TPACKET_V2) {
713 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
714 * the flag. It is not defined for older kernel so we go best effort
715 * and test for non zero value of the TCI header. */
716 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
717 vlan_tci = h.h2->tp_vlan_tci;
718 }
719 } else {
720 #ifdef HAVE_TPACKET_V3
721 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
722 vlan_tci = h.h3->hv1.tp_vlan_tci;
723 }
724 #else
725 /* Should not get here */
726 BUG_ON(1);
727 #endif
728 }
729
730 if (vlan_tci != 0) {
731 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
732 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
733 /* move ethernet addresses */
734 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
735 /* write vlan info */
736 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
737 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
738 } else {
739 pstart = GET_PKT_DATA(p);
740 plen = GET_PKT_LEN(p);
741 }
742
743 if (sendto(socket, pstart, plen, 0,
744 (struct sockaddr*) &socket_address,
745 sizeof(struct sockaddr_ll)) < 0) {
746 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
747 socket,
748 strerror(errno));
749 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
750 SCMutexUnlock(&p->afp_v.peer->sock_protect);
751 return TM_ECODE_FAILED;
752 }
753 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
754 SCMutexUnlock(&p->afp_v.peer->sock_protect);
755
756 return TM_ECODE_OK;
757 }
758
759 static void AFPReleaseDataFromRing(Packet *p)
760 {
761 /* Need to be in copy mode and need to detect early release
762 where Ethernet header could not be set (and pseudo packet) */
763 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
764 AFPWritePacket(p, TPACKET_V2);
765 }
766
767 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
768 goto cleanup;
769
770 if (p->afp_v.relptr) {
771 union thdr h;
772 h.raw = p->afp_v.relptr;
773 h.h2->tp_status = TP_STATUS_KERNEL;
774 }
775
776 cleanup:
777 AFPV_CLEANUP(&p->afp_v);
778 }
779
780 #ifdef HAVE_TPACKET_V3
781 static void AFPReleasePacketV3(Packet *p)
782 {
783 /* Need to be in copy mode and need to detect early release
784 where Ethernet header could not be set (and pseudo packet) */
785 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
786 AFPWritePacket(p, TPACKET_V3);
787 }
788 PacketFreeOrRelease(p);
789 }
790 #endif
791
792 static void AFPReleasePacket(Packet *p)
793 {
794 AFPReleaseDataFromRing(p);
795 PacketFreeOrRelease(p);
796 }
797
798 /**
799 * \brief AF packet read function for ring
800 *
801 * This function fills
802 * From here the packets are picked up by the DecodeAFP thread.
803 *
804 * \param user pointer to AFPThreadVars
805 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
806 */
807 static int AFPReadFromRing(AFPThreadVars *ptv)
808 {
809 Packet *p = NULL;
810 union thdr h;
811 uint8_t emergency_flush = 0;
812 int read_pkts = 0;
813 int loop_start = -1;
814
815
816 /* Loop till we have packets available */
817 while (1) {
818 if (unlikely(suricata_ctl_flags != 0)) {
819 break;
820 }
821
822 /* Read packet from ring */
823 h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
824 if (h.raw == NULL) {
825 SCReturnInt(AFP_FAILURE);
826 }
827
828 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
829 if (read_pkts == 0) {
830 if (loop_start == -1) {
831 loop_start = ptv->frame_offset;
832 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
833 SCReturnInt(AFP_READ_OK);
834 }
835 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
836 ptv->frame_offset = 0;
837 }
838 continue;
839 }
840 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
841 SCReturnInt(AFP_KERNEL_DROP);
842 } else {
843 SCReturnInt(AFP_READ_OK);
844 }
845 }
846
847 read_pkts++;
848 loop_start = -1;
849
850 /* Our packet is still used by suricata, we exit read loop to
851 * gain some time */
852 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
853 SCReturnInt(AFP_READ_OK);
854 }
855
856 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
857 h.h2->tp_status = TP_STATUS_KERNEL;
858 goto next_frame;
859 }
860
861 p = PacketGetFromQueueOrAlloc();
862 if (p == NULL) {
863 SCReturnInt(AFP_FAILURE);
864 }
865 PKT_SET_SRC(p, PKT_SRC_WIRE);
866
867 /* Suricata will treat packet so telling it is busy, this
868 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
869 * function. */
870 h.h2->tp_status |= TP_STATUS_USER_BUSY;
871
872 ptv->pkts++;
873 p->livedev = ptv->livedev;
874 p->datalink = ptv->datalink;
875
876 if (h.h2->tp_len > h.h2->tp_snaplen) {
877 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
878 h.h2->tp_len, h.h2->tp_snaplen);
879 }
880
881 /* get vlan id from header */
882 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
883 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
884 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
885 p->vlan_idx = 1;
886 p->vlanh[0] = NULL;
887 }
888
889 if (ptv->flags & AFP_ZERO_COPY) {
890 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
891 TmqhOutputPacketpool(ptv->tv, p);
892 SCReturnInt(AFP_FAILURE);
893 } else {
894 p->afp_v.relptr = h.raw;
895 p->ReleasePacket = AFPReleasePacket;
896 p->afp_v.mpeer = ptv->mpeer;
897 AFPRefSocket(ptv->mpeer);
898
899 p->afp_v.copy_mode = ptv->copy_mode;
900 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
901 p->afp_v.peer = ptv->mpeer->peer;
902 } else {
903 p->afp_v.peer = NULL;
904 }
905 }
906 } else {
907 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
908 TmqhOutputPacketpool(ptv->tv, p);
909 SCReturnInt(AFP_FAILURE);
910 }
911 }
912 /* Timestamp */
913 p->ts.tv_sec = h.h2->tp_sec;
914 p->ts.tv_usec = h.h2->tp_nsec/1000;
915 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
916 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
917
918 /* We only check for checksum disable */
919 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
920 p->flags |= PKT_IGNORE_CHECKSUM;
921 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
922 if (ptv->livedev->ignore_checksum) {
923 p->flags |= PKT_IGNORE_CHECKSUM;
924 } else if (ChecksumAutoModeCheck(ptv->pkts,
925 SC_ATOMIC_GET(ptv->livedev->pkts),
926 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
927 ptv->livedev->ignore_checksum = 1;
928 p->flags |= PKT_IGNORE_CHECKSUM;
929 }
930 } else {
931 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
932 p->flags |= PKT_IGNORE_CHECKSUM;
933 }
934 }
935 if (h.h2->tp_status & TP_STATUS_LOSING) {
936 emergency_flush = 1;
937 AFPDumpCounters(ptv);
938 }
939
940 /* release frame if not in zero copy mode */
941 if (!(ptv->flags & AFP_ZERO_COPY)) {
942 h.h2->tp_status = TP_STATUS_KERNEL;
943 }
944
945 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
946 h.h2->tp_status = TP_STATUS_KERNEL;
947 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
948 ptv->frame_offset = 0;
949 }
950 TmqhOutputPacketpool(ptv->tv, p);
951 SCReturnInt(AFP_FAILURE);
952 }
953
954 next_frame:
955 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
956 ptv->frame_offset = 0;
957 /* Get out of loop to be sure we will reach maintenance tasks */
958 SCReturnInt(AFP_READ_OK);
959 }
960 }
961
962 SCReturnInt(AFP_READ_OK);
963 }
964
965 #ifdef HAVE_TPACKET_V3
966 static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
967 {
968 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
969 }
970
971 static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
972 {
973 Packet *p = PacketGetFromQueueOrAlloc();
974 if (p == NULL) {
975 SCReturnInt(AFP_FAILURE);
976 }
977 PKT_SET_SRC(p, PKT_SRC_WIRE);
978
979 ptv->pkts++;
980 p->livedev = ptv->livedev;
981 p->datalink = ptv->datalink;
982
983 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
984 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
985 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
986 p->vlan_idx = 1;
987 p->vlanh[0] = NULL;
988 }
989
990 if (ptv->flags & AFP_ZERO_COPY) {
991 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
992 TmqhOutputPacketpool(ptv->tv, p);
993 SCReturnInt(AFP_FAILURE);
994 }
995 p->afp_v.relptr = ppd;
996 p->ReleasePacket = AFPReleasePacketV3;
997 p->afp_v.mpeer = ptv->mpeer;
998 AFPRefSocket(ptv->mpeer);
999
1000 p->afp_v.copy_mode = ptv->copy_mode;
1001 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1002 p->afp_v.peer = ptv->mpeer->peer;
1003 } else {
1004 p->afp_v.peer = NULL;
1005 }
1006 } else {
1007 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1008 TmqhOutputPacketpool(ptv->tv, p);
1009 SCReturnInt(AFP_FAILURE);
1010 }
1011 }
1012 /* Timestamp */
1013 p->ts.tv_sec = ppd->tp_sec;
1014 p->ts.tv_usec = ppd->tp_nsec/1000;
1015 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1016 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1017
1018 /* We only check for checksum disable */
1019 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1020 p->flags |= PKT_IGNORE_CHECKSUM;
1021 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1022 if (ptv->livedev->ignore_checksum) {
1023 p->flags |= PKT_IGNORE_CHECKSUM;
1024 } else if (ChecksumAutoModeCheck(ptv->pkts,
1025 SC_ATOMIC_GET(ptv->livedev->pkts),
1026 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1027 ptv->livedev->ignore_checksum = 1;
1028 p->flags |= PKT_IGNORE_CHECKSUM;
1029 }
1030 } else {
1031 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1032 p->flags |= PKT_IGNORE_CHECKSUM;
1033 }
1034 }
1035
1036 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1037 TmqhOutputPacketpool(ptv->tv, p);
1038 SCReturnInt(AFP_FAILURE);
1039 }
1040
1041 SCReturnInt(AFP_READ_OK);
1042 }
1043
1044 static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1045 {
1046 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1047 uint8_t *ppd;
1048
1049 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1050 for (i = 0; i < num_pkts; ++i) {
1051 if (unlikely(AFPParsePacketV3(ptv, pbd,
1052 (struct tpacket3_hdr *)ppd) == AFP_FAILURE)) {
1053 SCReturnInt(AFP_READ_FAILURE);
1054 }
1055 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1056 }
1057
1058 SCReturnInt(AFP_READ_OK);
1059 }
1060 #endif /* HAVE_TPACKET_V3 */
1061
1062 /**
1063 * \brief AF packet read function for ring
1064 *
1065 * This function fills
1066 * From here the packets are picked up by the DecodeAFP thread.
1067 *
1068 * \param user pointer to AFPThreadVars
1069 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1070 */
1071 static int AFPReadFromRingV3(AFPThreadVars *ptv)
1072 {
1073 #ifdef HAVE_TPACKET_V3
1074 struct tpacket_block_desc *pbd;
1075
1076 /* Loop till we have packets available */
1077 while (1) {
1078 if (unlikely(suricata_ctl_flags != 0)) {
1079 SCLogInfo("Exiting AFP V3 read loop");
1080 break;
1081 }
1082
1083 pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
1084
1085 /* block is not ready to be read */
1086 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1087 SCReturnInt(AFP_READ_OK);
1088 }
1089
1090 if (unlikely(AFPWalkBlock(ptv, pbd) != AFP_READ_OK)) {
1091 AFPFlushBlock(pbd);
1092 SCReturnInt(AFP_READ_FAILURE);
1093 }
1094
1095 AFPFlushBlock(pbd);
1096 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
1097 /* return to maintenance task after one loop on the ring */
1098 if (ptv->frame_offset == 0) {
1099 SCReturnInt(AFP_READ_OK);
1100 }
1101 }
1102 #endif
1103 SCReturnInt(AFP_READ_OK);
1104 }
1105
1106 /**
1107 * \brief Reference socket
1108 *
1109 * \retval O in case of failure, 1 in case of success
1110 */
1111 static int AFPRefSocket(AFPPeer* peer)
1112 {
1113 if (unlikely(peer == NULL))
1114 return 0;
1115
1116 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1117 return 1;
1118 }
1119
1120
1121 /**
1122 * \brief Dereference socket
1123 *
1124 * \retval 1 if socket is still alive, 0 if not
1125 */
1126 static int AFPDerefSocket(AFPPeer* peer)
1127 {
1128 if (peer == NULL)
1129 return 1;
1130
1131 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1132 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1133 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1134 close(SC_ATOMIC_GET(peer->socket));
1135 return 0;
1136 }
1137 }
1138 return 1;
1139 }
1140
1141 static void AFPSwitchState(AFPThreadVars *ptv, int state)
1142 {
1143 ptv->afp_state = state;
1144 ptv->down_count = 0;
1145
1146 AFPPeerUpdate(ptv);
1147
1148 /* Do cleaning if switching to down state */
1149 if (state == AFP_STATE_DOWN) {
1150 #ifdef HAVE_TPACKET_V3
1151 if (ptv->flags & AFP_TPACKET_V3) {
1152 if (!ptv->ring_v3) {
1153 SCFree(ptv->ring_v3);
1154 ptv->ring_v3 = NULL;
1155 }
1156 } else {
1157 #endif
1158 if (ptv->ring_v2) {
1159 /* only used in reading phase, we can free it */
1160 SCFree(ptv->ring_v2);
1161 ptv->ring_v2 = NULL;
1162 }
1163 #ifdef HAVE_TPACKET_V3
1164 }
1165 #endif
1166 if (ptv->socket != -1) {
1167 /* we need to wait for all packets to return data */
1168 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
1169 SCLogInfo("Cleaning socket connected to '%s'", ptv->iface);
1170 close(ptv->socket);
1171 ptv->socket = -1;
1172 }
1173 }
1174 }
1175 if (state == AFP_STATE_UP) {
1176 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1177 }
1178 }
1179
1180 static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1181 uint64_t *discarded_pkts)
1182 {
1183 struct sockaddr_ll from;
1184 struct iovec iov;
1185 struct msghdr msg;
1186 struct timeval ts;
1187 union {
1188 struct cmsghdr cmsg;
1189 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1190 } cmsg_buf;
1191
1192
1193 if (unlikely(suricata_ctl_flags != 0)) {
1194 return 1;
1195 }
1196
1197 msg.msg_name = &from;
1198 msg.msg_namelen = sizeof(from);
1199 msg.msg_iov = &iov;
1200 msg.msg_iovlen = 1;
1201 msg.msg_control = &cmsg_buf;
1202 msg.msg_controllen = sizeof(cmsg_buf);
1203 msg.msg_flags = 0;
1204
1205 iov.iov_len = ptv->datalen;
1206 iov.iov_base = ptv->data;
1207
1208 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
1209
1210 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1211 /* FIXME */
1212 return -1;
1213 }
1214
1215 if ((ts.tv_sec > synctv->tv_sec) ||
1216 (ts.tv_sec >= synctv->tv_sec &&
1217 ts.tv_usec > synctv->tv_usec)) {
1218 return 1;
1219 }
1220 return 0;
1221 }
1222
1223 static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1224 uint64_t *discarded_pkts)
1225 {
1226 union thdr h;
1227
1228 if (unlikely(suricata_ctl_flags != 0)) {
1229 return 1;
1230 }
1231
1232 #ifdef HAVE_TPACKET_V3
1233 if (ptv->flags & AFP_TPACKET_V3) {
1234 struct tpacket_block_desc *pbd;
1235 pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
1236 *discarded_pkts += pbd->hdr.bh1.num_pkts;
1237 AFPFlushBlock(pbd);
1238 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
1239 return 1;
1240
1241 } else
1242 #endif
1243 {
1244 /* Read packet from ring */
1245 h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
1246 if (h.raw == NULL) {
1247 return -1;
1248 }
1249 (*discarded_pkts)++;
1250 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1251 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1252 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1253 return 1;
1254 }
1255
1256 h.h2->tp_status = TP_STATUS_KERNEL;
1257 if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
1258 ptv->frame_offset = 0;
1259 }
1260 }
1261
1262
1263 return 0;
1264 }
1265
1266 /** \brief wait for all afpacket threads to fully init
1267 *
1268 * Discard packets before all threads are ready, as the cluster
1269 * setup is not complete yet.
1270 *
1271 * if AFPPeersListStarted() returns true init is complete
1272 *
1273 * \retval r 1 = happy, otherwise unhappy
1274 */
1275 static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
1276 {
1277 int r;
1278 struct timeval synctv;
1279 struct pollfd fds;
1280
1281 fds.fd = ptv->socket;
1282 fds.events = POLLIN;
1283
1284 /* Set timeval to end of the world */
1285 synctv.tv_sec = 0xffffffff;
1286 synctv.tv_usec = 0xffffffff;
1287
1288 while (1) {
1289 r = poll(&fds, 1, POLL_TIMEOUT);
1290 if (r > 0 &&
1291 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1292 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1293 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1294 return 0;
1295 } else if (r > 0) {
1296 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1297 gettimeofday(&synctv, NULL);
1298 }
1299 if (ptv->flags & AFP_RING_MODE) {
1300 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
1301 } else {
1302 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
1303 }
1304 SCLogDebug("Discarding on %s", ptv->tv->name);
1305 switch (r) {
1306 case 1:
1307 SCLogDebug("Starting to read on %s", ptv->tv->name);
1308 return 1;
1309 case -1:
1310 return r;
1311 }
1312 /* no packets */
1313 } else if (r == 0 && AFPPeersListStarted()) {
1314 SCLogDebug("Starting to read on %s", ptv->tv->name);
1315 return 1;
1316 } else if (r < 0) { /* only exit on error */
1317 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1318 return 0;
1319 }
1320 }
1321 return 1;
1322 }
1323
1324 /**
1325 * \brief Try to reopen socket
1326 *
1327 * \retval 0 in case of success, negative if error occurs or a condition
1328 * is not met.
1329 */
1330 static int AFPTryReopen(AFPThreadVars *ptv)
1331 {
1332 int afp_activate_r;
1333
1334 ptv->down_count++;
1335
1336
1337 /* Don't reconnect till we have packet that did not release data */
1338 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1339 return -1;
1340 }
1341
1342 afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
1343 if (afp_activate_r != 0) {
1344 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1345 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1346 ptv->iface);
1347 }
1348 return afp_activate_r;
1349 }
1350
1351 SCLogInfo("Interface '%s' is back", ptv->iface);
1352 return 0;
1353 }
1354
1355 /**
1356 * \brief Main AF_PACKET reading Loop function
1357 */
1358 TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1359 {
1360 SCEnter();
1361
1362 AFPThreadVars *ptv = (AFPThreadVars *)data;
1363 struct pollfd fds;
1364 int r;
1365 TmSlot *s = (TmSlot *)slot;
1366 time_t last_dump = 0;
1367 time_t current_time;
1368 int (*AFPReadFunc) (AFPThreadVars *);
1369 uint64_t discarded_pkts = 0;
1370
1371 ptv->slot = s->slot_next;
1372
1373 if (ptv->flags & AFP_RING_MODE) {
1374 if (ptv->flags & AFP_TPACKET_V3) {
1375 AFPReadFunc = AFPReadFromRingV3;
1376 } else {
1377 AFPReadFunc = AFPReadFromRing;
1378 }
1379 } else {
1380 AFPReadFunc = AFPRead;
1381 }
1382
1383 if (ptv->afp_state == AFP_STATE_DOWN) {
1384 /* Wait for our turn, threads before us must have opened the socket */
1385 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1386 usleep(1000);
1387 if (suricata_ctl_flags != 0) {
1388 break;
1389 }
1390 }
1391 r = AFPCreateSocket(ptv, ptv->iface, 1);
1392 if (r < 0) {
1393 switch (-r) {
1394 case AFP_FATAL_ERROR:
1395 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1396 SCReturnInt(TM_ECODE_FAILED);
1397 case AFP_RECOVERABLE_ERROR:
1398 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1399 }
1400 }
1401 AFPPeersListReachedInc();
1402 }
1403 if (ptv->afp_state == AFP_STATE_UP) {
1404 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
1405 if ((ptv->flags & AFP_TPACKET_V3) != 0) {
1406 AFPSynchronizeStart(ptv, &discarded_pkts);
1407 }
1408 /* let's reset counter as we will start the capture at the
1409 * next function call */
1410 #ifdef PACKET_STATISTICS
1411 struct tpacket_stats kstats;
1412 socklen_t len = sizeof (struct tpacket_stats);
1413 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1414 &kstats, &len) > -1) {
1415 uint64_t pkts = 0;
1416 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1417 ", dropped %" PRIu32 "",
1418 ptv->tv->name,
1419 kstats.tp_packets, kstats.tp_drops);
1420 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1421 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1422 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1423 }
1424 #endif
1425 }
1426
1427 fds.fd = ptv->socket;
1428 fds.events = POLLIN;
1429
1430 while (1) {
1431 /* Start by checking the state of our interface */
1432 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1433 int dbreak = 0;
1434
1435 do {
1436 usleep(AFP_RECONNECT_TIMEOUT);
1437 if (suricata_ctl_flags != 0) {
1438 dbreak = 1;
1439 break;
1440 }
1441 r = AFPTryReopen(ptv);
1442 fds.fd = ptv->socket;
1443 } while (r < 0);
1444 if (dbreak == 1)
1445 break;
1446 }
1447
1448 /* make sure we have at least one packet in the packet pool, to prevent
1449 * us from alloc'ing packets at line rate */
1450 PacketPoolWait();
1451
1452 r = poll(&fds, 1, POLL_TIMEOUT);
1453
1454 if (suricata_ctl_flags != 0) {
1455 break;
1456 }
1457
1458 if (r > 0 &&
1459 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1460 if (fds.revents & (POLLHUP | POLLRDHUP)) {
1461 AFPSwitchState(ptv, AFP_STATE_DOWN);
1462 continue;
1463 } else if (fds.revents & POLLERR) {
1464 char c;
1465 /* Do a recv to get errno */
1466 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1467 continue; /* what, no error? */
1468 SCLogError(SC_ERR_AFP_READ,
1469 "Error reading data from iface '%s': (%d" PRIu32 ") %s",
1470 ptv->iface, errno, strerror(errno));
1471 AFPSwitchState(ptv, AFP_STATE_DOWN);
1472 continue;
1473 } else if (fds.revents & POLLNVAL) {
1474 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
1475 AFPSwitchState(ptv, AFP_STATE_DOWN);
1476 continue;
1477 }
1478 } else if (r > 0) {
1479 r = AFPReadFunc(ptv);
1480 switch (r) {
1481 case AFP_READ_OK:
1482 /* Trigger one dump of stats every second */
1483 current_time = time(NULL);
1484 if (current_time != last_dump) {
1485 AFPDumpCounters(ptv);
1486 last_dump = current_time;
1487 }
1488 break;
1489 case AFP_READ_FAILURE:
1490 /* AFPRead in error: best to reset the socket */
1491 SCLogError(SC_ERR_AFP_READ,
1492 "AFPRead error reading data from iface '%s': (%d" PRIu32 ") %s",
1493 ptv->iface, errno, strerror(errno));
1494 AFPSwitchState(ptv, AFP_STATE_DOWN);
1495 continue;
1496 case AFP_FAILURE:
1497 AFPSwitchState(ptv, AFP_STATE_DOWN);
1498 SCReturnInt(TM_ECODE_FAILED);
1499 break;
1500 case AFP_KERNEL_DROP:
1501 AFPDumpCounters(ptv);
1502 break;
1503 }
1504 } else if (unlikely(r == 0)) {
1505 /* poll timed out, lets see if we need to inject a fake packet */
1506 TmThreadsCaptureInjectPacket(tv, ptv->slot, NULL);
1507
1508 } else if ((r < 0) && (errno != EINTR)) {
1509 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d" PRIu32 ") %s",
1510 ptv->iface,
1511 errno, strerror(errno));
1512 AFPSwitchState(ptv, AFP_STATE_DOWN);
1513 continue;
1514 }
1515 StatsSyncCountersIfSignalled(tv);
1516 }
1517
1518 AFPDumpCounters(ptv);
1519 StatsSyncCountersIfSignalled(tv);
1520 SCReturnInt(TM_ECODE_OK);
1521 }
1522
1523 static int AFPGetDevFlags(int fd, const char *ifname)
1524 {
1525 struct ifreq ifr;
1526
1527 memset(&ifr, 0, sizeof(ifr));
1528 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1529
1530 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1531 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1532 ifname, strerror(errno));
1533 return -1;
1534 }
1535
1536 return ifr.ifr_flags;
1537 }
1538
1539
1540 static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
1541 {
1542 struct ifreq ifr;
1543
1544 memset(&ifr, 0, sizeof(ifr));
1545 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1546
1547 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1548 if (verbose)
1549 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1550 ifname, strerror(errno));
1551 return -1;
1552 }
1553
1554 return ifr.ifr_ifindex;
1555 }
1556
1557 static int AFPGetDevLinktype(int fd, const char *ifname)
1558 {
1559 struct ifreq ifr;
1560
1561 memset(&ifr, 0, sizeof(ifr));
1562 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1563
1564 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1565 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1566 ifname, strerror(errno));
1567 return -1;
1568 }
1569
1570 switch (ifr.ifr_hwaddr.sa_family) {
1571 case ARPHRD_LOOPBACK:
1572 return LINKTYPE_ETHERNET;
1573 case ARPHRD_PPP:
1574 case ARPHRD_NONE:
1575 return LINKTYPE_RAW;
1576 default:
1577 return ifr.ifr_hwaddr.sa_family;
1578 }
1579 }
1580
1581 int AFPGetLinkType(const char *ifname)
1582 {
1583 int ltype;
1584
1585 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1586 if (fd == -1) {
1587 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1588 return LINKTYPE_RAW;
1589 }
1590
1591 ltype = AFPGetDevLinktype(fd, ifname);
1592 close(fd);
1593
1594 return ltype;
1595 }
1596
1597 static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1598 {
1599 /* Compute structure:
1600 Target is to store all pending packets
1601 with a size equal to MTU + auxdata
1602 And we keep a decent number of block
1603
1604 To do so:
1605 Compute frame_size (aligned to be able to fit in block
1606 Check which block size we need. Blocksize is a 2^n * pagesize
1607 We then need to get order, big enough to have
1608 frame_size < block size
1609 Find number of frame per block (divide)
1610 Fill in packet_req
1611
1612 Compute frame size:
1613 described in packet_mmap.txt
1614 dependant on snaplen (need to use a variable ?)
1615 snaplen: MTU ?
1616 tp_hdrlen determine_version in daq_afpacket
1617 in V1: sizeof(struct tpacket_hdr);
1618 in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1619 frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1620
1621 */
1622 int tp_hdrlen = sizeof(struct tpacket_hdr);
1623 int snaplen = default_packet_size;
1624
1625 if (snaplen == 0) {
1626 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1627 if (snaplen <= 0) {
1628 SCLogWarning(SC_ERR_INVALID_VALUE,
1629 "Unable to get MTU, setting snaplen to sane default of 1514");
1630 snaplen = 1514;
1631 }
1632 }
1633
1634 ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1635 ptv->req.tp_block_size = getpagesize() << order;
1636 int frames_per_block = ptv->req.tp_block_size / ptv->req.tp_frame_size;
1637 if (frames_per_block == 0) {
1638 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
1639 return -1;
1640 }
1641 ptv->req.tp_frame_nr = ptv->ring_size;
1642 ptv->req.tp_block_nr = ptv->req.tp_frame_nr / frames_per_block + 1;
1643 /* exact division */
1644 ptv->req.tp_frame_nr = ptv->req.tp_block_nr * frames_per_block;
1645 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
1646 ptv->req.tp_block_size, ptv->req.tp_block_nr,
1647 ptv->req.tp_frame_size, ptv->req.tp_frame_nr);
1648 return 1;
1649 }
1650
1651 #ifdef HAVE_TPACKET_V3
1652 static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1653 {
1654 ptv->req3.tp_block_size = ptv->block_size;
1655 ptv->req3.tp_frame_size = 2048;
1656 int frames_per_block = 0;
1657 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1658 int snaplen = default_packet_size;
1659
1660 if (snaplen == 0) {
1661 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1662 if (snaplen <= 0) {
1663 SCLogWarning(SC_ERR_INVALID_VALUE,
1664 "Unable to get MTU, setting snaplen to sane default of 1514");
1665 snaplen = 1514;
1666 }
1667 }
1668
1669 ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1670 frames_per_block = ptv->req3.tp_block_size / ptv->req3.tp_frame_size;
1671
1672 if (frames_per_block == 0) {
1673 SCLogError(SC_ERR_INVALID_VALUE,
1674 "Block size is too small, it should be at least %d",
1675 ptv->req3.tp_frame_size);
1676 return -1;
1677 }
1678 ptv->req3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
1679 /* exact division */
1680 ptv->req3.tp_frame_nr = ptv->req3.tp_block_nr * frames_per_block;
1681 ptv->req3.tp_retire_blk_tov = ptv->block_timeout;
1682 ptv->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
1683 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1684 ptv->req3.tp_block_size, ptv->req3.tp_block_nr,
1685 ptv->req3.tp_frame_size, ptv->req3.tp_frame_nr,
1686 ptv->req3.tp_block_size * ptv->req3.tp_block_nr
1687 );
1688 return 1;
1689 }
1690 #endif
1691
1692 static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1693 {
1694 int val;
1695 unsigned int len = sizeof(val), i;
1696 unsigned int ring_buflen;
1697 uint8_t * ring_buf;
1698 int order;
1699 int r, mmap_flag;
1700
1701 #ifdef HAVE_TPACKET_V3
1702 if (ptv->flags & AFP_TPACKET_V3) {
1703 val = TPACKET_V3;
1704 } else
1705 #endif
1706 {
1707 val = TPACKET_V2;
1708 }
1709 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1710 if (errno == ENOPROTOOPT) {
1711 if (ptv->flags & AFP_TPACKET_V3) {
1712 SCLogError(SC_ERR_AFP_CREATE,
1713 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1714 } else {
1715 SCLogError(SC_ERR_AFP_CREATE,
1716 "Too old kernel giving up (need 2.6.27 at least)");
1717 }
1718 }
1719 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1720 return AFP_FATAL_ERROR;
1721 }
1722
1723 val = TPACKET_V2;
1724 #ifdef HAVE_TPACKET_V3
1725 if (ptv->flags & AFP_TPACKET_V3) {
1726 val = TPACKET_V3;
1727 }
1728 #endif
1729 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1730 sizeof(val)) < 0) {
1731 SCLogError(SC_ERR_AFP_CREATE,
1732 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1733 strerror(errno));
1734 return AFP_FATAL_ERROR;
1735 }
1736
1737 #ifdef HAVE_HW_TIMESTAMPING
1738 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1739 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1740 sizeof(req)) < 0) {
1741 SCLogWarning(SC_ERR_AFP_CREATE,
1742 "Can't activate hardware timestamping on packet socket: %s",
1743 strerror(errno));
1744 }
1745 #endif
1746
1747 /* Let's reserve head room so we can add the VLAN header in IPS
1748 * or TAP mode before write the packet */
1749 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1750 /* Only one vlan is extracted from AFP header so
1751 * one VLAN header length is enough. */
1752 int reserve = VLAN_HEADER_LEN;
1753 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1754 sizeof(reserve)) < 0) {
1755 SCLogError(SC_ERR_AFP_CREATE,
1756 "Can't activate reserve on packet socket: %s",
1757 strerror(errno));
1758 return AFP_FATAL_ERROR;
1759 }
1760 }
1761
1762 /* Allocate RX ring */
1763 #ifdef HAVE_TPACKET_V3
1764 if (ptv->flags & AFP_TPACKET_V3) {
1765 if (AFPComputeRingParamsV3(ptv) != 1) {
1766 return AFP_FATAL_ERROR;
1767 }
1768 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1769 (void *) &ptv->req3, sizeof(ptv->req3));
1770 if (r < 0) {
1771 SCLogError(SC_ERR_MEM_ALLOC,
1772 "Unable to allocate RX Ring for iface %s: (%d) %s",
1773 devname,
1774 errno,
1775 strerror(errno));
1776 return AFP_FATAL_ERROR;
1777 }
1778 } else {
1779 #endif
1780 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
1781 if (AFPComputeRingParams(ptv, order) != 1) {
1782 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1783 return AFP_FATAL_ERROR;
1784 }
1785
1786 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1787 (void *) &ptv->req, sizeof(ptv->req));
1788
1789 if (r < 0) {
1790 if (errno == ENOMEM) {
1791 SCLogInfo("Memory issue with ring parameters. Retrying.");
1792 continue;
1793 }
1794 SCLogError(SC_ERR_MEM_ALLOC,
1795 "Unable to allocate RX Ring for iface %s: (%d) %s",
1796 devname,
1797 errno,
1798 strerror(errno));
1799 return AFP_FATAL_ERROR;
1800 } else {
1801 break;
1802 }
1803 }
1804 if (order < 0) {
1805 SCLogError(SC_ERR_MEM_ALLOC,
1806 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1807 devname);
1808 return AFP_FATAL_ERROR;
1809 }
1810 #ifdef HAVE_TPACKET_V3
1811 }
1812 #endif
1813
1814 /* Allocate the Ring */
1815 #ifdef HAVE_TPACKET_V3
1816 if (ptv->flags & AFP_TPACKET_V3) {
1817 ring_buflen = ptv->req3.tp_block_nr * ptv->req3.tp_block_size;
1818 } else {
1819 #endif
1820 ring_buflen = ptv->req.tp_block_nr * ptv->req.tp_block_size;
1821 #ifdef HAVE_TPACKET_V3
1822 }
1823 #endif
1824 mmap_flag = MAP_SHARED;
1825 if (ptv->flags & AFP_MMAP_LOCKED)
1826 mmap_flag |= MAP_LOCKED;
1827 ring_buf = mmap(0, ring_buflen, PROT_READ|PROT_WRITE,
1828 mmap_flag, ptv->socket, 0);
1829 if (ring_buf == MAP_FAILED) {
1830 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1831 strerror(errno));
1832 goto mmap_err;
1833 }
1834 #ifdef HAVE_TPACKET_V3
1835 if (ptv->flags & AFP_TPACKET_V3) {
1836 ptv->ring_v3 = SCMalloc(ptv->req3.tp_block_nr * sizeof(*ptv->ring_v3));
1837 if (!ptv->ring_v3) {
1838 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring_v3");
1839 goto postmmap_err;
1840 }
1841 for (i = 0; i < ptv->req3.tp_block_nr; ++i) {
1842 ptv->ring_v3[i].iov_base = ring_buf + (i * ptv->req3.tp_block_size);
1843 ptv->ring_v3[i].iov_len = ptv->req3.tp_block_size;
1844 }
1845 } else {
1846 #endif
1847 /* allocate a ring for each frame header pointer*/
1848 ptv->ring_v2 = SCMalloc(ptv->req.tp_frame_nr * sizeof (union thdr *));
1849 if (ptv->ring_v2 == NULL) {
1850 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
1851 goto postmmap_err;
1852 }
1853 memset(ptv->ring_v2, 0, ptv->req.tp_frame_nr * sizeof (union thdr *));
1854 /* fill the header ring with proper frame ptr*/
1855 ptv->frame_offset = 0;
1856 for (i = 0; i < ptv->req.tp_block_nr; ++i) {
1857 void *base = &ring_buf[i * ptv->req.tp_block_size];
1858 unsigned int j;
1859 for (j = 0; j < ptv->req.tp_block_size / ptv->req.tp_frame_size; ++j, ++ptv->frame_offset) {
1860 (((union thdr **)ptv->ring_v2)[ptv->frame_offset]) = base;
1861 base += ptv->req.tp_frame_size;
1862 }
1863 }
1864 ptv->frame_offset = 0;
1865 #ifdef HAVE_TPACKET_V3
1866 }
1867 #endif
1868
1869 return 0;
1870
1871 postmmap_err:
1872 munmap(ring_buf, ring_buflen);
1873 if (ptv->ring_v2)
1874 SCFree(ptv->ring_v2);
1875 if (ptv->ring_v3)
1876 SCFree(ptv->ring_v3);
1877 mmap_err:
1878 /* Packet mmap does the cleaning when socket is closed */
1879 return AFP_FATAL_ERROR;
1880 }
1881
1882 /** \brief test if we can use FANOUT. Older kernels like those in
1883 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1884 */
1885 int AFPIsFanoutSupported(void)
1886 {
1887 #ifdef HAVE_PACKET_FANOUT
1888 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1889 if (fd < 0)
1890 return 0;
1891
1892 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
1893 uint16_t id = 1;
1894 uint32_t option = (mode << 16) | (id & 0xffff);
1895 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1896 close(fd);
1897
1898 if (r < 0) {
1899 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
1900 return 0;
1901 }
1902 return 1;
1903 #else
1904 return 0;
1905 #endif
1906 }
1907
1908 static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
1909 {
1910 int r;
1911 int ret = AFP_FATAL_ERROR;
1912 struct packet_mreq sock_params;
1913 struct sockaddr_ll bind_address;
1914 int if_idx;
1915
1916 /* open socket */
1917 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1918 if (ptv->socket == -1) {
1919 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1920 goto error;
1921 }
1922 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
1923 /* bind socket */
1924 memset(&bind_address, 0, sizeof(bind_address));
1925 bind_address.sll_family = AF_PACKET;
1926 bind_address.sll_protocol = htons(ETH_P_ALL);
1927 bind_address.sll_ifindex = if_idx;
1928 if (bind_address.sll_ifindex == -1) {
1929 if (verbose)
1930 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1931 ret = AFP_RECOVERABLE_ERROR;
1932 goto socket_err;
1933 }
1934
1935 if (ptv->promisc != 0) {
1936 /* Force promiscuous mode */
1937 memset(&sock_params, 0, sizeof(sock_params));
1938 sock_params.mr_type = PACKET_MR_PROMISC;
1939 sock_params.mr_ifindex = bind_address.sll_ifindex;
1940 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
1941 if (r < 0) {
1942 SCLogError(SC_ERR_AFP_CREATE,
1943 "Couldn't switch iface %s to promiscuous, error %s",
1944 devname, strerror(errno));
1945 goto socket_err;
1946 }
1947 }
1948
1949 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
1950 int val = 1;
1951 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
1952 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
1953 SCLogWarning(SC_ERR_NO_AF_PACKET,
1954 "'kernel' checksum mode not supported, falling back to full mode.");
1955 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
1956 }
1957 }
1958
1959 /* set socket recv buffer size */
1960 if (ptv->buffer_size != 0) {
1961 /*
1962 * Set the socket buffer size to the specified value.
1963 */
1964 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
1965 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
1966 &ptv->buffer_size,
1967 sizeof(ptv->buffer_size)) == -1) {
1968 SCLogError(SC_ERR_AFP_CREATE,
1969 "Couldn't set buffer size to %d on iface %s, error %s",
1970 ptv->buffer_size, devname, strerror(errno));
1971 goto socket_err;
1972 }
1973 }
1974
1975 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
1976 if (r < 0) {
1977 if (verbose) {
1978 if (errno == ENETDOWN) {
1979 SCLogError(SC_ERR_AFP_CREATE,
1980 "Couldn't bind AF_PACKET socket, iface %s is down",
1981 devname);
1982 } else {
1983 SCLogError(SC_ERR_AFP_CREATE,
1984 "Couldn't bind AF_PACKET socket to iface %s, error %s",
1985 devname, strerror(errno));
1986 }
1987 }
1988 ret = AFP_RECOVERABLE_ERROR;
1989 goto socket_err;
1990 }
1991
1992 #ifdef HAVE_PACKET_FANOUT
1993 /* add binded socket to fanout group */
1994 if (ptv->threads > 1) {
1995 uint16_t mode = ptv->cluster_type;
1996 uint16_t id = ptv->cluster_id;
1997 uint32_t option = (mode << 16) | (id & 0xffff);
1998 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1999 if (r < 0) {
2000 SCLogError(SC_ERR_AFP_CREATE,
2001 "Couldn't set fanout mode, error %s",
2002 strerror(errno));
2003 goto socket_err;
2004 }
2005 }
2006 #endif
2007
2008 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2009 if (if_flags == -1) {
2010 if (verbose) {
2011 SCLogError(SC_ERR_AFP_READ,
2012 "Couldn't get flags for interface '%s'",
2013 ptv->iface);
2014 }
2015 ret = AFP_RECOVERABLE_ERROR;
2016 goto socket_err;
2017 }
2018 if ((if_flags & IFF_UP) == 0) {
2019 if (verbose) {
2020 SCLogError(SC_ERR_AFP_READ,
2021 "Interface '%s' is down",
2022 ptv->iface);
2023 }
2024 ret = AFP_RECOVERABLE_ERROR;
2025 goto socket_err;
2026 }
2027
2028 if (ptv->flags & AFP_RING_MODE) {
2029 ret = AFPSetupRing(ptv, devname);
2030 if (ret != 0)
2031 goto socket_err;
2032 }
2033
2034 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
2035
2036 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2037 switch (ptv->datalink) {
2038 case ARPHRD_PPP:
2039 case ARPHRD_ATM:
2040 ptv->cooked = 1;
2041 break;
2042 }
2043
2044 TmEcode rc;
2045 rc = AFPSetBPFFilter(ptv);
2046 if (rc == TM_ECODE_FAILED) {
2047 SCLogError(SC_ERR_AFP_CREATE, "Set AF_PACKET bpf filter \"%s\" failed.", ptv->bpf_filter);
2048 goto frame_err;
2049 }
2050
2051 /* Init is ok */
2052 AFPSwitchState(ptv, AFP_STATE_UP);
2053 return 0;
2054
2055 frame_err:
2056 if (ptv->flags & AFP_TPACKET_V3) {
2057 if (ptv->ring_v3)
2058 SCFree(ptv->ring_v3);
2059 } else {
2060 if (ptv->ring_v2)
2061 SCFree(ptv->ring_v2);
2062 }
2063 socket_err:
2064 close(ptv->socket);
2065 ptv->socket = -1;
2066 error:
2067 return -ret;
2068 }
2069
2070 TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2071 {
2072 struct bpf_program filter;
2073 struct sock_fprog fcode;
2074 int rc;
2075
2076 if (!ptv->bpf_filter)
2077 return TM_ECODE_OK;
2078
2079 SCMutexLock(&afpacket_bpf_set_filter_lock);
2080
2081 SCLogInfo("Using BPF '%s' on iface '%s'",
2082 ptv->bpf_filter,
2083 ptv->iface);
2084 if (pcap_compile_nopcap(default_packet_size, /* snaplen_arg */
2085 ptv->datalink, /* linktype_arg */
2086 &filter, /* program */
2087 ptv->bpf_filter, /* const char *buf */
2088 1, /* optimize */
2089 0 /* mask */
2090 ) == -1) {
2091 SCLogError(SC_ERR_AFP_CREATE, "Filter compilation failed.");
2092 SCMutexUnlock(&afpacket_bpf_set_filter_lock);
2093 return TM_ECODE_FAILED;
2094 }
2095 SCMutexUnlock(&afpacket_bpf_set_filter_lock);
2096
2097 if (filter.bf_insns == NULL) {
2098 SCLogError(SC_ERR_AFP_CREATE, "Filter badly setup.");
2099 return TM_ECODE_FAILED;
2100 }
2101
2102 fcode.len = filter.bf_len;
2103 fcode.filter = (struct sock_filter*)filter.bf_insns;
2104
2105 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2106
2107 if(rc == -1) {
2108 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2109 return TM_ECODE_FAILED;
2110 }
2111
2112 return TM_ECODE_OK;
2113 }
2114
2115
2116 /**
2117 * \brief Init function for ReceiveAFP.
2118 *
2119 * \param tv pointer to ThreadVars
2120 * \param initdata pointer to the interface passed from the user
2121 * \param data pointer gets populated with AFPThreadVars
2122 *
2123 * \todo Create a general AFP setup function.
2124 */
2125 TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2126 {
2127 SCEnter();
2128 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
2129
2130 if (initdata == NULL) {
2131 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2132 SCReturnInt(TM_ECODE_FAILED);
2133 }
2134
2135 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
2136 if (unlikely(ptv == NULL)) {
2137 afpconfig->DerefFunc(afpconfig);
2138 SCReturnInt(TM_ECODE_FAILED);
2139 }
2140 memset(ptv, 0, sizeof(AFPThreadVars));
2141
2142 ptv->tv = tv;
2143 ptv->cooked = 0;
2144
2145 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
2146 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2147
2148 ptv->livedev = LiveGetDevice(ptv->iface);
2149 if (ptv->livedev == NULL) {
2150 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
2151 SCFree(ptv);
2152 SCReturnInt(TM_ECODE_FAILED);
2153 }
2154
2155 ptv->buffer_size = afpconfig->buffer_size;
2156 ptv->ring_size = afpconfig->ring_size;
2157 ptv->block_size = afpconfig->block_size;
2158
2159 ptv->promisc = afpconfig->promisc;
2160 ptv->checksum_mode = afpconfig->checksum_mode;
2161 ptv->bpf_filter = NULL;
2162
2163 ptv->threads = 1;
2164 #ifdef HAVE_PACKET_FANOUT
2165 ptv->cluster_type = PACKET_FANOUT_LB;
2166 ptv->cluster_id = 1;
2167 /* We only set cluster info if the number of reader threads is greater than 1 */
2168 if (afpconfig->threads > 1) {
2169 ptv->cluster_id = afpconfig->cluster_id;
2170 ptv->cluster_type = afpconfig->cluster_type;
2171 ptv->threads = afpconfig->threads;
2172 }
2173 #endif
2174 ptv->flags = afpconfig->flags;
2175
2176 if (afpconfig->bpf_filter) {
2177 ptv->bpf_filter = afpconfig->bpf_filter;
2178 }
2179
2180 #ifdef PACKET_STATISTICS
2181 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2182 ptv->tv);
2183 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2184 ptv->tv);
2185 #endif
2186
2187 ptv->copy_mode = afpconfig->copy_mode;
2188 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2189 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2190 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2191 /* Warn about BPF filter consequence */
2192 if (ptv->bpf_filter) {
2193 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2194 " in dropping all non matching packets.");
2195 }
2196 }
2197
2198
2199 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2200 SCFree(ptv);
2201 afpconfig->DerefFunc(afpconfig);
2202 SCReturnInt(TM_ECODE_FAILED);
2203 }
2204
2205 #define T_DATA_SIZE 70000
2206 ptv->data = SCMalloc(T_DATA_SIZE);
2207 if (ptv->data == NULL) {
2208 afpconfig->DerefFunc(afpconfig);
2209 SCFree(ptv);
2210 SCReturnInt(TM_ECODE_FAILED);
2211 }
2212 ptv->datalen = T_DATA_SIZE;
2213 #undef T_DATA_SIZE
2214
2215 *data = (void *)ptv;
2216
2217 afpconfig->DerefFunc(afpconfig);
2218
2219 /* A bit strange to have this here but we only have vlan information
2220 * during reading so we need to know if we want to keep vlan during
2221 * the capture phase */
2222 int vlanbool = 0;
2223 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
2224 ptv->flags |= AFP_VLAN_DISABLED;
2225 }
2226
2227 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2228 * get the info from packet extended header but we will use a standard
2229 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2230 if (! SCKernelVersionIsAtLeast(3, 0)) {
2231 ptv->flags |= AFP_VLAN_DISABLED;
2232 }
2233
2234 SCReturnInt(TM_ECODE_OK);
2235 }
2236
2237 /**
2238 * \brief This function prints stats to the screen at exit.
2239 * \param tv pointer to ThreadVars
2240 * \param data pointer that gets cast into AFPThreadVars for ptv
2241 */
2242 void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2243 {
2244 SCEnter();
2245 AFPThreadVars *ptv = (AFPThreadVars *)data;
2246
2247 #ifdef PACKET_STATISTICS
2248 AFPDumpCounters(ptv);
2249 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
2250 tv->name,
2251 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2252 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
2253 #endif
2254 }
2255
2256 /**
2257 * \brief DeInit function closes af packet socket at exit.
2258 * \param tv pointer to ThreadVars
2259 * \param data pointer that gets cast into AFPThreadVars for ptv
2260 */
2261 TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2262 {
2263 AFPThreadVars *ptv = (AFPThreadVars *)data;
2264
2265 AFPSwitchState(ptv, AFP_STATE_DOWN);
2266
2267 if (ptv->data != NULL) {
2268 SCFree(ptv->data);
2269 ptv->data = NULL;
2270 }
2271 ptv->datalen = 0;
2272
2273 ptv->bpf_filter = NULL;
2274
2275 SCReturnInt(TM_ECODE_OK);
2276 }
2277
2278 /**
2279 * \brief This function passes off to link type decoders.
2280 *
2281 * DecodeAFP reads packets from the PacketQueue and passes
2282 * them off to the proper link type decoder.
2283 *
2284 * \param t pointer to ThreadVars
2285 * \param p pointer to the current packet
2286 * \param data pointer that gets cast into AFPThreadVars for ptv
2287 * \param pq pointer to the current PacketQueue
2288 */
2289 TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2290 {
2291 SCEnter();
2292 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2293
2294 /* XXX HACK: flow timeout can call us for injected pseudo packets
2295 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2296 if (p->flags & PKT_PSEUDO_STREAM_END)
2297 return TM_ECODE_OK;
2298
2299 /* update counters */
2300 DecodeUpdatePacketCounters(tv, dtv, p);
2301
2302 /* If suri has set vlan during reading, we increase vlan counter */
2303 if (p->vlan_idx) {
2304 StatsIncr(tv, dtv->counter_vlan);
2305 }
2306
2307 /* call the decoder */
2308 switch (p->datalink) {
2309 case LINKTYPE_ETHERNET:
2310 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2311 break;
2312 case LINKTYPE_LINUX_SLL:
2313 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2314 break;
2315 case LINKTYPE_PPP:
2316 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2317 break;
2318 case LINKTYPE_RAW:
2319 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2320 break;
2321 case LINKTYPE_NULL:
2322 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2323 break;
2324 default:
2325 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2326 break;
2327 }
2328
2329 PacketDecodeFinalize(tv, dtv, p);
2330
2331 SCReturnInt(TM_ECODE_OK);
2332 }
2333
2334 TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
2335 {
2336 SCEnter();
2337 DecodeThreadVars *dtv = NULL;
2338
2339 dtv = DecodeThreadVarsAlloc(tv);
2340
2341 if (dtv == NULL)
2342 SCReturnInt(TM_ECODE_FAILED);
2343
2344 DecodeRegisterPerfCounters(dtv, tv);
2345
2346 *data = (void *)dtv;
2347
2348 #ifdef __SC_CUDA_SUPPORT__
2349 if (CudaThreadVarsInit(&dtv->cuda_vars) < 0)
2350 SCReturnInt(TM_ECODE_FAILED);
2351 #endif
2352
2353 SCReturnInt(TM_ECODE_OK);
2354 }
2355
2356 TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2357 {
2358 if (data != NULL)
2359 DecodeThreadVarsFree(tv, data);
2360 SCReturnInt(TM_ECODE_OK);
2361 }
2362
2363 #endif /* HAVE_AF_PACKET */
2364 /* eof */
2365 /**
2366 * @}
2367 */