]> git.ipfire.org Git - thirdparty/mtr.git/commitdiff
mtr-packet: use ICMP and UDP without privilege on linux
authorChonggang Li <chonggangli@google.com>
Thu, 13 Jul 2017 23:17:08 +0000 (16:17 -0700)
committerChonggang Li <chonggangli@google.com>
Sun, 13 Aug 2017 22:32:28 +0000 (15:32 -0700)
This commit enables non-privileged users to use mtr on linux without
setuid. Currently ICMP and UDP protocols are supported in this commit.
Previously, to use mtr on linux with protocol ICMP and UDP, RAW sockets
have to be opened to send out packets and receive ICMP errors, so users
must have RAW socket permission to use this program. The goal of this
commit is to make mtr usable for normal users without RAW socket
permission. The changes include:
(1) The origianl logic is not changed, but instead, when the program
fails to open RAW sockets, it will fallback to opening DGRAM scoekts.
(2) A new flag is created to indicate whether RAW socket is used for
IPv4 and IPv6 respectively.
(3) When using DGRAM sockets to send out packets, receive sockets are
not required. Instead, IP_RECVERR is enabled to receive ICMP errors.
(4) Packet receiving function is changed from recvfrom() to recvmsg() to
retrieve more information.
(5) When error is indicated, the program will check the error type and
read from the error queue of the socket. Original payload causing the
error will be read out from the error queue to match for probe, and
additional data (e.g. source ip of ICMP error packets) will be retrieved
from CMSG.
(6) Use a separate socket to probe byter order if raw socket creation
fails, to avoid double bind issue.
(7) Also a few tweaks are added to make non-RAW socket working.

Change-Id: Idb08ff0847adbabd23cdc031907de3bde08a6afc

packet/command.c
packet/construct_unix.c
packet/deconstruct_unix.c
packet/deconstruct_unix.h
packet/probe.c
packet/probe.h
packet/probe_cygwin.c
packet/probe_unix.c
packet/probe_unix.h
packet/wait_unix.c

index 42b9a52f0feba64a48281e2649f69eeb777a608e..652424b9cf0d51d5929ab4677635dde77edae5eb 100644 (file)
@@ -321,6 +321,7 @@ void send_probe_command(
     param.ttl = 255;
     param.packet_size = 64;
     param.timeout = 10;
+    param.is_probing_byte_order = false;
 
     for (i = 0; i < command->argument_count; i++) {
         name = command->argument_name[i];
index b0f46795117a142821b223c2158809831dbbf863..95dbe8201f73c17bf9c7d1fc4aac9484ec5233d1 100644 (file)
@@ -138,8 +138,13 @@ void construct_icmp4_header(
     struct ICMPHeader *icmp;
     int icmp_size;
 
-    icmp = (struct ICMPHeader *) &packet_buffer[sizeof(struct IPHeader)];
-    icmp_size = packet_size - sizeof(struct IPHeader);
+    if (net_state->platform.ip4_socket_raw) {
+        icmp = (struct ICMPHeader *) &packet_buffer[sizeof(struct IPHeader)];
+        icmp_size = packet_size - sizeof(struct IPHeader);
+    } else {
+        icmp = (struct ICMPHeader *) &packet_buffer[0];
+        icmp_size = packet_size;
+    }
 
     memset(icmp, 0, sizeof(struct ICMPHeader));
 
@@ -224,8 +229,13 @@ void construct_udp4_header(
     struct UDPHeader *udp;
     int udp_size;
 
-    udp = (struct UDPHeader *) &packet_buffer[sizeof(struct IPHeader)];
-    udp_size = packet_size - sizeof(struct IPHeader);
+    if (net_state->platform.ip4_socket_raw) {
+        udp = (struct UDPHeader *) &packet_buffer[sizeof(struct IPHeader)];
+        udp_size = packet_size - sizeof(struct IPHeader);
+    } else {
+        udp = (struct UDPHeader *) &packet_buffer[0];
+        udp_size = packet_size;
+    }
 
     memset(udp, 0, sizeof(struct UDPHeader));
 
@@ -254,14 +264,16 @@ int construct_udp6_packet(
     set_udp_ports(udp, sequence, param);
     udp->length = htons(udp_size);
 
-    /*
-       Instruct the kernel to put the pseudoheader checksum into the
-       UDP header.
-     */
-    int chksum_offset = (char *) &udp->checksum - (char *) udp;
-    if (setsockopt(udp_socket, IPPROTO_IPV6,
-                   IPV6_CHECKSUM, &chksum_offset, sizeof(int))) {
-        return -1;
+    if (net_state->platform.ip6_socket_raw) {
+        /*
+           Instruct the kernel to put the pseudoheader checksum into the
+           UDP header, this is only needed when using RAW socket.
+         */
+        int chksum_offset = (char *) &udp->checksum - (char *) udp;
+        if (setsockopt(udp_socket, IPPROTO_IPV6,
+                       IPV6_CHECKSUM, &chksum_offset, sizeof(int))) {
+            return -1;
+        }
     }
 
     return 0;
@@ -425,7 +437,7 @@ int compute_packet_size(
     const struct net_state_t *net_state,
     const struct probe_param_t *param)
 {
-    int packet_size;
+    int packet_size = 0;
 
     if (param->protocol == IPPROTO_TCP) {
         return 0;
@@ -438,9 +450,13 @@ int compute_packet_size(
 
     /*  Start by determining the full size, including omitted headers  */
     if (param->ip_version == 6) {
-        packet_size = sizeof(struct IP6Header);
+        if (net_state->platform.ip6_socket_raw) {
+            packet_size += sizeof(struct IP6Header);
+        }
     } else if (param->ip_version == 4) {
-        packet_size = sizeof(struct IPHeader);
+        if (net_state->platform.ip4_socket_raw) {
+            packet_size += sizeof(struct IPHeader);
+        }
     } else {
         errno = EINVAL;
         return -1;
@@ -470,7 +486,7 @@ int compute_packet_size(
        Since we don't explicitly construct the IPv6 header, we
        need to account for it in our transmitted size.
      */
-    if (param->ip_version == 6) {
+    if (param->ip_version == 6 && net_state->platform.ip6_socket_raw) {
         packet_size -= sizeof(struct IP6Header);
     }
 
@@ -491,6 +507,10 @@ int construct_ip4_packet(
 {
     int send_socket = net_state->platform.ip4_send_socket;
     bool is_stream_protocol = false;
+    int tos, ttl, socket;
+    bool bind_send_socket = false;
+    struct sockaddr_storage current_sockaddr;
+    int current_sockaddr_len;
 
     if (param->protocol == IPPROTO_TCP) {
         is_stream_protocol = true;
@@ -499,9 +519,10 @@ int construct_ip4_packet(
         is_stream_protocol = true;
 #endif
     } else {
-        construct_ip4_header(net_state, packet_buffer, packet_size,
-                             src_sockaddr, dest_sockaddr, param);
-
+        if (net_state->platform.ip4_socket_raw) {
+            construct_ip4_header(net_state, packet_buffer, packet_size,
+                                 src_sockaddr, dest_sockaddr, param);
+        }
         if (param->protocol == IPPROTO_ICMP) {
             construct_icmp4_header(net_state, sequence, packet_buffer,
                                    packet_size, param);
@@ -546,6 +567,55 @@ int construct_ip4_packet(
     }
 #endif
 
+    /*
+       Bind src port when not using raw socket to pass in ICMP id, kernel
+       get ICMP id from src_port when using DGRAM socket.
+     */
+    if (!net_state->platform.ip4_socket_raw &&
+            param->protocol == IPPROTO_ICMP &&
+            !param->is_probing_byte_order) {
+        current_sockaddr_len = sizeof(struct sockaddr_in);
+        bind_send_socket = true;
+        socket = net_state->platform.ip4_txrx_icmp_socket;
+        if (getsockname(socket, (struct sockaddr *) &current_sockaddr,
+                        &current_sockaddr_len)) {
+            return -1;
+        }
+        struct sockaddr_in *sin_cur =
+            (struct sockaddr_in *) &current_sockaddr;
+
+        /* avoid double bind */
+        if (sin_cur->sin_port) {
+            bind_send_socket = false;
+        }
+    }
+
+    /*  Bind to our local address  */
+    if (bind_send_socket && bind(socket, (struct sockaddr *)src_sockaddr,
+                sizeof(struct sockaddr_in))) {
+        return -1;
+    }
+
+    /* set TOS and TTL for non-raw socket */
+    if (!net_state->platform.ip4_socket_raw && !param->is_probing_byte_order) {
+        if (param->protocol == IPPROTO_ICMP) {
+            socket = net_state->platform.ip4_txrx_icmp_socket;
+        } else if (param->protocol == IPPROTO_UDP) {
+            socket = net_state->platform.ip4_txrx_udp_socket;
+        } else {
+            return 0;
+        }
+        tos = param->type_of_service;
+        if (setsockopt(socket, SOL_IP, IP_TOS, &tos, sizeof(int))) {
+            return -1;
+        }
+        ttl = param->ttl;
+        if (setsockopt(socket, SOL_IP, IP_TTL,
+                       &ttl, sizeof(int)) == -1) {
+            return -1;
+        }
+    }
+
     return 0;
 }
 
@@ -574,14 +644,22 @@ int construct_ip6_packet(
         is_stream_protocol = true;
 #endif
     } else if (param->protocol == IPPROTO_ICMP) {
-        send_socket = net_state->platform.icmp6_send_socket;
+        if (net_state->platform.ip6_socket_raw) {
+            send_socket = net_state->platform.icmp6_send_socket;
+        } else {
+            send_socket = net_state->platform.ip6_txrx_icmp_socket;
+        }
 
         if (construct_icmp6_packet
             (net_state, sequence, packet_buffer, packet_size, param)) {
             return -1;
         }
     } else if (param->protocol == IPPROTO_UDP) {
-        send_socket = net_state->platform.udp6_send_socket;
+        if (net_state->platform.ip6_socket_raw) {
+            send_socket = net_state->platform.udp6_send_socket;
+        } else {
+            send_socket = net_state->platform.ip6_txrx_udp_socket;
+        }
 
         if (construct_udp6_packet
             (net_state, sequence, packet_buffer, packet_size, param)) {
@@ -615,10 +693,18 @@ int construct_ip6_packet(
     current_sockaddr_len = sizeof(struct sockaddr_in6);
     if (getsockname(send_socket, (struct sockaddr *) &current_sockaddr,
                     &current_sockaddr_len) == 0) {
+        struct sockaddr_in6 *sin6_cur = (struct sockaddr_in6 *) &current_sockaddr;
 
-        if (memcmp(&current_sockaddr,
-                   src_sockaddr, sizeof(struct sockaddr_in6)) == 0) {
-            bind_send_socket = false;
+        if (net_state->platform.ip6_socket_raw) {
+            if (memcmp(&current_sockaddr,
+                       src_sockaddr, sizeof(struct sockaddr_in6)) == 0) {
+                bind_send_socket = false;
+            }
+        } else {
+            /* avoid double bind for DGRAM socket */
+            if (sin6_cur->sin6_port) {
+                bind_send_socket = false;
+            }
         }
     }
 
index c09dd6440c804c26791dae6f2364c88069bf9aac..b9ade116766f75e74116cc66fa3c4dddbf63cef3 100644 (file)
@@ -86,6 +86,27 @@ void handle_inner_udp_packet(
     }
 }
 
+void handle_error_queue_packet(
+    struct net_state_t *net_state,
+    const struct sockaddr_storage *remote_addr,
+    int icmp_result,
+    int proto,
+    char *packet,
+    int packet_length,
+    struct timeval *timestamp)
+{
+    if (proto == IPPROTO_UDP) {
+        handle_inner_udp_packet(net_state, remote_addr, ICMP_TIME_EXCEEDED,
+                (struct UDPHeader *)packet, packet_length, timestamp, 0, NULL);
+    } else if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
+        const struct ICMPHeader *icmp = (struct ICMPHeader *)packet;
+        find_and_receive_probe(net_state, remote_addr, timestamp,
+                               ICMP_TIME_EXCEEDED, IPPROTO_ICMP, icmp->id,
+                               icmp->sequence, 0, NULL);
+    }
+
+}
+
 /*
     We've received an ICMP message with an embedded IP packet.
     We will try to determine which of our outgoing probes
@@ -347,13 +368,16 @@ void handle_received_icmp4_packet(
     int packet_length,
     struct timeval *timestamp)
 {
-    const int icmp_ip_size =
-        sizeof(struct ICMPHeader) + sizeof(struct IPHeader);
+    int icmp_ip_size = 0;
     const struct IPHeader *inner_ip;
     int inner_size = packet_length - sizeof(struct ICMPHeader);
     int mpls_count;
     struct mpls_label_t mpls[MAX_MPLS_LABELS];
 
+    if (net_state->platform.ip4_socket_raw) {
+        icmp_ip_size += sizeof(struct IPHeader);
+    }
+    icmp_ip_size += sizeof(struct ICMPHeader);
     mpls_count =
         decode_mpls_labels(icmp, packet_length, mpls, MAX_MPLS_LABELS);
 
@@ -472,24 +496,33 @@ void handle_received_ip4_packet(
     int packet_length,
     struct timeval *timestamp)
 {
-    const int ip_icmp_size =
-        sizeof(struct IPHeader) + sizeof(struct ICMPHeader);
+    int ip_icmp_size = 0;
     const struct IPHeader *ip;
     const struct ICMPHeader *icmp;
     int icmp_length;
 
+    if (net_state->platform.ip4_socket_raw) {
+        ip_icmp_size += sizeof(struct IPHeader);
+    }
+    ip_icmp_size += sizeof(struct ICMPHeader);
+
     /*  Ensure that we don't access memory beyond the bounds of the packet  */
     if (packet_length < ip_icmp_size) {
         return;
     }
 
-    ip = (struct IPHeader *) packet;
-    if (ip->protocol != IPPROTO_ICMP) {
-        return;
-    }
+    if (net_state->platform.ip4_socket_raw) {
+        ip = (struct IPHeader *) packet;
+        if (ip->protocol != IPPROTO_ICMP) {
+            return;
+        }
 
-    icmp = (struct ICMPHeader *) (ip + 1);
-    icmp_length = packet_length - sizeof(struct IPHeader);
+        icmp = (struct ICMPHeader *) (ip + 1);
+        icmp_length = packet_length - sizeof(struct IPHeader);
+    } else {
+        icmp = (struct ICMPHeader *) packet;
+        icmp_length = packet_length;
+    }
 
     handle_received_icmp4_packet(net_state, remote_addr, icmp, icmp_length,
                                  timestamp);
index 69186cd625292e835b3285cc3bad0f4cac47505b..8005da6aa3ea225db233d20f910ed4cac76b3922 100644 (file)
@@ -43,4 +43,13 @@ void handle_received_ip6_packet(
     int packet_length,
     struct timeval *timestamp);
 
+void handle_error_queue_packet(
+    struct net_state_t *net_state,
+    const struct sockaddr_storage *remote_addr,
+    int icmp_result,
+    int proto,
+    char *packet,
+    int packet_length,
+    struct timeval *timestamp);
+
 #endif
index de85b161c01f61c7d543c6c8b500a9113d8a0d7c..537ce78ec74710cff187ab36a5ff4a054a38530b 100644 (file)
@@ -87,6 +87,7 @@ int decode_address_string(
     for the probe.
 */
 int resolve_probe_addresses(
+    struct net_state_t *net_state,
     const struct probe_param_t *param,
     struct sockaddr_storage *dest_sockaddr,
     struct sockaddr_storage *src_sockaddr)
@@ -106,6 +107,22 @@ int resolve_probe_addresses(
             return -1;
         }
     }
+    /* DGRAM ICMP id is taken from src_port not from ICMP header */
+    if (param->protocol == IPPROTO_ICMP) {
+        if (src_sockaddr->ss_family == AF_INET) {
+            if (!net_state->platform.ip4_socket_raw) {
+                struct sockaddr_in *sin_src =
+                    (struct sockaddr_in *) src_sockaddr;
+                sin_src->sin_port = htons(getpid());
+            }
+        } else if (src_sockaddr->ss_family == AF_INET6) {
+            if (!net_state->platform.ip6_socket_raw) {
+                struct sockaddr_in6 *sin6_src =
+                    (struct sockaddr_in6 *) src_sockaddr;
+                sin6_src->sin6_port = htons(getpid());
+            }
+        }
+    }
 
     return 0;
 }
index 9c8dfc68a1f1a5fcf6a8d43a953dcc8e023f69b4..f32e2d646a535a9eae03b14d210a689c93bc423d 100644 (file)
@@ -79,6 +79,9 @@ struct probe_param_t {
 
     /*  The number of seconds to wait before assuming the probe was lost  */
     int timeout;
+
+    /*  true is the probe is to test byte order */
+    bool is_probing_byte_order;
 };
 
 /*  Tracking information for an outstanding probe  */
@@ -170,6 +173,7 @@ int decode_address_string(
     struct sockaddr_storage *address);
 
 int resolve_probe_addresses(
+    struct net_state_t *net_state,
     const struct probe_param_t *param,
     struct sockaddr_storage *dest_sockaddr,
     struct sockaddr_storage *src_sockaddr);
index c62cee118bad3a892b50d36af471b7f1082500a6..241e0cb0f4ea7e1162e582852af0acbfef34a4d6 100644 (file)
@@ -329,7 +329,8 @@ void send_probe(
     char payload[PACKET_BUFFER_SIZE];
     int payload_size;
 
-    if (resolve_probe_addresses(param, &dest_sockaddr, &src_sockaddr)) {
+    if (resolve_probe_addresses(net_state, param, &dest_sockaddr,
+                &src_sockaddr)) {
         printf("%d invalid-argument\n", param->command_token);
         return;
     }
index 53863eb69a5a00694276bb526c2ae703a93413c8..8083a153065fc1387ad5343ca499d2fa7c0f45e7 100644 (file)
@@ -21,6 +21,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <linux/errqueue.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -38,6 +39,7 @@ static
 int send_packet(
     const struct net_state_t *net_state,
     const struct probe_param_t *param,
+    int sequence,
     const char *packet,
     int packet_size,
     const struct sockaddr_storage *sockaddr)
@@ -49,14 +51,48 @@ int send_packet(
         sockaddr_length = sizeof(struct sockaddr_in6);
 
         if (param->protocol == IPPROTO_ICMP) {
-            send_socket = net_state->platform.icmp6_send_socket;
+            if (net_state->platform.ip6_socket_raw) {
+                send_socket = net_state->platform.icmp6_send_socket;
+            } else {
+                send_socket = net_state->platform.ip6_txrx_icmp_socket;
+            }
         } else if (param->protocol == IPPROTO_UDP) {
-            send_socket = net_state->platform.udp6_send_socket;
+            if (net_state->platform.ip6_socket_raw) {
+                send_socket = net_state->platform.udp6_send_socket;
+            } else {
+                struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)sockaddr;
+
+                send_socket = net_state->platform.ip6_txrx_udp_socket;
+                if (param->dest_port) {
+                    addr_in6->sin6_port = htons(param->dest_port);
+                } else {
+                    addr_in6->sin6_port = sequence;
+                }
+            }
         }
     } else if (sockaddr->ss_family == AF_INET) {
         sockaddr_length = sizeof(struct sockaddr_in);
 
-        send_socket = net_state->platform.ip4_send_socket;
+        if (net_state->platform.ip4_socket_raw) {
+            send_socket = net_state->platform.ip4_send_socket;
+        } else {
+            if (param->protocol == IPPROTO_ICMP) {
+                if (param->is_probing_byte_order) {
+                    send_socket = net_state->platform.ip4_tmp_icmp_socket;;
+                } else {
+                    send_socket = net_state->platform.ip4_txrx_icmp_socket;
+                }
+            } else if (param->protocol == IPPROTO_UDP) {
+                struct sockaddr_in *addr_in = (struct sockaddr_in *)sockaddr;
+
+                send_socket = net_state->platform.ip4_txrx_udp_socket;
+                if (param->dest_port) {
+                    addr_in->sin_port = htons(param->dest_port);
+                } else {
+                    addr_in->sin_port = sequence;
+                }
+            }
+        }
     }
 
     if (send_socket == 0) {
@@ -95,8 +131,10 @@ void check_length_order(
     param.protocol = IPPROTO_ICMP;
     param.ttl = 255;
     param.remote_address = "127.0.0.1";
+    param.is_probing_byte_order = true;
 
-    if (resolve_probe_addresses(&param, &dest_sockaddr, &src_sockaddr)) {
+    if (resolve_probe_addresses(net_state, &param, &dest_sockaddr,
+                &src_sockaddr)) {
         fprintf(stderr, "Error decoding localhost address\n");
         exit(EXIT_FAILURE);
     }
@@ -113,7 +151,7 @@ void check_length_order(
     }
 
     bytes_sent =
-        send_packet(net_state, &param, packet, packet_size,
+        send_packet(net_state, &param, MIN_PORT, packet, packet_size,
                     &dest_sockaddr);
     if (bytes_sent > 0) {
         return;
@@ -131,7 +169,7 @@ void check_length_order(
     }
 
     bytes_sent =
-        send_packet(net_state, &param, packet, packet_size,
+        send_packet(net_state, &param, MIN_PORT, packet, packet_size,
                     &dest_sockaddr);
     if (bytes_sent < 0) {
         perror("Unable to send with swapped length");
@@ -181,7 +219,7 @@ void set_socket_nonblocking(
 
 /*  Open the raw sockets for sending/receiving IPv4 packets  */
 static
-int open_ip4_sockets(
+int open_ip4_sockets_raw(
     struct net_state_t *net_state)
 {
     int send_socket;
@@ -215,15 +253,60 @@ int open_ip4_sockets(
     }
 
     net_state->platform.ip4_present = true;
+    net_state->platform.ip4_socket_raw = true;
     net_state->platform.ip4_send_socket = send_socket;
     net_state->platform.ip4_recv_socket = recv_socket;
 
     return 0;
 }
 
+/*  Open DGRAM sockets for sending/receiving IPv4 packets  */
+static
+int open_ip4_sockets_dgram(
+    struct net_state_t *net_state)
+{
+    int udp_socket;
+    int icmp_socket, icmp_tmp_socket;
+    int val = 1;
+
+    icmp_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+    if (icmp_socket == -1) {
+        return -1;
+    }
+    if (setsockopt(icmp_socket, SOL_IP, IP_RECVERR, &val, sizeof(val)) < 0) {
+        return -1;
+    }
+
+    udp_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+    if (udp_socket == -1) {
+        close(icmp_socket);
+        return -1;
+    }
+    if (setsockopt(udp_socket, SOL_IP, IP_RECVERR, &val, sizeof(val)) < 0) {
+        close(icmp_socket);
+        close(udp_socket);
+        return -1;
+    }
+
+    icmp_tmp_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+    if (icmp_tmp_socket == -1) {
+        close(icmp_socket);
+        close(udp_socket);
+        return -1;
+    }
+
+    net_state->platform.ip4_present = true;
+    net_state->platform.ip4_socket_raw = false;
+    net_state->platform.ip4_txrx_icmp_socket = icmp_socket;
+    net_state->platform.ip4_tmp_icmp_socket = icmp_tmp_socket;
+    net_state->platform.ip4_txrx_udp_socket = udp_socket;
+
+    return 0;
+}
+
 /*  Open the raw sockets for sending/receiving IPv6 packets  */
 static
-int open_ip6_sockets(
+int open_ip6_sockets_raw(
     struct net_state_t *net_state)
 {
     int send_socket_icmp;
@@ -251,6 +334,7 @@ int open_ip6_sockets(
     }
 
     net_state->platform.ip6_present = true;
+    net_state->platform.ip6_socket_raw = true;
     net_state->platform.icmp6_send_socket = send_socket_icmp;
     net_state->platform.udp6_send_socket = send_socket_udp;
     net_state->platform.ip6_recv_socket = recv_socket;
@@ -258,6 +342,42 @@ int open_ip6_sockets(
     return 0;
 }
 
+/*  Open DGRAM sockets for sending/receiving IPv6 packets  */
+static
+int open_ip6_sockets_dgram(
+    struct net_state_t *net_state)
+{
+    int icmp_socket;
+    int udp_socket;
+    int val = 1;
+
+    icmp_socket = socket(AF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6);
+    if (icmp_socket == -1) {
+        return -1;
+    }
+    if (setsockopt(icmp_socket, SOL_IPV6, IPV6_RECVERR, &val, sizeof(val)) < 0) {
+        return -1;
+    }
+
+    udp_socket = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
+    if (udp_socket == -1) {
+        close(icmp_socket);
+        return -1;
+    }
+    if (setsockopt(udp_socket, SOL_IPV6, IPV6_RECVERR, &val, sizeof(val)) < 0) {
+        close(icmp_socket);
+        close(udp_socket);
+        return -1;
+    }
+
+    net_state->platform.ip6_present = true;
+    net_state->platform.ip6_socket_raw = false;
+    net_state->platform.ip6_txrx_icmp_socket = icmp_socket;
+    net_state->platform.ip6_txrx_udp_socket = udp_socket;
+
+    return 0;
+}
+
 /*
     The first half of the net state initialization.  Since this
     happens with elevated privileges, this is kept as minimal
@@ -273,11 +393,17 @@ void init_net_state_privileged(
 
     net_state->platform.next_sequence = MIN_PORT;
 
-    if (open_ip4_sockets(net_state)) {
-        ip4_err = errno;
+    if (open_ip4_sockets_raw(net_state)) {
+        /* fall back to using unprivileged sockets */
+        if (open_ip4_sockets_dgram(net_state)) {
+            ip4_err = errno;
+        }
     }
-    if (open_ip6_sockets(net_state)) {
-        ip6_err = errno;
+    if (open_ip6_sockets_raw(net_state)) {
+        /* fall back to using unprivileged sockets */
+        if (open_ip6_sockets_dgram(net_state)) {
+            ip6_err = errno;
+        }
     }
 
     /*
@@ -304,8 +430,18 @@ void init_net_state_privileged(
 void init_net_state(
     struct net_state_t *net_state)
 {
-    set_socket_nonblocking(net_state->platform.ip4_recv_socket);
-    set_socket_nonblocking(net_state->platform.ip6_recv_socket);
+    if (net_state->platform.ip4_socket_raw) {
+        set_socket_nonblocking(net_state->platform.ip4_recv_socket);
+    } else {
+        set_socket_nonblocking(net_state->platform.ip4_txrx_icmp_socket);
+        set_socket_nonblocking(net_state->platform.ip4_txrx_udp_socket);
+    }
+    if (net_state->platform.ip4_socket_raw) {
+        set_socket_nonblocking(net_state->platform.ip6_recv_socket);
+    } else {
+        set_socket_nonblocking(net_state->platform.ip6_txrx_icmp_socket);
+        set_socket_nonblocking(net_state->platform.ip6_txrx_udp_socket);
+    }
 
     if (net_state->platform.ip4_present) {
         check_length_order(net_state);
@@ -396,7 +532,8 @@ void send_probe(
         return;
     }
 
-    if (resolve_probe_addresses(param, &probe->remote_addr, &src_sockaddr)) {
+    if (resolve_probe_addresses(net_state, param, &probe->remote_addr,
+                &src_sockaddr)) {
         printf("%d invalid-argument\n", param->command_token);
         free_probe(net_state, probe);
         return;
@@ -431,7 +568,7 @@ void send_probe(
     }
 
     if (packet_size > 0) {
-        if (send_packet(net_state, param,
+        if (send_packet(net_state, param, probe->sequence,
                         packet, packet_size, &probe->remote_addr) == -1) {
 
             report_packet_error(param->command_token);
@@ -508,7 +645,7 @@ void receive_probe(
     handle any responses to probes we have preivously sent.
 */
 static
-void receive_replies_from_icmp_socket(
+void receive_replies_from_recv_socket(
     struct net_state_t *net_state,
     int socket,
     received_packet_func_t handle_received_packet)
@@ -516,15 +653,30 @@ void receive_replies_from_icmp_socket(
     char packet[PACKET_BUFFER_SIZE];
     int packet_length;
     struct sockaddr_storage remote_addr;
-    socklen_t sockaddr_length;
     struct timeval timestamp;
+    int flag = 0;
+    struct cmsghdr *cm;
+    struct sock_extended_err *ee = NULL;
+    bool icmp_connrefused_received = false;
+    bool icmp_hostunreach_received = false;
 
     /*  Read until no more packets are available  */
     while (true) {
-        sockaddr_length = sizeof(struct sockaddr_storage);
-        packet_length = recvfrom(socket, packet, PACKET_BUFFER_SIZE, 0,
-                                 (struct sockaddr *) &remote_addr,
-                                 &sockaddr_length);
+        struct iovec iov;
+        struct msghdr msg;
+        char control[1024];
+
+        memset(&msg, 0, sizeof(msg));
+        memset(&iov, 0, sizeof(iov));
+        iov.iov_base = packet;
+        iov.iov_len = sizeof(packet);
+        msg.msg_iov = &iov;
+        msg.msg_iovlen = 1;
+        msg.msg_name = (struct sockaddr*) &remote_addr;
+        msg.msg_namelen = sizeof(remote_addr);
+        msg.msg_control = control;
+        msg.msg_controllen = sizeof(control);
+        packet_length = recvmsg(socket, &msg, flag);
 
         /*
            Get the time immediately after reading the packet to
@@ -549,6 +701,29 @@ void receive_replies_from_icmp_socket(
                receive.
              */
             if (errno == EINTR) {
+                /* clear error */
+                int so_err;
+                socklen_t so_err_size = sizeof(so_err);
+                int err;
+
+                do {
+                  err = getsockopt(socket, SOL_SOCKET, SO_ERROR, &so_err, &so_err_size);
+                } while (err < 0 && errno == EINTR);
+                continue;
+            }
+
+            /* handle error received in error queue */
+            if (errno == EHOSTUNREACH) {
+                /* potential error caused by ttl, read inner icmp hdr from err queue */
+                icmp_hostunreach_received = true;
+                flag |= MSG_ERRQUEUE;
+                continue;
+            }
+
+            if (errno == ECONNREFUSED) {
+                /* udp packet reached dst, read inner udp hdr from err queue */
+                icmp_connrefused_received = true;
+                flag |= MSG_ERRQUEUE;
                 continue;
             }
 
@@ -556,8 +731,46 @@ void receive_replies_from_icmp_socket(
             exit(EXIT_FAILURE);
         }
 
-        handle_received_packet(net_state, &remote_addr, packet,
-                               packet_length, &timestamp);
+        /* get src ip for packets read from err queue */
+        if (flag & MSG_ERRQUEUE) {
+            for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+                if (cm->cmsg_level == SOL_IP) {
+                    if (cm->cmsg_type == IP_RECVERR) {
+                        ee = (struct sock_extended_err *) CMSG_DATA(cm);
+                    }
+                }
+                else if (cm->cmsg_level == SOL_IPV6) {
+                    if (cm->cmsg_type == IPV6_RECVERR) {
+                        ee = (struct sock_extended_err *) CMSG_DATA(cm);
+                    }
+                }
+            }
+            if (ee) {
+                memcpy(&remote_addr, SO_EE_OFFENDER(ee), sizeof(remote_addr));
+            }
+        }
+
+        if (icmp_connrefused_received) {
+            /* using ICMP type ICMP_ECHOREPLY is not a bug, it is an
+               indication of successfully reaching dst host.
+             */
+            handle_error_queue_packet(net_state, &remote_addr, ICMP_ECHOREPLY, IPPROTO_UDP,
+                    packet, packet_length, &timestamp);
+        } else if (icmp_hostunreach_received) {
+            /* handle packet based on send socket protocol */
+            int proto, length = sizeof(int);
+
+            if (getsockopt(socket, SOL_SOCKET, SO_PROTOCOL, &proto, &length) < 0) {
+                perror("getsockopt SO_PROTOCOL error");
+                exit(EXIT_FAILURE);
+            }
+            handle_error_queue_packet(net_state, &remote_addr, ICMP_TIME_EXCEEDED, proto,
+                    packet, packet_length, &timestamp);
+        } else {
+            /* ICMP packets received from raw socket */
+            handle_received_packet(net_state, &remote_addr, packet,
+                                   packet_length, &timestamp);
+        }
     }
 }
 
@@ -631,17 +844,39 @@ void receive_replies(
     struct probe_t *probe_safe_iter;
 
     if (net_state->platform.ip4_present) {
-        receive_replies_from_icmp_socket(net_state,
-                                         net_state->platform.
-                                         ip4_recv_socket,
-                                         handle_received_ip4_packet);
+        if (net_state->platform.ip4_socket_raw) {
+            receive_replies_from_recv_socket(net_state,
+                                             net_state->platform.
+                                             ip4_recv_socket,
+                                             handle_received_ip4_packet);
+        } else {
+            receive_replies_from_recv_socket(net_state,
+                                             net_state->platform.
+                                             ip4_txrx_icmp_socket,
+                                             handle_received_ip4_packet);
+            receive_replies_from_recv_socket(net_state,
+                                             net_state->platform.
+                                             ip4_txrx_udp_socket,
+                                             handle_received_ip4_packet);
+        }
     }
 
     if (net_state->platform.ip6_present) {
-        receive_replies_from_icmp_socket(net_state,
-                                         net_state->platform.
-                                         ip6_recv_socket,
-                                         handle_received_ip6_packet);
+        if (net_state->platform.ip6_socket_raw) {
+            receive_replies_from_recv_socket(net_state,
+                                             net_state->platform.
+                                             ip6_recv_socket,
+                                             handle_received_ip6_packet);
+        } else {
+            receive_replies_from_recv_socket(net_state,
+                                             net_state->platform.
+                                             ip6_txrx_icmp_socket,
+                                             handle_received_ip6_packet);
+            receive_replies_from_recv_socket(net_state,
+                                             net_state->platform.
+                                             ip6_txrx_udp_socket,
+                                             handle_received_ip6_packet);
+        }
     }
 
     LIST_FOREACH_SAFE(probe, &net_state->outstanding_probes,
index 36b31596738b180deb92865cc264a54e16ed6764..d72fc67fabf7ddd392ea3e040db5d8c40b509be5 100644 (file)
@@ -43,12 +43,27 @@ struct net_state_platform_t {
     /*  true if we were successful at opening IPv6 sockets  */
     bool ip6_present;
 
+    /* true if ipv4 socket is raw socket */
+    bool ip4_socket_raw;
+
+    /* true if ipv6 socket is raw socket */
+    bool ip6_socket_raw;
+
     /*  Socket used to send raw IPv4 packets  */
     int ip4_send_socket;
 
     /*  Socket used to receive IPv4 ICMP replies  */
     int ip4_recv_socket;
 
+    /*  Socket used to probe byte order */
+    int ip4_tmp_icmp_socket;
+
+    /*  Socket used to tx & rx non-raw IPv4 icmp packets */
+    int ip4_txrx_icmp_socket;
+
+    /*  Socket used to send IPv4 udp packets and receive icmp err packets */
+    int ip4_txrx_udp_socket;
+
     /*  Send socket for ICMPv6 packets  */
     int icmp6_send_socket;
 
@@ -58,6 +73,12 @@ struct net_state_platform_t {
     /*  Receive socket for IPv6 packets  */
     int ip6_recv_socket;
 
+    /*  Socket used to tx & rx non-raw IPv6 icmp packets */
+    int ip6_txrx_icmp_socket;
+
+    /*  Socket used to send IPv6 udp packets and receive icmp err packets */
+    int ip6_txrx_udp_socket;
+
     /*
        true if we should encode the IP header length in host order.
        (as opposed to network order)
index f347e2312a4cafc7cd3a976d17e614fdd6c63036..35f3c5d2631ee1fd5c66d744116737cd95923f77 100644 (file)
@@ -39,8 +39,8 @@ int gather_read_fds(
 {
     int nfds;
     int probe_nfds;
-    int ip4_socket = net_state->platform.ip4_recv_socket;
-    int ip6_socket = net_state->platform.ip6_recv_socket;
+    int ip4_socket;
+    int ip6_socket;
     int command_stream = command_buffer->command_stream;
 
     FD_ZERO(read_set);
@@ -49,14 +49,42 @@ int gather_read_fds(
     FD_SET(command_stream, read_set);
     nfds = command_stream + 1;
 
-    FD_SET(ip4_socket, read_set);
-    if (ip4_socket >= nfds) {
-        nfds = ip4_socket + 1;
+    if (net_state->platform.ip4_socket_raw) {
+        ip4_socket = net_state->platform.ip4_recv_socket;
+        FD_SET(ip4_socket, read_set);
+        if (ip4_socket >= nfds) {
+            nfds = ip4_socket + 1;
+        }
+    } else {
+        ip4_socket = net_state->platform.ip4_txrx_icmp_socket;
+        FD_SET(ip4_socket, read_set);
+        if (ip4_socket >= nfds) {
+            nfds = ip4_socket + 1;
+        }
+        ip4_socket = net_state->platform.ip4_txrx_udp_socket;
+        FD_SET(ip4_socket, read_set);
+        if (ip4_socket >= nfds) {
+            nfds = ip4_socket + 1;
+        }
     }
 
-    FD_SET(ip6_socket, read_set);
-    if (ip6_socket >= nfds) {
-        nfds = ip6_socket + 1;
+    if (net_state->platform.ip6_socket_raw) {
+        ip6_socket = net_state->platform.ip6_recv_socket;
+        FD_SET(ip6_socket, read_set);
+        if (ip6_socket >= nfds) {
+            nfds = ip6_socket + 1;
+        }
+    } else {
+        ip6_socket = net_state->platform.ip6_txrx_icmp_socket;
+        FD_SET(ip6_socket, read_set);
+        if (ip6_socket >= nfds) {
+            nfds = ip6_socket + 1;
+        }
+        ip6_socket = net_state->platform.ip6_txrx_udp_socket;
+        FD_SET(ip6_socket, read_set);
+        if (ip6_socket >= nfds) {
+            nfds = ip6_socket + 1;
+        }
     }
 
     probe_nfds = gather_probe_sockets(net_state, write_set);