]>
git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/iputils.cc
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 #include <sys/socket.h>
30 #include <boost/format.hpp>
32 #ifdef HAVE_GETIFADDRS
36 /** these functions provide a very lightweight wrapper to the Berkeley sockets API. Errors -> exceptions! */
38 static void RuntimeError ( const std :: string
& error
)
40 throw runtime_error ( error
);
43 static void NetworkErr ( const std :: string
& error
)
45 throw NetworkError ( error
);
48 int SSocket ( int family
, int type
, int flags
)
50 int ret
= socket ( family
, type
, flags
);
52 RuntimeError ( "creating socket of type " + std :: to_string ( family
) + ": " + stringerror ());
57 int SConnect ( int sockfd
, const ComboAddress
& remote
)
59 int ret
= connect ( sockfd
, reinterpret_cast < const struct sockaddr
*>(& remote
), remote
. getSocklen ());
61 int savederrno
= errno
;
62 RuntimeError ( "connecting socket to " + remote
. toStringWithPort () + ": " + stringerror ( savederrno
));
67 int SConnectWithTimeout ( int sockfd
, const ComboAddress
& remote
, const struct timeval
& timeout
)
69 int ret
= connect ( sockfd
, reinterpret_cast < const struct sockaddr
*>(& remote
), remote
. getSocklen ());
71 int savederrno
= errno
;
72 if ( savederrno
== EINPROGRESS
) {
73 if ( timeout
<= timeval
{ 0 , 0 }) {
77 /* we wait until the connection has been established */
79 bool disconnected
= false ;
80 int res
= waitForRWData ( sockfd
, false , timeout
. tv_sec
, timeout
. tv_usec
, & error
, & disconnected
);
84 socklen_t errlen
= sizeof ( savederrno
);
85 if ( getsockopt ( sockfd
, SOL_SOCKET
, SO_ERROR
, ( void *)& savederrno
, & errlen
) == 0 ) {
86 NetworkErr ( "connecting to " + remote
. toStringWithPort () + " failed: " + stringerror ( savederrno
));
89 NetworkErr ( "connecting to " + remote
. toStringWithPort () + " failed" );
93 NetworkErr ( remote
. toStringWithPort () + " closed the connection" );
98 NetworkErr ( "timeout while connecting to " + remote
. toStringWithPort ());
101 NetworkErr ( "waiting to connect to " + remote
. toStringWithPort () + ": " + stringerror ( savederrno
));
105 NetworkErr ( "connecting to " + remote
. toStringWithPort () + ": " + stringerror ( savederrno
));
112 int SBind ( int sockfd
, const ComboAddress
& local
)
114 int ret
= bind ( sockfd
, ( struct sockaddr
*)& local
, local
. getSocklen ());
116 int savederrno
= errno
;
117 RuntimeError ( "binding socket to " + local
. toStringWithPort () + ": " + stringerror ( savederrno
));
122 int SAccept ( int sockfd
, ComboAddress
& remote
)
124 socklen_t remlen
= remote
. getSocklen ();
126 int ret
= accept ( sockfd
, ( struct sockaddr
*)& remote
, & remlen
);
128 RuntimeError ( "accepting new connection on socket: " + stringerror ());
133 int SListen ( int sockfd
, int limit
)
135 int ret
= listen ( sockfd
, limit
);
137 RuntimeError ( "setting socket to listen: " + stringerror ());
142 int SSetsockopt ( int sockfd
, int level
, int opname
, int value
)
144 int ret
= setsockopt ( sockfd
, level
, opname
, & value
, sizeof ( value
));
146 RuntimeError ( "setsockopt for level " + std :: to_string ( level
) + " and opname " + std :: to_string ( opname
) + " to " + std :: to_string ( value
) + " failed: " + stringerror ());
151 void setSocketIgnorePMTU ([[ maybe_unused
]] int sockfd
, [[ maybe_unused
]] int family
)
153 if ( family
== AF_INET
) {
154 #if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
155 #ifdef IP_PMTUDISC_OMIT
156 /* Linux 3.15+ has IP_PMTUDISC_OMIT, which discards PMTU information to prevent
157 poisoning, but still allows fragmentation if the packet size exceeds the
158 outgoing interface MTU, which is good.
161 SSetsockopt ( sockfd
, IPPROTO_IP
, IP_MTU_DISCOVER
, IP_PMTUDISC_OMIT
);
164 catch ( const std :: exception
& e
) {
165 /* failed, let's try IP_PMTUDISC_DONT instead */
167 #endif /* IP_PMTUDISC_OMIT */
169 /* IP_PMTUDISC_DONT disables Path MTU discovery */
170 SSetsockopt ( sockfd
, IPPROTO_IP
, IP_MTU_DISCOVER
, IP_PMTUDISC_DONT
);
171 #endif /* defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) */
174 #if defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DONT)
175 #ifdef IPV6_PMTUDISC_OMIT
176 /* Linux 3.15+ has IPV6_PMTUDISC_OMIT, which discards PMTU information to prevent
177 poisoning, but still allows fragmentation if the packet size exceeds the
178 outgoing interface MTU, which is good.
181 SSetsockopt ( sockfd
, IPPROTO_IPV6
, IPV6_MTU_DISCOVER
, IPV6_PMTUDISC_OMIT
);
184 catch ( const std :: exception
& e
) {
185 /* failed, let's try IP_PMTUDISC_DONT instead */
187 #endif /* IPV6_PMTUDISC_OMIT */
189 /* IPV6_PMTUDISC_DONT disables Path MTU discovery */
190 SSetsockopt ( sockfd
, IPPROTO_IPV6
, IPV6_MTU_DISCOVER
, IPV6_PMTUDISC_DONT
);
191 #endif /* defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DONT) */
195 void setSocketForcePMTU ([[ maybe_unused
]] int sockfd
, [[ maybe_unused
]] int family
)
197 if ( family
== AF_INET
) {
198 #if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DO)
199 /* IP_PMTUDISC_DO enables Path MTU discovery and prevents fragmentation */
200 SSetsockopt ( sockfd
, IPPROTO_IP
, IP_MTU_DISCOVER
, IP_PMTUDISC_DO
);
201 #elif defined(IP_DONTFRAG)
202 /* at least this prevents fragmentation */
203 SSetsockopt ( sockfd
, IPPROTO_IP
, IP_DONTFRAG
, 1 );
204 #endif /* defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DO) */
207 #if defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DO)
208 /* IPV6_PMTUDISC_DO enables Path MTU discovery and prevents fragmentation */
209 SSetsockopt ( sockfd
, IPPROTO_IPV6
, IPV6_MTU_DISCOVER
, IPV6_PMTUDISC_DO
);
210 #elif defined(IPV6_DONTFRAG)
211 /* at least this prevents fragmentation */
212 SSetsockopt ( sockfd
, IPPROTO_IPV6
, IPV6_DONTFRAG
, 1 );
213 #endif /* defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DO) */
217 bool setReusePort ( int sockfd
)
219 #if defined(SO_REUSEPORT_LB)
221 SSetsockopt ( sockfd
, SOL_SOCKET
, SO_REUSEPORT_LB
, 1 );
224 catch ( const std :: exception
& e
) {
227 #elif defined(SO_REUSEPORT)
229 SSetsockopt ( sockfd
, SOL_SOCKET
, SO_REUSEPORT
, 1 );
232 catch ( const std :: exception
& e
) {
239 bool HarvestTimestamp ( struct msghdr
* msgh
, struct timeval
* tv
)
242 struct cmsghdr
* cmsg
;
243 for ( cmsg
= CMSG_FIRSTHDR ( msgh
); cmsg
!= nullptr ; cmsg
= CMSG_NXTHDR ( msgh
, cmsg
)) {
244 if (( cmsg
-> cmsg_level
== SOL_SOCKET
) && ( cmsg
-> cmsg_type
== SO_TIMESTAMP
|| cmsg
-> cmsg_type
== SCM_TIMESTAMP
) &&
245 CMSG_LEN ( sizeof (* tv
)) == cmsg
-> cmsg_len
) {
246 memcpy ( tv
, CMSG_DATA ( cmsg
), sizeof (* tv
));
253 bool HarvestDestinationAddress ( const struct msghdr
* msgh
, ComboAddress
* destination
)
255 destination
-> reset ();
257 struct cmsghdr
* cmsg
;
259 const struct cmsghdr
* cmsg
;
261 for ( cmsg
= CMSG_FIRSTHDR ( msgh
); cmsg
!= nullptr ; cmsg
= CMSG_NXTHDR ( const_cast < struct msghdr
*>( msgh
), const_cast < struct cmsghdr
*>( cmsg
))) {
262 #if defined(IP_PKTINFO)
263 if (( cmsg
-> cmsg_level
== IPPROTO_IP
) && ( cmsg
-> cmsg_type
== IP_PKTINFO
)) {
264 struct in_pktinfo
* i
= ( struct in_pktinfo
*) CMSG_DATA ( cmsg
);
265 destination
-> sin4
. sin_addr
= i
-> ipi_addr
;
266 destination
-> sin4
. sin_family
= AF_INET
;
269 #elif defined(IP_RECVDSTADDR)
270 if (( cmsg
-> cmsg_level
== IPPROTO_IP
) && ( cmsg
-> cmsg_type
== IP_RECVDSTADDR
)) {
271 struct in_addr
* i
= ( struct in_addr
*) CMSG_DATA ( cmsg
);
272 destination
-> sin4
. sin_addr
= * i
;
273 destination
-> sin4
. sin_family
= AF_INET
;
278 if (( cmsg
-> cmsg_level
== IPPROTO_IPV6
) && ( cmsg
-> cmsg_type
== IPV6_PKTINFO
)) {
279 struct in6_pktinfo
* i
= ( struct in6_pktinfo
*) CMSG_DATA ( cmsg
);
280 destination
-> sin6
. sin6_addr
= i
-> ipi6_addr
;
281 destination
-> sin4
. sin_family
= AF_INET6
;
288 bool IsAnyAddress ( const ComboAddress
& addr
)
290 if ( addr
. sin4
. sin_family
== AF_INET
)
291 return addr
. sin4
. sin_addr
. s_addr
== 0 ;
292 else if ( addr
. sin4
. sin_family
== AF_INET6
)
293 return ! memcmp (& addr
. sin6
. sin6_addr
, & in6addr_any
, sizeof ( addr
. sin6
. sin6_addr
));
297 int sendOnNBSocket ( int fd
, const struct msghdr
* msgh
)
301 // OpenBSD can and does return EAGAIN on non-blocking datagram sockets
302 for ( int i
= 0 ; i
< 10 ; i
++) { // Arbitrary upper bound
303 if ( sendmsg ( fd
, msgh
, 0 ) != - 1 ) {
308 if ( sendErr
!= EAGAIN
) {
313 if ( sendmsg ( fd
, msgh
, 0 ) == - 1 ) {
320 // be careful: when using this for receive purposes, make sure addr->sin4.sin_family is set appropriately so getSocklen works!
321 // be careful: when using this function for *send* purposes, be sure to set cbufsize to 0!
322 // be careful: if you don't call addCMsgSrcAddr after fillMSGHdr, make sure to set msg_control to NULL
323 void fillMSGHdr ( struct msghdr
* msgh
, struct iovec
* iov
, cmsgbuf_aligned
* cbuf
, size_t cbufsize
, char * data
, size_t datalen
, ComboAddress
* addr
)
325 iov
-> iov_base
= data
;
326 iov
-> iov_len
= datalen
;
328 memset ( msgh
, 0 , sizeof ( struct msghdr
));
330 msgh
-> msg_control
= cbuf
;
331 msgh
-> msg_controllen
= cbufsize
;
332 msgh
-> msg_name
= addr
;
333 msgh
-> msg_namelen
= addr
-> getSocklen ();
335 msgh
-> msg_iovlen
= 1 ;
339 // warning: various parts of PowerDNS assume 'truncate' will never throw
340 void ComboAddress :: truncate ( unsigned int bits
) noexcept
344 if ( sin4
. sin_family
== AF_INET
) {
347 start
= ( uint8_t *)& sin4
. sin_addr
. s_addr
;
353 start
= ( uint8_t *)& sin6
. sin6_addr
. s6_addr
;
357 auto tozero
= len
* 8 - bits
; // if set to 22, this will clear 1 byte, as it should
359 memset ( start
+ len
- tozero
/ 8 , 0 , tozero
/ 8 ); // blot out the whole bytes on the right
361 auto bitsleft
= tozero
% 8 ; // 2 bits left to clear
363 // a b c d, to truncate to 22 bits, we just zeroed 'd' and need to zero 2 bits from c
364 // so and by '11111100', which is ~((1<<2)-1) = ~3
365 uint8_t * place
= start
+ len
- 1 - tozero
/ 8 ;
366 * place
&= (~(( 1 << bitsleft
)- 1 ));
369 size_t sendMsgWithOptions ( int fd
, const char * buffer
, size_t len
, const ComboAddress
* dest
, const ComboAddress
* local
, unsigned int localItf
, int flags
)
373 cmsgbuf_aligned cbuf
;
375 /* Set up iov and msgh structures. */
376 memset (& msgh
, 0 , sizeof ( struct msghdr
));
377 msgh
. msg_control
= nullptr ;
378 msgh
. msg_controllen
= 0 ;
380 msgh
. msg_name
= reinterpret_cast < void *>( const_cast < ComboAddress
*>( dest
));
381 msgh
. msg_namelen
= dest
-> getSocklen ();
384 msgh
. msg_name
= nullptr ;
385 msgh
. msg_namelen
= 0 ;
390 if ( localItf
!= 0 && local
) {
391 addCMsgSrcAddr (& msgh
, & cbuf
, local
, localItf
);
394 iov
. iov_base
= reinterpret_cast < void *>( const_cast < char *>( buffer
));
402 bool firstTry
= true ;
408 if ( flags
& MSG_FASTOPEN
&& firstTry
== false ) {
409 flags
&= ~ MSG_FASTOPEN
;
411 #endif /* MSG_FASTOPEN */
413 ssize_t res
= sendmsg ( fd
, & msgh
, flags
);
416 size_t written
= static_cast < size_t >( res
);
427 iov
. iov_len
-= written
;
428 iov
. iov_base
= reinterpret_cast < void *>( reinterpret_cast < char *>( iov
. iov_base
) + written
);
433 else if ( res
== - 1 ) {
438 else if ( err
== EAGAIN
|| err
== EWOULDBLOCK
|| err
== EINPROGRESS
|| err
== ENOTCONN
) {
439 /* EINPROGRESS might happen with non blocking socket,
440 especially with TCP Fast Open */
444 unixDie ( "failed in sendMsgWithTimeout" );
453 template class NetmaskTree
< bool , Netmask
>;
455 /* requires a non-blocking socket.
456 On Linux, we could use MSG_DONTWAIT on a blocking socket
457 but this is not portable.
459 bool isTCPSocketUsable ( int sock
)
463 size_t buf_size
= sizeof ( buf
);
466 ssize_t got
= recv ( sock
, & buf
, buf_size
, MSG_PEEK
);
469 /* socket is usable, some data is even waiting to be read */
473 /* other end has closed the socket */
479 if ( err
== EAGAIN
|| err
== EWOULDBLOCK
) {
480 /* socket is usable, no data waiting */
485 /* something is wrong, could be ECONNRESET,
486 ENOTCONN, EPIPE, but anyway this socket is
492 } while ( err
== EINTR
);
496 /* mission in life: parse four cases
500 4) 2001::1 no port allowed
503 ComboAddress
parseIPAndPort ( const std :: string
& input
, uint16_t port
)
505 if ( input
[ 0 ] == '[' ) { // case 1
506 auto both
= splitField ( input
. substr ( 1 ), ']' );
507 return ComboAddress ( both
. first
, both
. second
. empty () ? port
: pdns :: checked_stoi
< uint16_t >( both
. second
. substr ( 1 )));
510 string :: size_type count
= 0 ;
511 for ( char c
: input
) {
521 return ComboAddress ( input
, port
);
523 string :: size_type cpos
= input
. rfind ( ':' );
524 pair
< std :: string
, std :: string
> both
;
525 both
. first
= input
. substr ( 0 , cpos
);
526 both
. second
= input
. substr ( cpos
+ 1 );
528 auto newport
= pdns :: checked_stoi
< uint16_t >( both
. second
);
529 return ComboAddress ( both
. first
, newport
);
532 return ComboAddress ( input
, port
);
536 void setSocketBuffer ( int fd
, int optname
, uint32_t size
)
539 socklen_t len
= sizeof ( psize
);
541 if ( getsockopt ( fd
, SOL_SOCKET
, optname
, & psize
, & len
) != 0 ) {
542 throw std :: runtime_error ( "Unable to retrieve socket buffer size:" + stringerror ());
547 if ( setsockopt ( fd
, SOL_SOCKET
, optname
, & size
, sizeof ( size
)) != 0 ) {
548 throw std :: runtime_error ( "Unable to raise socket buffer size to " + std :: to_string ( size
) + ": " + stringerror ());
552 void setSocketReceiveBuffer ( int fd
, uint32_t size
)
554 setSocketBuffer ( fd
, SO_RCVBUF
, size
);
557 void setSocketSendBuffer ( int fd
, uint32_t size
)
559 setSocketBuffer ( fd
, SO_SNDBUF
, size
);
563 static uint32_t raiseSocketBufferToMax ( int socket
, int optname
, const std :: string
& readMaxFromFile
)
565 std :: ifstream
ifs ( readMaxFromFile
);
568 if ( getline ( ifs
, line
)) {
569 auto max
= pdns :: checked_stoi
< uint32_t >( line
);
570 setSocketBuffer ( socket
, optname
, max
);
578 uint32_t raiseSocketReceiveBufferToMax ([[ maybe_unused
]] int socket
)
581 return raiseSocketBufferToMax ( socket
, SO_RCVBUF
, "/proc/sys/net/core/rmem_max" );
587 uint32_t raiseSocketSendBufferToMax ([[ maybe_unused
]] int socket
)
590 return raiseSocketBufferToMax ( socket
, SO_SNDBUF
, "/proc/sys/net/core/wmem_max" );
596 std :: set
< std :: string
> getListOfNetworkInterfaces ()
598 std :: set
< std :: string
> result
;
599 #ifdef HAVE_GETIFADDRS
600 struct ifaddrs
* ifaddr
;
601 if ( getifaddrs (& ifaddr
) == - 1 ) {
605 for ( struct ifaddrs
* ifa
= ifaddr
; ifa
!= nullptr ; ifa
= ifa
-> ifa_next
) {
606 if ( ifa
-> ifa_name
== nullptr ) {
609 result
. insert ( ifa
-> ifa_name
);
617 #ifdef HAVE_GETIFADDRS
618 std :: vector
< ComboAddress
> getListOfAddressesOfNetworkInterface ( const std :: string
& itf
)
620 std :: vector
< ComboAddress
> result
;
621 struct ifaddrs
* ifaddr
= nullptr ;
622 if ( getifaddrs (& ifaddr
) == - 1 ) {
626 for ( struct ifaddrs
* ifa
= ifaddr
; ifa
!= nullptr ; ifa
= ifa
-> ifa_next
) {
627 if ( ifa
-> ifa_name
== nullptr || strcmp ( ifa
-> ifa_name
, itf
. c_str ()) != 0 ) {
630 if ( ifa
-> ifa_addr
== nullptr || ( ifa
-> ifa_addr
-> sa_family
!= AF_INET
&& ifa
-> ifa_addr
-> sa_family
!= AF_INET6
)) {
635 addr
. setSockaddr ( ifa
-> ifa_addr
, ifa
-> ifa_addr
-> sa_family
== AF_INET
? sizeof ( struct sockaddr_in
) : sizeof ( struct sockaddr_in6
));
641 result
. push_back ( addr
);
648 std :: vector
< ComboAddress
> getListOfAddressesOfNetworkInterface ( const std :: string
& /* itf */ )
650 std :: vector
< ComboAddress
> result
;
653 #endif // HAVE_GETIFADDRS
655 #ifdef HAVE_GETIFADDRS
656 static uint8_t convertNetmaskToBits ( const uint8_t * mask
, socklen_t len
)
658 if ( mask
== nullptr || len
> 16 ) {
659 throw std :: runtime_error ( "Invalid parameters passed to convertNetmaskToBits" );
663 // for all bytes in the address (4 for IPv4, 16 for IPv6)
664 for ( size_t idx
= 0 ; idx
< len
; idx
++) {
665 uint8_t byte
= *( mask
+ idx
);
666 // count the number of bits set
668 result
+= ( byte
& 1 );
674 #endif /* HAVE_GETIFADDRS */
676 #ifdef HAVE_GETIFADDRS
677 std :: vector
< Netmask
> getListOfRangesOfNetworkInterface ( const std :: string
& itf
)
679 std :: vector
< Netmask
> result
;
680 struct ifaddrs
* ifaddr
= nullptr ;
681 if ( getifaddrs (& ifaddr
) == - 1 ) {
685 for ( struct ifaddrs
* ifa
= ifaddr
; ifa
!= nullptr ; ifa
= ifa
-> ifa_next
) {
686 if ( ifa
-> ifa_name
== nullptr || strcmp ( ifa
-> ifa_name
, itf
. c_str ()) != 0 ) {
689 if ( ifa
-> ifa_addr
== nullptr || ( ifa
-> ifa_addr
-> sa_family
!= AF_INET
&& ifa
-> ifa_addr
-> sa_family
!= AF_INET6
)) {
694 addr
. setSockaddr ( ifa
-> ifa_addr
, ifa
-> ifa_addr
-> sa_family
== AF_INET
? sizeof ( struct sockaddr_in
) : sizeof ( struct sockaddr_in6
));
700 if ( ifa
-> ifa_addr
-> sa_family
== AF_INET
) {
701 auto netmask
= reinterpret_cast < const struct sockaddr_in
*>( ifa
-> ifa_netmask
);
702 uint8_t maskBits
= convertNetmaskToBits ( reinterpret_cast < const uint8_t *>(& netmask
-> sin_addr
. s_addr
), sizeof ( netmask
-> sin_addr
. s_addr
));
703 result
. emplace_back ( addr
, maskBits
);
705 else if ( ifa
-> ifa_addr
-> sa_family
== AF_INET6
) {
706 auto netmask
= reinterpret_cast < const struct sockaddr_in6
*>( ifa
-> ifa_netmask
);
707 uint8_t maskBits
= convertNetmaskToBits ( reinterpret_cast < const uint8_t *>(& netmask
-> sin6_addr
. s6_addr
), sizeof ( netmask
-> sin6_addr
. s6_addr
));
708 result
. emplace_back ( addr
, maskBits
);
716 std :: vector
< Netmask
> getListOfRangesOfNetworkInterface ( const std :: string
& /* itf */ )
718 std :: vector
< Netmask
> result
;
721 #endif // HAVE_GETIFADDRS