1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
5 #include <linux/netlink.h>
6 #include <linux/if_ether.h>
7 #include <linux/if_infiniband.h>
8 #include <linux/if_packet.h>
9 #include <netinet/in.h>
13 #include <sys/socket.h>
14 #include <sys/types.h>
17 #include "errno-util.h"
18 #include "in-addr-util.h"
20 #include "missing_network.h"
21 #include "missing_socket.h"
22 #include "sparse-endian.h"
24 union sockaddr_union
{
25 /* The minimal, abstract version */
28 /* The libc provided version that allocates "enough room" for every protocol */
29 struct sockaddr_storage storage
;
31 /* Protoctol-specific implementations */
32 struct sockaddr_in in
;
33 struct sockaddr_in6 in6
;
34 struct sockaddr_un un
;
35 struct sockaddr_nl nl
;
36 struct sockaddr_ll ll
;
37 struct sockaddr_vm vm
;
39 /* Ensure there is enough space to store Infiniband addresses */
40 uint8_t ll_buffer
[offsetof(struct sockaddr_ll
, sll_addr
) + CONST_MAX(ETH_ALEN
, INFINIBAND_ALEN
)];
42 /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path
43 * component is always followed by at least one NUL byte. */
44 uint8_t un_buffer
[sizeof(struct sockaddr_un
) + 1];
47 #define SUN_PATH_LEN (sizeof(((struct sockaddr_un){}).sun_path))
49 typedef struct SocketAddress
{
50 union sockaddr_union sockaddr
;
52 /* We store the size here explicitly due to the weird
53 * sockaddr_un semantics for abstract sockets */
56 /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */
59 /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */
63 typedef enum SocketAddressBindIPv6Only
{
64 SOCKET_ADDRESS_DEFAULT
,
66 SOCKET_ADDRESS_IPV6_ONLY
,
67 _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX
,
68 _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID
= -EINVAL
,
69 } SocketAddressBindIPv6Only
;
71 #define socket_address_family(a) ((a)->sockaddr.sa.sa_family)
73 const char* socket_address_type_to_string(int t
) _const_
;
74 int socket_address_type_from_string(const char *s
) _pure_
;
76 int sockaddr_un_unlink(const struct sockaddr_un
*sa
);
78 static inline int socket_address_unlink(const SocketAddress
*a
) {
79 return socket_address_family(a
) == AF_UNIX
? sockaddr_un_unlink(&a
->sockaddr
.un
) : 0;
82 bool socket_address_can_accept(const SocketAddress
*a
) _pure_
;
84 int socket_address_listen(
85 const SocketAddress
*a
,
88 SocketAddressBindIPv6Only only
,
89 const char *bind_to_device
,
93 mode_t directory_mode
,
97 int socket_address_verify(const SocketAddress
*a
, bool strict
) _pure_
;
98 int socket_address_print(const SocketAddress
*a
, char **p
);
99 bool socket_address_matches_fd(const SocketAddress
*a
, int fd
);
101 bool socket_address_equal(const SocketAddress
*a
, const SocketAddress
*b
) _pure_
;
103 const char* socket_address_get_path(const SocketAddress
*a
);
105 bool socket_ipv6_is_supported(void);
106 bool socket_ipv6_is_enabled(void);
108 int sockaddr_port(const struct sockaddr
*_sa
, unsigned *port
);
109 const union in_addr_union
*sockaddr_in_addr(const struct sockaddr
*sa
);
110 int sockaddr_set_in_addr(union sockaddr_union
*u
, int family
, const union in_addr_union
*a
, uint16_t port
);
112 int sockaddr_pretty(const struct sockaddr
*_sa
, socklen_t salen
, bool translate_ipv6
, bool include_port
, char **ret
);
113 int getpeername_pretty(int fd
, bool include_port
, char **ret
);
114 int getsockname_pretty(int fd
, char **ret
);
116 int socknameinfo_pretty(const struct sockaddr
*sa
, socklen_t salen
, char **_ret
);
118 const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b
) _const_
;
119 SocketAddressBindIPv6Only
socket_address_bind_ipv6_only_from_string(const char *s
) _pure_
;
120 SocketAddressBindIPv6Only
socket_address_bind_ipv6_only_or_bool_from_string(const char *s
);
122 int netlink_family_to_string_alloc(int b
, char **s
);
123 int netlink_family_from_string(const char *s
) _pure_
;
125 bool sockaddr_equal(const union sockaddr_union
*a
, const union sockaddr_union
*b
);
127 int fd_set_sndbuf(int fd
, size_t n
, bool increase
);
128 static inline int fd_inc_sndbuf(int fd
, size_t n
) {
129 return fd_set_sndbuf(fd
, n
, true);
131 int fd_set_rcvbuf(int fd
, size_t n
, bool increase
);
132 static inline int fd_increase_rxbuf(int fd
, size_t n
) {
133 return fd_set_rcvbuf(fd
, n
, true);
136 int ip_tos_to_string_alloc(int i
, char **s
);
137 int ip_tos_from_string(const char *s
);
140 IFNAME_VALID_ALTERNATIVE
= 1 << 0, /* Allow "altnames" too */
141 IFNAME_VALID_NUMERIC
= 1 << 1, /* Allow decimal formatted ifindexes too */
142 IFNAME_VALID_SPECIAL
= 1 << 2, /* Allow the special names "all" and "default" */
143 _IFNAME_VALID_ALL
= IFNAME_VALID_ALTERNATIVE
| IFNAME_VALID_NUMERIC
| IFNAME_VALID_SPECIAL
,
145 bool ifname_valid_char(char a
);
146 bool ifname_valid_full(const char *p
, IfnameValidFlags flags
);
147 static inline bool ifname_valid(const char *p
) {
148 return ifname_valid_full(p
, 0);
150 bool address_label_valid(const char *p
);
152 int getpeercred(int fd
, struct ucred
*ucred
);
153 int getpeersec(int fd
, char **ret
);
154 int getpeergroups(int fd
, gid_t
**ret
);
155 int getpeerpidfd(int fd
);
157 ssize_t
send_many_fds_iov_sa(
159 int *fds_array
, size_t n_fds_array
,
160 const struct iovec
*iov
, size_t iovlen
,
161 const struct sockaddr
*sa
, socklen_t len
,
163 static inline ssize_t
send_many_fds_iov(
165 int *fds_array
, size_t n_fds_array
,
166 const struct iovec
*iov
, size_t iovlen
,
169 return send_many_fds_iov_sa(transport_fd
, fds_array
, n_fds_array
, iov
, iovlen
, NULL
, 0, flags
);
171 static inline int send_many_fds(
177 return send_many_fds_iov_sa(transport_fd
, fds_array
, n_fds_array
, NULL
, 0, NULL
, 0, flags
);
179 ssize_t
send_one_fd_iov_sa(
182 const struct iovec
*iov
, size_t iovlen
,
183 const struct sockaddr
*sa
, socklen_t len
,
185 int send_one_fd_sa(int transport_fd
,
187 const struct sockaddr
*sa
, socklen_t len
,
189 #define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags)
190 #define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags)
191 ssize_t
receive_one_fd_iov(int transport_fd
, struct iovec
*iov
, size_t iovlen
, int flags
, int *ret_fd
);
192 int receive_one_fd(int transport_fd
, int flags
);
193 ssize_t
receive_many_fds_iov(int transport_fd
, struct iovec
*iov
, size_t iovlen
, int **ret_fds_array
, size_t *ret_n_fds_array
, int flags
);
194 int receive_many_fds(int transport_fd
, int **ret_fds_array
, size_t *ret_n_fds_array
, int flags
);
196 ssize_t
next_datagram_size_fd(int fd
);
198 int flush_accept(int fd
);
200 #define CMSG_FOREACH(cmsg, mh) \
201 for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg)))
203 /* Returns the cmsghdr's data pointer, but safely cast to the specified type. Does two alignment checks: one
204 * at compile time, that the requested type has a smaller or same alignment as 'struct cmsghdr', and one
205 * during runtime, that the actual pointer matches the alignment too. This is supposed to catch cases such as
206 * 'struct timeval' is embedded into 'struct cmsghdr' on architectures where the alignment of the former is 8
207 * bytes (because of a 64-bit time_t), but of the latter is 4 bytes (because size_t is 32 bits), such as
209 #define CMSG_TYPED_DATA(cmsg, type) \
211 struct cmsghdr *_cmsg = (cmsg); \
212 assert_cc(alignof(type) <= alignof(struct cmsghdr)); \
213 _cmsg ? CAST_ALIGN_PTR(type, CMSG_DATA(_cmsg)) : (type*) NULL; \
216 struct cmsghdr
* cmsg_find(struct msghdr
*mh
, int level
, int type
, socklen_t length
);
217 void* cmsg_find_and_copy_data(struct msghdr
*mh
, int level
, int type
, void *buf
, size_t buf_len
);
219 /* Type-safe, dereferencing version of cmsg_find() */
220 #define CMSG_FIND_DATA(mh, level, type, ctype) \
221 CMSG_TYPED_DATA(cmsg_find(mh, level, type, CMSG_LEN(sizeof(ctype))), ctype)
223 /* Type-safe version of cmsg_find_and_copy_data() */
224 #define CMSG_FIND_AND_COPY_DATA(mh, level, type, ctype) \
225 (ctype*) cmsg_find_and_copy_data(mh, level, type, &(ctype){}, sizeof(ctype))
227 /* Resolves to a type that can carry cmsghdr structures. Make sure things are properly aligned, i.e. the type
228 * itself is placed properly in memory and the size is also aligned to what's appropriate for "cmsghdr"
230 #define CMSG_BUFFER_TYPE(size) \
232 struct cmsghdr cmsghdr; \
234 uint8_t align_check[(size) >= CMSG_SPACE(0) && \
235 (size) == CMSG_ALIGN(size) ? 1 : -1]; \
239 * Certain hardware address types (e.g Infiniband) do not fit into sll_addr
240 * (8 bytes) and run over the structure. This macro returns the correct size that
241 * must be passed to kernel.
243 #define SOCKADDR_LL_LEN(sa) \
245 const struct sockaddr_ll *_sa = &(sa); \
246 size_t _mac_len = sizeof(_sa->sll_addr); \
247 assert(_sa->sll_family == AF_PACKET); \
248 if (be16toh(_sa->sll_hatype) == ARPHRD_ETHER) \
249 _mac_len = MAX(_mac_len, (size_t) ETH_ALEN); \
250 if (be16toh(_sa->sll_hatype) == ARPHRD_INFINIBAND) \
251 _mac_len = MAX(_mac_len, (size_t) INFINIBAND_ALEN); \
252 offsetof(struct sockaddr_ll, sll_addr) + _mac_len; \
255 /* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */
256 #define SOCKADDR_UN_LEN(sa) \
258 const struct sockaddr_un *_sa = &(sa); \
259 assert(_sa->sun_family == AF_UNIX); \
260 offsetof(struct sockaddr_un, sun_path) + \
261 (_sa->sun_path[0] == 0 ? \
262 1 + strnlen(_sa->sun_path+1, sizeof(_sa->sun_path)-1) : \
263 strnlen(_sa->sun_path, sizeof(_sa->sun_path))+1); \
266 #define SOCKADDR_LEN(saddr) \
268 const union sockaddr_union *__sa = &(saddr); \
270 switch (__sa->sa.sa_family) { \
272 _len = sizeof(struct sockaddr_in); \
275 _len = sizeof(struct sockaddr_in6); \
278 _len = SOCKADDR_UN_LEN(__sa->un); \
281 _len = SOCKADDR_LL_LEN(__sa->ll); \
284 _len = sizeof(struct sockaddr_nl); \
287 _len = sizeof(struct sockaddr_vm); \
290 assert_not_reached(); \
295 int socket_ioctl_fd(void);
297 int sockaddr_un_set_path(struct sockaddr_un
*ret
, const char *path
);
299 static inline int setsockopt_int(int fd
, int level
, int optname
, int value
) {
300 if (setsockopt(fd
, level
, optname
, &value
, sizeof(value
)) < 0)
306 static inline int getsockopt_int(int fd
, int level
, int optname
, int *ret
) {
308 socklen_t sl
= sizeof(v
);
310 if (getsockopt(fd
, level
, optname
, &v
, &sl
) < 0)
311 return negative_errno();
319 int socket_bind_to_ifname(int fd
, const char *ifname
);
320 int socket_bind_to_ifindex(int fd
, int ifindex
);
322 /* Define a 64-bit version of timeval/timespec in any case, even on 32-bit userspace. */
323 struct timeval_large
{
324 uint64_t tvl_sec
, tvl_usec
;
326 struct timespec_large
{
327 uint64_t tvl_sec
, tvl_nsec
;
330 /* glibc duplicates timespec/timeval on certain 32-bit arches, once in 32-bit and once in 64-bit.
331 * See __convert_scm_timestamps() in glibc source code. Hence, we need additional buffer space for them
332 * to prevent from recvmsg_safe() returning -EXFULL. */
333 #define CMSG_SPACE_TIMEVAL \
334 ((sizeof(struct timeval) == sizeof(struct timeval_large)) ? \
335 CMSG_SPACE(sizeof(struct timeval)) : \
336 CMSG_SPACE(sizeof(struct timeval)) + \
337 CMSG_SPACE(sizeof(struct timeval_large)))
338 #define CMSG_SPACE_TIMESPEC \
339 ((sizeof(struct timespec) == sizeof(struct timespec_large)) ? \
340 CMSG_SPACE(sizeof(struct timespec)) : \
341 CMSG_SPACE(sizeof(struct timespec)) + \
342 CMSG_SPACE(sizeof(struct timespec_large)))
344 ssize_t
recvmsg_safe(int sockfd
, struct msghdr
*msg
, int flags
);
346 int socket_get_family(int fd
);
347 int socket_set_recvpktinfo(int fd
, int af
, bool b
);
348 int socket_set_unicast_if(int fd
, int af
, int ifi
);
350 int socket_set_option(int fd
, int af
, int opt_ipv4
, int opt_ipv6
, int val
);
351 static inline int socket_set_recverr(int fd
, int af
, bool b
) {
352 return socket_set_option(fd
, af
, IP_RECVERR
, IPV6_RECVERR
, b
);
354 static inline int socket_set_recvttl(int fd
, int af
, bool b
) {
355 return socket_set_option(fd
, af
, IP_RECVTTL
, IPV6_RECVHOPLIMIT
, b
);
357 static inline int socket_set_ttl(int fd
, int af
, int ttl
) {
358 return socket_set_option(fd
, af
, IP_TTL
, IPV6_UNICAST_HOPS
, ttl
);
360 static inline int socket_set_freebind(int fd
, int af
, bool b
) {
361 return socket_set_option(fd
, af
, IP_FREEBIND
, IPV6_FREEBIND
, b
);
363 static inline int socket_set_transparent(int fd
, int af
, bool b
) {
364 return socket_set_option(fd
, af
, IP_TRANSPARENT
, IPV6_TRANSPARENT
, b
);
366 static inline int socket_set_recvfragsize(int fd
, int af
, bool b
) {
367 return socket_set_option(fd
, af
, IP_RECVFRAGSIZE
, IPV6_RECVFRAGSIZE
, b
);
370 int socket_get_mtu(int fd
, int af
, size_t *ret
);
372 /* an initializer for struct ucred that initialized all fields to the invalid value appropriate for each */
373 #define UCRED_INVALID { .pid = 0, .uid = UID_INVALID, .gid = GID_INVALID }
375 int connect_unix_path(int fd
, int dir_fd
, const char *path
);
377 static inline bool VSOCK_CID_IS_REGULAR(unsigned cid
) {
378 /* 0, 1, 2, UINT32_MAX are special, refuse those */
379 return cid
> 2 && cid
< UINT32_MAX
;
382 int vsock_parse_port(const char *s
, unsigned *ret
);
383 int vsock_parse_cid(const char *s
, unsigned *ret
);
385 /* Parses AF_UNIX and AF_VSOCK addresses. AF_INET[6] require some netlink calls, so it cannot be in
386 * src/basic/ and is done from 'socket_local_address from src/shared/. Return -EPROTO in case of
387 * protocol mismatch. */
388 int socket_address_parse_unix(SocketAddress
*ret_address
, const char *s
);
389 int socket_address_parse_vsock(SocketAddress
*ret_address
, const char *s
);
391 /* libc's SOMAXCONN is defined to 128 or 4096 (at least on glibc). But actually, the value can be much
392 * larger. In our codebase we want to set it to the max usually, since noawadays socket memory is properly
393 * tracked by memcg, and hence we don't need to enforce extra limits here. Moreover, the kernel caps it to
394 * /proc/sys/net/core/somaxconn anyway, thus by setting this to unbounded we just make that sysctl file
396 #define SOMAXCONN_DELUXE INT_MAX
398 int vsock_get_local_cid(unsigned *ret
);