1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
4 #include <linux/if_ether.h>
5 #include <linux/if_infiniband.h>
6 #include <linux/if_packet.h>
7 #include <linux/netlink.h>
8 #include <linux/vm_sockets.h>
9 #include <netinet/in.h>
10 #include <sys/socket.h>
14 #include "memory-util.h"
15 #include "missing_network.h"
17 union sockaddr_union
{
18 /* The minimal, abstract version */
21 /* The libc provided version that allocates "enough room" for every protocol */
22 struct sockaddr_storage storage
;
24 /* Protocol-specific implementations */
25 struct sockaddr_in in
;
26 struct sockaddr_in6 in6
;
27 struct sockaddr_un un
;
28 struct sockaddr_nl nl
;
29 struct sockaddr_ll ll
;
30 struct sockaddr_vm vm
;
32 /* Ensure there is enough space to store Infiniband addresses */
33 uint8_t ll_buffer
[offsetof(struct sockaddr_ll
, sll_addr
) + CONST_MAX(ETH_ALEN
, INFINIBAND_ALEN
)];
35 /* Ensure there is enough space after the AF_UNIX sun_path for one more NUL byte, just to be sure that the path
36 * component is always followed by at least one NUL byte. */
37 uint8_t un_buffer
[sizeof(struct sockaddr_un
) + 1];
40 #define SUN_PATH_LEN (sizeof(((struct sockaddr_un){}).sun_path))
42 typedef struct SocketAddress
{
43 union sockaddr_union sockaddr
;
45 /* We store the size here explicitly due to the weird
46 * sockaddr_un semantics for abstract sockets */
49 /* Socket type, i.e. SOCK_STREAM, SOCK_DGRAM, ... */
52 /* Socket protocol, IPPROTO_xxx, usually 0, except for netlink */
56 typedef enum SocketAddressBindIPv6Only
{
57 SOCKET_ADDRESS_DEFAULT
,
59 SOCKET_ADDRESS_IPV6_ONLY
,
60 _SOCKET_ADDRESS_BIND_IPV6_ONLY_MAX
,
61 _SOCKET_ADDRESS_BIND_IPV6_ONLY_INVALID
= -EINVAL
,
62 } SocketAddressBindIPv6Only
;
64 #define socket_address_family(a) ((a)->sockaddr.sa.sa_family)
66 const char* socket_address_type_to_string(int t
) _const_
;
67 int socket_address_type_from_string(const char *s
) _pure_
;
69 int sockaddr_un_unlink(const struct sockaddr_un
*sa
);
71 static inline int socket_address_unlink(const SocketAddress
*a
) {
72 return socket_address_family(a
) == AF_UNIX
? sockaddr_un_unlink(&a
->sockaddr
.un
) : 0;
75 bool socket_address_can_accept(const SocketAddress
*a
) _pure_
;
77 int socket_address_listen(
78 const SocketAddress
*a
,
81 SocketAddressBindIPv6Only only
,
82 const char *bind_to_device
,
86 mode_t directory_mode
,
90 int socket_address_verify(const SocketAddress
*a
, bool strict
) _pure_
;
91 int socket_address_print(const SocketAddress
*a
, char **p
);
92 bool socket_address_matches_fd(const SocketAddress
*a
, int fd
);
94 bool socket_address_equal(const SocketAddress
*a
, const SocketAddress
*b
) _pure_
;
96 const char* socket_address_get_path(const SocketAddress
*a
);
98 bool socket_ipv6_is_supported(void);
99 bool socket_ipv6_is_enabled(void);
101 int sockaddr_port(const struct sockaddr
*_sa
, unsigned *port
);
102 const union in_addr_union
*sockaddr_in_addr(const struct sockaddr
*sa
);
103 int sockaddr_set_in_addr(union sockaddr_union
*u
, int family
, const union in_addr_union
*a
, uint16_t port
);
105 int sockaddr_pretty(const struct sockaddr
*_sa
, socklen_t salen
, bool translate_ipv6
, bool include_port
, char **ret
);
106 int getpeername_pretty(int fd
, bool include_port
, char **ret
);
107 int getsockname_pretty(int fd
, char **ret
);
109 int socknameinfo_pretty(const struct sockaddr
*sa
, socklen_t salen
, char **_ret
);
111 const char* socket_address_bind_ipv6_only_to_string(SocketAddressBindIPv6Only b
) _const_
;
112 SocketAddressBindIPv6Only
socket_address_bind_ipv6_only_from_string(const char *s
) _pure_
;
113 SocketAddressBindIPv6Only
socket_address_bind_ipv6_only_or_bool_from_string(const char *s
);
115 int netlink_family_to_string_alloc(int b
, char **s
);
116 int netlink_family_from_string(const char *s
) _pure_
;
118 bool sockaddr_equal(const union sockaddr_union
*a
, const union sockaddr_union
*b
);
120 int fd_set_sndbuf(int fd
, size_t n
, bool increase
);
121 static inline int fd_inc_sndbuf(int fd
, size_t n
) {
122 return fd_set_sndbuf(fd
, n
, true);
124 int fd_set_rcvbuf(int fd
, size_t n
, bool increase
);
125 static inline int fd_increase_rxbuf(int fd
, size_t n
) {
126 return fd_set_rcvbuf(fd
, n
, true);
129 int ip_tos_to_string_alloc(int i
, char **s
);
130 int ip_tos_from_string(const char *s
);
133 IFNAME_VALID_ALTERNATIVE
= 1 << 0, /* Allow "altnames" too */
134 IFNAME_VALID_NUMERIC
= 1 << 1, /* Allow decimal formatted ifindexes too */
135 IFNAME_VALID_SPECIAL
= 1 << 2, /* Allow the special names "all" and "default" */
136 _IFNAME_VALID_ALL
= IFNAME_VALID_ALTERNATIVE
| IFNAME_VALID_NUMERIC
| IFNAME_VALID_SPECIAL
,
138 bool ifname_valid_char(char a
) _const_
;
139 bool ifname_valid_full(const char *p
, IfnameValidFlags flags
) _pure_
;
140 static inline bool ifname_valid(const char *p
) {
141 return ifname_valid_full(p
, 0);
143 bool address_label_valid(const char *p
) _pure_
;
145 int getpeercred(int fd
, struct ucred
*ucred
);
146 int getpeersec(int fd
, char **ret
);
147 int getpeergroups(int fd
, gid_t
**ret
);
148 int getpeerpidfd(int fd
);
149 int getpeerpidref(int fd
, PidRef
*ret
);
151 ssize_t
send_many_fds_iov_sa(
153 int *fds_array
, size_t n_fds_array
,
154 const struct iovec
*iov
, size_t iovlen
,
155 const struct sockaddr
*sa
, socklen_t len
,
157 static inline ssize_t
send_many_fds_iov(
159 int *fds_array
, size_t n_fds_array
,
160 const struct iovec
*iov
, size_t iovlen
,
163 return send_many_fds_iov_sa(transport_fd
, fds_array
, n_fds_array
, iov
, iovlen
, NULL
, 0, flags
);
165 static inline int send_many_fds(
171 return send_many_fds_iov_sa(transport_fd
, fds_array
, n_fds_array
, NULL
, 0, NULL
, 0, flags
);
173 ssize_t
send_one_fd_iov_sa(
176 const struct iovec
*iov
, size_t iovlen
,
177 const struct sockaddr
*sa
, socklen_t len
,
179 int send_one_fd_sa(int transport_fd
,
181 const struct sockaddr
*sa
, socklen_t len
,
183 #define send_one_fd_iov(transport_fd, fd, iov, iovlen, flags) send_one_fd_iov_sa(transport_fd, fd, iov, iovlen, NULL, 0, flags)
184 #define send_one_fd(transport_fd, fd, flags) send_one_fd_iov_sa(transport_fd, fd, NULL, 0, NULL, 0, flags)
185 ssize_t
receive_one_fd_iov(int transport_fd
, struct iovec
*iov
, size_t iovlen
, int flags
, int *ret_fd
);
186 int receive_one_fd(int transport_fd
, int flags
);
187 ssize_t
receive_many_fds_iov(int transport_fd
, struct iovec
*iov
, size_t iovlen
, int **ret_fds_array
, size_t *ret_n_fds_array
, int flags
);
188 int receive_many_fds(int transport_fd
, int **ret_fds_array
, size_t *ret_n_fds_array
, int flags
);
190 ssize_t
next_datagram_size_fd(int fd
);
192 int flush_accept(int fd
);
193 ssize_t
flush_mqueue(int fd
);
195 #define CMSG_FOREACH(cmsg, mh) \
196 for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg)))
198 /* Returns the cmsghdr's data pointer, but safely cast to the specified type. Does two alignment checks: one
199 * at compile time, that the requested type has a smaller or same alignment as 'struct cmsghdr', and one
200 * during runtime, that the actual pointer matches the alignment too. This is supposed to catch cases such as
201 * 'struct timeval' is embedded into 'struct cmsghdr' on architectures where the alignment of the former is 8
202 * bytes (because of a 64-bit time_t), but of the latter is 4 bytes (because size_t is 32 bits), such as
204 #define CMSG_TYPED_DATA(cmsg, type) \
206 struct cmsghdr *_cmsg = (cmsg); \
207 assert_cc(alignof(type) <= alignof(struct cmsghdr)); \
208 _cmsg ? CAST_ALIGN_PTR(type, CMSG_DATA(_cmsg)) : (type*) NULL; \
211 struct cmsghdr
* cmsg_find(struct msghdr
*mh
, int level
, int type
, socklen_t length
);
212 void* cmsg_find_and_copy_data(struct msghdr
*mh
, int level
, int type
, void *buf
, size_t buf_len
);
214 /* Type-safe, dereferencing version of cmsg_find() */
215 #define CMSG_FIND_DATA(mh, level, type, ctype) \
216 CMSG_TYPED_DATA(cmsg_find(mh, level, type, CMSG_LEN(sizeof(ctype))), ctype)
218 /* Type-safe version of cmsg_find_and_copy_data() */
219 #define CMSG_FIND_AND_COPY_DATA(mh, level, type, ctype) \
220 (ctype*) cmsg_find_and_copy_data(mh, level, type, &(ctype){}, sizeof(ctype))
222 /* Resolves to a type that can carry cmsghdr structures. Make sure things are properly aligned, i.e. the type
223 * itself is placed properly in memory and the size is also aligned to what's appropriate for "cmsghdr"
225 #define CMSG_BUFFER_TYPE(size) \
227 struct cmsghdr cmsghdr; \
229 uint8_t align_check[(size) >= CMSG_SPACE(0) && \
230 (size) == CMSG_ALIGN(size) ? 1 : -1]; \
233 size_t sockaddr_ll_len(const struct sockaddr_ll
*sa
);
235 size_t sockaddr_un_len(const struct sockaddr_un
*sa
);
237 size_t sockaddr_len(const union sockaddr_union
*sa
);
239 int socket_ioctl_fd(void);
241 int sockaddr_un_set_path(struct sockaddr_un
*ret
, const char *path
);
243 static inline int setsockopt_int(int fd
, int level
, int optname
, int value
) {
244 if (setsockopt(fd
, level
, optname
, &value
, sizeof(value
)) < 0)
250 int getsockopt_int(int fd
, int level
, int optname
, int *ret
);
252 int socket_bind_to_ifname(int fd
, const char *ifname
);
253 int socket_bind_to_ifindex(int fd
, int ifindex
);
255 int socket_autobind(int fd
, char **ret_name
);
257 /* Define a 64-bit version of timeval/timespec in any case, even on 32-bit userspace. */
258 struct timeval_large
{
259 uint64_t tvl_sec
, tvl_usec
;
261 struct timespec_large
{
262 uint64_t tvl_sec
, tvl_nsec
;
265 /* glibc duplicates timespec/timeval on certain 32-bit arches, once in 32-bit and once in 64-bit.
266 * See __convert_scm_timestamps() in glibc source code. Hence, we need additional buffer space for them
267 * to prevent truncating control msg (recvmsg() MSG_CTRUNC). */
268 #define CMSG_SPACE_TIMEVAL \
269 ((sizeof(struct timeval) == sizeof(struct timeval_large)) ? \
270 CMSG_SPACE(sizeof(struct timeval)) : \
271 CMSG_SPACE(sizeof(struct timeval)) + \
272 CMSG_SPACE(sizeof(struct timeval_large)))
273 #define CMSG_SPACE_TIMESPEC \
274 ((sizeof(struct timespec) == sizeof(struct timespec_large)) ? \
275 CMSG_SPACE(sizeof(struct timespec)) : \
276 CMSG_SPACE(sizeof(struct timespec)) + \
277 CMSG_SPACE(sizeof(struct timespec_large)))
279 ssize_t
recvmsg_safe(int sockfd
, struct msghdr
*msg
, int flags
);
281 int socket_get_family(int fd
);
282 int socket_set_recvpktinfo(int fd
, int af
, bool b
);
283 int socket_set_unicast_if(int fd
, int af
, int ifi
);
285 int socket_set_option(int fd
, int af
, int opt_ipv4
, int opt_ipv6
, int val
);
286 static inline int socket_set_recverr(int fd
, int af
, bool b
) {
287 return socket_set_option(fd
, af
, IP_RECVERR
, IPV6_RECVERR
, b
);
289 static inline int socket_set_recvttl(int fd
, int af
, bool b
) {
290 return socket_set_option(fd
, af
, IP_RECVTTL
, IPV6_RECVHOPLIMIT
, b
);
292 static inline int socket_set_ttl(int fd
, int af
, int ttl
) {
293 return socket_set_option(fd
, af
, IP_TTL
, IPV6_UNICAST_HOPS
, ttl
);
295 static inline int socket_set_freebind(int fd
, int af
, bool b
) {
296 return socket_set_option(fd
, af
, IP_FREEBIND
, IPV6_FREEBIND
, b
);
298 static inline int socket_set_transparent(int fd
, int af
, bool b
) {
299 return socket_set_option(fd
, af
, IP_TRANSPARENT
, IPV6_TRANSPARENT
, b
);
301 static inline int socket_set_recvfragsize(int fd
, int af
, bool b
) {
302 return socket_set_option(fd
, af
, IP_RECVFRAGSIZE
, IPV6_RECVFRAGSIZE
, b
);
305 int socket_get_mtu(int fd
, int af
, size_t *ret
);
307 /* an initializer for struct ucred that initialized all fields to the invalid value appropriate for each */
308 #define UCRED_INVALID { .pid = 0, .uid = UID_INVALID, .gid = GID_INVALID }
310 int connect_unix_path(int fd
, int dir_fd
, const char *path
);
312 static inline bool VSOCK_CID_IS_REGULAR(unsigned cid
) {
313 /* 0, 1, 2, UINT32_MAX are special, refuse those */
314 return cid
> 2 && cid
< UINT32_MAX
;
317 int vsock_parse_port(const char *s
, unsigned *ret
);
318 int vsock_parse_cid(const char *s
, unsigned *ret
);
320 /* Parses AF_UNIX and AF_VSOCK addresses. AF_INET[6] require some netlink calls, so it cannot be in
321 * src/basic/ and is done from 'socket_local_address from src/shared/. Return -EPROTO in case of
322 * protocol mismatch. */
323 int socket_address_parse_unix(SocketAddress
*ret_address
, const char *s
);
324 int socket_address_parse_vsock(SocketAddress
*ret_address
, const char *s
);
325 int socket_address_equal_unix(const char *a
, const char *b
);
327 /* libc's SOMAXCONN is defined to 128 or 4096 (at least on glibc). But actually, the value can be much
328 * larger. In our codebase we want to set it to the max usually, since nowadays socket memory is properly
329 * tracked by memcg, and hence we don't need to enforce extra limits here. Moreover, the kernel caps it to
330 * /proc/sys/net/core/somaxconn anyway, thus by setting this to unbounded we just make that sysctl file
332 #define SOMAXCONN_DELUXE INT_MAX
334 int vsock_get_local_cid(unsigned *ret
);
336 int netlink_socket_get_multicast_groups(int fd
, size_t *ret_len
, uint32_t **ret_groups
);
338 int socket_get_cookie(int fd
, uint64_t *ret
);
340 void cmsg_close_all(struct msghdr
*mh
);