]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/resolve/resolved-dns-stream.c
io-util.h: move iovec stuff from macro.h to io-util.h
[thirdparty/systemd.git] / src / resolve / resolved-dns-stream.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2014 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <netinet/tcp.h>
23
24 #include "fd-util.h"
25 #include "io-util.h"
26 #include "missing.h"
27 #include "resolved-dns-stream.h"
28
29 #define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC)
30 #define DNS_STREAMS_MAX 128
31
32 static void dns_stream_stop(DnsStream *s) {
33 assert(s);
34
35 s->io_event_source = sd_event_source_unref(s->io_event_source);
36 s->timeout_event_source = sd_event_source_unref(s->timeout_event_source);
37 s->fd = safe_close(s->fd);
38 }
39
40 static int dns_stream_update_io(DnsStream *s) {
41 int f = 0;
42
43 assert(s);
44
45 if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size)
46 f |= EPOLLOUT;
47 if (!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size)
48 f |= EPOLLIN;
49
50 return sd_event_source_set_io_events(s->io_event_source, f);
51 }
52
53 static int dns_stream_complete(DnsStream *s, int error) {
54 assert(s);
55
56 dns_stream_stop(s);
57
58 if (s->complete)
59 s->complete(s, error);
60 else
61 dns_stream_free(s);
62
63 return 0;
64 }
65
66 static int dns_stream_identify(DnsStream *s) {
67 union {
68 struct cmsghdr header; /* For alignment */
69 uint8_t buffer[CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo))
70 + EXTRA_CMSG_SPACE /* kernel appears to require extra space */];
71 } control;
72 struct msghdr mh = {};
73 struct cmsghdr *cmsg;
74 socklen_t sl;
75 int r;
76
77 assert(s);
78
79 if (s->identified)
80 return 0;
81
82 /* Query the local side */
83 s->local_salen = sizeof(s->local);
84 r = getsockname(s->fd, &s->local.sa, &s->local_salen);
85 if (r < 0)
86 return -errno;
87 if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0)
88 s->ifindex = s->local.in6.sin6_scope_id;
89
90 /* Query the remote side */
91 s->peer_salen = sizeof(s->peer);
92 r = getpeername(s->fd, &s->peer.sa, &s->peer_salen);
93 if (r < 0)
94 return -errno;
95 if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0)
96 s->ifindex = s->peer.in6.sin6_scope_id;
97
98 /* Check consistency */
99 assert(s->peer.sa.sa_family == s->local.sa.sa_family);
100 assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6));
101
102 /* Query connection meta information */
103 sl = sizeof(control);
104 if (s->peer.sa.sa_family == AF_INET) {
105 r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl);
106 if (r < 0)
107 return -errno;
108 } else if (s->peer.sa.sa_family == AF_INET6) {
109
110 r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl);
111 if (r < 0)
112 return -errno;
113 } else
114 return -EAFNOSUPPORT;
115
116 mh.msg_control = &control;
117 mh.msg_controllen = sl;
118
119 CMSG_FOREACH(cmsg, &mh) {
120
121 if (cmsg->cmsg_level == IPPROTO_IPV6) {
122 assert(s->peer.sa.sa_family == AF_INET6);
123
124 switch (cmsg->cmsg_type) {
125
126 case IPV6_PKTINFO: {
127 struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg);
128
129 if (s->ifindex <= 0)
130 s->ifindex = i->ipi6_ifindex;
131 break;
132 }
133
134 case IPV6_HOPLIMIT:
135 s->ttl = *(int *) CMSG_DATA(cmsg);
136 break;
137 }
138
139 } else if (cmsg->cmsg_level == IPPROTO_IP) {
140 assert(s->peer.sa.sa_family == AF_INET);
141
142 switch (cmsg->cmsg_type) {
143
144 case IP_PKTINFO: {
145 struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg);
146
147 if (s->ifindex <= 0)
148 s->ifindex = i->ipi_ifindex;
149 break;
150 }
151
152 case IP_TTL:
153 s->ttl = *(int *) CMSG_DATA(cmsg);
154 break;
155 }
156 }
157 }
158
159 /* The Linux kernel sets the interface index to the loopback
160 * device if the connection came from the local host since it
161 * avoids the routing table in such a case. Let's unset the
162 * interface index in such a case. */
163 if (s->ifindex == LOOPBACK_IFINDEX)
164 s->ifindex = 0;
165
166 /* If we don't know the interface index still, we look for the
167 * first local interface with a matching address. Yuck! */
168 if (s->ifindex <= 0)
169 s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr);
170
171 if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) {
172 uint32_t ifindex = htobe32(s->ifindex);
173
174 /* Make sure all packets for this connection are sent on the same interface */
175 if (s->local.sa.sa_family == AF_INET) {
176 r = setsockopt(s->fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex));
177 if (r < 0)
178 log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF: %m");
179 } else if (s->local.sa.sa_family == AF_INET6) {
180 r = setsockopt(s->fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex, sizeof(ifindex));
181 if (r < 0)
182 log_debug_errno(errno, "Failed to invoke IPV6_UNICAST_IF: %m");
183 }
184 }
185
186 s->identified = true;
187
188 return 0;
189 }
190
191 static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) {
192 DnsStream *s = userdata;
193
194 assert(s);
195
196 return dns_stream_complete(s, ETIMEDOUT);
197 }
198
199 static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
200 DnsStream *s = userdata;
201 int r;
202
203 assert(s);
204
205 r = dns_stream_identify(s);
206 if (r < 0)
207 return dns_stream_complete(s, -r);
208
209 if ((revents & EPOLLOUT) &&
210 s->write_packet &&
211 s->n_written < sizeof(s->write_size) + s->write_packet->size) {
212
213 struct iovec iov[2];
214 ssize_t ss;
215
216 iov[0].iov_base = &s->write_size;
217 iov[0].iov_len = sizeof(s->write_size);
218 iov[1].iov_base = DNS_PACKET_DATA(s->write_packet);
219 iov[1].iov_len = s->write_packet->size;
220
221 IOVEC_INCREMENT(iov, 2, s->n_written);
222
223 ss = writev(fd, iov, 2);
224 if (ss < 0) {
225 if (errno != EINTR && errno != EAGAIN)
226 return dns_stream_complete(s, errno);
227 } else
228 s->n_written += ss;
229
230 /* Are we done? If so, disable the event source for EPOLLOUT */
231 if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) {
232 r = dns_stream_update_io(s);
233 if (r < 0)
234 return dns_stream_complete(s, -r);
235 }
236 }
237
238 if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) &&
239 (!s->read_packet ||
240 s->n_read < sizeof(s->read_size) + s->read_packet->size)) {
241
242 if (s->n_read < sizeof(s->read_size)) {
243 ssize_t ss;
244
245 ss = read(fd, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read);
246 if (ss < 0) {
247 if (errno != EINTR && errno != EAGAIN)
248 return dns_stream_complete(s, errno);
249 } else if (ss == 0)
250 return dns_stream_complete(s, ECONNRESET);
251 else
252 s->n_read += ss;
253 }
254
255 if (s->n_read >= sizeof(s->read_size)) {
256
257 if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE)
258 return dns_stream_complete(s, EBADMSG);
259
260 if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) {
261 ssize_t ss;
262
263 if (!s->read_packet) {
264 r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size));
265 if (r < 0)
266 return dns_stream_complete(s, -r);
267
268 s->read_packet->size = be16toh(s->read_size);
269 s->read_packet->ipproto = IPPROTO_TCP;
270 s->read_packet->family = s->peer.sa.sa_family;
271 s->read_packet->ttl = s->ttl;
272 s->read_packet->ifindex = s->ifindex;
273
274 if (s->read_packet->family == AF_INET) {
275 s->read_packet->sender.in = s->peer.in.sin_addr;
276 s->read_packet->sender_port = be16toh(s->peer.in.sin_port);
277 s->read_packet->destination.in = s->local.in.sin_addr;
278 s->read_packet->destination_port = be16toh(s->local.in.sin_port);
279 } else {
280 assert(s->read_packet->family == AF_INET6);
281 s->read_packet->sender.in6 = s->peer.in6.sin6_addr;
282 s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port);
283 s->read_packet->destination.in6 = s->local.in6.sin6_addr;
284 s->read_packet->destination_port = be16toh(s->local.in6.sin6_port);
285
286 if (s->read_packet->ifindex == 0)
287 s->read_packet->ifindex = s->peer.in6.sin6_scope_id;
288 if (s->read_packet->ifindex == 0)
289 s->read_packet->ifindex = s->local.in6.sin6_scope_id;
290 }
291 }
292
293 ss = read(fd,
294 (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size),
295 sizeof(s->read_size) + be16toh(s->read_size) - s->n_read);
296 if (ss < 0) {
297 if (errno != EINTR && errno != EAGAIN)
298 return dns_stream_complete(s, errno);
299 } else if (ss == 0)
300 return dns_stream_complete(s, ECONNRESET);
301 else
302 s->n_read += ss;
303 }
304
305 /* Are we done? If so, disable the event source for EPOLLIN */
306 if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) {
307 r = dns_stream_update_io(s);
308 if (r < 0)
309 return dns_stream_complete(s, -r);
310
311 /* If there's a packet handler
312 * installed, call that. Note that
313 * this is optional... */
314 if (s->on_packet)
315 return s->on_packet(s);
316 }
317 }
318 }
319
320 if ((s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) &&
321 (s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size))
322 return dns_stream_complete(s, 0);
323
324 return 0;
325 }
326
327 DnsStream *dns_stream_free(DnsStream *s) {
328 if (!s)
329 return NULL;
330
331 dns_stream_stop(s);
332
333 if (s->manager) {
334 LIST_REMOVE(streams, s->manager->dns_streams, s);
335 s->manager->n_dns_streams--;
336 }
337
338 dns_packet_unref(s->write_packet);
339 dns_packet_unref(s->read_packet);
340
341 free(s);
342
343 return 0;
344 }
345
346 DEFINE_TRIVIAL_CLEANUP_FUNC(DnsStream*, dns_stream_free);
347
348 int dns_stream_new(Manager *m, DnsStream **ret, DnsProtocol protocol, int fd) {
349 static const int one = 1;
350 _cleanup_(dns_stream_freep) DnsStream *s = NULL;
351 int r;
352
353 assert(m);
354 assert(fd >= 0);
355
356 if (m->n_dns_streams > DNS_STREAMS_MAX)
357 return -EBUSY;
358
359 s = new0(DnsStream, 1);
360 if (!s)
361 return -ENOMEM;
362
363 s->fd = -1;
364 s->protocol = protocol;
365
366 r = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one));
367 if (r < 0)
368 return -errno;
369
370 r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s);
371 if (r < 0)
372 return r;
373
374 r = sd_event_add_time(
375 m->event,
376 &s->timeout_event_source,
377 clock_boottime_or_monotonic(),
378 now(clock_boottime_or_monotonic()) + DNS_STREAM_TIMEOUT_USEC, 0,
379 on_stream_timeout, s);
380 if (r < 0)
381 return r;
382
383 LIST_PREPEND(streams, m->dns_streams, s);
384 s->manager = m;
385 s->fd = fd;
386 m->n_dns_streams++;
387
388 *ret = s;
389 s = NULL;
390
391 return 0;
392 }
393
394 int dns_stream_write_packet(DnsStream *s, DnsPacket *p) {
395 assert(s);
396
397 if (s->write_packet)
398 return -EBUSY;
399
400 s->write_packet = dns_packet_ref(p);
401 s->write_size = htobe16(p->size);
402 s->n_written = 0;
403
404 return dns_stream_update_io(s);
405 }