]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
resolved: instead of closing DNS UDP transaction fds right-away, add them to a socket...
authorLennart Poettering <lennart@poettering.net>
Fri, 6 Nov 2020 12:32:53 +0000 (13:32 +0100)
committerLennart Poettering <lennart@poettering.net>
Mon, 15 Feb 2021 22:14:32 +0000 (23:14 +0100)
The "socket graveyard" shall contain sockets we have sent a question out
of, but not received a reply. If we'd close thus sockets immediately
when we are not interested anymore, we'd trigger ICMP port unreachable
messages once we after all *do* get a reply. Let's avoid that, by
leaving the fds open for a bit longer, until a timeout is reached or a
reply datagram received.

Fixes: #17421
src/resolve/meson.build
src/resolve/resolved-dns-transaction.c
src/resolve/resolved-manager.c
src/resolve/resolved-manager.h
src/resolve/resolved-socket-graveyard.c [new file with mode: 0644]
src/resolve/resolved-socket-graveyard.h [new file with mode: 0644]

index f34e7ac68006ce7ed710d2bb4c65ca9fe681fb7c..0392ad98742df9ad233e2389adf051c5ecca5a4e 100644 (file)
@@ -64,6 +64,8 @@ systemd_resolved_sources = files('''
         resolved-mdns.h
         resolved-resolv-conf.c
         resolved-resolv-conf.h
+        resolved-socket-graveyard.c
+        resolved-socket-graveyard.h
         resolved-varlink.c
         resolved-varlink.h
         resolved.c
index bd73aa5451e899b0200308ce5e892cce1b5a9da8..bce1918e99950b8eb92980e8040f0c2adb4287e1 100644 (file)
@@ -46,7 +46,14 @@ static void dns_transaction_flush_dnssec_transactions(DnsTransaction *t) {
         }
 }
 
-static void dns_transaction_close_connection(DnsTransaction *t) {
+static void dns_transaction_close_connection(
+                DnsTransaction *t,
+                bool use_graveyard) { /* Set use_graveyard = false when you know the connection is already
+                                       * dead, for example because you got a connection error back from the
+                                       * kernel. In that case there's no point in keeping the fd around,
+                                       * hence don't. */
+        int r;
+
         assert(t);
 
         if (t->stream) {
@@ -60,6 +67,20 @@ static void dns_transaction_close_connection(DnsTransaction *t) {
         }
 
         t->dns_udp_event_source = sd_event_source_unref(t->dns_udp_event_source);
+
+        /* If we have an UDP socket where we sent a packet, but never received one, then add it to the socket
+         * graveyard, instead of closing it right away. That way it will stick around for a moment longer,
+         * and the reply we might still get from the server will be eaten up instead of resulting in an ICMP
+         * port unreachable error message. */
+
+        if (use_graveyard && t->dns_udp_fd >= 0 && t->sent && !t->received) {
+                r = manager_add_socket_to_graveyard(t->scope->manager, t->dns_udp_fd);
+                if (r < 0)
+                        log_debug_errno(r, "Failed to add UDP socket to graveyard, closing immediately: %m");
+                else
+                        TAKE_FD(t->dns_udp_fd);
+        }
+
         t->dns_udp_fd = safe_close(t->dns_udp_fd);
 }
 
@@ -79,7 +100,7 @@ DnsTransaction* dns_transaction_free(DnsTransaction *t) {
 
         log_debug("Freeing transaction %" PRIu16 ".", t->id);
 
-        dns_transaction_close_connection(t);
+        dns_transaction_close_connection(t, true);
         dns_transaction_stop_timeout(t);
 
         dns_packet_unref(t->sent);
@@ -403,7 +424,7 @@ void dns_transaction_complete(DnsTransaction *t, DnsTransactionState state) {
 
         t->state = state;
 
-        dns_transaction_close_connection(t);
+        dns_transaction_close_connection(t, true);
         dns_transaction_stop_timeout(t);
 
         /* Notify all queries that are interested, but make sure the
@@ -523,7 +544,7 @@ static int dns_transaction_maybe_restart(DnsTransaction *t) {
 static void on_transaction_stream_error(DnsTransaction *t, int error) {
         assert(t);
 
-        dns_transaction_close_connection(t);
+        dns_transaction_close_connection(t, true);
 
         if (ERRNO_IS_DISCONNECT(error)) {
                 if (t->scope->protocol == DNS_PROTOCOL_LLMNR) {
@@ -544,7 +565,7 @@ static int dns_transaction_on_stream_packet(DnsTransaction *t, DnsPacket *p) {
         assert(t);
         assert(p);
 
-        dns_transaction_close_connection(t);
+        dns_transaction_close_connection(t, true);
 
         if (dns_packet_validate_reply(p) <= 0) {
                 log_debug("Invalid TCP reply packet.");
@@ -631,7 +652,7 @@ static int dns_transaction_emit_tcp(DnsTransaction *t) {
         assert(t);
         assert(t->sent);
 
-        dns_transaction_close_connection(t);
+        dns_transaction_close_connection(t, true);
 
         switch (t->scope->protocol) {
 
@@ -731,7 +752,7 @@ static int dns_transaction_emit_tcp(DnsTransaction *t) {
 
         r = dns_stream_write_packet(t->stream, t->sent);
         if (r < 0) {
-                dns_transaction_close_connection(t);
+                dns_transaction_close_connection(t, /* use_graveyard= */ false);
                 return r;
         }
 
@@ -1235,7 +1256,7 @@ void dns_transaction_process_reply(DnsTransaction *t, DnsPacket *p) {
         if (r > 0) {
                 /* There are DNSSEC transactions pending now. Update the state accordingly. */
                 t->state = DNS_TRANSACTION_VALIDATING;
-                dns_transaction_close_connection(t);
+                dns_transaction_close_connection(t, true);
                 dns_transaction_stop_timeout(t);
                 return;
         }
@@ -1319,7 +1340,10 @@ static int dns_transaction_emit_udp(DnsTransaction *t) {
                 if (r > 0 || t->dns_udp_fd < 0) { /* Server changed, or no connection yet. */
                         int fd;
 
-                        dns_transaction_close_connection(t);
+                        dns_transaction_close_connection(t, true);
+
+                        /* Before we allocate a new UDP socket, let's process the graveyard a bit to free some fds */
+                        manager_socket_graveyard_process(t->scope->manager);
 
                         fd = dns_scope_socket_udp(t->scope, t->server);
                         if (fd < 0)
@@ -1341,7 +1365,7 @@ static int dns_transaction_emit_udp(DnsTransaction *t) {
                                 return r;
                 }
         } else
-                dns_transaction_close_connection(t);
+                dns_transaction_close_connection(t, true);
 
         r = dns_scope_emit_udp(t->scope, t->dns_udp_fd, t->sent);
         if (r < 0)
index a0997700540d0be550b1303fe23c67307e218137..a2a2cf495472c525830593b4efe6e1e2d5f46d12 100644 (file)
@@ -767,6 +767,8 @@ Manager *manager_free(Manager *m) {
         manager_dns_stub_stop(m);
         manager_varlink_done(m);
 
+        manager_socket_graveyard_clear(m);
+
         ordered_set_free(m->dns_extra_stub_listeners);
 
         bus_verify_polkit_async_registry_free(m->polkit_registry);
index 4dc55e6e41c4b18c93a8b19732a4441b6497c5f8..90f55862301ee92c19434195edace99596b73f2f 100644 (file)
@@ -21,6 +21,7 @@ typedef struct Manager Manager;
 #include "resolved-dns-stub.h"
 #include "resolved-dns-trust-anchor.h"
 #include "resolved-link.h"
+#include "resolved-socket-graveyard.h"
 
 #define MANAGER_SEARCH_DOMAINS_MAX 256
 #define MANAGER_DNS_SERVERS_MAX 256
@@ -144,6 +145,10 @@ struct Manager {
         VarlinkServer *varlink_server;
 
         sd_event_source *clock_change_event_source;
+
+        LIST_HEAD(SocketGraveyard, socket_graveyard);
+        SocketGraveyard *socket_graveyard_oldest;
+        size_t n_socket_graveyard;
 };
 
 /* Manager */
diff --git a/src/resolve/resolved-socket-graveyard.c b/src/resolve/resolved-socket-graveyard.c
new file mode 100644 (file)
index 0000000..067cb66
--- /dev/null
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "resolved-socket-graveyard.h"
+
+#define SOCKET_GRAVEYARD_USEC (5 * USEC_PER_SEC)
+#define SOCKET_GRAVEYARD_MAX 100
+
+/* This implements a socket "graveyard" for UDP sockets. If a socket fd is added to the graveyard it is kept
+ * open for a couple of more seconds, expecting one reply. Once the reply is received the fd is closed
+ * immediately, or if none is received it is closed after the timeout. Why all this? So that if we contact a
+ * DNS server, and it doesn't reply instantly, and we lose interest in the response and thus close the fd, we
+ * don't end up sending back an ICMP error once the server responds but we aren't listening anymore. (See
+ * https://github.com/systemd/systemd/issues/17421 for further information.)
+ *
+ * Note that we don't allocate any timer event source to clear up the graveyard once the socket's timeout is
+ * reached. Instead we operate lazily: we close old entries when adding a new fd to the graveyard, or
+ * whenever any code runs manager_socket_graveyard_process() — which the DNS transaction code does right
+ * before allocating a new UDP socket. */
+
+static SocketGraveyard* socket_graveyard_free(SocketGraveyard *g) {
+        if (!g)
+                return NULL;
+
+        if (g->manager) {
+                assert(g->manager->n_socket_graveyard > 0);
+                g->manager->n_socket_graveyard--;
+
+                if (g->manager->socket_graveyard_oldest == g)
+                        g->manager->socket_graveyard_oldest = g->graveyard_prev;
+
+                LIST_REMOVE(graveyard, g->manager->socket_graveyard, g);
+
+                assert((g->manager->n_socket_graveyard > 0) == !!g->manager->socket_graveyard);
+                assert((g->manager->n_socket_graveyard > 0) == !!g->manager->socket_graveyard_oldest);
+        }
+
+        if (g->io_event_source) {
+                log_debug("Closing graveyard socket fd %i", sd_event_source_get_io_fd(g->io_event_source));
+                sd_event_source_unref(g->io_event_source);
+        }
+
+        return mfree(g);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(SocketGraveyard*, socket_graveyard_free);
+
+void manager_socket_graveyard_process(Manager *m) {
+        usec_t n = USEC_INFINITY;
+
+        assert(m);
+
+        while (m->socket_graveyard_oldest) {
+                SocketGraveyard *g = m->socket_graveyard_oldest;
+
+                if (n == USEC_INFINITY)
+                        assert_se(sd_event_now(m->event, clock_boottime_or_monotonic(), &n) >= 0);
+
+                if (g->deadline > n)
+                        break;
+
+                socket_graveyard_free(g);
+        }
+}
+
+void manager_socket_graveyard_clear(Manager *m) {
+        assert(m);
+
+        while (m->socket_graveyard)
+                socket_graveyard_free(m->socket_graveyard);
+}
+
+static int on_io_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+        SocketGraveyard *g = userdata;
+
+        assert(g);
+
+        /* An IO event happened on the graveyard fd. We don't actually care which event that is, and we don't
+         * read any incoming packet off the socket. We just close the fd, that's enough to not trigger the
+         * ICMP unreachable port event */
+
+        socket_graveyard_free(g);
+        return 0;
+}
+
+static void manager_socket_graveyard_make_room(Manager *m) {
+        assert(m);
+
+        while (m->n_socket_graveyard >= SOCKET_GRAVEYARD_MAX)
+                socket_graveyard_free(m->socket_graveyard_oldest);
+}
+
+int manager_add_socket_to_graveyard(Manager *m, int fd) {
+        _cleanup_(socket_graveyard_freep) SocketGraveyard *g = NULL;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+
+        manager_socket_graveyard_process(m);
+        manager_socket_graveyard_make_room(m);
+
+        g = new(SocketGraveyard, 1);
+        if (!g)
+                return log_oom();
+
+        *g = (SocketGraveyard) {
+                .manager = m,
+        };
+
+        LIST_PREPEND(graveyard, m->socket_graveyard, g);
+        if (!m->socket_graveyard_oldest)
+                m->socket_graveyard_oldest = g;
+
+        m->n_socket_graveyard++;
+
+        assert_se(sd_event_now(m->event, clock_boottime_or_monotonic(), &g->deadline) >= 0);
+        g->deadline += SOCKET_GRAVEYARD_USEC;
+
+        r = sd_event_add_io(m->event, &g->io_event_source, fd, EPOLLIN, on_io_event, g);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create graveyard IO source: %m");
+
+        r = sd_event_source_set_io_fd_own(g->io_event_source, true);
+        if (r < 0)
+                return log_error_errno(r, "Failed to enable graveyard IO source fd ownership: %m");
+
+        (void) sd_event_source_set_description(g->io_event_source, "graveyard");
+
+        log_debug("Added socket %i to graveyard", fd);
+
+        TAKE_PTR(g);
+        return 0;
+}
diff --git a/src/resolve/resolved-socket-graveyard.h b/src/resolve/resolved-socket-graveyard.h
new file mode 100644 (file)
index 0000000..9b13bb0
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct SocketGraveyard SocketGraveyard;
+
+#include "resolved-manager.h"
+
+struct SocketGraveyard {
+        Manager *manager;
+        usec_t deadline;
+        sd_event_source *io_event_source;
+        LIST_FIELDS(SocketGraveyard, graveyard);
+};
+
+void manager_socket_graveyard_process(Manager *m);
+void manager_socket_graveyard_clear(Manager *m);
+
+int manager_add_socket_to_graveyard(Manager *m, int fd);