From: Remi Gacogne Date: Mon, 8 Jun 2020 14:28:42 +0000 (+0200) Subject: dnsdist: Use non-blocking pipes to pass DoH queries/responses around X-Git-Tag: dnsdist-1.5.0-rc3~8^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=729801bc626dec3088690a5d0de4b805aa4a099e;p=thirdparty%2Fpdns.git dnsdist: Use non-blocking pipes to pass DoH queries/responses around This commit makes the internal sockets non-blocking so we don't freeze if they ever fill up, and log errors/increment metrics instead. It also replaces the socket pairs by pipes, since the default buffer size for sockets seems to allow only ~278 pending queries which might be reached given how libh2o batches events. On Linux, a pipe gives us 8192 pending queries by default due to the lower overhead, and it can easily be incremented to 131072 pending queries by setting the pipe size to 1048576. This commits adds a new setting to do just that. --- diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc index bdde09e1f8..4077d9b75b 100644 --- a/pdns/dnsdist-lua.cc +++ b/pdns/dnsdist-lua.cc @@ -1936,6 +1936,10 @@ static void setupLuaConfig(bool client, bool configCheck) frontend->d_trustForwardedForHeader = boost::get((*vars)["trustForwardedForHeader"]); } + if (vars->count("internalPipeBufferSize")) { + frontend->d_internalPipeBufferSize = boost::get((*vars)["internalPipeBufferSize"]); + } + parseTLSConfig(frontend->d_tlsConfig, "addDOHLocal", vars); } g_dohlocals.push_back(frontend); diff --git a/pdns/dnsdist-web.cc b/pdns/dnsdist-web.cc index a49a9f9136..4766403924 100644 --- a/pdns/dnsdist-web.cc +++ b/pdns/dnsdist-web.cc @@ -88,6 +88,8 @@ const std::map MetricDefinitionStorage::metrics{ { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")}, { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") }, { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") }, + { "doh-query-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of DoH queries dropped because the internal pipe used to distribute queries was full") }, + { "doh-response-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of DoH responses dropped because the internal pipe used to distribute responses was full") }, { "udp-in-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp InErrors") }, { "udp-noport-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp NoPorts") }, { "udp-recvbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp RcvbufErrors") }, diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index 610387ca7a..edcefda370 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -638,7 +638,12 @@ try { #ifdef HAVE_DNS_OVER_HTTPS // DoH query du->response = std::string(response, responseLen); - if (send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { + ssize_t sent = write(du->rsock, &du, sizeof(du)); + if (sent != sizeof(du)) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + ++g_stats.dohResponsePipeFull; + } + /* at this point we have the only remaining pointer on this DOHUnit object since we did set ids->du to nullptr earlier, except if we got the response before the pointer could be diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index 949e5a08c7..1b532df341 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -263,6 +263,8 @@ struct DNSDistStats stat_t cacheMisses{0}; stat_t latency0_1{0}, latency1_10{0}, latency10_50{0}, latency50_100{0}, latency100_1000{0}, latencySlow{0}, latencySum{0}; stat_t securityStatus{0}; + stat_t dohQueryPipeFull{0}; + stat_t dohResponsePipeFull{0}; double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0}; typedef std::function statfunction_t; @@ -315,6 +317,8 @@ struct DNSDistStats {"dyn-blocked", &dynBlocked}, {"dyn-block-nmg-size", [](const std::string&) { return g_dynblockNMG.getLocal()->size(); }}, {"security-status", &securityStatus}, + {"doh-query-pipe-full", &dohQueryPipeFull}, + {"doh-response-pipe-full", &dohResponsePipeFull}, // Latency histogram {"latency-sum", &latencySum}, {"latency-count", getLatencyCount}, diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst index a5db904eca..9f6c8a73e9 100644 --- a/pdns/dnsdistdist/docs/reference/config.rst +++ b/pdns/dnsdistdist/docs/reference/config.rst @@ -113,7 +113,7 @@ Listen Sockets .. versionadded:: 1.4.0 .. versionchanged:: 1.5.0 - ``sendCacheControlHeaders``, ``sessionTimeout``, ``trustForwardedForHeader`` options added. + ``internalPipeBufferSize``, ``sendCacheControlHeaders``, ``sessionTimeout``, ``trustForwardedForHeader`` options added. ``url`` now defaults to ``/dns-query`` instead of ``/``. Added ``tcpListenQueueSize`` parameter. Listen on the specified address and TCP port for incoming DNS over HTTPS connections, presenting the specified X.509 certificate. @@ -150,6 +150,7 @@ Listen Sockets * ``sendCacheControlHeaders``: bool - Whether to parse the response to find the lowest TTL and set a HTTP Cache-Control header accordingly. Default is true. * ``trustForwardedForHeader``: bool - Whether to parse any existing X-Forwarded-For header in the HTTP query and use the right-most value as the client source address and port, for ACL checks, rules, logging and so on. Default is false. * ``tcpListenQueueSize=SOMAXCONN``: int - Set the size of the listen queue. Default is ``SOMAXCONN``. + * ``internalPipeBufferSize=0``: int - Set the size in bytes of the internal buffer of the pipes used internally to pass queries and responses between threads. Requires support for ``F_SETPIPE_SZ`` which is present in Linux since 2.6.35. The actual size might be rounded up to a multiple of a page size. 0 means that the OS default size is used. .. function:: addTLSLocal(address, certFile(s), keyFile(s) [, options]) diff --git a/pdns/dnsdistdist/doh.cc b/pdns/dnsdistdist/doh.cc index 16132d9874..14373eb3ac 100644 --- a/pdns/dnsdistdist/doh.cc +++ b/pdns/dnsdistdist/doh.cc @@ -43,11 +43,11 @@ using namespace std; dnsdist worker thread which we also launched. This dnsdist worker thread injects the query into the normal dnsdist flow - (as a datagram over a socketpair). The response also goes back over a - (different) socketpair, where we pick it up and deliver it back to h2o. + (over a pipe). The response also goes back over a (different) pipe, + where we pick it up and deliver it back to h2o. For coordination, we use the h2o socket multiplexer, which is sensitive to our - socketpair too. + pipe too. */ /* h2o notes. @@ -175,18 +175,36 @@ private: // through the bowels of h2o struct DOHServerConfig { - DOHServerConfig(uint32_t idleTimeout): accept_ctx(new DOHAcceptContext) + DOHServerConfig(uint32_t idleTimeout, uint32_t internalPipeBufferSize): accept_ctx(new DOHAcceptContext) { - if(socketpair(AF_LOCAL, SOCK_DGRAM, 0, dohquerypair) < 0) { - unixDie("Creating a socket pair for DNS over HTTPS"); + int fd[2]; + if (pipe(fd) < 0) { + unixDie("Creating a pipe for DNS over HTTPS"); } + dohquerypair[0] = fd[1]; + dohquerypair[1] = fd[0]; - if (socketpair(AF_LOCAL, SOCK_DGRAM, 0, dohresponsepair) < 0) { + if (pipe(fd) < 0) { close(dohquerypair[0]); close(dohquerypair[1]); - unixDie("Creating a socket pair for DNS over HTTPS"); + unixDie("Creating a pipe for DNS over HTTPS"); } + dohresponsepair[0] = fd[1]; + dohresponsepair[1] = fd[0]; + + setNonBlocking(dohquerypair[0]); + if (internalPipeBufferSize > 0) { + setPipeBufferSize(dohquerypair[0], internalPipeBufferSize); + } + + setNonBlocking(dohresponsepair[0]); + if (internalPipeBufferSize > 0) { + setPipeBufferSize(dohresponsepair[0], internalPipeBufferSize); + } + + setNonBlocking(dohresponsepair[1]); + h2o_config_init(&h2o_config); h2o_config.http2.idle_timeout = idleTimeout * 1000; } @@ -222,9 +240,16 @@ void handleDOHTimeout(DOHUnit* oldDU) /* increase the ref counter before sending the pointer */ oldDU->get(); - if (send(oldDU->rsock, &oldDU, sizeof(oldDU), 0) != sizeof(oldDU)) { + + ssize_t sent = write(oldDU->rsock, &oldDU, sizeof(oldDU)); + if (sent != sizeof(oldDU)) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + ++g_stats.dohResponsePipeFull; + } + oldDU->release(); } + oldDU->release(); oldDU = nullptr; } @@ -432,7 +457,13 @@ static int processDOHQuery(DOHUnit* du) } /* increase the ref counter before sending the pointer */ du->get(); - if (send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { + + ssize_t sent = write(du->rsock, &du, sizeof(du)); + if (sent != sizeof(du)) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + ++g_stats.dohResponsePipeFull; + } + du->release(); } return 0; @@ -639,7 +670,11 @@ static void doh_dispatch_query(DOHServerConfig* dsc, h2o_handler_t* self, h2o_re auto ptr = du.release(); *(ptr->self) = ptr; try { - if(send(dsc->dohquerypair[0], &ptr, sizeof(ptr), 0) != sizeof(ptr)) { + ssize_t sent = write(dsc->dohquerypair[0], &ptr, sizeof(ptr)); + if (sent != sizeof(ptr)) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + ++g_stats.dohQueryPipeFull; + } ptr->release(); ptr = nullptr; h2o_send_error_500(req, "Internal Server Error", "Internal Server Error", 0); @@ -717,7 +752,14 @@ try h2o_socket_t* sock = req->conn->callbacks->get_socket(req->conn); ComboAddress remote; ComboAddress local; - h2o_socket_getpeername(sock, reinterpret_cast(&remote)); + + if (h2o_socket_getpeername(sock, reinterpret_cast(&remote)) == 0) { + /* getpeername failed, likely because the connection has already been closed, + but anyway that means we can't get the remote address, which could allow an ACL bypass */ + h2o_send_error_500(req, getReasonFromStatusCode(500).c_str(), "Internal Server Error - Unable to get remote address", 0); + return 0; + } + h2o_socket_getsockname(sock, reinterpret_cast(&local)); DOHServerConfig* dsc = reinterpret_cast(req->conn->ctx->storage.entries[0].data); @@ -968,14 +1010,14 @@ void DOHUnit::setHTTPResponse(uint16_t statusCode, const std::string& body_, con /* query has been parsed by h2o, which called doh_handler() in the main DoH thread. In order not to blockfor long, doh_handler() called doh_dispatch_query() which allocated a DOHUnit object and passed it to us */ -static void dnsdistclient(int qsock, int rsock) +static void dnsdistclient(int qsock) { setThreadName("dnsdist/doh-cli"); for(;;) { try { DOHUnit* du = nullptr; - ssize_t got = recv(qsock, &du, sizeof(du), 0); + ssize_t got = read(qsock, &du, sizeof(du)); if (got < 0) { warnlog("Error receiving internal DoH query: %s", strerror(errno)); continue; @@ -988,7 +1030,7 @@ static void dnsdistclient(int qsock, int rsock) // so we can use UDP to talk to the backend. auto dh = const_cast(reinterpret_cast(du->query.c_str())); - if(!dh->arcount) { + if (!dh->arcount) { std::string res; generateOptRR(std::string(), res, 4096, 0, false); @@ -1001,12 +1043,18 @@ static void dnsdistclient(int qsock, int rsock) // we leave existing EDNS in place } - if(processDOHQuery(du) < 0) { + if (processDOHQuery(du) < 0) { du->status_code = 500; /* increase the ref count before sending the pointer */ du->get(); - if(send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { - du->release(); // XXX but now what - will h2o time this out for us? + ssize_t sent = write(du->rsock, &du, sizeof(du)); + if (sent != sizeof(du)) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + ++g_stats.dohResponsePipeFull; + } + + // XXX but now what - will h2o time this out for us? + du->release(); } } du->release(); @@ -1031,7 +1079,7 @@ static void on_dnsdist(h2o_socket_t *listener, const char *err) { DOHUnit *du = nullptr; DOHServerConfig* dsc = reinterpret_cast(listener->data); - ssize_t got = recv(dsc->dohresponsepair[1], &du, sizeof(du), 0); + ssize_t got = read(dsc->dohresponsepair[1], &du, sizeof(du)); if (got < 0) { warnlog("Error reading a DOH internal response: %s", strerror(errno)); @@ -1041,7 +1089,7 @@ static void on_dnsdist(h2o_socket_t *listener, const char *err) return; } - if(!du->req) { // it got killed in flight + if (!du->req) { // it got killed in flight // cout << "du "<<(void*)du<<" came back from dnsdist, but it was killed"<release(); return; @@ -1235,7 +1283,7 @@ void DOHFrontend::setup() { registerOpenSSLUser(); - d_dsc = std::make_shared(d_idleTimeout); + d_dsc = std::make_shared(d_idleTimeout, d_internalPipeBufferSize); if (!d_tlsConfig.d_certKeyPairs.empty()) { try { @@ -1260,7 +1308,7 @@ try dsc->h2o_config.server_name = h2o_iovec_init(df->d_serverTokens.c_str(), df->d_serverTokens.size()); - std::thread dnsdistThread(dnsdistclient, dsc->dohquerypair[1], dsc->dohresponsepair[0]); + std::thread dnsdistThread(dnsdistclient, dsc->dohquerypair[1]); dnsdistThread.detach(); // gets us better error reporting setThreadName("dnsdist/doh"); diff --git a/pdns/doh.hh b/pdns/doh.hh index 9e51c2e065..53ee0e433f 100644 --- a/pdns/doh.hh +++ b/pdns/doh.hh @@ -97,6 +97,7 @@ struct DOHFrontend HTTPVersionStats d_http1Stats; HTTPVersionStats d_http2Stats; + uint32_t d_internalPipeBufferSize{0}; bool d_sendCacheControlHeaders{true}; bool d_trustForwardedForHeader{false};