]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/tcpreceiver.cc
rec: allow exception to proxy protocal usage for specific listen addresses
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
CommitLineData
12c86877
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
2e7834cb 3 Copyright (C) 2002-2012 PowerDNS.COM BV
12c86877
BH
4
5 This program is free software; you can redistribute it and/or modify
22dc646a
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
f782fe38
MH
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
12c86877
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12c86877 21*/
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
b6f3b03a 25#include <boost/algorithm/string.hpp>
097b1e68 26#include <boost/scoped_array.hpp>
bf269e28 27#include "auth-packetcache.hh"
1258abe0 28#include "utility.hh"
519f5484 29#include "threadname.hh"
add640c0 30#include "dnssecinfra.hh"
4c1474f3 31#include "dnsseckeeper.hh"
12c86877 32#include <cstdio>
4888e4b2 33#include "base32.hh"
12c86877
BH
34#include <cstring>
35#include <cstdlib>
36#include <sys/types.h>
940d7811 37#include <netinet/tcp.h>
12c86877
BH
38#include <iostream>
39#include <string>
40#include "tcpreceiver.hh"
67d74e49 41#include "sstuff.hh"
fa8fd4d2 42
dc6aa7f5
AV
43#include <cerrno>
44#include <csignal>
78bcb858 45#include "base64.hh"
12c86877
BH
46#include "ueberbackend.hh"
47#include "dnspacket.hh"
48#include "nameserver.hh"
49#include "distributor.hh"
50#include "lock.hh"
51#include "logger.hh"
52#include "arguments.hh"
379ab445 53
8cb70f23 54#include "auth-main.hh"
12c86877
BH
55#include "packethandler.hh"
56#include "statbag.hh"
12c86877 57#include "communicator.hh"
61b26744 58#include "namespaces.hh"
8e9b7d99 59#include "signingpipe.hh"
273d88b2 60#include "stubresolver.hh"
4172a5b2
PD
61#include "proxy-protocol.hh"
62#include "noinitvector.hh"
f13cde2a 63#include "gss_context.hh"
1f07a63f 64#include "pdnsexception.hh"
bf269e28 65extern AuthPacketCache PC;
12c86877
BH
66extern StatBag S;
67
68/**
69\file tcpreceiver.cc
70\brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
71*/
72
c2826d2e 73std::unique_ptr<Semaphore> TCPNameserver::d_connectionroom_sem{nullptr};
24fb5774 74LockGuarded<std::unique_ptr<PacketHandler>> TCPNameserver::s_P{nullptr};
d322f931 75unsigned int TCPNameserver::d_maxTCPConnections = 0;
9f1d5826 76NetmaskGroup TCPNameserver::d_ng;
cb0af1a1
RG
77size_t TCPNameserver::d_maxTransactionsPerConn;
78size_t TCPNameserver::d_maxConnectionsPerClient;
79unsigned int TCPNameserver::d_idleTimeout;
80unsigned int TCPNameserver::d_maxConnectionDuration;
24fb5774 81LockGuarded<std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan>> TCPNameserver::s_clientsCount;
12c86877 82
12c86877
BH
83void TCPNameserver::go()
84{
e6a9dde5 85 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
24fb5774 86 s_P.lock()->reset();
12c86877 87 try {
24fb5774 88 *(s_P.lock()) = make_unique<PacketHandler>();
12c86877 89 }
3f81d239 90 catch(PDNSException &ae) {
e6a9dde5 91 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
12c86877 92 }
12c86877 93
969e4459 94 std::thread th([this](){thread();});
0ddde5fb 95 th.detach();
12c86877
BH
96}
97
3f81d239 98// throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
cb0af1a1 99static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
12c86877 100{
6a3e5d1a
BH
101 unsigned int bytes=n;
102 char *ptr = (char*)buffer;
103 int ret;
cb0af1a1
RG
104 time_t start = 0;
105 unsigned int remainingTotal = totalTimeout;
106 if (totalTimeout) {
4646277d 107 start = time(nullptr);
cb0af1a1 108 }
6a3e5d1a
BH
109 while(bytes) {
110 ret=read(fd, ptr, bytes);
111 if(ret < 0) {
112 if(errno==EAGAIN) {
cb0af1a1 113 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
4957a608
BH
114 if(ret < 0)
115 throw NetworkError("Waiting for data read");
116 if(!ret)
117 throw NetworkError("Timeout reading data");
118 continue;
6a3e5d1a
BH
119 }
120 else
4957a608 121 throw NetworkError("Reading data: "+stringerror());
6a3e5d1a
BH
122 }
123 if(!ret) {
124 if(!throwOnEOF && n == bytes)
4957a608 125 return 0;
6a3e5d1a 126 else
4957a608 127 throw NetworkError("Did not fulfill read from TCP due to EOF");
6a3e5d1a 128 }
1e05b07c 129
6a3e5d1a
BH
130 ptr += ret;
131 bytes -= ret;
cb0af1a1 132 if (totalTimeout) {
4646277d 133 time_t now = time(nullptr);
600c8a65 134 const auto elapsed = now - start;
f25dd2b7 135 if (elapsed >= static_cast<time_t>(remainingTotal)) {
cb0af1a1
RG
136 throw NetworkError("Timeout while reading data");
137 }
138 start = now;
62a34cfd
RG
139 if (elapsed > 0) {
140 remainingTotal -= elapsed;
141 }
cb0af1a1 142 }
6a3e5d1a
BH
143 }
144 return n;
145}
12c86877 146
6a3e5d1a 147// ditto
cb0af1a1 148static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
6a3e5d1a
BH
149{
150 unsigned int bytes=n;
151 const char *ptr = (char*)buffer;
152 int ret;
153 while(bytes) {
154 ret=write(fd, ptr, bytes);
155 if(ret < 0) {
156 if(errno==EAGAIN) {
cb0af1a1 157 ret=waitForRWData(fd, false, idleTimeout, 0);
4957a608
BH
158 if(ret < 0)
159 throw NetworkError("Waiting for data write");
160 if(!ret)
161 throw NetworkError("Timeout writing data");
162 continue;
6a3e5d1a
BH
163 }
164 else
4957a608 165 throw NetworkError("Writing data: "+stringerror());
6a3e5d1a 166 }
12c86877 167 if(!ret) {
67d74e49 168 throw NetworkError("Did not fulfill TCP write due to EOF");
12c86877 169 }
1e05b07c 170
6a3e5d1a
BH
171 ptr += ret;
172 bytes -= ret;
12c86877 173 }
12c86877
BH
174}
175
02b4b703 176void TCPNameserver::sendPacket(std::unique_ptr<DNSPacket>& p, int outsock, bool last)
6a3e5d1a 177{
1f07a63f
PD
178 uint16_t len=htons(p->getString(true).length());
179
180 // this also calls p->getString; call it after our explicit call so throwsOnTruncation=true is honoured
02b4b703 181 g_rs.submitResponse(*p, false, last);
9951e2d0 182
fbaa5e09
BH
183 string buffer((const char*)&len, 2);
184 buffer.append(p->getString());
cb0af1a1 185 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
6a3e5d1a
BH
186}
187
188
cb0af1a1 189void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
6a3e5d1a
BH
190try
191{
cb0af1a1 192 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
6a3e5d1a 193}
67d74e49
BH
194catch(NetworkError& ae) {
195 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
12c86877
BH
196}
197
cb0af1a1
RG
198static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
199{
200 if (maxConnectionDuration) {
4646277d 201 time_t elapsed = time(nullptr) - start;
62a34cfd 202 if (elapsed >= maxConnectionDuration) {
cb0af1a1
RG
203 return true;
204 }
62a34cfd 205 if (elapsed > 0) {
cdf0fe6e 206 remainingTime = static_cast<unsigned int>(maxConnectionDuration - elapsed);
62a34cfd 207 }
cb0af1a1
RG
208 }
209 return false;
210}
211
212void TCPNameserver::decrementClientCount(const ComboAddress& remote)
213{
214 if (d_maxConnectionsPerClient) {
24fb5774
RG
215 auto count = s_clientsCount.lock();
216 auto it = count->find(remote);
217 if (it == count->end()) {
218 // this is worrying, but nothing we can do at this point
219 return;
220 }
221 --it->second;
222 if (it->second == 0) {
223 count->erase(it);
cb0af1a1
RG
224 }
225 }
226}
227
0ddde5fb 228void TCPNameserver::doConnection(int fd)
12c86877 229{
519f5484 230 setThreadName("pdns/tcpConnect");
c2826d2e 231 std::unique_ptr<DNSPacket> packet;
208abc4c 232 ComboAddress remote, accountremote;
4f5e7925 233 socklen_t remotelen=sizeof(remote);
cb0af1a1
RG
234 size_t transactions = 0;
235 time_t start = 0;
236 if (d_maxConnectionDuration) {
4646277d 237 start = time(nullptr);
cb0af1a1 238 }
4f5e7925 239
4f5e7925 240 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
e6a9dde5 241 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
4f5e7925 242 d_connectionroom_sem->post();
a7b68ae7
RG
243 try {
244 closesocket(fd);
245 }
246 catch(const PDNSException& e) {
e6a9dde5 247 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 248 }
0ddde5fb 249 return;
4f5e7925 250 }
251
3897b9e1 252 setNonBlocking(fd);
12c86877 253 try {
c2b4ccc0 254 int mesgsize=65535;
dc593046 255 boost::scoped_array<char> mesg(new char[mesgsize]);
4172a5b2
PD
256 std::optional<ComboAddress> inner_remote;
257 bool inner_tcp = false;
258
e6a9dde5 259 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
21a303f3 260 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
4172a5b2
PD
261 if (g_proxyProtocolACL.match(remote)) {
262 unsigned int remainingTime = 0;
263 PacketBuffer proxyData;
264 proxyData.reserve(g_proxyProtocolMaximumSize);
265 ssize_t used;
266
267 // this for-loop ends by throwing, or by having gathered a complete proxy header
268 for (;;) {
269 used = isProxyHeaderComplete(proxyData);
270 if (used < 0) {
271 ssize_t origsize = proxyData.size();
272 proxyData.resize(origsize + -used);
273 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
274 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": maximum TCP connection duration exceeded");
275 }
276
277 try {
278 readnWithTimeout(fd, &proxyData[origsize], -used, d_idleTimeout, true, remainingTime);
279 }
280 catch(NetworkError& ae) {
281 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": "+ae.what());
282 }
283 }
284 else if (used == 0) {
285 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was invalid");
286 }
287 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
288 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header too big");
289 }
290 else { // used > 0 && used <= g_proxyProtocolMaximumSize
291 break;
292 }
293 }
294 ComboAddress psource, pdestination;
295 bool proxyProto, tcp;
296 std::vector<ProxyProtocolValue> ppvalues;
297
298 used = parseProxyHeader(proxyData, proxyProto, psource, pdestination, tcp, ppvalues);
299 if (used <= 0) {
300 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was invalid");
301 }
302 if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
303 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was oversized");
304 }
305 inner_remote = psource;
306 inner_tcp = tcp;
208abc4c
KM
307 accountremote = psource;
308 }
309 else {
310 accountremote = remote;
4172a5b2
PD
311 }
312
12c86877 313 for(;;) {
cb0af1a1
RG
314 unsigned int remainingTime = 0;
315 transactions++;
316 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
e6a9dde5 317 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
cb0af1a1
RG
318 break;
319 }
320 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 321 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
322 break;
323 }
6a3e5d1a
BH
324
325 uint16_t pktlen;
cb0af1a1 326 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
4957a608 327 break;
6a3e5d1a 328 else
4957a608 329 pktlen=ntohs(pktlen);
12c86877 330
366e1e5e
AT
331 // this check will always be false *if* no one touches
332 // the mesg array. pktlen can be maximum of 65535 as
1e05b07c
FM
333 // it is 2 byte unsigned variable. In getQuestion, we
334 // write to 0 up to pktlen-1 so 65535 is just right.
366e1e5e
AT
335
336 // do not remove this check as it will catch if someone
1e05b07c 337 // decreases the mesg buffer size for some reason.
c2b4ccc0 338 if(pktlen > mesgsize) {
e6a9dde5 339 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
4957a608 340 break;
12c86877 341 }
1e05b07c 342
cb0af1a1 343 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 344 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
345 break;
346 }
347
348 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
208abc4c
KM
349 S.inc("tcp-queries");
350 if (accountremote.sin4.sin_family == AF_INET6)
5fd567ec 351 S.inc("tcp6-queries");
352 else
353 S.inc("tcp4-queries");
3e579e91 354
c2826d2e 355 packet=make_unique<DNSPacket>(true);
809fe23f 356 packet->setRemote(&remote);
e9dd48f9 357 packet->d_tcp=true;
4172a5b2
PD
358 if (inner_remote) {
359 packet->d_inner_remote = inner_remote;
360 packet->d_tcp = inner_tcp;
361 }
ff76e8b4 362 packet->setSocket(fd);
c2b4ccc0 363 if(packet->parse(mesg.get(), pktlen)<0)
4957a608 364 break;
78f1d7b5
PL
365
366 if (packet->hasEDNSCookie())
367 S.inc("tcp-cookie-queries");
368
6e59a580 369 if(packet->qtype.getCode()==QType::AXFR) {
02b4b703 370 doAXFR(packet->qdomain, packet, fd);
6e59a580
KM
371 continue;
372 }
373
374 if(packet->qtype.getCode()==QType::IXFR) {
02b4b703 375 doIXFR(packet, fd);
4957a608 376 continue;
12c86877
BH
377 }
378
1e05b07c 379 std::unique_ptr<DNSPacket> reply;
c2826d2e 380 auto cached = make_unique<DNSPacket>(false);
fe498ace 381 if(logDNSQueries) {
4172a5b2 382 g_log << Logger::Notice<<"TCP Remote "<< packet->getRemoteString() <<" wants '" << packet->qdomain<<"|"<<packet->qtype.toString() <<
1b16851b 383 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen();
fe498ace 384 }
bb5903e2 385
9a037bfa 386 if(PC.enabled()) {
c2826d2e 387 if(packet->couldBeCached() && PC.get(*packet, *cached)) { // short circuit - does the PacketCache recognize this question?
9a037bfa 388 if(logDNSQueries)
1b16851b 389 g_log<<": packetcache HIT"<<endl;
9a037bfa 390 cached->setRemote(&packet->d_remote);
208abc4c 391 cached->d_inner_remote = packet->d_inner_remote;
9a037bfa
KM
392 cached->d.id=packet->d.id;
393 cached->d.rd=packet->d.rd; // copy in recursion desired bit
394 cached->commitD(); // commit d to the packet inlined
395
396 sendPacket(cached, fd); // presigned, don't do it again
397 continue;
398 }
21a303f3 399 if(logDNSQueries)
1b16851b 400 g_log<<": packetcache MISS"<<endl;
bbe4b041
OM
401 } else {
402 if (logDNSQueries) {
1b16851b 403 g_log<<endl;
bbe4b041 404 }
12c86877 405 }
12c86877 406 {
24fb5774
RG
407 auto packetHandler = s_P.lock();
408 if (!*packetHandler) {
994cae6b 409 g_log<<Logger::Warning<<"TCP server is without backend connections, launching"<<endl;
24fb5774 410 *packetHandler = make_unique<PacketHandler>();
4957a608 411 }
4957a608 412
24fb5774 413 reply = (*packetHandler)->doQuestion(*packet); // we really need to ask the backend :-)
12c86877
BH
414 }
415
12c86877 416 if(!reply) // unable to write an answer?
4957a608 417 break;
b552d7b1 418
ff76e8b4 419 sendPacket(reply, fd);
15e39ee4 420#ifdef ENABLE_GSS_TSIG
c113acc3
OM
421 if (g_doGssTSIG) {
422 packet->cleanupGSS(reply->d.rcode);
423 }
15e39ee4 424#endif
12c86877 425 }
12c86877 426 }
3f81d239 427 catch(PDNSException &ae) {
24fb5774 428 s_P.lock()->reset(); // on next call, backend will be recycled
a274da88 429 g_log << Logger::Error << "TCP Connection Thread for client " << remote << " failed, cycling backend: " << ae.reason << endl;
ef1d2f44 430 }
0afa9049 431 catch(NetworkError &e) {
a274da88 432 g_log << Logger::Info << "TCP Connection Thread for client " << remote << " died because of network error: " << e.what() << endl;
0afa9049
BH
433 }
434
adc10f99 435 catch(std::exception &e) {
3d88c4c6 436 s_P.lock()->reset(); // on next call, backend will be recycled
a274da88 437 g_log << Logger::Error << "TCP Connection Thread for client " << remote << " died because of STL error, cycling backend: " << e.what() << endl;
12c86877
BH
438 }
439 catch( ... )
440 {
3d88c4c6 441 s_P.lock()->reset(); // on next call, backend will be recycled
a274da88 442 g_log << Logger::Error << "TCP Connection Thread for client " << remote << " caught unknown exception, cycling backend." << endl;
12c86877 443 }
12c86877 444 d_connectionroom_sem->post();
a7b68ae7
RG
445
446 try {
447 closesocket(fd);
448 }
449 catch(const PDNSException& e) {
a274da88 450 g_log << Logger::Error << "Error closing TCP socket for client " << remote << ": " << e.reason << endl;
a7b68ae7 451 }
cb0af1a1 452 decrementClientCount(remote);
12c86877
BH
453}
454
78bcb858 455
24fb5774 456bool TCPNameserver::canDoAXFR(std::unique_ptr<DNSPacket>& q, bool isAXFR, std::unique_ptr<PacketHandler>& packetHandler)
12c86877 457{
379ab445 458 if(::arg().mustDo("disable-axfr"))
318c3ec6
BH
459 return false;
460
4172a5b2 461 string logPrefix=string(isAXFR ? "A" : "I")+"XFR-out zone '"+q->qdomain.toLogString()+"', client '"+q->getInnerRemote().toStringWithPort()+"', ";
efe7948e 462
78bcb858
BH
463 if(q->d_havetsig) { // if you have one, it must be good
464 TSIGRecordContent trc;
7abbc40f
PD
465 DNSName keyname;
466 string secret;
24fb5774 467 if(!q->checkForCorrectTSIG(packetHandler->getBackend(), &keyname, &secret, &trc)) {
78bcb858 468 return false;
7f9ac49b
AT
469 } else {
470 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
c113acc3 471#ifdef ENABLE_GSS_TSIG
f13cde2a
OM
472 if (g_doGssTSIG && q->d_tsig_algo == TSIG_GSS) {
473 GssContext gssctx(keyname);
474 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
475 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
476 }
477 }
c113acc3 478#endif
7f9ac49b
AT
479 }
480
24fb5774 481 DNSSECKeeper dk(packetHandler->getBackend());
c113acc3 482#ifdef ENABLE_GSS_TSIG
f13cde2a
OM
483 if (g_doGssTSIG && q->d_tsig_algo == TSIG_GSS) {
484 vector<string> princs;
485 packetHandler->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
486 for(const std::string& princ : princs) {
487 if (q->d_peer_principal == princ) {
488 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
489 return true;
490 }
491 }
492 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
493 return false;
494 }
c113acc3 495#endif
3d03fee8 496 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
efe7948e 497 g_log<<Logger::Warning<<logPrefix<<"denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access"<<endl;
78bcb858
BH
498 return false;
499 }
500 else {
efe7948e 501 g_log<<Logger::Notice<<logPrefix<<"allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
78bcb858
BH
502 return true;
503 }
504 }
1e05b07c 505
93afc0a3 506 // cerr<<"checking allow-axfr-ips"<<endl;
4172a5b2 507 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( q->getInnerRemote() )) {
efe7948e 508 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in allow-axfr-ips"<<endl;
12c86877 509 return true;
ab5edd12 510 }
93afc0a3
PD
511
512 FindNS fns;
513
514 // cerr<<"doing per-zone-axfr-acls"<<endl;
515 SOAData sd;
24fb5774 516 if(packetHandler->getBackend()->getSOAUncached(q->qdomain,sd)) {
93afc0a3 517 // cerr<<"got backend and SOA"<<endl;
93afc0a3 518 vector<string> acl;
24fb5774 519 packetHandler->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
d7f67000 520 for (const auto & i : acl) {
93afc0a3 521 // cerr<<"matching against "<<*i<<endl;
d7f67000 522 if(pdns_iequals(i, "AUTO-NS")) {
93afc0a3
PD
523 // cerr<<"AUTO-NS magic please!"<<endl;
524
525 DNSResourceRecord rr;
7abbc40f 526 set<DNSName> nsset;
93afc0a3 527
13b80e77
CH
528 sd.db->lookup(QType(QType::NS), q->qdomain, sd.domain_id);
529 while (sd.db->get(rr)) {
290a083d 530 nsset.insert(DNSName(rr.content));
13b80e77 531 }
7abbc40f 532 for(const auto & j: nsset) {
24fb5774 533 vector<string> nsips=fns.lookup(j, packetHandler->getBackend());
d7f67000 534 for(const auto & nsip : nsips) {
93afc0a3 535 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
4172a5b2 536 if(nsip == q->getInnerRemote().toString())
93afc0a3
PD
537 {
538 // cerr<<"got AUTO-NS hit"<<endl;
efe7948e 539 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in NSset"<<endl;
93afc0a3
PD
540 return true;
541 }
542 }
543 }
544 }
545 else
546 {
d7f67000 547 Netmask nm = Netmask(i);
4172a5b2 548 if(nm.match( q->getInnerRemote() ))
93afc0a3 549 {
efe7948e 550 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in per-zone ACL"<<endl;
93afc0a3
PD
551 // cerr<<"hit!"<<endl;
552 return true;
553 }
554 }
555 }
1e05b07c 556 }
93afc0a3 557
12c86877
BH
558 extern CommunicatorClass Communicator;
559
4172a5b2 560 if(Communicator.justNotified(q->qdomain, q->getInnerRemote().toString())) { // we just notified this ip
efe7948e 561 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is from recently notified secondary"<<endl;
12c86877
BH
562 return true;
563 }
564
efe7948e 565 g_log<<Logger::Warning<<logPrefix<<"denied: client IP has no permission"<<endl;
12c86877
BH
566 return false;
567}
568
b317b510 569namespace {
54d84273
PD
570 struct NSECXEntry
571 {
22a0ef16 572 NSECBitmap d_set;
54d84273 573 unsigned int d_ttl;
feef1ece 574 bool d_auth;
54d84273 575 };
8e9b7d99 576
a5188bcd 577 static std::unique_ptr<DNSPacket> getFreshAXFRPacket(std::unique_ptr<DNSPacket>& q)
54d84273 578 {
c2826d2e 579 std::unique_ptr<DNSPacket> ret = std::unique_ptr<DNSPacket>(q->replyPacket());
54d84273
PD
580 ret->setCompress(false);
581 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
582 ret->d_tcp = true;
583 return ret;
584 }
8e9b7d99
BH
585}
586
54d84273 587
12c86877 588/** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
5cf23dba 589int TCPNameserver::doAXFR(const DNSName &target, std::unique_ptr<DNSPacket>& q, int outsock) // NOLINT(readability-function-cognitive-complexity)
12c86877 590{
c6ca0aa8 591 string logPrefix="AXFR-out zone '"+target.toLogString()+"', client '"+q->getRemoteStringWithPort()+"', ";
efe7948e 592
c2826d2e 593 std::unique_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
c67e46a1 594 if(q->d_dnssecOk)
05e24311 595 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
22893145 596
efe7948e 597 g_log<<Logger::Warning<<logPrefix<<"transfer initiated"<<endl;
12c86877 598
22893145 599 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
91ad8c11 600 SOAData sd;
12c86877 601 {
24fb5774 602 auto packetHandler = s_P.lock();
efe7948e 603 DLOG(g_log<<logPrefix<<"looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
24fb5774 604 if(!*packetHandler) {
994cae6b 605 g_log<<Logger::Warning<<"TCP server is without backend connections in doAXFR, launching"<<endl;
24fb5774 606 *packetHandler = make_unique<PacketHandler>();
12a965c5 607 }
12c86877 608
ea99d474 609 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
24fb5774 610 if (!canDoAXFR(q, true, *packetHandler)) {
efe7948e 611 g_log<<Logger::Warning<<logPrefix<<"failed: client may not request AXFR"<<endl;
9c556f63 612 outpacket->setRcode(RCode::NotAuth);
8090f5a2
AT
613 sendPacket(outpacket,outsock);
614 return 0;
615 }
616
91ad8c11 617 if (!(*packetHandler)->getBackend()->getSOAUncached(target, sd)) {
efe7948e 618 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative"<<endl;
9c556f63 619 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 620 sendPacket(outpacket,outsock);
12c86877
BH
621 return 0;
622 }
3de83124 623 }
22893145 624
8e9b7d99 625 UeberBackend db;
79ba7763 626 if(!db.getSOAUncached(target, sd)) {
efe7948e 627 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative in second instance"<<endl;
79ba7763 628 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 629 sendPacket(outpacket,outsock);
3de83124 630 return 0;
12c86877 631 }
3de83124 632
8a66a927
KM
633 bool securedZone = false;
634 bool presignedZone = false;
635 bool NSEC3Zone = false;
636 bool narrow = false;
637
91ad8c11
KM
638 DomainInfo di;
639 bool isCatalogZone = sd.db->getDomainInfo(target, di, false) && di.isCatalogType();
640
8a66a927
KM
641 NSEC3PARAMRecordContent ns3pr;
642
ea99d474 643 DNSSECKeeper dk(&db);
40b3959a 644 DNSSECKeeper::clearCaches(target);
91ad8c11 645 if (!isCatalogZone) {
8a66a927
KM
646 securedZone = dk.isSecuredZone(target);
647 presignedZone = dk.isPresigned(target);
648 }
22893145 649
dacacb23 650 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
22893145
CH
651 NSEC3Zone=true;
652 if(narrow) {
efe7948e 653 g_log<<Logger::Warning<<logPrefix<<"failed: not doing AXFR of an NSEC3 narrow zone"<<endl;
994cae6b
KM
654 outpacket->setRcode(RCode::Refused);
655 sendPacket(outpacket,outsock);
656 return 0;
22893145
CH
657 }
658 }
659
78bcb858 660 TSIGRecordContent trc;
7abbc40f
PD
661 DNSName tsigkeyname;
662 string tsigsecret;
78bcb858 663
ea3816cf 664 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
78bcb858 665
60a1c204 666 if(haveTSIGDetails && !tsigkeyname.empty()) {
2c26f25a 667 string tsig64;
5e8d94f1 668 DNSName algorithm=trc.d_algoName;
290a083d 669 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
670 algorithm = DNSName("hmac-md5");
f13cde2a
OM
671 if (algorithm != DNSName("gss-tsig")) {
672 if(!db.getTSIGKey(tsigkeyname, algorithm, tsig64)) {
673 g_log<<Logger::Warning<<logPrefix<<"TSIG key not found"<<endl;
674 return 0;
675 }
676 if (B64Decode(tsig64, tsigsecret) == -1) {
677 g_log<<Logger::Error<<logPrefix<<"unable to Base-64 decode TSIG key '"<<tsigkeyname<<"'"<<endl;
678 return 0;
679 }
84fc3f8b 680 }
78bcb858 681 }
1e05b07c
FM
682
683
8267bd2c 684 // SOA *must* go out first, our signing pipe might reorder
efe7948e 685 DLOG(g_log<<logPrefix<<"sending out SOA"<<endl);
13f9e280 686 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 687 outpacket->addRecord(DNSZoneRecord(soa));
3c68fb14 688 if(securedZone && !presignedZone) {
7abbc40f 689 set<DNSName> authSet;
8d3cbffa 690 authSet.insert(target);
ea99d474 691 addRRSigs(dk, db, authSet, outpacket->getRRS());
8d3cbffa 692 }
1e05b07c 693
60a1c204 694 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 695 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1e05b07c 696
02b4b703 697 sendPacket(outpacket, outsock, false);
1e05b07c 698
78bcb858 699 trc.d_mac = outpacket->d_trc.d_mac;
8267bd2c 700 outpacket = getFreshAXFRPacket(q);
04f5504d 701
1e05b07c 702
90ba52e0 703 DNSZoneRecord zrr;
04f5504d 704 vector<DNSZoneRecord> zrrs;
794c2f92 705
04f5504d
KM
706 zrr.dr.d_name = target;
707 zrr.dr.d_ttl = sd.minimum;
95b70d3a 708
04f5504d
KM
709 if(securedZone && !presignedZone) { // this is where the DNSKEYs, CDNSKEYs and CDSs go in
710 bool doCDNSKEY = true, doCDS = true;
95b70d3a
KM
711 string publishCDNSKEY, publishCDS;
712 dk.getPublishCDNSKEY(q->qdomain, publishCDNSKEY);
713 dk.getPublishCDS(q->qdomain, publishCDS);
04f5504d 714
95b70d3a 715 set<uint32_t> entryPointIds;
04f5504d
KM
716 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(target);
717 for (auto const& value : entryPoints) {
95b70d3a 718 entryPointIds.insert(value.second.id);
04f5504d 719 }
95b70d3a 720
04f5504d 721 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
95b70d3a
KM
722 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
723 if (!value.second.published) {
724 continue;
991a0977 725 }
95b70d3a 726 zrr.dr.d_type = QType::DNSKEY;
d06dcda4 727 zrr.dr.setContent(std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY()));
95b70d3a 728 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
64d22929 729 zrrs.push_back(zrr);
95b70d3a
KM
730
731 // generate CDS and CDNSKEY records
04f5504d 732 if(doCDNSKEY && entryPointIds.count(value.second.id) > 0){
481508ab 733 if(!publishCDNSKEY.empty()) {
95b70d3a 734 zrr.dr.d_type=QType::CDNSKEY;
481508ab 735 if (publishCDNSKEY == "0") {
04f5504d 736 doCDNSKEY = false;
d06dcda4 737 zrr.dr.setContent(PacketHandler::s_deleteCDNSKEYContent);
04f5504d 738 zrrs.push_back(zrr);
481508ab 739 } else {
d06dcda4 740 zrr.dr.setContent(std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY()));
04f5504d 741 zrrs.push_back(zrr);
481508ab 742 }
95b70d3a
KM
743 }
744
04f5504d 745 if(doCDS && !publishCDS.empty()){
95b70d3a
KM
746 zrr.dr.d_type=QType::CDS;
747 vector<string> digestAlgos;
748 stringtok(digestAlgos, publishCDS, ", ");
481508ab 749 if(std::find(digestAlgos.begin(), digestAlgos.end(), "0") != digestAlgos.end()) {
cd7c3624 750 doCDS = false;
d06dcda4 751 zrr.dr.setContent(PacketHandler::s_deleteCDSContent);
04f5504d 752 zrrs.push_back(zrr);
481508ab
KM
753 } else {
754 for(auto const &digestAlgo : digestAlgos) {
d06dcda4 755 zrr.dr.setContent(std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns::checked_stoi<uint8_t>(digestAlgo))));
04f5504d 756 zrrs.push_back(zrr);
481508ab 757 }
95b70d3a 758 }
991a0977
PL
759 }
760 }
761 }
95b70d3a 762
6dae726d
PD
763 }
764
95c5bc40 765 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
04f5504d 766 uint8_t flags = ns3pr.d_flags;
90ba52e0 767 zrr.dr.d_type = QType::NSEC3PARAM;
95c5bc40 768 ns3pr.d_flags = 0;
d06dcda4 769 zrr.dr.setContent(std::make_shared<NSEC3PARAMRecordContent>(ns3pr));
b8adb30d 770 ns3pr.d_flags = flags;
6e8694df 771 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
64d22929 772 zrrs.push_back(zrr);
ce464268 773 }
1e05b07c 774
8a66a927
KM
775 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
776 set<DNSName> qnames, nsset, terms;
777
778 // Catalog zone start
779 if (di.kind == DomainInfo::Producer) {
780 // Ignore all records except NS at apex
781 sd.db->lookup(QType::NS, target, di.id);
782 while (sd.db->get(zrr)) {
783 zrrs.emplace_back(zrr);
784 }
785 if (zrrs.empty()) {
786 zrr.dr.d_name = target;
787 zrr.dr.d_ttl = 0;
788 zrr.dr.d_type = QType::NS;
d06dcda4 789 zrr.dr.setContent(std::make_shared<NSRecordContent>("invalid."));
8a66a927
KM
790 zrrs.emplace_back(zrr);
791 }
792
793 zrrs.emplace_back(CatalogInfo::getCatalogVersionRecord(target));
794
795 vector<CatalogInfo> members;
796 sd.db->getCatalogMembers(target, members, CatalogInfo::CatalogType::Producer);
797 for (const auto& ci : members) {
798 ci.toDNSZoneRecords(target, zrrs);
799 }
800 if (members.empty()) {
801 g_log << Logger::Warning << logPrefix << "catalog zone '" << target << "' has no members" << endl;
802 }
803 goto send;
804 }
805 // Catalog zone end
806
0c350cb5 807 // now start list zone
91ad8c11 808 if (!sd.db->list(target, sd.domain_id, isCatalogZone)) {
efe7948e 809 g_log<<Logger::Error<<logPrefix<<"backend signals error condition, aborting AXFR"<<endl;
9c556f63 810 outpacket->setRcode(RCode::ServFail);
0c350cb5
BH
811 sendPacket(outpacket,outsock);
812 return 0;
813 }
814
90ba52e0 815 while(sd.db->get(zrr)) {
64d22929
KM
816 if (!presignedZone) {
817 if (zrr.dr.d_type == QType::RRSIG) {
818 continue;
819 }
820 if (zrr.dr.d_type == QType::DNSKEY || zrr.dr.d_type == QType::CDNSKEY || zrr.dr.d_type == QType::CDS) {
821 if(!::arg().mustDo("direct-dnskey")) {
822 continue;
823 } else {
824 zrr.dr.d_ttl = sd.minimum;
825 }
826 }
827 }
8bf260dd 828 zrr.dr.d_name.makeUsLowerCase();
90ba52e0 829 if(zrr.dr.d_name.isPartOf(target)) {
833b07fe 830 if (zrr.dr.d_type == QType::ALIAS && (::arg().mustDo("outgoing-axfr-expand-alias") || ::arg()["outgoing-axfr-expand-alias"] == "ignore-errors")) {
90ba52e0 831 vector<DNSZoneRecord> ips;
d06dcda4
RG
832 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->getContent(), QType::A, ips);
833 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->getContent(), QType::AAAA, ips);
833b07fe 834 if (ret1 != RCode::NoError || ret2 != RCode::NoError) {
50ff05d7 835 if (::arg()["outgoing-axfr-expand-alias"] == "ignore-errors") {
833b07fe 836 if (ret1 != RCode::NoError) {
50ff05d7 837 g_log << Logger::Error << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving A record for ALIAS target " << zrr.dr.getContent()->getZoneRepresentation() << ", continuing AXFR" << endl;
833b07fe
KD
838 }
839 if (ret2 != RCode::NoError) {
50ff05d7 840 g_log << Logger::Error << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving AAAA record for ALIAS target " << zrr.dr.getContent()->getZoneRepresentation() << ", continuing AXFR" << endl;
833b07fe
KD
841 }
842 }
50ff05d7
CH
843 else {
844 g_log << Logger::Warning << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving for ALIAS " << zrr.dr.getContent()->getZoneRepresentation() << ", aborting AXFR" << endl;
845 outpacket->setRcode(RCode::ServFail);
846 sendPacket(outpacket, outsock);
847 return 0;
848 }
273d88b2 849 }
d06dcda4 850 for (auto& ip: ips) {
90ba52e0 851 zrr.dr.d_type = ip.dr.d_type;
d06dcda4 852 zrr.dr.setContent(ip.dr.getContent());
90ba52e0 853 zrrs.push_back(zrr);
d86e1bf7 854 }
a68df29d 855 continue;
d86e1bf7
PD
856 }
857
b772ffea 858 if (rectify) {
90ba52e0 859 if (zrr.dr.d_type) {
860 qnames.insert(zrr.dr.d_name);
861 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
862 nsset.insert(zrr.dr.d_name);
b772ffea
KM
863 } else {
864 // remove existing ents
865 continue;
866 }
867 }
a68df29d 868 zrrs.push_back(zrr);
b772ffea 869 } else {
90ba52e0 870 if (zrr.dr.d_type)
efe7948e 871 g_log<<Logger::Warning<<logPrefix<<"zone contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
b772ffea
KM
872 }
873 }
874
d06dcda4 875 for (auto& loopRR : zrrs) {
bdbee377
PL
876 if ((loopRR.dr.d_type == QType::SVCB || loopRR.dr.d_type == QType::HTTPS)) {
877 // Process auto hints
878 // TODO this is an almost copy of the code in the packethandler
879 auto rrc = getRR<SVCBBaseRecordContent>(loopRR.dr);
880 if (rrc == nullptr) {
881 continue;
882 }
d06dcda4
RG
883 auto newRRC = rrc->clone();
884 if (!newRRC) {
885 continue;
886 }
887 DNSName svcTarget = newRRC->getTarget().isRoot() ? loopRR.dr.d_name : newRRC->getTarget();
888 if (newRRC->autoHint(SvcParam::ipv4hint)) {
bdbee377
PL
889 sd.db->lookup(QType::A, svcTarget, sd.domain_id);
890 vector<ComboAddress> hints;
891 DNSZoneRecord rr;
892 while (sd.db->get(rr)) {
893 auto arrc = getRR<ARecordContent>(rr.dr);
894 hints.push_back(arrc->getCA());
895 }
896 if (hints.size() == 0) {
d06dcda4 897 newRRC->removeParam(SvcParam::ipv4hint);
bdbee377 898 } else {
d06dcda4 899 newRRC->setHints(SvcParam::ipv4hint, hints);
bdbee377
PL
900 }
901 }
902
d06dcda4 903 if (newRRC->autoHint(SvcParam::ipv6hint)) {
bdbee377
PL
904 sd.db->lookup(QType::AAAA, svcTarget, sd.domain_id);
905 vector<ComboAddress> hints;
906 DNSZoneRecord rr;
907 while (sd.db->get(rr)) {
908 auto arrc = getRR<AAAARecordContent>(rr.dr);
909 hints.push_back(arrc->getCA());
910 }
911 if (hints.size() == 0) {
d06dcda4 912 newRRC->removeParam(SvcParam::ipv6hint);
bdbee377 913 } else {
d06dcda4 914 newRRC->setHints(SvcParam::ipv6hint, hints);
bdbee377
PL
915 }
916 }
d06dcda4
RG
917
918 loopRR.dr.setContent(std::move(newRRC));
bdbee377
PL
919 }
920 }
921
75f2589f 922 // Group records by name and type, signpipe stumbles over interrupted rrsets
22a676e0 923 if(securedZone && !presignedZone) {
8daafcc1 924 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
905dae56 925 return std::tie(a.dr.d_name, a.dr.d_type) < std::tie(b.dr.d_name, b.dr.d_type);
8daafcc1
KM
926 });
927 }
75f2589f 928
b772ffea
KM
929 if(rectify) {
930 // set auth
2010ac95
RG
931 for(DNSZoneRecord &loopZRR : zrrs) {
932 loopZRR.auth=true;
933 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
934 DNSName shorter(loopZRR.dr.d_name);
b772ffea 935 do {
e325f20c 936 if (shorter==target) // apex is always auth
cb045f61 937 break;
2010ac95
RG
938 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
939 loopZRR.auth=false;
cb045f61 940 break;
9f70b77a 941 }
7abbc40f 942 } while(shorter.chopOff());
9f70b77a 943 }
b772ffea
KM
944 }
945
946 if(NSEC3Zone) {
947 // ents are only required for NSEC3 zones
948 uint32_t maxent = ::arg().asNum("max-ent-entries");
6ded341a 949 set<DNSName> nsec3set, nonterm;
2010ac95 950 for (auto &loopZRR: zrrs) {
6ded341a 951 bool skip=false;
2010ac95 952 DNSName shorter = loopZRR.dr.d_name;
6ded341a
KM
953 if (shorter != target && shorter.chopOff() && shorter != target) {
954 do {
955 if(nsset.count(shorter)) {
956 skip=true;
957 break;
958 }
959 } while(shorter.chopOff() && shorter != target);
960 }
2010ac95
RG
961 shorter = loopZRR.dr.d_name;
962 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
6ded341a
KM
963 do {
964 if(!nsec3set.count(shorter)) {
965 nsec3set.insert(shorter);
966 }
967 } while(shorter != target && shorter.chopOff());
968 }
969 }
970
2010ac95
RG
971 for(DNSZoneRecord &loopZRR : zrrs) {
972 DNSName shorter(loopZRR.dr.d_name);
e325f20c 973 while(shorter != target && shorter.chopOff()) {
6ded341a 974 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
b772ffea 975 if(!(maxent)) {
efe7948e
KM
976 g_log<<Logger::Warning<<logPrefix<<"zone has too many empty non terminals, aborting AXFR"<<endl;
977 outpacket->setRcode(RCode::ServFail);
978 sendPacket(outpacket,outsock);
b772ffea
KM
979 return 0;
980 }
6ded341a
KM
981 nonterm.insert(shorter);
982 --maxent;
b772ffea
KM
983 }
984 }
985 }
986
9e23e712 987 for(const auto& nt : nonterm) {
2010ac95
RG
988 DNSZoneRecord tempRR;
989 tempRR.dr.d_name=nt;
990 tempRR.dr.d_type=QType::ENT;
991 tempRR.auth=true;
992 zrrs.push_back(tempRR);
b772ffea
KM
993 }
994 }
995 }
996
8a66a927 997send:
b772ffea 998
12c86877 999 /* now write all other records */
04f5504d
KM
1000
1001 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
1002 nsecxrepo_t nsecxrepo;
1003
3af419da 1004 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1), ::arg().mustDo("workaround-11804") ? 1 : 100);
04f5504d 1005
6e8694df 1006 DNSName keyname;
3370c993 1007 unsigned int udiff;
1c6d9830
BH
1008 DTime dt;
1009 dt.set();
2010ac95 1010 for(DNSZoneRecord &loopZRR : zrrs) {
2010ac95
RG
1011 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
1012 if (NSEC3Zone || loopZRR.dr.d_type) {
3c68fb14
KM
1013 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
1014 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
1015 } else {
1016 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
1017 }
b5baefaf 1018 NSECXEntry& ne = nsecxrepo[keyname];
192bcba2 1019 ne.d_ttl = sd.getNegativeTTL();
3c68fb14
KM
1020 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
1021 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
22a0ef16 1022 ne.d_set.set(loopZRR.dr.d_type);
b5baefaf
PD
1023 }
1024 }
b317b510 1025 }
b5baefaf 1026
2010ac95 1027 if (!loopZRR.dr.d_type)
b5baefaf
PD
1028 continue; // skip empty non-terminals
1029
2010ac95 1030 if(loopZRR.dr.d_type == QType::SOA)
12c86877 1031 continue; // skip SOA - would indicate end of AXFR
add640c0 1032
2010ac95 1033 if(csp.submit(loopZRR)) {
1c6d9830
BH
1034 for(;;) {
1035 outpacket->getRRS() = csp.getChunk();
1036 if(!outpacket->getRRS().empty()) {
60a1c204 1037 if(haveTSIGDetails && !tsigkeyname.empty())
54d84273 1038 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1039 sendPacket(outpacket, outsock, false);
78bcb858 1040 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1041 outpacket=getFreshAXFRPacket(q);
1042 }
1043 else
1044 break;
1045 }
12c86877
BH
1046 }
1047 }
78bcb858 1048 /*
3370c993 1049 udiff=dt.udiffNoReset();
1c6d9830
BH
1050 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1051 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1052 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1053 */
feef1ece 1054 if(securedZone) {
4888e4b2 1055 if(NSEC3Zone) {
9d3151d9 1056 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
3c68fb14 1057 if(iter->second.d_auth) {
feef1ece 1058 NSEC3RecordContent n3rc;
22a0ef16 1059 n3rc.set(iter->second.d_set);
27d4a65b
RG
1060 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
1061 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
1062 n3rc.set(QType::RRSIG);
1063 }
1064 n3rc.d_salt = ns3pr.d_salt;
feef1ece
PD
1065 n3rc.d_flags = ns3pr.d_flags;
1066 n3rc.d_iterations = ns3pr.d_iterations;
690b86b7 1067 n3rc.d_algorithm = DNSSECKeeper::DIGEST_SHA1; // SHA1, fixed in PowerDNS for now
feef1ece 1068 nsecxrepo_t::const_iterator inext = iter;
cb167afd 1069 ++inext;
feef1ece
PD
1070 if(inext == nsecxrepo.end())
1071 inext = nsecxrepo.begin();
3c68fb14 1072 while(!inext->second.d_auth && inext != iter)
feef1ece 1073 {
cb167afd 1074 ++inext;
feef1ece
PD
1075 if(inext == nsecxrepo.end())
1076 inext = nsecxrepo.begin();
1077 }
6e8694df
KM
1078 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
1079 zrr.dr.d_name = iter->first+sd.qname;
90ba52e0 1080
192bcba2 1081 zrr.dr.d_ttl = sd.getNegativeTTL();
d06dcda4 1082 zrr.dr.setContent(std::make_shared<NSEC3RecordContent>(std::move(n3rc)));
90ba52e0 1083 zrr.dr.d_type = QType::NSEC3;
1084 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1085 zrr.auth=true;
1086 if(csp.submit(zrr)) {
feef1ece
PD
1087 for(;;) {
1088 outpacket->getRRS() = csp.getChunk();
1089 if(!outpacket->getRRS().empty()) {
60a1c204 1090 if(haveTSIGDetails && !tsigkeyname.empty())
feef1ece 1091 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1092 sendPacket(outpacket, outsock, false);
feef1ece
PD
1093 trc.d_mac=outpacket->d_trc.d_mac;
1094 outpacket=getFreshAXFRPacket(q);
1095 }
1096 else
1097 break;
1c6d9830 1098 }
1c6d9830 1099 }
8e9b7d99 1100 }
4888e4b2
BH
1101 }
1102 }
9d3151d9 1103 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
ed9c3a50 1104 NSECRecordContent nrc;
22a0ef16 1105 nrc.set(iter->second.d_set);
27d4a65b
RG
1106 nrc.set(QType::RRSIG);
1107 nrc.set(QType::NSEC);
6e8694df
KM
1108
1109 if(boost::next(iter) != nsecxrepo.end())
1110 nrc.d_next = boost::next(iter)->first;
ed9c3a50 1111 else
6e8694df
KM
1112 nrc.d_next=nsecxrepo.begin()->first;
1113 zrr.dr.d_name = iter->first;
1114
192bcba2 1115 zrr.dr.d_ttl = sd.getNegativeTTL();
d06dcda4 1116 zrr.dr.setContent(std::make_shared<NSECRecordContent>(std::move(nrc)));
90ba52e0 1117 zrr.dr.d_type = QType::NSEC;
1118 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1119 zrr.auth=true;
1120 if(csp.submit(zrr)) {
1c6d9830
BH
1121 for(;;) {
1122 outpacket->getRRS() = csp.getChunk();
1123 if(!outpacket->getRRS().empty()) {
60a1c204 1124 if(haveTSIGDetails && !tsigkeyname.empty())
1e05b07c 1125 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1126 sendPacket(outpacket, outsock, false);
78bcb858 1127 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1128 outpacket=getFreshAXFRPacket(q);
1129 }
1130 else
1131 break;
1132 }
8e9b7d99 1133 }
add640c0 1134 }
add640c0 1135 }
78bcb858 1136 /*
3370c993 1137 udiff=dt.udiffNoReset();
1c6d9830
BH
1138 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1139 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1140 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1141 * */
1e05b07c 1142 for(;;) {
bec14a20
BH
1143 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
1144 if(!outpacket->getRRS().empty()) {
60a1c204 1145 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1146 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
1f07a63f
PD
1147 try {
1148 sendPacket(outpacket, outsock, false);
1149 }
1150 catch (PDNSException& pe) {
1151 throw PDNSException("during axfr-out of "+target.toString()+", this happened: "+pe.reason);
1152 }
78bcb858 1153 trc.d_mac=outpacket->d_trc.d_mac;
bec14a20
BH
1154 outpacket=getFreshAXFRPacket(q);
1155 }
1e05b07c 1156 else
bec14a20 1157 break;
12c86877 1158 }
1e05b07c 1159
1c6d9830 1160 udiff=dt.udiffNoReset();
1e05b07c 1161 if(securedZone)
efe7948e 1162 g_log<<Logger::Debug<<logPrefix<<"done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1e05b07c 1163
efe7948e 1164 DLOG(g_log<<logPrefix<<"done writing out records"<<endl);
12c86877 1165 /* and terminate with yet again the SOA record */
8e9b7d99 1166 outpacket=getFreshAXFRPacket(q);
9bbcf03a 1167 outpacket->addRecord(std::move(soa));
60a1c204 1168 if(haveTSIGDetails && !tsigkeyname.empty())
1e05b07c
FM
1169 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1170
ff76e8b4 1171 sendPacket(outpacket, outsock);
1e05b07c 1172
efe7948e
KM
1173 DLOG(g_log<<logPrefix<<"last packet - close"<<endl);
1174 g_log<<Logger::Notice<<logPrefix<<"AXFR finished"<<endl;
12c86877
BH
1175
1176 return 1;
1177}
1178
c2826d2e 1179int TCPNameserver::doIXFR(std::unique_ptr<DNSPacket>& q, int outsock)
6e59a580 1180{
c6ca0aa8 1181 string logPrefix="IXFR-out zone '"+q->qdomain.toLogString()+"', client '"+q->getRemoteStringWithPort()+"', ";
efe7948e 1182
c2826d2e 1183 std::unique_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
6e59a580
KM
1184 if(q->d_dnssecOk)
1185 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1186
6e59a580 1187 uint32_t serial = 0;
27c0050c 1188 MOADNSParser mdp(false, q->getString());
f80ebc05
O
1189 for(const auto & answer : mdp.d_answers) {
1190 const DNSRecord *rr = &answer.first;
e693ff5a 1191 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
6e59a580 1192 vector<string>parts;
d06dcda4 1193 stringtok(parts, rr->getContent()->getZoneRepresentation());
6e59a580 1194 if (parts.size() >= 3) {
95dd3b90 1195 try {
a0383aad 1196 pdns::checked_stoi_into(serial, parts[2]);
95dd3b90
RG
1197 }
1198 catch(const std::out_of_range& oor) {
efe7948e 1199 g_log<<Logger::Warning<<logPrefix<<"invalid serial in IXFR query"<<endl;
95dd3b90
RG
1200 outpacket->setRcode(RCode::FormErr);
1201 sendPacket(outpacket,outsock);
1202 return 0;
1203 }
6e59a580 1204 } else {
efe7948e 1205 g_log<<Logger::Warning<<logPrefix<<"no serial in IXFR query"<<endl;
6e59a580
KM
1206 outpacket->setRcode(RCode::FormErr);
1207 sendPacket(outpacket,outsock);
1208 return 0;
1209 }
3e67ea8b 1210 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
d5fcd583 1211 g_log<<Logger::Warning<<logPrefix<<"additional records in IXFR query, type: "<<QType(rr->d_type).toString()<<endl;
6e59a580
KM
1212 outpacket->setRcode(RCode::FormErr);
1213 sendPacket(outpacket,outsock);
1214 return 0;
1215 }
1216 }
1217
efe7948e 1218 g_log<<Logger::Warning<<logPrefix<<"transfer initiated with serial "<<serial<<endl;
6e59a580 1219
57f95528 1220 // determine if zone exists, XFR is allowed, and if IXFR can proceed using existing backend before spawning a new backend.
6e59a580 1221 SOAData sd;
57f95528
CH
1222 bool securedZone;
1223 bool serialPermitsIXFR;
6e59a580 1224 {
24fb5774 1225 auto packetHandler = s_P.lock();
efe7948e 1226 DLOG(g_log<<logPrefix<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
24fb5774 1227 if(!*packetHandler) {
994cae6b 1228 g_log<<Logger::Warning<<"TCP server is without backend connections in doIXFR, launching"<<endl;
24fb5774 1229 *packetHandler = make_unique<PacketHandler>();
6e59a580
KM
1230 }
1231
22893145 1232 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
24fb5774 1233 if(!canDoAXFR(q, false, *packetHandler) || !(*packetHandler)->getBackend()->getSOAUncached(q->qdomain, sd)) {
efe7948e 1234 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative"<<endl;
9c556f63 1235 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1236 sendPacket(outpacket,outsock);
1237 return 0;
1238 }
22893145 1239
24fb5774 1240 DNSSECKeeper dk((*packetHandler)->getBackend());
57f95528 1241 DNSSECKeeper::clearCaches(q->qdomain);
d5e7c918 1242 bool narrow = false;
57f95528
CH
1243 securedZone = dk.isSecuredZone(q->qdomain);
1244 if(dk.getNSEC3PARAM(q->qdomain, nullptr, &narrow)) {
1245 if(narrow) {
efe7948e 1246 g_log<<Logger::Warning<<logPrefix<<"not doing IXFR of an NSEC3 narrow zone"<<endl;
57f95528
CH
1247 outpacket->setRcode(RCode::Refused);
1248 sendPacket(outpacket,outsock);
1249 return 0;
1250 }
22893145 1251 }
6e59a580 1252
57f95528 1253 serialPermitsIXFR = !rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname));
6e59a580 1254 }
24d9e514 1255
57f95528
CH
1256 if (serialPermitsIXFR) {
1257 DNSName target = q->qdomain;
6e59a580 1258 TSIGRecordContent trc;
7abbc40f
PD
1259 DNSName tsigkeyname;
1260 string tsigsecret;
6e59a580 1261
57f95528
CH
1262 UeberBackend db;
1263 DNSSECKeeper dk(&db);
57f95528 1264
ea3816cf 1265 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
6e59a580 1266
60a1c204 1267 if(haveTSIGDetails && !tsigkeyname.empty()) {
bb7fb11c 1268 string tsig64;
3343ad1f 1269 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
290a083d 1270 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1271 algorithm = DNSName("hmac-md5");
40361bf2
KM
1272 if (!db.getTSIGKey(tsigkeyname, algorithm, tsig64)) {
1273 g_log << Logger::Error << "TSIG key '" << tsigkeyname << "' for domain '" << target << "' not found" << endl;
53ace5d5
PL
1274 return 0;
1275 }
1276 if (B64Decode(tsig64, tsigsecret) == -1) {
efe7948e 1277 g_log<<Logger::Error<<logPrefix<<"unable to Base-64 decode TSIG key '"<<tsigkeyname<<"'"<<endl;
53ace5d5
PL
1278 return 0;
1279 }
6e59a580
KM
1280 }
1281
6e59a580 1282 // SOA *must* go out first, our signing pipe might reorder
efe7948e 1283 DLOG(g_log<<logPrefix<<"sending out SOA"<<endl);
13f9e280 1284 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 1285 outpacket->addRecord(std::move(soa));
ada68bd9 1286 if(securedZone && outpacket->d_dnssecOk) {
7abbc40f 1287 set<DNSName> authSet;
6e59a580 1288 authSet.insert(target);
57f95528 1289 addRRSigs(dk, db, authSet, outpacket->getRRS());
6e59a580
KM
1290 }
1291
60a1c204 1292 if(haveTSIGDetails && !tsigkeyname.empty())
6e59a580
KM
1293 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1294
1295 sendPacket(outpacket, outsock);
1296
efe7948e 1297 g_log<<Logger::Notice<<logPrefix<<"IXFR finished"<<endl;
6e59a580
KM
1298
1299 return 1;
1300 }
1301
efe7948e 1302 g_log<<Logger::Notice<<logPrefix<<"IXFR fallback to AXFR"<<endl;
6e59a580
KM
1303 return doAXFR(q->qdomain, q, outsock);
1304}
1305
abb11ca4 1306TCPNameserver::~TCPNameserver() = default;
12c86877
BH
1307TCPNameserver::TCPNameserver()
1308{
cb0af1a1
RG
1309 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1310 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1311 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1312 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1313
379ab445 1314// sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
c2826d2e 1315 d_connectionroom_sem = make_unique<Semaphore>( ::arg().asNum( "max-tcp-connections" ));
d322f931 1316 d_maxTCPConnections = ::arg().asNum( "max-tcp-connections" );
f5ad09dc 1317
12c86877 1318 vector<string>locals;
379ab445 1319 stringtok(locals,::arg()["local-address"]," ,");
f5ad09dc
PL
1320 if(locals.empty())
1321 throw PDNSException("No local addresses specified");
12c86877 1322
68b011bd 1323 d_ng.toMasks(::arg()["allow-axfr-ips"] );
9f1d5826 1324
12c86877 1325 signal(SIGPIPE,SIG_IGN);
12c86877 1326
f5ad09dc
PL
1327 for(auto const &laddr : locals) {
1328 ComboAddress local(laddr, ::arg().asNum("local-port"));
12c86877 1329
f5ad09dc
PL
1330 int s=socket(local.sin4.sin_family, SOCK_STREAM, 0);
1331 if(s<0)
1332 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
3897b9e1 1333 setCloseOnExec(s);
fb316318 1334
12c86877 1335 int tmp=1;
f5ad09dc 1336 if(setsockopt(s, SOL_SOCKET,SO_REUSEADDR, (char*)&tmp, sizeof tmp) < 0) {
e6a9dde5 1337 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
f5ad09dc 1338 _exit(1);
12c86877 1339 }
940d7811
RG
1340
1341 if (::arg().asNum("tcp-fast-open") > 0) {
1342#ifdef TCP_FASTOPEN
1343 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1344 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
f5ad09dc 1345 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket "<<local.toStringWithPort()<<": "<<stringerror()<<endl;
940d7811
RG
1346 }
1347#else
e6a9dde5 1348 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1349#endif
1350 }
1351
f5ad09dc
PL
1352 if(::arg().mustDo("non-local-bind"))
1353 Utility::setBindAny(local.sin4.sin_family, s);
1354
1355 if(local.isIPv6() && setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1356 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<stringerror()<<endl;
1357 }
fec7dd5a 1358
379ab445 1359 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
a702a96c 1360 int err = errno;
2c896042 1361 close(s);
a702a96c 1362 if( err == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
f5ad09dc 1363 g_log<<Logger::Error<<"Address " << local.toString() << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1364 continue;
1365 } else {
f5ad09dc 1366 g_log<<Logger::Error<<"Unable to bind to TCP socket " << local.toStringWithPort() << ": "<<stringerror(err)<<endl;
2ab7e9ac 1367 throw PDNSException("Unable to bind to TCP socket");
5ecb2885 1368 }
12c86877 1369 }
12c86877 1370
f5ad09dc
PL
1371 listen(s, 128);
1372 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
12c86877 1373 d_sockets.push_back(s);
8edfedf1
BH
1374 struct pollfd pfd;
1375 memset(&pfd, 0, sizeof(pfd));
1376 pfd.fd = s;
1377 pfd.events = POLLIN;
8edfedf1 1378 d_prfds.push_back(pfd);
12c86877 1379 }
12c86877
BH
1380}
1381
1382
ff76e8b4 1383//! Start of TCP operations thread, we launch a new thread for each incoming TCP question
12c86877
BH
1384void TCPNameserver::thread()
1385{
519f5484 1386 setThreadName("pdns/tcpnameser");
12c86877
BH
1387 try {
1388 for(;;) {
1389 int fd;
cb0af1a1
RG
1390 ComboAddress remote;
1391 Utility::socklen_t addrlen=remote.getSocklen();
12c86877 1392
8edfedf1 1393 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
8a63d3ce 1394 if(ret <= 0)
4957a608 1395 continue;
8a63d3ce 1396
12c86877 1397 int sock=-1;
8ce9e4e6 1398 for(const pollfd& pfd : d_prfds) {
c1ee10a6 1399 if(pfd.revents & POLLIN) {
4957a608 1400 sock = pfd.fd;
cb0af1a1
RG
1401 remote.sin4.sin_family = AF_INET6;
1402 addrlen=remote.getSocklen();
4957a608
BH
1403
1404 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
a702a96c
OM
1405 int err = errno;
1406 g_log<<Logger::Error<<"TCP question accept error: "<<stringerror(err)<<endl;
1e05b07c 1407
a702a96c 1408 if(err==EMFILE) {
e6a9dde5 1409 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
5bd2ea7b 1410 _exit(1);
4957a608
BH
1411 }
1412 }
1413 else {
cb0af1a1 1414 if (d_maxConnectionsPerClient) {
24fb5774
RG
1415 auto clientsCount = s_clientsCount.lock();
1416 if ((*clientsCount)[remote] >= d_maxConnectionsPerClient) {
e6a9dde5 1417 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
cb0af1a1
RG
1418 close(fd);
1419 continue;
1420 }
24fb5774 1421 (*clientsCount)[remote]++;
cb0af1a1
RG
1422 }
1423
4957a608
BH
1424 d_connectionroom_sem->wait(); // blocks if no connections are available
1425
1426 int room;
1427 d_connectionroom_sem->getValue( &room);
1428 if(room<1)
e6a9dde5 1429 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
4957a608 1430
0ddde5fb
RG
1431 try {
1432 std::thread connThread(doConnection, fd);
1433 connThread.detach();
1434 }
1435 catch (std::exception& e) {
1436 g_log<<Logger::Error<<"Error creating thread: "<<e.what()<<endl;
4957a608 1437 d_connectionroom_sem->post();
48e8d70b 1438 close(fd);
cb0af1a1 1439 decrementClientCount(remote);
4957a608
BH
1440 }
1441 }
1442 }
12c86877
BH
1443 }
1444 }
1445 }
3f81d239 1446 catch(PDNSException &AE) {
e6a9dde5 1447 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
12c86877
BH
1448 }
1449 catch(...) {
e6a9dde5 1450 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
12c86877 1451 }
5bd2ea7b 1452 _exit(1); // take rest of server with us
12c86877
BH
1453}
1454
1455
d322f931
PD
1456unsigned int TCPNameserver::numTCPConnections()
1457{
1458 int room;
1459 d_connectionroom_sem->getValue( &room);
1460 return d_maxTCPConnections - room;
1461}