]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/tcpreceiver.cc
pkcs11signers: Use emplace_back for attributes
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
CommitLineData
12c86877
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
2e7834cb 3 Copyright (C) 2002-2012 PowerDNS.COM BV
12c86877
BH
4
5 This program is free software; you can redistribute it and/or modify
22dc646a
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
f782fe38
MH
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
12c86877
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12c86877 21*/
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
b6f3b03a 25#include <boost/algorithm/string.hpp>
097b1e68 26#include <boost/scoped_array.hpp>
bf269e28 27#include "auth-packetcache.hh"
1258abe0 28#include "utility.hh"
519f5484 29#include "threadname.hh"
add640c0 30#include "dnssecinfra.hh"
4c1474f3 31#include "dnsseckeeper.hh"
12c86877 32#include <cstdio>
4888e4b2 33#include "base32.hh"
12c86877
BH
34#include <cstring>
35#include <cstdlib>
36#include <sys/types.h>
940d7811 37#include <netinet/tcp.h>
12c86877
BH
38#include <iostream>
39#include <string>
40#include "tcpreceiver.hh"
67d74e49 41#include "sstuff.hh"
fa8fd4d2 42
dc6aa7f5
AV
43#include <cerrno>
44#include <csignal>
78bcb858 45#include "base64.hh"
12c86877
BH
46#include "ueberbackend.hh"
47#include "dnspacket.hh"
48#include "nameserver.hh"
49#include "distributor.hh"
50#include "lock.hh"
51#include "logger.hh"
52#include "arguments.hh"
379ab445 53
8cb70f23 54#include "auth-main.hh"
12c86877
BH
55#include "packethandler.hh"
56#include "statbag.hh"
12c86877 57#include "communicator.hh"
61b26744 58#include "namespaces.hh"
8e9b7d99 59#include "signingpipe.hh"
273d88b2 60#include "stubresolver.hh"
4172a5b2
PD
61#include "proxy-protocol.hh"
62#include "noinitvector.hh"
f13cde2a 63#include "gss_context.hh"
1f07a63f 64#include "pdnsexception.hh"
bf269e28 65extern AuthPacketCache PC;
12c86877
BH
66extern StatBag S;
67
68/**
69\file tcpreceiver.cc
70\brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
71*/
72
c2826d2e 73std::unique_ptr<Semaphore> TCPNameserver::d_connectionroom_sem{nullptr};
24fb5774 74LockGuarded<std::unique_ptr<PacketHandler>> TCPNameserver::s_P{nullptr};
d322f931 75unsigned int TCPNameserver::d_maxTCPConnections = 0;
9f1d5826 76NetmaskGroup TCPNameserver::d_ng;
cb0af1a1
RG
77size_t TCPNameserver::d_maxTransactionsPerConn;
78size_t TCPNameserver::d_maxConnectionsPerClient;
79unsigned int TCPNameserver::d_idleTimeout;
80unsigned int TCPNameserver::d_maxConnectionDuration;
24fb5774 81LockGuarded<std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan>> TCPNameserver::s_clientsCount;
12c86877 82
12c86877
BH
83void TCPNameserver::go()
84{
e6a9dde5 85 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
24fb5774 86 s_P.lock()->reset();
12c86877 87 try {
24fb5774 88 *(s_P.lock()) = make_unique<PacketHandler>();
12c86877 89 }
3f81d239 90 catch(PDNSException &ae) {
e6a9dde5 91 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
12c86877 92 }
12c86877 93
969e4459 94 std::thread th([this](){thread();});
0ddde5fb 95 th.detach();
12c86877
BH
96}
97
3f81d239 98// throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
cb0af1a1 99static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
12c86877 100{
6a3e5d1a
BH
101 unsigned int bytes=n;
102 char *ptr = (char*)buffer;
103 int ret;
cb0af1a1
RG
104 time_t start = 0;
105 unsigned int remainingTotal = totalTimeout;
106 if (totalTimeout) {
4646277d 107 start = time(nullptr);
cb0af1a1 108 }
6a3e5d1a
BH
109 while(bytes) {
110 ret=read(fd, ptr, bytes);
111 if(ret < 0) {
112 if(errno==EAGAIN) {
cb0af1a1 113 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
4957a608
BH
114 if(ret < 0)
115 throw NetworkError("Waiting for data read");
116 if(!ret)
117 throw NetworkError("Timeout reading data");
118 continue;
6a3e5d1a
BH
119 }
120 else
4957a608 121 throw NetworkError("Reading data: "+stringerror());
6a3e5d1a
BH
122 }
123 if(!ret) {
124 if(!throwOnEOF && n == bytes)
4957a608 125 return 0;
6a3e5d1a 126 else
4957a608 127 throw NetworkError("Did not fulfill read from TCP due to EOF");
6a3e5d1a 128 }
1e05b07c 129
6a3e5d1a
BH
130 ptr += ret;
131 bytes -= ret;
cb0af1a1 132 if (totalTimeout) {
4646277d 133 time_t now = time(nullptr);
cb0af1a1
RG
134 unsigned int elapsed = now - start;
135 if (elapsed >= remainingTotal) {
136 throw NetworkError("Timeout while reading data");
137 }
138 start = now;
139 remainingTotal -= elapsed;
140 }
6a3e5d1a
BH
141 }
142 return n;
143}
12c86877 144
6a3e5d1a 145// ditto
cb0af1a1 146static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
6a3e5d1a
BH
147{
148 unsigned int bytes=n;
149 const char *ptr = (char*)buffer;
150 int ret;
151 while(bytes) {
152 ret=write(fd, ptr, bytes);
153 if(ret < 0) {
154 if(errno==EAGAIN) {
cb0af1a1 155 ret=waitForRWData(fd, false, idleTimeout, 0);
4957a608
BH
156 if(ret < 0)
157 throw NetworkError("Waiting for data write");
158 if(!ret)
159 throw NetworkError("Timeout writing data");
160 continue;
6a3e5d1a
BH
161 }
162 else
4957a608 163 throw NetworkError("Writing data: "+stringerror());
6a3e5d1a 164 }
12c86877 165 if(!ret) {
67d74e49 166 throw NetworkError("Did not fulfill TCP write due to EOF");
12c86877 167 }
1e05b07c 168
6a3e5d1a
BH
169 ptr += ret;
170 bytes -= ret;
12c86877 171 }
12c86877
BH
172}
173
02b4b703 174void TCPNameserver::sendPacket(std::unique_ptr<DNSPacket>& p, int outsock, bool last)
6a3e5d1a 175{
1f07a63f
PD
176 uint16_t len=htons(p->getString(true).length());
177
178 // this also calls p->getString; call it after our explicit call so throwsOnTruncation=true is honoured
02b4b703 179 g_rs.submitResponse(*p, false, last);
9951e2d0 180
fbaa5e09
BH
181 string buffer((const char*)&len, 2);
182 buffer.append(p->getString());
cb0af1a1 183 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
6a3e5d1a
BH
184}
185
186
cb0af1a1 187void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
6a3e5d1a
BH
188try
189{
cb0af1a1 190 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
6a3e5d1a 191}
67d74e49
BH
192catch(NetworkError& ae) {
193 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
12c86877
BH
194}
195
cb0af1a1
RG
196static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
197{
198 if (maxConnectionDuration) {
4646277d 199 time_t elapsed = time(nullptr) - start;
cb0af1a1
RG
200 if (elapsed >= maxConnectionDuration) {
201 return true;
202 }
203 remainingTime = maxConnectionDuration - elapsed;
204 }
205 return false;
206}
207
208void TCPNameserver::decrementClientCount(const ComboAddress& remote)
209{
210 if (d_maxConnectionsPerClient) {
24fb5774
RG
211 auto count = s_clientsCount.lock();
212 auto it = count->find(remote);
213 if (it == count->end()) {
214 // this is worrying, but nothing we can do at this point
215 return;
216 }
217 --it->second;
218 if (it->second == 0) {
219 count->erase(it);
cb0af1a1
RG
220 }
221 }
222}
223
0ddde5fb 224void TCPNameserver::doConnection(int fd)
12c86877 225{
519f5484 226 setThreadName("pdns/tcpConnect");
c2826d2e 227 std::unique_ptr<DNSPacket> packet;
208abc4c 228 ComboAddress remote, accountremote;
4f5e7925 229 socklen_t remotelen=sizeof(remote);
cb0af1a1
RG
230 size_t transactions = 0;
231 time_t start = 0;
232 if (d_maxConnectionDuration) {
4646277d 233 start = time(nullptr);
cb0af1a1 234 }
4f5e7925 235
4f5e7925 236 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
e6a9dde5 237 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
4f5e7925 238 d_connectionroom_sem->post();
a7b68ae7
RG
239 try {
240 closesocket(fd);
241 }
242 catch(const PDNSException& e) {
e6a9dde5 243 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 244 }
0ddde5fb 245 return;
4f5e7925 246 }
247
3897b9e1 248 setNonBlocking(fd);
12c86877 249 try {
c2b4ccc0 250 int mesgsize=65535;
dc593046 251 boost::scoped_array<char> mesg(new char[mesgsize]);
4172a5b2
PD
252 std::optional<ComboAddress> inner_remote;
253 bool inner_tcp = false;
254
e6a9dde5 255 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
21a303f3 256 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
4172a5b2
PD
257 if (g_proxyProtocolACL.match(remote)) {
258 unsigned int remainingTime = 0;
259 PacketBuffer proxyData;
260 proxyData.reserve(g_proxyProtocolMaximumSize);
261 ssize_t used;
262
263 // this for-loop ends by throwing, or by having gathered a complete proxy header
264 for (;;) {
265 used = isProxyHeaderComplete(proxyData);
266 if (used < 0) {
267 ssize_t origsize = proxyData.size();
268 proxyData.resize(origsize + -used);
269 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
270 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": maximum TCP connection duration exceeded");
271 }
272
273 try {
274 readnWithTimeout(fd, &proxyData[origsize], -used, d_idleTimeout, true, remainingTime);
275 }
276 catch(NetworkError& ae) {
277 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": "+ae.what());
278 }
279 }
280 else if (used == 0) {
281 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was invalid");
282 }
283 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
284 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header too big");
285 }
286 else { // used > 0 && used <= g_proxyProtocolMaximumSize
287 break;
288 }
289 }
290 ComboAddress psource, pdestination;
291 bool proxyProto, tcp;
292 std::vector<ProxyProtocolValue> ppvalues;
293
294 used = parseProxyHeader(proxyData, proxyProto, psource, pdestination, tcp, ppvalues);
295 if (used <= 0) {
296 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was invalid");
297 }
298 if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
299 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was oversized");
300 }
301 inner_remote = psource;
302 inner_tcp = tcp;
208abc4c
KM
303 accountremote = psource;
304 }
305 else {
306 accountremote = remote;
4172a5b2
PD
307 }
308
12c86877 309 for(;;) {
cb0af1a1
RG
310 unsigned int remainingTime = 0;
311 transactions++;
312 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
e6a9dde5 313 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
cb0af1a1
RG
314 break;
315 }
316 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 317 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
318 break;
319 }
6a3e5d1a
BH
320
321 uint16_t pktlen;
cb0af1a1 322 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
4957a608 323 break;
6a3e5d1a 324 else
4957a608 325 pktlen=ntohs(pktlen);
12c86877 326
366e1e5e
AT
327 // this check will always be false *if* no one touches
328 // the mesg array. pktlen can be maximum of 65535 as
1e05b07c
FM
329 // it is 2 byte unsigned variable. In getQuestion, we
330 // write to 0 up to pktlen-1 so 65535 is just right.
366e1e5e
AT
331
332 // do not remove this check as it will catch if someone
1e05b07c 333 // decreases the mesg buffer size for some reason.
c2b4ccc0 334 if(pktlen > mesgsize) {
e6a9dde5 335 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
4957a608 336 break;
12c86877 337 }
1e05b07c 338
cb0af1a1 339 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 340 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
341 break;
342 }
343
344 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
208abc4c
KM
345 S.inc("tcp-queries");
346 if (accountremote.sin4.sin_family == AF_INET6)
5fd567ec 347 S.inc("tcp6-queries");
348 else
349 S.inc("tcp4-queries");
3e579e91 350
c2826d2e 351 packet=make_unique<DNSPacket>(true);
809fe23f 352 packet->setRemote(&remote);
e9dd48f9 353 packet->d_tcp=true;
4172a5b2
PD
354 if (inner_remote) {
355 packet->d_inner_remote = inner_remote;
356 packet->d_tcp = inner_tcp;
357 }
ff76e8b4 358 packet->setSocket(fd);
c2b4ccc0 359 if(packet->parse(mesg.get(), pktlen)<0)
4957a608 360 break;
78f1d7b5
PL
361
362 if (packet->hasEDNSCookie())
363 S.inc("tcp-cookie-queries");
364
6e59a580 365 if(packet->qtype.getCode()==QType::AXFR) {
02b4b703 366 doAXFR(packet->qdomain, packet, fd);
6e59a580
KM
367 continue;
368 }
369
370 if(packet->qtype.getCode()==QType::IXFR) {
02b4b703 371 doIXFR(packet, fd);
4957a608 372 continue;
12c86877
BH
373 }
374
1e05b07c 375 std::unique_ptr<DNSPacket> reply;
c2826d2e 376 auto cached = make_unique<DNSPacket>(false);
fe498ace 377 if(logDNSQueries) {
4172a5b2 378 g_log << Logger::Notice<<"TCP Remote "<< packet->getRemoteString() <<" wants '" << packet->qdomain<<"|"<<packet->qtype.toString() <<
1b16851b 379 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen();
fe498ace 380 }
bb5903e2 381
9a037bfa 382 if(PC.enabled()) {
c2826d2e 383 if(packet->couldBeCached() && PC.get(*packet, *cached)) { // short circuit - does the PacketCache recognize this question?
9a037bfa 384 if(logDNSQueries)
1b16851b 385 g_log<<": packetcache HIT"<<endl;
9a037bfa 386 cached->setRemote(&packet->d_remote);
208abc4c 387 cached->d_inner_remote = packet->d_inner_remote;
9a037bfa
KM
388 cached->d.id=packet->d.id;
389 cached->d.rd=packet->d.rd; // copy in recursion desired bit
390 cached->commitD(); // commit d to the packet inlined
391
392 sendPacket(cached, fd); // presigned, don't do it again
393 continue;
394 }
21a303f3 395 if(logDNSQueries)
1b16851b 396 g_log<<": packetcache MISS"<<endl;
bbe4b041
OM
397 } else {
398 if (logDNSQueries) {
1b16851b 399 g_log<<endl;
bbe4b041 400 }
12c86877 401 }
12c86877 402 {
24fb5774
RG
403 auto packetHandler = s_P.lock();
404 if (!*packetHandler) {
994cae6b 405 g_log<<Logger::Warning<<"TCP server is without backend connections, launching"<<endl;
24fb5774 406 *packetHandler = make_unique<PacketHandler>();
4957a608 407 }
4957a608 408
24fb5774 409 reply = (*packetHandler)->doQuestion(*packet); // we really need to ask the backend :-)
12c86877
BH
410 }
411
12c86877 412 if(!reply) // unable to write an answer?
4957a608 413 break;
b552d7b1 414
ff76e8b4 415 sendPacket(reply, fd);
15e39ee4 416#ifdef ENABLE_GSS_TSIG
c113acc3
OM
417 if (g_doGssTSIG) {
418 packet->cleanupGSS(reply->d.rcode);
419 }
15e39ee4 420#endif
12c86877 421 }
12c86877 422 }
3f81d239 423 catch(PDNSException &ae) {
24fb5774 424 s_P.lock()->reset(); // on next call, backend will be recycled
e6a9dde5 425 g_log<<Logger::Error<<"TCP nameserver had error, cycling backend: "<<ae.reason<<endl;
ef1d2f44 426 }
0afa9049 427 catch(NetworkError &e) {
e6a9dde5 428 g_log<<Logger::Info<<"TCP Connection Thread died because of network error: "<<e.what()<<endl;
0afa9049
BH
429 }
430
adc10f99 431 catch(std::exception &e) {
3d88c4c6
KM
432 s_P.lock()->reset(); // on next call, backend will be recycled
433 g_log << Logger::Error << "TCP Connection Thread died because of STL error, cycling backend: " << e.what() << endl;
12c86877
BH
434 }
435 catch( ... )
436 {
3d88c4c6
KM
437 s_P.lock()->reset(); // on next call, backend will be recycled
438 g_log << Logger::Error << "TCP Connection Thread caught unknown exception, cycling backend." << endl;
12c86877 439 }
12c86877 440 d_connectionroom_sem->post();
a7b68ae7
RG
441
442 try {
443 closesocket(fd);
444 }
445 catch(const PDNSException& e) {
e6a9dde5 446 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 447 }
cb0af1a1 448 decrementClientCount(remote);
12c86877
BH
449}
450
78bcb858 451
24fb5774 452bool TCPNameserver::canDoAXFR(std::unique_ptr<DNSPacket>& q, bool isAXFR, std::unique_ptr<PacketHandler>& packetHandler)
12c86877 453{
379ab445 454 if(::arg().mustDo("disable-axfr"))
318c3ec6
BH
455 return false;
456
4172a5b2 457 string logPrefix=string(isAXFR ? "A" : "I")+"XFR-out zone '"+q->qdomain.toLogString()+"', client '"+q->getInnerRemote().toStringWithPort()+"', ";
efe7948e 458
78bcb858
BH
459 if(q->d_havetsig) { // if you have one, it must be good
460 TSIGRecordContent trc;
7abbc40f
PD
461 DNSName keyname;
462 string secret;
24fb5774 463 if(!q->checkForCorrectTSIG(packetHandler->getBackend(), &keyname, &secret, &trc)) {
78bcb858 464 return false;
7f9ac49b
AT
465 } else {
466 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
c113acc3 467#ifdef ENABLE_GSS_TSIG
f13cde2a
OM
468 if (g_doGssTSIG && q->d_tsig_algo == TSIG_GSS) {
469 GssContext gssctx(keyname);
470 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
471 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
472 }
473 }
c113acc3 474#endif
7f9ac49b
AT
475 }
476
24fb5774 477 DNSSECKeeper dk(packetHandler->getBackend());
c113acc3 478#ifdef ENABLE_GSS_TSIG
f13cde2a
OM
479 if (g_doGssTSIG && q->d_tsig_algo == TSIG_GSS) {
480 vector<string> princs;
481 packetHandler->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
482 for(const std::string& princ : princs) {
483 if (q->d_peer_principal == princ) {
484 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
485 return true;
486 }
487 }
488 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
489 return false;
490 }
c113acc3 491#endif
3d03fee8 492 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
efe7948e 493 g_log<<Logger::Warning<<logPrefix<<"denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access"<<endl;
78bcb858
BH
494 return false;
495 }
496 else {
efe7948e 497 g_log<<Logger::Notice<<logPrefix<<"allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
78bcb858
BH
498 return true;
499 }
500 }
1e05b07c 501
93afc0a3 502 // cerr<<"checking allow-axfr-ips"<<endl;
4172a5b2 503 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( q->getInnerRemote() )) {
efe7948e 504 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in allow-axfr-ips"<<endl;
12c86877 505 return true;
ab5edd12 506 }
93afc0a3
PD
507
508 FindNS fns;
509
510 // cerr<<"doing per-zone-axfr-acls"<<endl;
511 SOAData sd;
24fb5774 512 if(packetHandler->getBackend()->getSOAUncached(q->qdomain,sd)) {
93afc0a3 513 // cerr<<"got backend and SOA"<<endl;
93afc0a3 514 vector<string> acl;
24fb5774 515 packetHandler->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
d7f67000 516 for (const auto & i : acl) {
93afc0a3 517 // cerr<<"matching against "<<*i<<endl;
d7f67000 518 if(pdns_iequals(i, "AUTO-NS")) {
93afc0a3
PD
519 // cerr<<"AUTO-NS magic please!"<<endl;
520
521 DNSResourceRecord rr;
7abbc40f 522 set<DNSName> nsset;
93afc0a3 523
13b80e77
CH
524 sd.db->lookup(QType(QType::NS), q->qdomain, sd.domain_id);
525 while (sd.db->get(rr)) {
290a083d 526 nsset.insert(DNSName(rr.content));
13b80e77 527 }
7abbc40f 528 for(const auto & j: nsset) {
24fb5774 529 vector<string> nsips=fns.lookup(j, packetHandler->getBackend());
d7f67000 530 for(const auto & nsip : nsips) {
93afc0a3 531 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
4172a5b2 532 if(nsip == q->getInnerRemote().toString())
93afc0a3
PD
533 {
534 // cerr<<"got AUTO-NS hit"<<endl;
efe7948e 535 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in NSset"<<endl;
93afc0a3
PD
536 return true;
537 }
538 }
539 }
540 }
541 else
542 {
d7f67000 543 Netmask nm = Netmask(i);
4172a5b2 544 if(nm.match( q->getInnerRemote() ))
93afc0a3 545 {
efe7948e 546 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in per-zone ACL"<<endl;
93afc0a3
PD
547 // cerr<<"hit!"<<endl;
548 return true;
549 }
550 }
551 }
1e05b07c 552 }
93afc0a3 553
12c86877
BH
554 extern CommunicatorClass Communicator;
555
4172a5b2 556 if(Communicator.justNotified(q->qdomain, q->getInnerRemote().toString())) { // we just notified this ip
efe7948e 557 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is from recently notified secondary"<<endl;
12c86877
BH
558 return true;
559 }
560
efe7948e 561 g_log<<Logger::Warning<<logPrefix<<"denied: client IP has no permission"<<endl;
12c86877
BH
562 return false;
563}
564
b317b510 565namespace {
54d84273
PD
566 struct NSECXEntry
567 {
22a0ef16 568 NSECBitmap d_set;
54d84273 569 unsigned int d_ttl;
feef1ece 570 bool d_auth;
54d84273 571 };
8e9b7d99 572
a5188bcd 573 static std::unique_ptr<DNSPacket> getFreshAXFRPacket(std::unique_ptr<DNSPacket>& q)
54d84273 574 {
c2826d2e 575 std::unique_ptr<DNSPacket> ret = std::unique_ptr<DNSPacket>(q->replyPacket());
54d84273
PD
576 ret->setCompress(false);
577 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
578 ret->d_tcp = true;
579 return ret;
580 }
8e9b7d99
BH
581}
582
54d84273 583
12c86877 584/** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
5cf23dba 585int TCPNameserver::doAXFR(const DNSName &target, std::unique_ptr<DNSPacket>& q, int outsock) // NOLINT(readability-function-cognitive-complexity)
12c86877 586{
4172a5b2 587 string logPrefix="AXFR-out zone '"+target.toLogString()+"', client '"+q->getRemoteString()+"', ";
efe7948e 588
c2826d2e 589 std::unique_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
c67e46a1 590 if(q->d_dnssecOk)
05e24311 591 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
22893145 592
efe7948e 593 g_log<<Logger::Warning<<logPrefix<<"transfer initiated"<<endl;
12c86877 594
22893145 595 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
91ad8c11 596 SOAData sd;
12c86877 597 {
24fb5774 598 auto packetHandler = s_P.lock();
efe7948e 599 DLOG(g_log<<logPrefix<<"looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
24fb5774 600 if(!*packetHandler) {
994cae6b 601 g_log<<Logger::Warning<<"TCP server is without backend connections in doAXFR, launching"<<endl;
24fb5774 602 *packetHandler = make_unique<PacketHandler>();
12a965c5 603 }
12c86877 604
ea99d474 605 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
24fb5774 606 if (!canDoAXFR(q, true, *packetHandler)) {
efe7948e 607 g_log<<Logger::Warning<<logPrefix<<"failed: client may not request AXFR"<<endl;
9c556f63 608 outpacket->setRcode(RCode::NotAuth);
8090f5a2
AT
609 sendPacket(outpacket,outsock);
610 return 0;
611 }
612
91ad8c11 613 if (!(*packetHandler)->getBackend()->getSOAUncached(target, sd)) {
efe7948e 614 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative"<<endl;
9c556f63 615 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 616 sendPacket(outpacket,outsock);
12c86877
BH
617 return 0;
618 }
3de83124 619 }
22893145 620
8e9b7d99 621 UeberBackend db;
79ba7763 622 if(!db.getSOAUncached(target, sd)) {
efe7948e 623 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative in second instance"<<endl;
79ba7763 624 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 625 sendPacket(outpacket,outsock);
3de83124 626 return 0;
12c86877 627 }
3de83124 628
8a66a927
KM
629 bool securedZone = false;
630 bool presignedZone = false;
631 bool NSEC3Zone = false;
632 bool narrow = false;
633
91ad8c11
KM
634 DomainInfo di;
635 bool isCatalogZone = sd.db->getDomainInfo(target, di, false) && di.isCatalogType();
636
8a66a927
KM
637 NSEC3PARAMRecordContent ns3pr;
638
ea99d474 639 DNSSECKeeper dk(&db);
40b3959a 640 DNSSECKeeper::clearCaches(target);
91ad8c11 641 if (!isCatalogZone) {
8a66a927
KM
642 securedZone = dk.isSecuredZone(target);
643 presignedZone = dk.isPresigned(target);
644 }
22893145 645
dacacb23 646 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
22893145
CH
647 NSEC3Zone=true;
648 if(narrow) {
efe7948e 649 g_log<<Logger::Warning<<logPrefix<<"failed: not doing AXFR of an NSEC3 narrow zone"<<endl;
994cae6b
KM
650 outpacket->setRcode(RCode::Refused);
651 sendPacket(outpacket,outsock);
652 return 0;
22893145
CH
653 }
654 }
655
78bcb858 656 TSIGRecordContent trc;
7abbc40f
PD
657 DNSName tsigkeyname;
658 string tsigsecret;
78bcb858 659
ea3816cf 660 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
78bcb858 661
60a1c204 662 if(haveTSIGDetails && !tsigkeyname.empty()) {
2c26f25a 663 string tsig64;
5e8d94f1 664 DNSName algorithm=trc.d_algoName;
290a083d 665 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
666 algorithm = DNSName("hmac-md5");
f13cde2a
OM
667 if (algorithm != DNSName("gss-tsig")) {
668 if(!db.getTSIGKey(tsigkeyname, algorithm, tsig64)) {
669 g_log<<Logger::Warning<<logPrefix<<"TSIG key not found"<<endl;
670 return 0;
671 }
672 if (B64Decode(tsig64, tsigsecret) == -1) {
673 g_log<<Logger::Error<<logPrefix<<"unable to Base-64 decode TSIG key '"<<tsigkeyname<<"'"<<endl;
674 return 0;
675 }
84fc3f8b 676 }
78bcb858 677 }
1e05b07c
FM
678
679
8267bd2c 680 // SOA *must* go out first, our signing pipe might reorder
efe7948e 681 DLOG(g_log<<logPrefix<<"sending out SOA"<<endl);
13f9e280 682 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 683 outpacket->addRecord(DNSZoneRecord(soa));
3c68fb14 684 if(securedZone && !presignedZone) {
7abbc40f 685 set<DNSName> authSet;
8d3cbffa 686 authSet.insert(target);
ea99d474 687 addRRSigs(dk, db, authSet, outpacket->getRRS());
8d3cbffa 688 }
1e05b07c 689
60a1c204 690 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 691 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1e05b07c 692
02b4b703 693 sendPacket(outpacket, outsock, false);
1e05b07c 694
78bcb858 695 trc.d_mac = outpacket->d_trc.d_mac;
8267bd2c 696 outpacket = getFreshAXFRPacket(q);
04f5504d 697
1e05b07c 698
90ba52e0 699 DNSZoneRecord zrr;
04f5504d 700 vector<DNSZoneRecord> zrrs;
794c2f92 701
04f5504d
KM
702 zrr.dr.d_name = target;
703 zrr.dr.d_ttl = sd.minimum;
95b70d3a 704
04f5504d
KM
705 if(securedZone && !presignedZone) { // this is where the DNSKEYs, CDNSKEYs and CDSs go in
706 bool doCDNSKEY = true, doCDS = true;
95b70d3a
KM
707 string publishCDNSKEY, publishCDS;
708 dk.getPublishCDNSKEY(q->qdomain, publishCDNSKEY);
709 dk.getPublishCDS(q->qdomain, publishCDS);
04f5504d 710
95b70d3a 711 set<uint32_t> entryPointIds;
04f5504d
KM
712 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(target);
713 for (auto const& value : entryPoints) {
95b70d3a 714 entryPointIds.insert(value.second.id);
04f5504d 715 }
95b70d3a 716
04f5504d 717 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
95b70d3a
KM
718 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
719 if (!value.second.published) {
720 continue;
991a0977 721 }
95b70d3a 722 zrr.dr.d_type = QType::DNSKEY;
d06dcda4 723 zrr.dr.setContent(std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY()));
95b70d3a 724 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
64d22929 725 zrrs.push_back(zrr);
95b70d3a
KM
726
727 // generate CDS and CDNSKEY records
04f5504d 728 if(doCDNSKEY && entryPointIds.count(value.second.id) > 0){
481508ab 729 if(!publishCDNSKEY.empty()) {
95b70d3a 730 zrr.dr.d_type=QType::CDNSKEY;
481508ab 731 if (publishCDNSKEY == "0") {
04f5504d 732 doCDNSKEY = false;
d06dcda4 733 zrr.dr.setContent(PacketHandler::s_deleteCDNSKEYContent);
04f5504d 734 zrrs.push_back(zrr);
481508ab 735 } else {
d06dcda4 736 zrr.dr.setContent(std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY()));
04f5504d 737 zrrs.push_back(zrr);
481508ab 738 }
95b70d3a
KM
739 }
740
04f5504d 741 if(doCDS && !publishCDS.empty()){
95b70d3a
KM
742 zrr.dr.d_type=QType::CDS;
743 vector<string> digestAlgos;
744 stringtok(digestAlgos, publishCDS, ", ");
481508ab 745 if(std::find(digestAlgos.begin(), digestAlgos.end(), "0") != digestAlgos.end()) {
cd7c3624 746 doCDS = false;
d06dcda4 747 zrr.dr.setContent(PacketHandler::s_deleteCDSContent);
04f5504d 748 zrrs.push_back(zrr);
481508ab
KM
749 } else {
750 for(auto const &digestAlgo : digestAlgos) {
d06dcda4 751 zrr.dr.setContent(std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns::checked_stoi<uint8_t>(digestAlgo))));
04f5504d 752 zrrs.push_back(zrr);
481508ab 753 }
95b70d3a 754 }
991a0977
PL
755 }
756 }
757 }
95b70d3a 758
6dae726d
PD
759 }
760
95c5bc40 761 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
04f5504d 762 uint8_t flags = ns3pr.d_flags;
90ba52e0 763 zrr.dr.d_type = QType::NSEC3PARAM;
95c5bc40 764 ns3pr.d_flags = 0;
d06dcda4 765 zrr.dr.setContent(std::make_shared<NSEC3PARAMRecordContent>(ns3pr));
b8adb30d 766 ns3pr.d_flags = flags;
6e8694df 767 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
64d22929 768 zrrs.push_back(zrr);
ce464268 769 }
1e05b07c 770
8a66a927
KM
771 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
772 set<DNSName> qnames, nsset, terms;
773
774 // Catalog zone start
775 if (di.kind == DomainInfo::Producer) {
776 // Ignore all records except NS at apex
777 sd.db->lookup(QType::NS, target, di.id);
778 while (sd.db->get(zrr)) {
779 zrrs.emplace_back(zrr);
780 }
781 if (zrrs.empty()) {
782 zrr.dr.d_name = target;
783 zrr.dr.d_ttl = 0;
784 zrr.dr.d_type = QType::NS;
d06dcda4 785 zrr.dr.setContent(std::make_shared<NSRecordContent>("invalid."));
8a66a927
KM
786 zrrs.emplace_back(zrr);
787 }
788
789 zrrs.emplace_back(CatalogInfo::getCatalogVersionRecord(target));
790
791 vector<CatalogInfo> members;
792 sd.db->getCatalogMembers(target, members, CatalogInfo::CatalogType::Producer);
793 for (const auto& ci : members) {
794 ci.toDNSZoneRecords(target, zrrs);
795 }
796 if (members.empty()) {
797 g_log << Logger::Warning << logPrefix << "catalog zone '" << target << "' has no members" << endl;
798 }
799 goto send;
800 }
801 // Catalog zone end
802
0c350cb5 803 // now start list zone
91ad8c11 804 if (!sd.db->list(target, sd.domain_id, isCatalogZone)) {
efe7948e 805 g_log<<Logger::Error<<logPrefix<<"backend signals error condition, aborting AXFR"<<endl;
9c556f63 806 outpacket->setRcode(RCode::ServFail);
0c350cb5
BH
807 sendPacket(outpacket,outsock);
808 return 0;
809 }
810
90ba52e0 811 while(sd.db->get(zrr)) {
64d22929
KM
812 if (!presignedZone) {
813 if (zrr.dr.d_type == QType::RRSIG) {
814 continue;
815 }
816 if (zrr.dr.d_type == QType::DNSKEY || zrr.dr.d_type == QType::CDNSKEY || zrr.dr.d_type == QType::CDS) {
817 if(!::arg().mustDo("direct-dnskey")) {
818 continue;
819 } else {
820 zrr.dr.d_ttl = sd.minimum;
821 }
822 }
823 }
8bf260dd 824 zrr.dr.d_name.makeUsLowerCase();
90ba52e0 825 if(zrr.dr.d_name.isPartOf(target)) {
833b07fe 826 if (zrr.dr.d_type == QType::ALIAS && (::arg().mustDo("outgoing-axfr-expand-alias") || ::arg()["outgoing-axfr-expand-alias"] == "ignore-errors")) {
90ba52e0 827 vector<DNSZoneRecord> ips;
d06dcda4
RG
828 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->getContent(), QType::A, ips);
829 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->getContent(), QType::AAAA, ips);
833b07fe 830 if (ret1 != RCode::NoError || ret2 != RCode::NoError) {
50ff05d7 831 if (::arg()["outgoing-axfr-expand-alias"] == "ignore-errors") {
833b07fe 832 if (ret1 != RCode::NoError) {
50ff05d7 833 g_log << Logger::Error << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving A record for ALIAS target " << zrr.dr.getContent()->getZoneRepresentation() << ", continuing AXFR" << endl;
833b07fe
KD
834 }
835 if (ret2 != RCode::NoError) {
50ff05d7 836 g_log << Logger::Error << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving AAAA record for ALIAS target " << zrr.dr.getContent()->getZoneRepresentation() << ", continuing AXFR" << endl;
833b07fe
KD
837 }
838 }
50ff05d7
CH
839 else {
840 g_log << Logger::Warning << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving for ALIAS " << zrr.dr.getContent()->getZoneRepresentation() << ", aborting AXFR" << endl;
841 outpacket->setRcode(RCode::ServFail);
842 sendPacket(outpacket, outsock);
843 return 0;
844 }
273d88b2 845 }
d06dcda4 846 for (auto& ip: ips) {
90ba52e0 847 zrr.dr.d_type = ip.dr.d_type;
d06dcda4 848 zrr.dr.setContent(ip.dr.getContent());
90ba52e0 849 zrrs.push_back(zrr);
d86e1bf7 850 }
a68df29d 851 continue;
d86e1bf7
PD
852 }
853
b772ffea 854 if (rectify) {
90ba52e0 855 if (zrr.dr.d_type) {
856 qnames.insert(zrr.dr.d_name);
857 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
858 nsset.insert(zrr.dr.d_name);
b772ffea
KM
859 } else {
860 // remove existing ents
861 continue;
862 }
863 }
a68df29d 864 zrrs.push_back(zrr);
b772ffea 865 } else {
90ba52e0 866 if (zrr.dr.d_type)
efe7948e 867 g_log<<Logger::Warning<<logPrefix<<"zone contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
b772ffea
KM
868 }
869 }
870
d06dcda4 871 for (auto& loopRR : zrrs) {
bdbee377
PL
872 if ((loopRR.dr.d_type == QType::SVCB || loopRR.dr.d_type == QType::HTTPS)) {
873 // Process auto hints
874 // TODO this is an almost copy of the code in the packethandler
875 auto rrc = getRR<SVCBBaseRecordContent>(loopRR.dr);
876 if (rrc == nullptr) {
877 continue;
878 }
d06dcda4
RG
879 auto newRRC = rrc->clone();
880 if (!newRRC) {
881 continue;
882 }
883 DNSName svcTarget = newRRC->getTarget().isRoot() ? loopRR.dr.d_name : newRRC->getTarget();
884 if (newRRC->autoHint(SvcParam::ipv4hint)) {
bdbee377
PL
885 sd.db->lookup(QType::A, svcTarget, sd.domain_id);
886 vector<ComboAddress> hints;
887 DNSZoneRecord rr;
888 while (sd.db->get(rr)) {
889 auto arrc = getRR<ARecordContent>(rr.dr);
890 hints.push_back(arrc->getCA());
891 }
892 if (hints.size() == 0) {
d06dcda4 893 newRRC->removeParam(SvcParam::ipv4hint);
bdbee377 894 } else {
d06dcda4 895 newRRC->setHints(SvcParam::ipv4hint, hints);
bdbee377
PL
896 }
897 }
898
d06dcda4 899 if (newRRC->autoHint(SvcParam::ipv6hint)) {
bdbee377
PL
900 sd.db->lookup(QType::AAAA, svcTarget, sd.domain_id);
901 vector<ComboAddress> hints;
902 DNSZoneRecord rr;
903 while (sd.db->get(rr)) {
904 auto arrc = getRR<AAAARecordContent>(rr.dr);
905 hints.push_back(arrc->getCA());
906 }
907 if (hints.size() == 0) {
d06dcda4 908 newRRC->removeParam(SvcParam::ipv6hint);
bdbee377 909 } else {
d06dcda4 910 newRRC->setHints(SvcParam::ipv6hint, hints);
bdbee377
PL
911 }
912 }
d06dcda4
RG
913
914 loopRR.dr.setContent(std::move(newRRC));
bdbee377
PL
915 }
916 }
917
75f2589f 918 // Group records by name and type, signpipe stumbles over interrupted rrsets
22a676e0 919 if(securedZone && !presignedZone) {
8daafcc1 920 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
905dae56 921 return std::tie(a.dr.d_name, a.dr.d_type) < std::tie(b.dr.d_name, b.dr.d_type);
8daafcc1
KM
922 });
923 }
75f2589f 924
b772ffea
KM
925 if(rectify) {
926 // set auth
2010ac95
RG
927 for(DNSZoneRecord &loopZRR : zrrs) {
928 loopZRR.auth=true;
929 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
930 DNSName shorter(loopZRR.dr.d_name);
b772ffea 931 do {
e325f20c 932 if (shorter==target) // apex is always auth
cb045f61 933 break;
2010ac95
RG
934 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
935 loopZRR.auth=false;
cb045f61 936 break;
9f70b77a 937 }
7abbc40f 938 } while(shorter.chopOff());
9f70b77a 939 }
b772ffea
KM
940 }
941
942 if(NSEC3Zone) {
943 // ents are only required for NSEC3 zones
944 uint32_t maxent = ::arg().asNum("max-ent-entries");
6ded341a 945 set<DNSName> nsec3set, nonterm;
2010ac95 946 for (auto &loopZRR: zrrs) {
6ded341a 947 bool skip=false;
2010ac95 948 DNSName shorter = loopZRR.dr.d_name;
6ded341a
KM
949 if (shorter != target && shorter.chopOff() && shorter != target) {
950 do {
951 if(nsset.count(shorter)) {
952 skip=true;
953 break;
954 }
955 } while(shorter.chopOff() && shorter != target);
956 }
2010ac95
RG
957 shorter = loopZRR.dr.d_name;
958 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
6ded341a
KM
959 do {
960 if(!nsec3set.count(shorter)) {
961 nsec3set.insert(shorter);
962 }
963 } while(shorter != target && shorter.chopOff());
964 }
965 }
966
2010ac95
RG
967 for(DNSZoneRecord &loopZRR : zrrs) {
968 DNSName shorter(loopZRR.dr.d_name);
e325f20c 969 while(shorter != target && shorter.chopOff()) {
6ded341a 970 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
b772ffea 971 if(!(maxent)) {
efe7948e
KM
972 g_log<<Logger::Warning<<logPrefix<<"zone has too many empty non terminals, aborting AXFR"<<endl;
973 outpacket->setRcode(RCode::ServFail);
974 sendPacket(outpacket,outsock);
b772ffea
KM
975 return 0;
976 }
6ded341a
KM
977 nonterm.insert(shorter);
978 --maxent;
b772ffea
KM
979 }
980 }
981 }
982
9e23e712 983 for(const auto& nt : nonterm) {
2010ac95
RG
984 DNSZoneRecord tempRR;
985 tempRR.dr.d_name=nt;
986 tempRR.dr.d_type=QType::ENT;
987 tempRR.auth=true;
988 zrrs.push_back(tempRR);
b772ffea
KM
989 }
990 }
991 }
992
8a66a927 993send:
b772ffea 994
12c86877 995 /* now write all other records */
04f5504d
KM
996
997 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
998 nsecxrepo_t nsecxrepo;
999
3af419da 1000 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1), ::arg().mustDo("workaround-11804") ? 1 : 100);
04f5504d 1001
6e8694df 1002 DNSName keyname;
3370c993 1003 unsigned int udiff;
1c6d9830
BH
1004 DTime dt;
1005 dt.set();
2010ac95 1006 for(DNSZoneRecord &loopZRR : zrrs) {
2010ac95
RG
1007 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
1008 if (NSEC3Zone || loopZRR.dr.d_type) {
3c68fb14
KM
1009 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
1010 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
1011 } else {
1012 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
1013 }
b5baefaf 1014 NSECXEntry& ne = nsecxrepo[keyname];
192bcba2 1015 ne.d_ttl = sd.getNegativeTTL();
3c68fb14
KM
1016 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
1017 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
22a0ef16 1018 ne.d_set.set(loopZRR.dr.d_type);
b5baefaf
PD
1019 }
1020 }
b317b510 1021 }
b5baefaf 1022
2010ac95 1023 if (!loopZRR.dr.d_type)
b5baefaf
PD
1024 continue; // skip empty non-terminals
1025
2010ac95 1026 if(loopZRR.dr.d_type == QType::SOA)
12c86877 1027 continue; // skip SOA - would indicate end of AXFR
add640c0 1028
2010ac95 1029 if(csp.submit(loopZRR)) {
1c6d9830
BH
1030 for(;;) {
1031 outpacket->getRRS() = csp.getChunk();
1032 if(!outpacket->getRRS().empty()) {
60a1c204 1033 if(haveTSIGDetails && !tsigkeyname.empty())
54d84273 1034 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1035 sendPacket(outpacket, outsock, false);
78bcb858 1036 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1037 outpacket=getFreshAXFRPacket(q);
1038 }
1039 else
1040 break;
1041 }
12c86877
BH
1042 }
1043 }
78bcb858 1044 /*
3370c993 1045 udiff=dt.udiffNoReset();
1c6d9830
BH
1046 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1047 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1048 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1049 */
feef1ece 1050 if(securedZone) {
4888e4b2 1051 if(NSEC3Zone) {
9d3151d9 1052 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
3c68fb14 1053 if(iter->second.d_auth) {
feef1ece 1054 NSEC3RecordContent n3rc;
22a0ef16 1055 n3rc.set(iter->second.d_set);
27d4a65b
RG
1056 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
1057 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
1058 n3rc.set(QType::RRSIG);
1059 }
1060 n3rc.d_salt = ns3pr.d_salt;
feef1ece
PD
1061 n3rc.d_flags = ns3pr.d_flags;
1062 n3rc.d_iterations = ns3pr.d_iterations;
690b86b7 1063 n3rc.d_algorithm = DNSSECKeeper::DIGEST_SHA1; // SHA1, fixed in PowerDNS for now
feef1ece 1064 nsecxrepo_t::const_iterator inext = iter;
cb167afd 1065 ++inext;
feef1ece
PD
1066 if(inext == nsecxrepo.end())
1067 inext = nsecxrepo.begin();
3c68fb14 1068 while(!inext->second.d_auth && inext != iter)
feef1ece 1069 {
cb167afd 1070 ++inext;
feef1ece
PD
1071 if(inext == nsecxrepo.end())
1072 inext = nsecxrepo.begin();
1073 }
6e8694df
KM
1074 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
1075 zrr.dr.d_name = iter->first+sd.qname;
90ba52e0 1076
192bcba2 1077 zrr.dr.d_ttl = sd.getNegativeTTL();
d06dcda4 1078 zrr.dr.setContent(std::make_shared<NSEC3RecordContent>(std::move(n3rc)));
90ba52e0 1079 zrr.dr.d_type = QType::NSEC3;
1080 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1081 zrr.auth=true;
1082 if(csp.submit(zrr)) {
feef1ece
PD
1083 for(;;) {
1084 outpacket->getRRS() = csp.getChunk();
1085 if(!outpacket->getRRS().empty()) {
60a1c204 1086 if(haveTSIGDetails && !tsigkeyname.empty())
feef1ece 1087 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1088 sendPacket(outpacket, outsock, false);
feef1ece
PD
1089 trc.d_mac=outpacket->d_trc.d_mac;
1090 outpacket=getFreshAXFRPacket(q);
1091 }
1092 else
1093 break;
1c6d9830 1094 }
1c6d9830 1095 }
8e9b7d99 1096 }
4888e4b2
BH
1097 }
1098 }
9d3151d9 1099 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
ed9c3a50 1100 NSECRecordContent nrc;
22a0ef16 1101 nrc.set(iter->second.d_set);
27d4a65b
RG
1102 nrc.set(QType::RRSIG);
1103 nrc.set(QType::NSEC);
6e8694df
KM
1104
1105 if(boost::next(iter) != nsecxrepo.end())
1106 nrc.d_next = boost::next(iter)->first;
ed9c3a50 1107 else
6e8694df
KM
1108 nrc.d_next=nsecxrepo.begin()->first;
1109 zrr.dr.d_name = iter->first;
1110
192bcba2 1111 zrr.dr.d_ttl = sd.getNegativeTTL();
d06dcda4 1112 zrr.dr.setContent(std::make_shared<NSECRecordContent>(std::move(nrc)));
90ba52e0 1113 zrr.dr.d_type = QType::NSEC;
1114 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1115 zrr.auth=true;
1116 if(csp.submit(zrr)) {
1c6d9830
BH
1117 for(;;) {
1118 outpacket->getRRS() = csp.getChunk();
1119 if(!outpacket->getRRS().empty()) {
60a1c204 1120 if(haveTSIGDetails && !tsigkeyname.empty())
1e05b07c 1121 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1122 sendPacket(outpacket, outsock, false);
78bcb858 1123 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1124 outpacket=getFreshAXFRPacket(q);
1125 }
1126 else
1127 break;
1128 }
8e9b7d99 1129 }
add640c0 1130 }
add640c0 1131 }
78bcb858 1132 /*
3370c993 1133 udiff=dt.udiffNoReset();
1c6d9830
BH
1134 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1135 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1136 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1137 * */
1e05b07c 1138 for(;;) {
bec14a20
BH
1139 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
1140 if(!outpacket->getRRS().empty()) {
60a1c204 1141 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1142 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
1f07a63f
PD
1143 try {
1144 sendPacket(outpacket, outsock, false);
1145 }
1146 catch (PDNSException& pe) {
1147 throw PDNSException("during axfr-out of "+target.toString()+", this happened: "+pe.reason);
1148 }
78bcb858 1149 trc.d_mac=outpacket->d_trc.d_mac;
bec14a20
BH
1150 outpacket=getFreshAXFRPacket(q);
1151 }
1e05b07c 1152 else
bec14a20 1153 break;
12c86877 1154 }
1e05b07c 1155
1c6d9830 1156 udiff=dt.udiffNoReset();
1e05b07c 1157 if(securedZone)
efe7948e 1158 g_log<<Logger::Debug<<logPrefix<<"done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1e05b07c 1159
efe7948e 1160 DLOG(g_log<<logPrefix<<"done writing out records"<<endl);
12c86877 1161 /* and terminate with yet again the SOA record */
8e9b7d99 1162 outpacket=getFreshAXFRPacket(q);
9bbcf03a 1163 outpacket->addRecord(std::move(soa));
60a1c204 1164 if(haveTSIGDetails && !tsigkeyname.empty())
1e05b07c
FM
1165 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1166
ff76e8b4 1167 sendPacket(outpacket, outsock);
1e05b07c 1168
efe7948e
KM
1169 DLOG(g_log<<logPrefix<<"last packet - close"<<endl);
1170 g_log<<Logger::Notice<<logPrefix<<"AXFR finished"<<endl;
12c86877
BH
1171
1172 return 1;
1173}
1174
c2826d2e 1175int TCPNameserver::doIXFR(std::unique_ptr<DNSPacket>& q, int outsock)
6e59a580 1176{
4172a5b2 1177 string logPrefix="IXFR-out zone '"+q->qdomain.toLogString()+"', client '"+q->getRemoteString()+"', ";
efe7948e 1178
c2826d2e 1179 std::unique_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
6e59a580
KM
1180 if(q->d_dnssecOk)
1181 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1182
6e59a580 1183 uint32_t serial = 0;
27c0050c 1184 MOADNSParser mdp(false, q->getString());
f80ebc05
O
1185 for(const auto & answer : mdp.d_answers) {
1186 const DNSRecord *rr = &answer.first;
e693ff5a 1187 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
6e59a580 1188 vector<string>parts;
d06dcda4 1189 stringtok(parts, rr->getContent()->getZoneRepresentation());
6e59a580 1190 if (parts.size() >= 3) {
95dd3b90 1191 try {
a0383aad 1192 pdns::checked_stoi_into(serial, parts[2]);
95dd3b90
RG
1193 }
1194 catch(const std::out_of_range& oor) {
efe7948e 1195 g_log<<Logger::Warning<<logPrefix<<"invalid serial in IXFR query"<<endl;
95dd3b90
RG
1196 outpacket->setRcode(RCode::FormErr);
1197 sendPacket(outpacket,outsock);
1198 return 0;
1199 }
6e59a580 1200 } else {
efe7948e 1201 g_log<<Logger::Warning<<logPrefix<<"no serial in IXFR query"<<endl;
6e59a580
KM
1202 outpacket->setRcode(RCode::FormErr);
1203 sendPacket(outpacket,outsock);
1204 return 0;
1205 }
3e67ea8b 1206 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
d5fcd583 1207 g_log<<Logger::Warning<<logPrefix<<"additional records in IXFR query, type: "<<QType(rr->d_type).toString()<<endl;
6e59a580
KM
1208 outpacket->setRcode(RCode::FormErr);
1209 sendPacket(outpacket,outsock);
1210 return 0;
1211 }
1212 }
1213
efe7948e 1214 g_log<<Logger::Warning<<logPrefix<<"transfer initiated with serial "<<serial<<endl;
6e59a580 1215
57f95528 1216 // determine if zone exists, XFR is allowed, and if IXFR can proceed using existing backend before spawning a new backend.
6e59a580 1217 SOAData sd;
57f95528
CH
1218 bool securedZone;
1219 bool serialPermitsIXFR;
6e59a580 1220 {
24fb5774 1221 auto packetHandler = s_P.lock();
efe7948e 1222 DLOG(g_log<<logPrefix<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
24fb5774 1223 if(!*packetHandler) {
994cae6b 1224 g_log<<Logger::Warning<<"TCP server is without backend connections in doIXFR, launching"<<endl;
24fb5774 1225 *packetHandler = make_unique<PacketHandler>();
6e59a580
KM
1226 }
1227
22893145 1228 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
24fb5774 1229 if(!canDoAXFR(q, false, *packetHandler) || !(*packetHandler)->getBackend()->getSOAUncached(q->qdomain, sd)) {
efe7948e 1230 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative"<<endl;
9c556f63 1231 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1232 sendPacket(outpacket,outsock);
1233 return 0;
1234 }
22893145 1235
24fb5774 1236 DNSSECKeeper dk((*packetHandler)->getBackend());
57f95528 1237 DNSSECKeeper::clearCaches(q->qdomain);
d5e7c918 1238 bool narrow = false;
57f95528
CH
1239 securedZone = dk.isSecuredZone(q->qdomain);
1240 if(dk.getNSEC3PARAM(q->qdomain, nullptr, &narrow)) {
1241 if(narrow) {
efe7948e 1242 g_log<<Logger::Warning<<logPrefix<<"not doing IXFR of an NSEC3 narrow zone"<<endl;
57f95528
CH
1243 outpacket->setRcode(RCode::Refused);
1244 sendPacket(outpacket,outsock);
1245 return 0;
1246 }
22893145 1247 }
6e59a580 1248
57f95528 1249 serialPermitsIXFR = !rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname));
6e59a580 1250 }
24d9e514 1251
57f95528
CH
1252 if (serialPermitsIXFR) {
1253 DNSName target = q->qdomain;
6e59a580 1254 TSIGRecordContent trc;
7abbc40f
PD
1255 DNSName tsigkeyname;
1256 string tsigsecret;
6e59a580 1257
57f95528
CH
1258 UeberBackend db;
1259 DNSSECKeeper dk(&db);
57f95528 1260
ea3816cf 1261 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
6e59a580 1262
60a1c204 1263 if(haveTSIGDetails && !tsigkeyname.empty()) {
bb7fb11c 1264 string tsig64;
3343ad1f 1265 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
290a083d 1266 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1267 algorithm = DNSName("hmac-md5");
40361bf2
KM
1268 if (!db.getTSIGKey(tsigkeyname, algorithm, tsig64)) {
1269 g_log << Logger::Error << "TSIG key '" << tsigkeyname << "' for domain '" << target << "' not found" << endl;
53ace5d5
PL
1270 return 0;
1271 }
1272 if (B64Decode(tsig64, tsigsecret) == -1) {
efe7948e 1273 g_log<<Logger::Error<<logPrefix<<"unable to Base-64 decode TSIG key '"<<tsigkeyname<<"'"<<endl;
53ace5d5
PL
1274 return 0;
1275 }
6e59a580
KM
1276 }
1277
6e59a580 1278 // SOA *must* go out first, our signing pipe might reorder
efe7948e 1279 DLOG(g_log<<logPrefix<<"sending out SOA"<<endl);
13f9e280 1280 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 1281 outpacket->addRecord(std::move(soa));
ada68bd9 1282 if(securedZone && outpacket->d_dnssecOk) {
7abbc40f 1283 set<DNSName> authSet;
6e59a580 1284 authSet.insert(target);
57f95528 1285 addRRSigs(dk, db, authSet, outpacket->getRRS());
6e59a580
KM
1286 }
1287
60a1c204 1288 if(haveTSIGDetails && !tsigkeyname.empty())
6e59a580
KM
1289 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1290
1291 sendPacket(outpacket, outsock);
1292
efe7948e 1293 g_log<<Logger::Notice<<logPrefix<<"IXFR finished"<<endl;
6e59a580
KM
1294
1295 return 1;
1296 }
1297
efe7948e 1298 g_log<<Logger::Notice<<logPrefix<<"IXFR fallback to AXFR"<<endl;
6e59a580
KM
1299 return doAXFR(q->qdomain, q, outsock);
1300}
1301
12c86877
BH
1302TCPNameserver::~TCPNameserver()
1303{
12c86877
BH
1304}
1305
1306TCPNameserver::TCPNameserver()
1307{
cb0af1a1
RG
1308 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1309 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1310 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1311 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1312
379ab445 1313// sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
c2826d2e 1314 d_connectionroom_sem = make_unique<Semaphore>( ::arg().asNum( "max-tcp-connections" ));
d322f931 1315 d_maxTCPConnections = ::arg().asNum( "max-tcp-connections" );
f5ad09dc 1316
12c86877 1317 vector<string>locals;
379ab445 1318 stringtok(locals,::arg()["local-address"]," ,");
f5ad09dc
PL
1319 if(locals.empty())
1320 throw PDNSException("No local addresses specified");
12c86877 1321
68b011bd 1322 d_ng.toMasks(::arg()["allow-axfr-ips"] );
9f1d5826 1323
12c86877 1324 signal(SIGPIPE,SIG_IGN);
12c86877 1325
f5ad09dc
PL
1326 for(auto const &laddr : locals) {
1327 ComboAddress local(laddr, ::arg().asNum("local-port"));
12c86877 1328
f5ad09dc
PL
1329 int s=socket(local.sin4.sin_family, SOCK_STREAM, 0);
1330 if(s<0)
1331 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
3897b9e1 1332 setCloseOnExec(s);
fb316318 1333
12c86877 1334 int tmp=1;
f5ad09dc 1335 if(setsockopt(s, SOL_SOCKET,SO_REUSEADDR, (char*)&tmp, sizeof tmp) < 0) {
e6a9dde5 1336 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
f5ad09dc 1337 _exit(1);
12c86877 1338 }
940d7811
RG
1339
1340 if (::arg().asNum("tcp-fast-open") > 0) {
1341#ifdef TCP_FASTOPEN
1342 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1343 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
f5ad09dc 1344 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket "<<local.toStringWithPort()<<": "<<stringerror()<<endl;
940d7811
RG
1345 }
1346#else
e6a9dde5 1347 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1348#endif
1349 }
1350
f5ad09dc
PL
1351 if(::arg().mustDo("non-local-bind"))
1352 Utility::setBindAny(local.sin4.sin_family, s);
1353
1354 if(local.isIPv6() && setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1355 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<stringerror()<<endl;
1356 }
fec7dd5a 1357
379ab445 1358 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
a702a96c 1359 int err = errno;
2c896042 1360 close(s);
a702a96c 1361 if( err == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
f5ad09dc 1362 g_log<<Logger::Error<<"Address " << local.toString() << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1363 continue;
1364 } else {
f5ad09dc 1365 g_log<<Logger::Error<<"Unable to bind to TCP socket " << local.toStringWithPort() << ": "<<stringerror(err)<<endl;
2ab7e9ac 1366 throw PDNSException("Unable to bind to TCP socket");
5ecb2885 1367 }
12c86877 1368 }
12c86877 1369
f5ad09dc
PL
1370 listen(s, 128);
1371 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
12c86877 1372 d_sockets.push_back(s);
8edfedf1
BH
1373 struct pollfd pfd;
1374 memset(&pfd, 0, sizeof(pfd));
1375 pfd.fd = s;
1376 pfd.events = POLLIN;
8edfedf1 1377 d_prfds.push_back(pfd);
12c86877 1378 }
12c86877
BH
1379}
1380
1381
ff76e8b4 1382//! Start of TCP operations thread, we launch a new thread for each incoming TCP question
12c86877
BH
1383void TCPNameserver::thread()
1384{
519f5484 1385 setThreadName("pdns/tcpnameser");
12c86877
BH
1386 try {
1387 for(;;) {
1388 int fd;
cb0af1a1
RG
1389 ComboAddress remote;
1390 Utility::socklen_t addrlen=remote.getSocklen();
12c86877 1391
8edfedf1 1392 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
8a63d3ce 1393 if(ret <= 0)
4957a608 1394 continue;
8a63d3ce 1395
12c86877 1396 int sock=-1;
8ce9e4e6 1397 for(const pollfd& pfd : d_prfds) {
c1ee10a6 1398 if(pfd.revents & POLLIN) {
4957a608 1399 sock = pfd.fd;
cb0af1a1
RG
1400 remote.sin4.sin_family = AF_INET6;
1401 addrlen=remote.getSocklen();
4957a608
BH
1402
1403 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
a702a96c
OM
1404 int err = errno;
1405 g_log<<Logger::Error<<"TCP question accept error: "<<stringerror(err)<<endl;
1e05b07c 1406
a702a96c 1407 if(err==EMFILE) {
e6a9dde5 1408 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
5bd2ea7b 1409 _exit(1);
4957a608
BH
1410 }
1411 }
1412 else {
cb0af1a1 1413 if (d_maxConnectionsPerClient) {
24fb5774
RG
1414 auto clientsCount = s_clientsCount.lock();
1415 if ((*clientsCount)[remote] >= d_maxConnectionsPerClient) {
e6a9dde5 1416 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
cb0af1a1
RG
1417 close(fd);
1418 continue;
1419 }
24fb5774 1420 (*clientsCount)[remote]++;
cb0af1a1
RG
1421 }
1422
4957a608
BH
1423 d_connectionroom_sem->wait(); // blocks if no connections are available
1424
1425 int room;
1426 d_connectionroom_sem->getValue( &room);
1427 if(room<1)
e6a9dde5 1428 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
4957a608 1429
0ddde5fb
RG
1430 try {
1431 std::thread connThread(doConnection, fd);
1432 connThread.detach();
1433 }
1434 catch (std::exception& e) {
1435 g_log<<Logger::Error<<"Error creating thread: "<<e.what()<<endl;
4957a608 1436 d_connectionroom_sem->post();
48e8d70b 1437 close(fd);
cb0af1a1 1438 decrementClientCount(remote);
4957a608
BH
1439 }
1440 }
1441 }
12c86877
BH
1442 }
1443 }
1444 }
3f81d239 1445 catch(PDNSException &AE) {
e6a9dde5 1446 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
12c86877
BH
1447 }
1448 catch(...) {
e6a9dde5 1449 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
12c86877 1450 }
5bd2ea7b 1451 _exit(1); // take rest of server with us
12c86877
BH
1452}
1453
1454
d322f931
PD
1455unsigned int TCPNameserver::numTCPConnections()
1456{
1457 int room;
1458 d_connectionroom_sem->getValue( &room);
1459 return d_maxTCPConnections - room;
1460}