]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/tcpreceiver.cc
Merge pull request #14118 from jap/patch-2
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
CommitLineData
12c86877
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
2e7834cb 3 Copyright (C) 2002-2012 PowerDNS.COM BV
12c86877
BH
4
5 This program is free software; you can redistribute it and/or modify
22dc646a
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
f782fe38
MH
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
12c86877
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12c86877 21*/
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
b6f3b03a 25#include <boost/algorithm/string.hpp>
097b1e68 26#include <boost/scoped_array.hpp>
bf269e28 27#include "auth-packetcache.hh"
1258abe0 28#include "utility.hh"
519f5484 29#include "threadname.hh"
add640c0 30#include "dnssecinfra.hh"
4c1474f3 31#include "dnsseckeeper.hh"
12c86877 32#include <cstdio>
4888e4b2 33#include "base32.hh"
12c86877
BH
34#include <cstring>
35#include <cstdlib>
36#include <sys/types.h>
940d7811 37#include <netinet/tcp.h>
12c86877
BH
38#include <iostream>
39#include <string>
40#include "tcpreceiver.hh"
67d74e49 41#include "sstuff.hh"
fa8fd4d2 42
dc6aa7f5
AV
43#include <cerrno>
44#include <csignal>
78bcb858 45#include "base64.hh"
12c86877
BH
46#include "ueberbackend.hh"
47#include "dnspacket.hh"
48#include "nameserver.hh"
49#include "distributor.hh"
50#include "lock.hh"
51#include "logger.hh"
52#include "arguments.hh"
379ab445 53
8cb70f23 54#include "auth-main.hh"
12c86877
BH
55#include "packethandler.hh"
56#include "statbag.hh"
12c86877 57#include "communicator.hh"
61b26744 58#include "namespaces.hh"
8e9b7d99 59#include "signingpipe.hh"
273d88b2 60#include "stubresolver.hh"
4172a5b2
PD
61#include "proxy-protocol.hh"
62#include "noinitvector.hh"
f13cde2a 63#include "gss_context.hh"
1f07a63f 64#include "pdnsexception.hh"
bf269e28 65extern AuthPacketCache PC;
12c86877
BH
66extern StatBag S;
67
68/**
69\file tcpreceiver.cc
70\brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
71*/
72
c2826d2e 73std::unique_ptr<Semaphore> TCPNameserver::d_connectionroom_sem{nullptr};
24fb5774 74LockGuarded<std::unique_ptr<PacketHandler>> TCPNameserver::s_P{nullptr};
d322f931 75unsigned int TCPNameserver::d_maxTCPConnections = 0;
9f1d5826 76NetmaskGroup TCPNameserver::d_ng;
cb0af1a1
RG
77size_t TCPNameserver::d_maxTransactionsPerConn;
78size_t TCPNameserver::d_maxConnectionsPerClient;
79unsigned int TCPNameserver::d_idleTimeout;
80unsigned int TCPNameserver::d_maxConnectionDuration;
24fb5774 81LockGuarded<std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan>> TCPNameserver::s_clientsCount;
12c86877 82
12c86877
BH
83void TCPNameserver::go()
84{
e6a9dde5 85 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
24fb5774 86 s_P.lock()->reset();
12c86877 87 try {
24fb5774 88 *(s_P.lock()) = make_unique<PacketHandler>();
12c86877 89 }
3f81d239 90 catch(PDNSException &ae) {
e6a9dde5 91 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
12c86877 92 }
12c86877 93
969e4459 94 std::thread th([this](){thread();});
0ddde5fb 95 th.detach();
12c86877
BH
96}
97
3f81d239 98// throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
cb0af1a1 99static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
12c86877 100{
6a3e5d1a
BH
101 unsigned int bytes=n;
102 char *ptr = (char*)buffer;
103 int ret;
cb0af1a1
RG
104 time_t start = 0;
105 unsigned int remainingTotal = totalTimeout;
106 if (totalTimeout) {
4646277d 107 start = time(nullptr);
cb0af1a1 108 }
6a3e5d1a
BH
109 while(bytes) {
110 ret=read(fd, ptr, bytes);
111 if(ret < 0) {
112 if(errno==EAGAIN) {
cb0af1a1 113 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
4957a608
BH
114 if(ret < 0)
115 throw NetworkError("Waiting for data read");
116 if(!ret)
117 throw NetworkError("Timeout reading data");
118 continue;
6a3e5d1a
BH
119 }
120 else
4957a608 121 throw NetworkError("Reading data: "+stringerror());
6a3e5d1a
BH
122 }
123 if(!ret) {
124 if(!throwOnEOF && n == bytes)
4957a608 125 return 0;
6a3e5d1a 126 else
4957a608 127 throw NetworkError("Did not fulfill read from TCP due to EOF");
6a3e5d1a 128 }
1e05b07c 129
6a3e5d1a
BH
130 ptr += ret;
131 bytes -= ret;
cb0af1a1 132 if (totalTimeout) {
4646277d 133 time_t now = time(nullptr);
600c8a65 134 const auto elapsed = now - start;
f25dd2b7 135 if (elapsed >= static_cast<time_t>(remainingTotal)) {
cb0af1a1
RG
136 throw NetworkError("Timeout while reading data");
137 }
138 start = now;
62a34cfd
RG
139 if (elapsed > 0) {
140 remainingTotal -= elapsed;
141 }
cb0af1a1 142 }
6a3e5d1a
BH
143 }
144 return n;
145}
12c86877 146
6a3e5d1a 147// ditto
cb0af1a1 148static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
6a3e5d1a
BH
149{
150 unsigned int bytes=n;
151 const char *ptr = (char*)buffer;
152 int ret;
153 while(bytes) {
154 ret=write(fd, ptr, bytes);
155 if(ret < 0) {
156 if(errno==EAGAIN) {
cb0af1a1 157 ret=waitForRWData(fd, false, idleTimeout, 0);
4957a608
BH
158 if(ret < 0)
159 throw NetworkError("Waiting for data write");
160 if(!ret)
161 throw NetworkError("Timeout writing data");
162 continue;
6a3e5d1a
BH
163 }
164 else
4957a608 165 throw NetworkError("Writing data: "+stringerror());
6a3e5d1a 166 }
12c86877 167 if(!ret) {
67d74e49 168 throw NetworkError("Did not fulfill TCP write due to EOF");
12c86877 169 }
1e05b07c 170
6a3e5d1a
BH
171 ptr += ret;
172 bytes -= ret;
12c86877 173 }
12c86877
BH
174}
175
02b4b703 176void TCPNameserver::sendPacket(std::unique_ptr<DNSPacket>& p, int outsock, bool last)
6a3e5d1a 177{
1f07a63f
PD
178 uint16_t len=htons(p->getString(true).length());
179
180 // this also calls p->getString; call it after our explicit call so throwsOnTruncation=true is honoured
02b4b703 181 g_rs.submitResponse(*p, false, last);
9951e2d0 182
fbaa5e09
BH
183 string buffer((const char*)&len, 2);
184 buffer.append(p->getString());
cb0af1a1 185 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
6a3e5d1a
BH
186}
187
188
cb0af1a1 189void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
6a3e5d1a
BH
190try
191{
cb0af1a1 192 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
6a3e5d1a 193}
67d74e49
BH
194catch(NetworkError& ae) {
195 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
12c86877
BH
196}
197
cb0af1a1
RG
198static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
199{
200 if (maxConnectionDuration) {
4646277d 201 time_t elapsed = time(nullptr) - start;
62a34cfd 202 if (elapsed >= maxConnectionDuration) {
cb0af1a1
RG
203 return true;
204 }
62a34cfd 205 if (elapsed > 0) {
cdf0fe6e 206 remainingTime = static_cast<unsigned int>(maxConnectionDuration - elapsed);
62a34cfd 207 }
cb0af1a1
RG
208 }
209 return false;
210}
211
212void TCPNameserver::decrementClientCount(const ComboAddress& remote)
213{
214 if (d_maxConnectionsPerClient) {
24fb5774
RG
215 auto count = s_clientsCount.lock();
216 auto it = count->find(remote);
217 if (it == count->end()) {
218 // this is worrying, but nothing we can do at this point
219 return;
220 }
221 --it->second;
222 if (it->second == 0) {
223 count->erase(it);
cb0af1a1
RG
224 }
225 }
226}
227
0ddde5fb 228void TCPNameserver::doConnection(int fd)
12c86877 229{
519f5484 230 setThreadName("pdns/tcpConnect");
c2826d2e 231 std::unique_ptr<DNSPacket> packet;
208abc4c 232 ComboAddress remote, accountremote;
4f5e7925 233 socklen_t remotelen=sizeof(remote);
cb0af1a1
RG
234 size_t transactions = 0;
235 time_t start = 0;
236 if (d_maxConnectionDuration) {
4646277d 237 start = time(nullptr);
cb0af1a1 238 }
4f5e7925 239
4f5e7925 240 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
e6a9dde5 241 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
4f5e7925 242 d_connectionroom_sem->post();
a7b68ae7
RG
243 try {
244 closesocket(fd);
245 }
246 catch(const PDNSException& e) {
e6a9dde5 247 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 248 }
0ddde5fb 249 return;
4f5e7925 250 }
251
3897b9e1 252 setNonBlocking(fd);
12c86877 253 try {
c2b4ccc0 254 int mesgsize=65535;
dc593046 255 boost::scoped_array<char> mesg(new char[mesgsize]);
4172a5b2
PD
256 std::optional<ComboAddress> inner_remote;
257 bool inner_tcp = false;
258
e6a9dde5 259 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
21a303f3 260 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
4172a5b2
PD
261 if (g_proxyProtocolACL.match(remote)) {
262 unsigned int remainingTime = 0;
263 PacketBuffer proxyData;
264 proxyData.reserve(g_proxyProtocolMaximumSize);
265 ssize_t used;
266
267 // this for-loop ends by throwing, or by having gathered a complete proxy header
268 for (;;) {
269 used = isProxyHeaderComplete(proxyData);
270 if (used < 0) {
271 ssize_t origsize = proxyData.size();
272 proxyData.resize(origsize + -used);
273 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
274 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": maximum TCP connection duration exceeded");
275 }
276
277 try {
278 readnWithTimeout(fd, &proxyData[origsize], -used, d_idleTimeout, true, remainingTime);
279 }
280 catch(NetworkError& ae) {
281 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": "+ae.what());
282 }
283 }
284 else if (used == 0) {
285 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was invalid");
286 }
287 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
288 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header too big");
289 }
290 else { // used > 0 && used <= g_proxyProtocolMaximumSize
291 break;
292 }
293 }
294 ComboAddress psource, pdestination;
295 bool proxyProto, tcp;
296 std::vector<ProxyProtocolValue> ppvalues;
297
298 used = parseProxyHeader(proxyData, proxyProto, psource, pdestination, tcp, ppvalues);
299 if (used <= 0) {
300 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was invalid");
301 }
302 if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
303 throw NetworkError("Error reading PROXYv2 header from TCP client "+remote.toString()+": PROXYv2 header was oversized");
304 }
305 inner_remote = psource;
306 inner_tcp = tcp;
208abc4c
KM
307 accountremote = psource;
308 }
309 else {
310 accountremote = remote;
4172a5b2
PD
311 }
312
12c86877 313 for(;;) {
cb0af1a1
RG
314 unsigned int remainingTime = 0;
315 transactions++;
316 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
e6a9dde5 317 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
cb0af1a1
RG
318 break;
319 }
320 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 321 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
322 break;
323 }
6a3e5d1a
BH
324
325 uint16_t pktlen;
cb0af1a1 326 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
4957a608 327 break;
6a3e5d1a 328 else
4957a608 329 pktlen=ntohs(pktlen);
12c86877 330
366e1e5e
AT
331 // this check will always be false *if* no one touches
332 // the mesg array. pktlen can be maximum of 65535 as
1e05b07c
FM
333 // it is 2 byte unsigned variable. In getQuestion, we
334 // write to 0 up to pktlen-1 so 65535 is just right.
366e1e5e
AT
335
336 // do not remove this check as it will catch if someone
1e05b07c 337 // decreases the mesg buffer size for some reason.
c2b4ccc0 338 if(pktlen > mesgsize) {
e6a9dde5 339 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
4957a608 340 break;
12c86877 341 }
1e05b07c 342
cb0af1a1 343 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 344 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
345 break;
346 }
347
348 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
208abc4c
KM
349 S.inc("tcp-queries");
350 if (accountremote.sin4.sin_family == AF_INET6)
5fd567ec 351 S.inc("tcp6-queries");
352 else
353 S.inc("tcp4-queries");
3e579e91 354
c2826d2e 355 packet=make_unique<DNSPacket>(true);
809fe23f 356 packet->setRemote(&remote);
e9dd48f9 357 packet->d_tcp=true;
4172a5b2
PD
358 if (inner_remote) {
359 packet->d_inner_remote = inner_remote;
360 packet->d_tcp = inner_tcp;
361 }
ff76e8b4 362 packet->setSocket(fd);
c2b4ccc0 363 if(packet->parse(mesg.get(), pktlen)<0)
4957a608 364 break;
78f1d7b5
PL
365
366 if (packet->hasEDNSCookie())
367 S.inc("tcp-cookie-queries");
368
6e59a580 369 if(packet->qtype.getCode()==QType::AXFR) {
ca9d9b95 370 packet->d_xfr=true;
02b4b703 371 doAXFR(packet->qdomain, packet, fd);
6e59a580
KM
372 continue;
373 }
374
375 if(packet->qtype.getCode()==QType::IXFR) {
ca9d9b95 376 packet->d_xfr=true;
02b4b703 377 doIXFR(packet, fd);
4957a608 378 continue;
12c86877
BH
379 }
380
1e05b07c 381 std::unique_ptr<DNSPacket> reply;
c2826d2e 382 auto cached = make_unique<DNSPacket>(false);
fe498ace 383 if(logDNSQueries) {
4172a5b2 384 g_log << Logger::Notice<<"TCP Remote "<< packet->getRemoteString() <<" wants '" << packet->qdomain<<"|"<<packet->qtype.toString() <<
1b16851b 385 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen();
fe498ace 386 }
bb5903e2 387
9a037bfa 388 if(PC.enabled()) {
c2826d2e 389 if(packet->couldBeCached() && PC.get(*packet, *cached)) { // short circuit - does the PacketCache recognize this question?
9a037bfa 390 if(logDNSQueries)
1b16851b 391 g_log<<": packetcache HIT"<<endl;
9a037bfa 392 cached->setRemote(&packet->d_remote);
208abc4c 393 cached->d_inner_remote = packet->d_inner_remote;
9a037bfa
KM
394 cached->d.id=packet->d.id;
395 cached->d.rd=packet->d.rd; // copy in recursion desired bit
396 cached->commitD(); // commit d to the packet inlined
397
398 sendPacket(cached, fd); // presigned, don't do it again
399 continue;
400 }
21a303f3 401 if(logDNSQueries)
1b16851b 402 g_log<<": packetcache MISS"<<endl;
bbe4b041
OM
403 } else {
404 if (logDNSQueries) {
1b16851b 405 g_log<<endl;
bbe4b041 406 }
12c86877 407 }
12c86877 408 {
24fb5774
RG
409 auto packetHandler = s_P.lock();
410 if (!*packetHandler) {
994cae6b 411 g_log<<Logger::Warning<<"TCP server is without backend connections, launching"<<endl;
24fb5774 412 *packetHandler = make_unique<PacketHandler>();
4957a608 413 }
4957a608 414
24fb5774 415 reply = (*packetHandler)->doQuestion(*packet); // we really need to ask the backend :-)
12c86877
BH
416 }
417
12c86877 418 if(!reply) // unable to write an answer?
4957a608 419 break;
b552d7b1 420
ff76e8b4 421 sendPacket(reply, fd);
15e39ee4 422#ifdef ENABLE_GSS_TSIG
c113acc3
OM
423 if (g_doGssTSIG) {
424 packet->cleanupGSS(reply->d.rcode);
425 }
15e39ee4 426#endif
12c86877 427 }
12c86877 428 }
3f81d239 429 catch(PDNSException &ae) {
24fb5774 430 s_P.lock()->reset(); // on next call, backend will be recycled
a274da88 431 g_log << Logger::Error << "TCP Connection Thread for client " << remote << " failed, cycling backend: " << ae.reason << endl;
ef1d2f44 432 }
0afa9049 433 catch(NetworkError &e) {
a274da88 434 g_log << Logger::Info << "TCP Connection Thread for client " << remote << " died because of network error: " << e.what() << endl;
0afa9049
BH
435 }
436
adc10f99 437 catch(std::exception &e) {
3d88c4c6 438 s_P.lock()->reset(); // on next call, backend will be recycled
a274da88 439 g_log << Logger::Error << "TCP Connection Thread for client " << remote << " died because of STL error, cycling backend: " << e.what() << endl;
12c86877
BH
440 }
441 catch( ... )
442 {
3d88c4c6 443 s_P.lock()->reset(); // on next call, backend will be recycled
a274da88 444 g_log << Logger::Error << "TCP Connection Thread for client " << remote << " caught unknown exception, cycling backend." << endl;
12c86877 445 }
12c86877 446 d_connectionroom_sem->post();
a7b68ae7
RG
447
448 try {
449 closesocket(fd);
450 }
451 catch(const PDNSException& e) {
a274da88 452 g_log << Logger::Error << "Error closing TCP socket for client " << remote << ": " << e.reason << endl;
a7b68ae7 453 }
cb0af1a1 454 decrementClientCount(remote);
12c86877
BH
455}
456
78bcb858 457
24fb5774 458bool TCPNameserver::canDoAXFR(std::unique_ptr<DNSPacket>& q, bool isAXFR, std::unique_ptr<PacketHandler>& packetHandler)
12c86877 459{
379ab445 460 if(::arg().mustDo("disable-axfr"))
318c3ec6
BH
461 return false;
462
4172a5b2 463 string logPrefix=string(isAXFR ? "A" : "I")+"XFR-out zone '"+q->qdomain.toLogString()+"', client '"+q->getInnerRemote().toStringWithPort()+"', ";
efe7948e 464
78bcb858
BH
465 if(q->d_havetsig) { // if you have one, it must be good
466 TSIGRecordContent trc;
7abbc40f
PD
467 DNSName keyname;
468 string secret;
24fb5774 469 if(!q->checkForCorrectTSIG(packetHandler->getBackend(), &keyname, &secret, &trc)) {
78bcb858 470 return false;
7f9ac49b
AT
471 } else {
472 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
c113acc3 473#ifdef ENABLE_GSS_TSIG
f13cde2a
OM
474 if (g_doGssTSIG && q->d_tsig_algo == TSIG_GSS) {
475 GssContext gssctx(keyname);
476 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
477 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
478 }
479 }
c113acc3 480#endif
7f9ac49b
AT
481 }
482
24fb5774 483 DNSSECKeeper dk(packetHandler->getBackend());
c113acc3 484#ifdef ENABLE_GSS_TSIG
f13cde2a
OM
485 if (g_doGssTSIG && q->d_tsig_algo == TSIG_GSS) {
486 vector<string> princs;
487 packetHandler->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
488 for(const std::string& princ : princs) {
489 if (q->d_peer_principal == princ) {
490 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
491 return true;
492 }
493 }
494 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
495 return false;
496 }
c113acc3 497#endif
3d03fee8 498 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
efe7948e 499 g_log<<Logger::Warning<<logPrefix<<"denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access"<<endl;
78bcb858
BH
500 return false;
501 }
502 else {
efe7948e 503 g_log<<Logger::Notice<<logPrefix<<"allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
78bcb858
BH
504 return true;
505 }
506 }
1e05b07c 507
93afc0a3 508 // cerr<<"checking allow-axfr-ips"<<endl;
4172a5b2 509 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( q->getInnerRemote() )) {
efe7948e 510 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in allow-axfr-ips"<<endl;
12c86877 511 return true;
ab5edd12 512 }
93afc0a3
PD
513
514 FindNS fns;
515
516 // cerr<<"doing per-zone-axfr-acls"<<endl;
517 SOAData sd;
24fb5774 518 if(packetHandler->getBackend()->getSOAUncached(q->qdomain,sd)) {
93afc0a3 519 // cerr<<"got backend and SOA"<<endl;
93afc0a3 520 vector<string> acl;
24fb5774 521 packetHandler->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
d7f67000 522 for (const auto & i : acl) {
93afc0a3 523 // cerr<<"matching against "<<*i<<endl;
d7f67000 524 if(pdns_iequals(i, "AUTO-NS")) {
93afc0a3
PD
525 // cerr<<"AUTO-NS magic please!"<<endl;
526
527 DNSResourceRecord rr;
7abbc40f 528 set<DNSName> nsset;
93afc0a3 529
13b80e77
CH
530 sd.db->lookup(QType(QType::NS), q->qdomain, sd.domain_id);
531 while (sd.db->get(rr)) {
290a083d 532 nsset.insert(DNSName(rr.content));
13b80e77 533 }
7abbc40f 534 for(const auto & j: nsset) {
24fb5774 535 vector<string> nsips=fns.lookup(j, packetHandler->getBackend());
d7f67000 536 for(const auto & nsip : nsips) {
93afc0a3 537 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
4172a5b2 538 if(nsip == q->getInnerRemote().toString())
93afc0a3
PD
539 {
540 // cerr<<"got AUTO-NS hit"<<endl;
efe7948e 541 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in NSset"<<endl;
93afc0a3
PD
542 return true;
543 }
544 }
545 }
546 }
547 else
548 {
d7f67000 549 Netmask nm = Netmask(i);
4172a5b2 550 if(nm.match( q->getInnerRemote() ))
93afc0a3 551 {
efe7948e 552 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is in per-zone ACL"<<endl;
93afc0a3
PD
553 // cerr<<"hit!"<<endl;
554 return true;
555 }
556 }
557 }
1e05b07c 558 }
93afc0a3 559
12c86877
BH
560 extern CommunicatorClass Communicator;
561
4172a5b2 562 if(Communicator.justNotified(q->qdomain, q->getInnerRemote().toString())) { // we just notified this ip
efe7948e 563 g_log<<Logger::Notice<<logPrefix<<"allowed: client IP is from recently notified secondary"<<endl;
12c86877
BH
564 return true;
565 }
566
efe7948e 567 g_log<<Logger::Warning<<logPrefix<<"denied: client IP has no permission"<<endl;
12c86877
BH
568 return false;
569}
570
b317b510 571namespace {
54d84273
PD
572 struct NSECXEntry
573 {
22a0ef16 574 NSECBitmap d_set;
54d84273 575 unsigned int d_ttl;
feef1ece 576 bool d_auth;
54d84273 577 };
8e9b7d99 578
a5188bcd 579 static std::unique_ptr<DNSPacket> getFreshAXFRPacket(std::unique_ptr<DNSPacket>& q)
54d84273 580 {
c2826d2e 581 std::unique_ptr<DNSPacket> ret = std::unique_ptr<DNSPacket>(q->replyPacket());
54d84273
PD
582 ret->setCompress(false);
583 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
584 ret->d_tcp = true;
585 return ret;
586 }
8e9b7d99
BH
587}
588
54d84273 589
12c86877 590/** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
5cf23dba 591int TCPNameserver::doAXFR(const DNSName &target, std::unique_ptr<DNSPacket>& q, int outsock) // NOLINT(readability-function-cognitive-complexity)
12c86877 592{
c6ca0aa8 593 string logPrefix="AXFR-out zone '"+target.toLogString()+"', client '"+q->getRemoteStringWithPort()+"', ";
efe7948e 594
c2826d2e 595 std::unique_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
c67e46a1 596 if(q->d_dnssecOk)
05e24311 597 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
22893145 598
efe7948e 599 g_log<<Logger::Warning<<logPrefix<<"transfer initiated"<<endl;
12c86877 600
22893145 601 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
91ad8c11 602 SOAData sd;
12c86877 603 {
24fb5774 604 auto packetHandler = s_P.lock();
efe7948e 605 DLOG(g_log<<logPrefix<<"looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
24fb5774 606 if(!*packetHandler) {
994cae6b 607 g_log<<Logger::Warning<<"TCP server is without backend connections in doAXFR, launching"<<endl;
24fb5774 608 *packetHandler = make_unique<PacketHandler>();
12a965c5 609 }
12c86877 610
ea99d474 611 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
24fb5774 612 if (!canDoAXFR(q, true, *packetHandler)) {
efe7948e 613 g_log<<Logger::Warning<<logPrefix<<"failed: client may not request AXFR"<<endl;
9c556f63 614 outpacket->setRcode(RCode::NotAuth);
8090f5a2
AT
615 sendPacket(outpacket,outsock);
616 return 0;
617 }
618
91ad8c11 619 if (!(*packetHandler)->getBackend()->getSOAUncached(target, sd)) {
efe7948e 620 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative"<<endl;
9c556f63 621 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 622 sendPacket(outpacket,outsock);
12c86877
BH
623 return 0;
624 }
3de83124 625 }
22893145 626
8e9b7d99 627 UeberBackend db;
79ba7763 628 if(!db.getSOAUncached(target, sd)) {
efe7948e 629 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative in second instance"<<endl;
79ba7763 630 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 631 sendPacket(outpacket,outsock);
3de83124 632 return 0;
12c86877 633 }
3de83124 634
8a66a927
KM
635 bool securedZone = false;
636 bool presignedZone = false;
637 bool NSEC3Zone = false;
638 bool narrow = false;
639
91ad8c11
KM
640 DomainInfo di;
641 bool isCatalogZone = sd.db->getDomainInfo(target, di, false) && di.isCatalogType();
642
8a66a927
KM
643 NSEC3PARAMRecordContent ns3pr;
644
ea99d474 645 DNSSECKeeper dk(&db);
40b3959a 646 DNSSECKeeper::clearCaches(target);
91ad8c11 647 if (!isCatalogZone) {
8a66a927
KM
648 securedZone = dk.isSecuredZone(target);
649 presignedZone = dk.isPresigned(target);
650 }
22893145 651
dacacb23 652 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
22893145
CH
653 NSEC3Zone=true;
654 if(narrow) {
efe7948e 655 g_log<<Logger::Warning<<logPrefix<<"failed: not doing AXFR of an NSEC3 narrow zone"<<endl;
994cae6b
KM
656 outpacket->setRcode(RCode::Refused);
657 sendPacket(outpacket,outsock);
658 return 0;
22893145
CH
659 }
660 }
661
78bcb858 662 TSIGRecordContent trc;
7abbc40f
PD
663 DNSName tsigkeyname;
664 string tsigsecret;
78bcb858 665
ea3816cf 666 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
78bcb858 667
60a1c204 668 if(haveTSIGDetails && !tsigkeyname.empty()) {
2c26f25a 669 string tsig64;
5e8d94f1 670 DNSName algorithm=trc.d_algoName;
290a083d 671 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
672 algorithm = DNSName("hmac-md5");
f13cde2a
OM
673 if (algorithm != DNSName("gss-tsig")) {
674 if(!db.getTSIGKey(tsigkeyname, algorithm, tsig64)) {
675 g_log<<Logger::Warning<<logPrefix<<"TSIG key not found"<<endl;
676 return 0;
677 }
678 if (B64Decode(tsig64, tsigsecret) == -1) {
679 g_log<<Logger::Error<<logPrefix<<"unable to Base-64 decode TSIG key '"<<tsigkeyname<<"'"<<endl;
680 return 0;
681 }
84fc3f8b 682 }
78bcb858 683 }
1e05b07c
FM
684
685
8267bd2c 686 // SOA *must* go out first, our signing pipe might reorder
efe7948e 687 DLOG(g_log<<logPrefix<<"sending out SOA"<<endl);
13f9e280 688 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 689 outpacket->addRecord(DNSZoneRecord(soa));
3c68fb14 690 if(securedZone && !presignedZone) {
7abbc40f 691 set<DNSName> authSet;
8d3cbffa 692 authSet.insert(target);
ea99d474 693 addRRSigs(dk, db, authSet, outpacket->getRRS());
8d3cbffa 694 }
1e05b07c 695
60a1c204 696 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 697 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1e05b07c 698
02b4b703 699 sendPacket(outpacket, outsock, false);
1e05b07c 700
78bcb858 701 trc.d_mac = outpacket->d_trc.d_mac;
8267bd2c 702 outpacket = getFreshAXFRPacket(q);
04f5504d 703
1e05b07c 704
90ba52e0 705 DNSZoneRecord zrr;
04f5504d 706 vector<DNSZoneRecord> zrrs;
794c2f92 707
04f5504d
KM
708 zrr.dr.d_name = target;
709 zrr.dr.d_ttl = sd.minimum;
95b70d3a 710
04f5504d
KM
711 if(securedZone && !presignedZone) { // this is where the DNSKEYs, CDNSKEYs and CDSs go in
712 bool doCDNSKEY = true, doCDS = true;
95b70d3a
KM
713 string publishCDNSKEY, publishCDS;
714 dk.getPublishCDNSKEY(q->qdomain, publishCDNSKEY);
715 dk.getPublishCDS(q->qdomain, publishCDS);
04f5504d 716
95b70d3a 717 set<uint32_t> entryPointIds;
04f5504d
KM
718 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(target);
719 for (auto const& value : entryPoints) {
95b70d3a 720 entryPointIds.insert(value.second.id);
04f5504d 721 }
95b70d3a 722
04f5504d 723 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
95b70d3a
KM
724 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
725 if (!value.second.published) {
726 continue;
991a0977 727 }
95b70d3a 728 zrr.dr.d_type = QType::DNSKEY;
d06dcda4 729 zrr.dr.setContent(std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY()));
95b70d3a 730 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
64d22929 731 zrrs.push_back(zrr);
95b70d3a
KM
732
733 // generate CDS and CDNSKEY records
04f5504d 734 if(doCDNSKEY && entryPointIds.count(value.second.id) > 0){
481508ab 735 if(!publishCDNSKEY.empty()) {
95b70d3a 736 zrr.dr.d_type=QType::CDNSKEY;
481508ab 737 if (publishCDNSKEY == "0") {
04f5504d 738 doCDNSKEY = false;
d06dcda4 739 zrr.dr.setContent(PacketHandler::s_deleteCDNSKEYContent);
04f5504d 740 zrrs.push_back(zrr);
481508ab 741 } else {
d06dcda4 742 zrr.dr.setContent(std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY()));
04f5504d 743 zrrs.push_back(zrr);
481508ab 744 }
95b70d3a
KM
745 }
746
04f5504d 747 if(doCDS && !publishCDS.empty()){
95b70d3a
KM
748 zrr.dr.d_type=QType::CDS;
749 vector<string> digestAlgos;
750 stringtok(digestAlgos, publishCDS, ", ");
481508ab 751 if(std::find(digestAlgos.begin(), digestAlgos.end(), "0") != digestAlgos.end()) {
cd7c3624 752 doCDS = false;
d06dcda4 753 zrr.dr.setContent(PacketHandler::s_deleteCDSContent);
04f5504d 754 zrrs.push_back(zrr);
481508ab
KM
755 } else {
756 for(auto const &digestAlgo : digestAlgos) {
d06dcda4 757 zrr.dr.setContent(std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns::checked_stoi<uint8_t>(digestAlgo))));
04f5504d 758 zrrs.push_back(zrr);
481508ab 759 }
95b70d3a 760 }
991a0977
PL
761 }
762 }
763 }
95b70d3a 764
6dae726d
PD
765 }
766
95c5bc40 767 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
04f5504d 768 uint8_t flags = ns3pr.d_flags;
90ba52e0 769 zrr.dr.d_type = QType::NSEC3PARAM;
95c5bc40 770 ns3pr.d_flags = 0;
d06dcda4 771 zrr.dr.setContent(std::make_shared<NSEC3PARAMRecordContent>(ns3pr));
b8adb30d 772 ns3pr.d_flags = flags;
6e8694df 773 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
64d22929 774 zrrs.push_back(zrr);
ce464268 775 }
1e05b07c 776
8a66a927
KM
777 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
778 set<DNSName> qnames, nsset, terms;
779
780 // Catalog zone start
781 if (di.kind == DomainInfo::Producer) {
782 // Ignore all records except NS at apex
783 sd.db->lookup(QType::NS, target, di.id);
784 while (sd.db->get(zrr)) {
785 zrrs.emplace_back(zrr);
786 }
787 if (zrrs.empty()) {
788 zrr.dr.d_name = target;
789 zrr.dr.d_ttl = 0;
790 zrr.dr.d_type = QType::NS;
d06dcda4 791 zrr.dr.setContent(std::make_shared<NSRecordContent>("invalid."));
8a66a927
KM
792 zrrs.emplace_back(zrr);
793 }
794
795 zrrs.emplace_back(CatalogInfo::getCatalogVersionRecord(target));
796
797 vector<CatalogInfo> members;
798 sd.db->getCatalogMembers(target, members, CatalogInfo::CatalogType::Producer);
799 for (const auto& ci : members) {
800 ci.toDNSZoneRecords(target, zrrs);
801 }
802 if (members.empty()) {
803 g_log << Logger::Warning << logPrefix << "catalog zone '" << target << "' has no members" << endl;
804 }
805 goto send;
806 }
807 // Catalog zone end
808
0c350cb5 809 // now start list zone
91ad8c11 810 if (!sd.db->list(target, sd.domain_id, isCatalogZone)) {
efe7948e 811 g_log<<Logger::Error<<logPrefix<<"backend signals error condition, aborting AXFR"<<endl;
9c556f63 812 outpacket->setRcode(RCode::ServFail);
0c350cb5
BH
813 sendPacket(outpacket,outsock);
814 return 0;
815 }
816
90ba52e0 817 while(sd.db->get(zrr)) {
64d22929
KM
818 if (!presignedZone) {
819 if (zrr.dr.d_type == QType::RRSIG) {
820 continue;
821 }
822 if (zrr.dr.d_type == QType::DNSKEY || zrr.dr.d_type == QType::CDNSKEY || zrr.dr.d_type == QType::CDS) {
823 if(!::arg().mustDo("direct-dnskey")) {
824 continue;
825 } else {
826 zrr.dr.d_ttl = sd.minimum;
827 }
828 }
829 }
8bf260dd 830 zrr.dr.d_name.makeUsLowerCase();
90ba52e0 831 if(zrr.dr.d_name.isPartOf(target)) {
833b07fe 832 if (zrr.dr.d_type == QType::ALIAS && (::arg().mustDo("outgoing-axfr-expand-alias") || ::arg()["outgoing-axfr-expand-alias"] == "ignore-errors")) {
90ba52e0 833 vector<DNSZoneRecord> ips;
d06dcda4
RG
834 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->getContent(), QType::A, ips);
835 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->getContent(), QType::AAAA, ips);
833b07fe 836 if (ret1 != RCode::NoError || ret2 != RCode::NoError) {
50ff05d7 837 if (::arg()["outgoing-axfr-expand-alias"] == "ignore-errors") {
833b07fe 838 if (ret1 != RCode::NoError) {
50ff05d7 839 g_log << Logger::Error << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving A record for ALIAS target " << zrr.dr.getContent()->getZoneRepresentation() << ", continuing AXFR" << endl;
833b07fe
KD
840 }
841 if (ret2 != RCode::NoError) {
50ff05d7 842 g_log << Logger::Error << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving AAAA record for ALIAS target " << zrr.dr.getContent()->getZoneRepresentation() << ", continuing AXFR" << endl;
833b07fe
KD
843 }
844 }
50ff05d7
CH
845 else {
846 g_log << Logger::Warning << logPrefix << zrr.dr.d_name.toLogString() << ": error resolving for ALIAS " << zrr.dr.getContent()->getZoneRepresentation() << ", aborting AXFR" << endl;
847 outpacket->setRcode(RCode::ServFail);
848 sendPacket(outpacket, outsock);
849 return 0;
850 }
273d88b2 851 }
d06dcda4 852 for (auto& ip: ips) {
90ba52e0 853 zrr.dr.d_type = ip.dr.d_type;
d06dcda4 854 zrr.dr.setContent(ip.dr.getContent());
90ba52e0 855 zrrs.push_back(zrr);
d86e1bf7 856 }
a68df29d 857 continue;
d86e1bf7
PD
858 }
859
b772ffea 860 if (rectify) {
90ba52e0 861 if (zrr.dr.d_type) {
862 qnames.insert(zrr.dr.d_name);
863 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
864 nsset.insert(zrr.dr.d_name);
b772ffea
KM
865 } else {
866 // remove existing ents
867 continue;
868 }
869 }
a68df29d 870 zrrs.push_back(zrr);
b772ffea 871 } else {
90ba52e0 872 if (zrr.dr.d_type)
efe7948e 873 g_log<<Logger::Warning<<logPrefix<<"zone contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
b772ffea
KM
874 }
875 }
876
d06dcda4 877 for (auto& loopRR : zrrs) {
bdbee377
PL
878 if ((loopRR.dr.d_type == QType::SVCB || loopRR.dr.d_type == QType::HTTPS)) {
879 // Process auto hints
880 // TODO this is an almost copy of the code in the packethandler
881 auto rrc = getRR<SVCBBaseRecordContent>(loopRR.dr);
882 if (rrc == nullptr) {
883 continue;
884 }
d06dcda4
RG
885 auto newRRC = rrc->clone();
886 if (!newRRC) {
887 continue;
888 }
889 DNSName svcTarget = newRRC->getTarget().isRoot() ? loopRR.dr.d_name : newRRC->getTarget();
890 if (newRRC->autoHint(SvcParam::ipv4hint)) {
bdbee377
PL
891 sd.db->lookup(QType::A, svcTarget, sd.domain_id);
892 vector<ComboAddress> hints;
893 DNSZoneRecord rr;
894 while (sd.db->get(rr)) {
895 auto arrc = getRR<ARecordContent>(rr.dr);
896 hints.push_back(arrc->getCA());
897 }
898 if (hints.size() == 0) {
d06dcda4 899 newRRC->removeParam(SvcParam::ipv4hint);
bdbee377 900 } else {
d06dcda4 901 newRRC->setHints(SvcParam::ipv4hint, hints);
bdbee377
PL
902 }
903 }
904
d06dcda4 905 if (newRRC->autoHint(SvcParam::ipv6hint)) {
bdbee377
PL
906 sd.db->lookup(QType::AAAA, svcTarget, sd.domain_id);
907 vector<ComboAddress> hints;
908 DNSZoneRecord rr;
909 while (sd.db->get(rr)) {
910 auto arrc = getRR<AAAARecordContent>(rr.dr);
911 hints.push_back(arrc->getCA());
912 }
913 if (hints.size() == 0) {
d06dcda4 914 newRRC->removeParam(SvcParam::ipv6hint);
bdbee377 915 } else {
d06dcda4 916 newRRC->setHints(SvcParam::ipv6hint, hints);
bdbee377
PL
917 }
918 }
d06dcda4
RG
919
920 loopRR.dr.setContent(std::move(newRRC));
bdbee377
PL
921 }
922 }
923
75f2589f 924 // Group records by name and type, signpipe stumbles over interrupted rrsets
22a676e0 925 if(securedZone && !presignedZone) {
8daafcc1 926 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
905dae56 927 return std::tie(a.dr.d_name, a.dr.d_type) < std::tie(b.dr.d_name, b.dr.d_type);
8daafcc1
KM
928 });
929 }
75f2589f 930
b772ffea
KM
931 if(rectify) {
932 // set auth
2010ac95
RG
933 for(DNSZoneRecord &loopZRR : zrrs) {
934 loopZRR.auth=true;
935 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
936 DNSName shorter(loopZRR.dr.d_name);
b772ffea 937 do {
e325f20c 938 if (shorter==target) // apex is always auth
cb045f61 939 break;
2010ac95
RG
940 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
941 loopZRR.auth=false;
cb045f61 942 break;
9f70b77a 943 }
7abbc40f 944 } while(shorter.chopOff());
9f70b77a 945 }
b772ffea
KM
946 }
947
948 if(NSEC3Zone) {
949 // ents are only required for NSEC3 zones
950 uint32_t maxent = ::arg().asNum("max-ent-entries");
6ded341a 951 set<DNSName> nsec3set, nonterm;
2010ac95 952 for (auto &loopZRR: zrrs) {
6ded341a 953 bool skip=false;
2010ac95 954 DNSName shorter = loopZRR.dr.d_name;
6ded341a
KM
955 if (shorter != target && shorter.chopOff() && shorter != target) {
956 do {
957 if(nsset.count(shorter)) {
958 skip=true;
959 break;
960 }
961 } while(shorter.chopOff() && shorter != target);
962 }
2010ac95
RG
963 shorter = loopZRR.dr.d_name;
964 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
6ded341a
KM
965 do {
966 if(!nsec3set.count(shorter)) {
967 nsec3set.insert(shorter);
968 }
969 } while(shorter != target && shorter.chopOff());
970 }
971 }
972
2010ac95
RG
973 for(DNSZoneRecord &loopZRR : zrrs) {
974 DNSName shorter(loopZRR.dr.d_name);
e325f20c 975 while(shorter != target && shorter.chopOff()) {
6ded341a 976 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
b772ffea 977 if(!(maxent)) {
efe7948e
KM
978 g_log<<Logger::Warning<<logPrefix<<"zone has too many empty non terminals, aborting AXFR"<<endl;
979 outpacket->setRcode(RCode::ServFail);
980 sendPacket(outpacket,outsock);
b772ffea
KM
981 return 0;
982 }
6ded341a
KM
983 nonterm.insert(shorter);
984 --maxent;
b772ffea
KM
985 }
986 }
987 }
988
9e23e712 989 for(const auto& nt : nonterm) {
2010ac95
RG
990 DNSZoneRecord tempRR;
991 tempRR.dr.d_name=nt;
992 tempRR.dr.d_type=QType::ENT;
993 tempRR.auth=true;
994 zrrs.push_back(tempRR);
b772ffea
KM
995 }
996 }
997 }
998
8a66a927 999send:
b772ffea 1000
12c86877 1001 /* now write all other records */
04f5504d
KM
1002
1003 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
1004 nsecxrepo_t nsecxrepo;
1005
3af419da 1006 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1), ::arg().mustDo("workaround-11804") ? 1 : 100);
04f5504d 1007
6e8694df 1008 DNSName keyname;
3370c993 1009 unsigned int udiff;
1c6d9830
BH
1010 DTime dt;
1011 dt.set();
2010ac95 1012 for(DNSZoneRecord &loopZRR : zrrs) {
2010ac95
RG
1013 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
1014 if (NSEC3Zone || loopZRR.dr.d_type) {
3c68fb14
KM
1015 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
1016 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
1017 } else {
1018 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
1019 }
b5baefaf 1020 NSECXEntry& ne = nsecxrepo[keyname];
192bcba2 1021 ne.d_ttl = sd.getNegativeTTL();
3c68fb14
KM
1022 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
1023 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
22a0ef16 1024 ne.d_set.set(loopZRR.dr.d_type);
b5baefaf
PD
1025 }
1026 }
b317b510 1027 }
b5baefaf 1028
2010ac95 1029 if (!loopZRR.dr.d_type)
b5baefaf
PD
1030 continue; // skip empty non-terminals
1031
2010ac95 1032 if(loopZRR.dr.d_type == QType::SOA)
12c86877 1033 continue; // skip SOA - would indicate end of AXFR
add640c0 1034
2010ac95 1035 if(csp.submit(loopZRR)) {
1c6d9830
BH
1036 for(;;) {
1037 outpacket->getRRS() = csp.getChunk();
1038 if(!outpacket->getRRS().empty()) {
60a1c204 1039 if(haveTSIGDetails && !tsigkeyname.empty())
54d84273 1040 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1041 sendPacket(outpacket, outsock, false);
78bcb858 1042 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1043 outpacket=getFreshAXFRPacket(q);
1044 }
1045 else
1046 break;
1047 }
12c86877
BH
1048 }
1049 }
78bcb858 1050 /*
3370c993 1051 udiff=dt.udiffNoReset();
1c6d9830
BH
1052 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1053 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1054 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1055 */
feef1ece 1056 if(securedZone) {
4888e4b2 1057 if(NSEC3Zone) {
9d3151d9 1058 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
3c68fb14 1059 if(iter->second.d_auth) {
feef1ece 1060 NSEC3RecordContent n3rc;
22a0ef16 1061 n3rc.set(iter->second.d_set);
27d4a65b
RG
1062 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
1063 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
1064 n3rc.set(QType::RRSIG);
1065 }
1066 n3rc.d_salt = ns3pr.d_salt;
feef1ece
PD
1067 n3rc.d_flags = ns3pr.d_flags;
1068 n3rc.d_iterations = ns3pr.d_iterations;
690b86b7 1069 n3rc.d_algorithm = DNSSECKeeper::DIGEST_SHA1; // SHA1, fixed in PowerDNS for now
feef1ece 1070 nsecxrepo_t::const_iterator inext = iter;
cb167afd 1071 ++inext;
feef1ece
PD
1072 if(inext == nsecxrepo.end())
1073 inext = nsecxrepo.begin();
3c68fb14 1074 while(!inext->second.d_auth && inext != iter)
feef1ece 1075 {
cb167afd 1076 ++inext;
feef1ece
PD
1077 if(inext == nsecxrepo.end())
1078 inext = nsecxrepo.begin();
1079 }
6e8694df
KM
1080 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
1081 zrr.dr.d_name = iter->first+sd.qname;
90ba52e0 1082
192bcba2 1083 zrr.dr.d_ttl = sd.getNegativeTTL();
d06dcda4 1084 zrr.dr.setContent(std::make_shared<NSEC3RecordContent>(std::move(n3rc)));
90ba52e0 1085 zrr.dr.d_type = QType::NSEC3;
1086 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1087 zrr.auth=true;
1088 if(csp.submit(zrr)) {
feef1ece
PD
1089 for(;;) {
1090 outpacket->getRRS() = csp.getChunk();
1091 if(!outpacket->getRRS().empty()) {
60a1c204 1092 if(haveTSIGDetails && !tsigkeyname.empty())
feef1ece 1093 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1094 sendPacket(outpacket, outsock, false);
feef1ece
PD
1095 trc.d_mac=outpacket->d_trc.d_mac;
1096 outpacket=getFreshAXFRPacket(q);
1097 }
1098 else
1099 break;
1c6d9830 1100 }
1c6d9830 1101 }
8e9b7d99 1102 }
4888e4b2
BH
1103 }
1104 }
9d3151d9 1105 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
ed9c3a50 1106 NSECRecordContent nrc;
22a0ef16 1107 nrc.set(iter->second.d_set);
27d4a65b
RG
1108 nrc.set(QType::RRSIG);
1109 nrc.set(QType::NSEC);
6e8694df
KM
1110
1111 if(boost::next(iter) != nsecxrepo.end())
1112 nrc.d_next = boost::next(iter)->first;
ed9c3a50 1113 else
6e8694df
KM
1114 nrc.d_next=nsecxrepo.begin()->first;
1115 zrr.dr.d_name = iter->first;
1116
192bcba2 1117 zrr.dr.d_ttl = sd.getNegativeTTL();
d06dcda4 1118 zrr.dr.setContent(std::make_shared<NSECRecordContent>(std::move(nrc)));
90ba52e0 1119 zrr.dr.d_type = QType::NSEC;
1120 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1121 zrr.auth=true;
1122 if(csp.submit(zrr)) {
1c6d9830
BH
1123 for(;;) {
1124 outpacket->getRRS() = csp.getChunk();
1125 if(!outpacket->getRRS().empty()) {
60a1c204 1126 if(haveTSIGDetails && !tsigkeyname.empty())
1e05b07c 1127 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
02b4b703 1128 sendPacket(outpacket, outsock, false);
78bcb858 1129 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1130 outpacket=getFreshAXFRPacket(q);
1131 }
1132 else
1133 break;
1134 }
8e9b7d99 1135 }
add640c0 1136 }
add640c0 1137 }
78bcb858 1138 /*
3370c993 1139 udiff=dt.udiffNoReset();
1c6d9830
BH
1140 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1141 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1142 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1143 * */
1e05b07c 1144 for(;;) {
bec14a20
BH
1145 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
1146 if(!outpacket->getRRS().empty()) {
60a1c204 1147 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1148 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
1f07a63f
PD
1149 try {
1150 sendPacket(outpacket, outsock, false);
1151 }
1152 catch (PDNSException& pe) {
1153 throw PDNSException("during axfr-out of "+target.toString()+", this happened: "+pe.reason);
1154 }
78bcb858 1155 trc.d_mac=outpacket->d_trc.d_mac;
bec14a20
BH
1156 outpacket=getFreshAXFRPacket(q);
1157 }
1e05b07c 1158 else
bec14a20 1159 break;
12c86877 1160 }
1e05b07c 1161
1c6d9830 1162 udiff=dt.udiffNoReset();
1e05b07c 1163 if(securedZone)
efe7948e 1164 g_log<<Logger::Debug<<logPrefix<<"done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1e05b07c 1165
efe7948e 1166 DLOG(g_log<<logPrefix<<"done writing out records"<<endl);
12c86877 1167 /* and terminate with yet again the SOA record */
8e9b7d99 1168 outpacket=getFreshAXFRPacket(q);
9bbcf03a 1169 outpacket->addRecord(std::move(soa));
60a1c204 1170 if(haveTSIGDetails && !tsigkeyname.empty())
1e05b07c
FM
1171 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1172
ff76e8b4 1173 sendPacket(outpacket, outsock);
1e05b07c 1174
efe7948e
KM
1175 DLOG(g_log<<logPrefix<<"last packet - close"<<endl);
1176 g_log<<Logger::Notice<<logPrefix<<"AXFR finished"<<endl;
12c86877
BH
1177
1178 return 1;
1179}
1180
c2826d2e 1181int TCPNameserver::doIXFR(std::unique_ptr<DNSPacket>& q, int outsock)
6e59a580 1182{
c6ca0aa8 1183 string logPrefix="IXFR-out zone '"+q->qdomain.toLogString()+"', client '"+q->getRemoteStringWithPort()+"', ";
efe7948e 1184
c2826d2e 1185 std::unique_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
6e59a580
KM
1186 if(q->d_dnssecOk)
1187 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1188
6e59a580 1189 uint32_t serial = 0;
27c0050c 1190 MOADNSParser mdp(false, q->getString());
f80ebc05
O
1191 for(const auto & answer : mdp.d_answers) {
1192 const DNSRecord *rr = &answer.first;
e693ff5a 1193 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
6e59a580 1194 vector<string>parts;
d06dcda4 1195 stringtok(parts, rr->getContent()->getZoneRepresentation());
6e59a580 1196 if (parts.size() >= 3) {
95dd3b90 1197 try {
a0383aad 1198 pdns::checked_stoi_into(serial, parts[2]);
95dd3b90
RG
1199 }
1200 catch(const std::out_of_range& oor) {
efe7948e 1201 g_log<<Logger::Warning<<logPrefix<<"invalid serial in IXFR query"<<endl;
95dd3b90
RG
1202 outpacket->setRcode(RCode::FormErr);
1203 sendPacket(outpacket,outsock);
1204 return 0;
1205 }
6e59a580 1206 } else {
efe7948e 1207 g_log<<Logger::Warning<<logPrefix<<"no serial in IXFR query"<<endl;
6e59a580
KM
1208 outpacket->setRcode(RCode::FormErr);
1209 sendPacket(outpacket,outsock);
1210 return 0;
1211 }
3e67ea8b 1212 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
d5fcd583 1213 g_log<<Logger::Warning<<logPrefix<<"additional records in IXFR query, type: "<<QType(rr->d_type).toString()<<endl;
6e59a580
KM
1214 outpacket->setRcode(RCode::FormErr);
1215 sendPacket(outpacket,outsock);
1216 return 0;
1217 }
1218 }
1219
efe7948e 1220 g_log<<Logger::Warning<<logPrefix<<"transfer initiated with serial "<<serial<<endl;
6e59a580 1221
57f95528 1222 // determine if zone exists, XFR is allowed, and if IXFR can proceed using existing backend before spawning a new backend.
6e59a580 1223 SOAData sd;
57f95528
CH
1224 bool securedZone;
1225 bool serialPermitsIXFR;
6e59a580 1226 {
24fb5774 1227 auto packetHandler = s_P.lock();
efe7948e 1228 DLOG(g_log<<logPrefix<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
24fb5774 1229 if(!*packetHandler) {
994cae6b 1230 g_log<<Logger::Warning<<"TCP server is without backend connections in doIXFR, launching"<<endl;
24fb5774 1231 *packetHandler = make_unique<PacketHandler>();
6e59a580
KM
1232 }
1233
22893145 1234 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
24fb5774 1235 if(!canDoAXFR(q, false, *packetHandler) || !(*packetHandler)->getBackend()->getSOAUncached(q->qdomain, sd)) {
efe7948e 1236 g_log<<Logger::Warning<<logPrefix<<"failed: not authoritative"<<endl;
9c556f63 1237 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1238 sendPacket(outpacket,outsock);
1239 return 0;
1240 }
22893145 1241
24fb5774 1242 DNSSECKeeper dk((*packetHandler)->getBackend());
57f95528 1243 DNSSECKeeper::clearCaches(q->qdomain);
d5e7c918 1244 bool narrow = false;
57f95528
CH
1245 securedZone = dk.isSecuredZone(q->qdomain);
1246 if(dk.getNSEC3PARAM(q->qdomain, nullptr, &narrow)) {
1247 if(narrow) {
efe7948e 1248 g_log<<Logger::Warning<<logPrefix<<"not doing IXFR of an NSEC3 narrow zone"<<endl;
57f95528
CH
1249 outpacket->setRcode(RCode::Refused);
1250 sendPacket(outpacket,outsock);
1251 return 0;
1252 }
22893145 1253 }
6e59a580 1254
57f95528 1255 serialPermitsIXFR = !rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname));
6e59a580 1256 }
24d9e514 1257
57f95528
CH
1258 if (serialPermitsIXFR) {
1259 DNSName target = q->qdomain;
6e59a580 1260 TSIGRecordContent trc;
7abbc40f
PD
1261 DNSName tsigkeyname;
1262 string tsigsecret;
6e59a580 1263
57f95528
CH
1264 UeberBackend db;
1265 DNSSECKeeper dk(&db);
57f95528 1266
ea3816cf 1267 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
6e59a580 1268
60a1c204 1269 if(haveTSIGDetails && !tsigkeyname.empty()) {
bb7fb11c 1270 string tsig64;
3343ad1f 1271 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
290a083d 1272 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1273 algorithm = DNSName("hmac-md5");
40361bf2
KM
1274 if (!db.getTSIGKey(tsigkeyname, algorithm, tsig64)) {
1275 g_log << Logger::Error << "TSIG key '" << tsigkeyname << "' for domain '" << target << "' not found" << endl;
53ace5d5
PL
1276 return 0;
1277 }
1278 if (B64Decode(tsig64, tsigsecret) == -1) {
efe7948e 1279 g_log<<Logger::Error<<logPrefix<<"unable to Base-64 decode TSIG key '"<<tsigkeyname<<"'"<<endl;
53ace5d5
PL
1280 return 0;
1281 }
6e59a580
KM
1282 }
1283
6e59a580 1284 // SOA *must* go out first, our signing pipe might reorder
efe7948e 1285 DLOG(g_log<<logPrefix<<"sending out SOA"<<endl);
13f9e280 1286 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 1287 outpacket->addRecord(std::move(soa));
ada68bd9 1288 if(securedZone && outpacket->d_dnssecOk) {
7abbc40f 1289 set<DNSName> authSet;
6e59a580 1290 authSet.insert(target);
57f95528 1291 addRRSigs(dk, db, authSet, outpacket->getRRS());
6e59a580
KM
1292 }
1293
60a1c204 1294 if(haveTSIGDetails && !tsigkeyname.empty())
6e59a580
KM
1295 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1296
1297 sendPacket(outpacket, outsock);
1298
efe7948e 1299 g_log<<Logger::Notice<<logPrefix<<"IXFR finished"<<endl;
6e59a580
KM
1300
1301 return 1;
1302 }
1303
efe7948e 1304 g_log<<Logger::Notice<<logPrefix<<"IXFR fallback to AXFR"<<endl;
6e59a580
KM
1305 return doAXFR(q->qdomain, q, outsock);
1306}
1307
abb11ca4 1308TCPNameserver::~TCPNameserver() = default;
12c86877
BH
1309TCPNameserver::TCPNameserver()
1310{
cb0af1a1
RG
1311 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1312 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1313 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1314 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1315
379ab445 1316// sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
c2826d2e 1317 d_connectionroom_sem = make_unique<Semaphore>( ::arg().asNum( "max-tcp-connections" ));
d322f931 1318 d_maxTCPConnections = ::arg().asNum( "max-tcp-connections" );
f5ad09dc 1319
12c86877 1320 vector<string>locals;
379ab445 1321 stringtok(locals,::arg()["local-address"]," ,");
f5ad09dc
PL
1322 if(locals.empty())
1323 throw PDNSException("No local addresses specified");
12c86877 1324
68b011bd 1325 d_ng.toMasks(::arg()["allow-axfr-ips"] );
9f1d5826 1326
12c86877 1327 signal(SIGPIPE,SIG_IGN);
12c86877 1328
f5ad09dc
PL
1329 for(auto const &laddr : locals) {
1330 ComboAddress local(laddr, ::arg().asNum("local-port"));
12c86877 1331
f5ad09dc
PL
1332 int s=socket(local.sin4.sin_family, SOCK_STREAM, 0);
1333 if(s<0)
1334 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
3897b9e1 1335 setCloseOnExec(s);
fb316318 1336
12c86877 1337 int tmp=1;
f5ad09dc 1338 if(setsockopt(s, SOL_SOCKET,SO_REUSEADDR, (char*)&tmp, sizeof tmp) < 0) {
e6a9dde5 1339 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
f5ad09dc 1340 _exit(1);
12c86877 1341 }
940d7811
RG
1342
1343 if (::arg().asNum("tcp-fast-open") > 0) {
1344#ifdef TCP_FASTOPEN
1345 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1346 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
f5ad09dc 1347 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket "<<local.toStringWithPort()<<": "<<stringerror()<<endl;
940d7811
RG
1348 }
1349#else
e6a9dde5 1350 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1351#endif
1352 }
1353
f5ad09dc
PL
1354 if(::arg().mustDo("non-local-bind"))
1355 Utility::setBindAny(local.sin4.sin_family, s);
1356
1357 if(local.isIPv6() && setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1358 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<stringerror()<<endl;
1359 }
fec7dd5a 1360
379ab445 1361 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
a702a96c 1362 int err = errno;
2c896042 1363 close(s);
a702a96c 1364 if( err == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
f5ad09dc 1365 g_log<<Logger::Error<<"Address " << local.toString() << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1366 continue;
1367 } else {
f5ad09dc 1368 g_log<<Logger::Error<<"Unable to bind to TCP socket " << local.toStringWithPort() << ": "<<stringerror(err)<<endl;
2ab7e9ac 1369 throw PDNSException("Unable to bind to TCP socket");
5ecb2885 1370 }
12c86877 1371 }
12c86877 1372
f5ad09dc
PL
1373 listen(s, 128);
1374 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
12c86877 1375 d_sockets.push_back(s);
8edfedf1
BH
1376 struct pollfd pfd;
1377 memset(&pfd, 0, sizeof(pfd));
1378 pfd.fd = s;
1379 pfd.events = POLLIN;
8edfedf1 1380 d_prfds.push_back(pfd);
12c86877 1381 }
12c86877
BH
1382}
1383
1384
ff76e8b4 1385//! Start of TCP operations thread, we launch a new thread for each incoming TCP question
12c86877
BH
1386void TCPNameserver::thread()
1387{
519f5484 1388 setThreadName("pdns/tcpnameser");
12c86877
BH
1389 try {
1390 for(;;) {
1391 int fd;
cb0af1a1
RG
1392 ComboAddress remote;
1393 Utility::socklen_t addrlen=remote.getSocklen();
12c86877 1394
8edfedf1 1395 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
8a63d3ce 1396 if(ret <= 0)
4957a608 1397 continue;
8a63d3ce 1398
12c86877 1399 int sock=-1;
8ce9e4e6 1400 for(const pollfd& pfd : d_prfds) {
c1ee10a6 1401 if(pfd.revents & POLLIN) {
4957a608 1402 sock = pfd.fd;
cb0af1a1
RG
1403 remote.sin4.sin_family = AF_INET6;
1404 addrlen=remote.getSocklen();
4957a608
BH
1405
1406 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
a702a96c
OM
1407 int err = errno;
1408 g_log<<Logger::Error<<"TCP question accept error: "<<stringerror(err)<<endl;
1e05b07c 1409
a702a96c 1410 if(err==EMFILE) {
e6a9dde5 1411 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
5bd2ea7b 1412 _exit(1);
4957a608
BH
1413 }
1414 }
1415 else {
cb0af1a1 1416 if (d_maxConnectionsPerClient) {
24fb5774
RG
1417 auto clientsCount = s_clientsCount.lock();
1418 if ((*clientsCount)[remote] >= d_maxConnectionsPerClient) {
e6a9dde5 1419 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
cb0af1a1
RG
1420 close(fd);
1421 continue;
1422 }
24fb5774 1423 (*clientsCount)[remote]++;
cb0af1a1
RG
1424 }
1425
4957a608
BH
1426 d_connectionroom_sem->wait(); // blocks if no connections are available
1427
1428 int room;
1429 d_connectionroom_sem->getValue( &room);
1430 if(room<1)
e6a9dde5 1431 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
4957a608 1432
0ddde5fb
RG
1433 try {
1434 std::thread connThread(doConnection, fd);
1435 connThread.detach();
1436 }
1437 catch (std::exception& e) {
1438 g_log<<Logger::Error<<"Error creating thread: "<<e.what()<<endl;
4957a608 1439 d_connectionroom_sem->post();
48e8d70b 1440 close(fd);
cb0af1a1 1441 decrementClientCount(remote);
4957a608
BH
1442 }
1443 }
1444 }
12c86877
BH
1445 }
1446 }
1447 }
3f81d239 1448 catch(PDNSException &AE) {
e6a9dde5 1449 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
12c86877
BH
1450 }
1451 catch(...) {
e6a9dde5 1452 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
12c86877 1453 }
5bd2ea7b 1454 _exit(1); // take rest of server with us
12c86877
BH
1455}
1456
1457
d322f931
PD
1458unsigned int TCPNameserver::numTCPConnections()
1459{
1460 int room;
1461 d_connectionroom_sem->getValue( &room);
1462 return d_maxTCPConnections - room;
1463}