]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/tcpreceiver.cc
Merge pull request #8777 from omoerbeek/rec-wip-qname-vs-ds
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
CommitLineData
12c86877
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
2e7834cb 3 Copyright (C) 2002-2012 PowerDNS.COM BV
12c86877
BH
4
5 This program is free software; you can redistribute it and/or modify
22dc646a
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
f782fe38
MH
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
12c86877
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12c86877 21*/
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
b6f3b03a 25#include <boost/algorithm/string.hpp>
bf269e28 26#include "auth-packetcache.hh"
1258abe0 27#include "utility.hh"
519f5484 28#include "threadname.hh"
add640c0 29#include "dnssecinfra.hh"
4c1474f3 30#include "dnsseckeeper.hh"
12c86877 31#include <cstdio>
4888e4b2 32#include "base32.hh"
12c86877
BH
33#include <cstring>
34#include <cstdlib>
35#include <sys/types.h>
940d7811 36#include <netinet/tcp.h>
12c86877
BH
37#include <iostream>
38#include <string>
39#include "tcpreceiver.hh"
67d74e49 40#include "sstuff.hh"
fa8fd4d2 41
12c86877
BH
42#include <errno.h>
43#include <signal.h>
78bcb858 44#include "base64.hh"
12c86877
BH
45#include "ueberbackend.hh"
46#include "dnspacket.hh"
47#include "nameserver.hh"
48#include "distributor.hh"
49#include "lock.hh"
50#include "logger.hh"
51#include "arguments.hh"
379ab445 52
3e8216c8 53#include "common_startup.hh"
12c86877
BH
54#include "packethandler.hh"
55#include "statbag.hh"
56#include "resolver.hh"
57#include "communicator.hh"
61b26744 58#include "namespaces.hh"
8e9b7d99 59#include "signingpipe.hh"
273d88b2 60#include "stubresolver.hh"
bf269e28 61extern AuthPacketCache PC;
12c86877
BH
62extern StatBag S;
63
64/**
65\file tcpreceiver.cc
66\brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
67*/
68
ac2bb9e7 69pthread_mutex_t TCPNameserver::s_plock = PTHREAD_MUTEX_INITIALIZER;
c2826d2e
RG
70std::unique_ptr<Semaphore> TCPNameserver::d_connectionroom_sem{nullptr};
71std::unique_ptr<PacketHandler> TCPNameserver::s_P{nullptr};
d322f931 72unsigned int TCPNameserver::d_maxTCPConnections = 0;
9f1d5826 73NetmaskGroup TCPNameserver::d_ng;
cb0af1a1
RG
74size_t TCPNameserver::d_maxTransactionsPerConn;
75size_t TCPNameserver::d_maxConnectionsPerClient;
76unsigned int TCPNameserver::d_idleTimeout;
77unsigned int TCPNameserver::d_maxConnectionDuration;
78std::mutex TCPNameserver::s_clientsCountMutex;
79std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan> TCPNameserver::s_clientsCount;
12c86877 80
12c86877
BH
81void TCPNameserver::go()
82{
e6a9dde5 83 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
c2826d2e 84 s_P.reset();
12c86877 85 try {
c2826d2e 86 s_P=make_unique<PacketHandler>();
12c86877 87 }
3f81d239 88 catch(PDNSException &ae) {
e6a9dde5 89 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
12c86877
BH
90 }
91 pthread_create(&d_tid, 0, launcher, static_cast<void *>(this));
92}
93
94void *TCPNameserver::launcher(void *data)
95{
96 static_cast<TCPNameserver *>(data)->thread();
97 return 0;
98}
99
3f81d239 100// throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
cb0af1a1 101static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
12c86877 102{
6a3e5d1a
BH
103 unsigned int bytes=n;
104 char *ptr = (char*)buffer;
105 int ret;
cb0af1a1
RG
106 time_t start = 0;
107 unsigned int remainingTotal = totalTimeout;
108 if (totalTimeout) {
109 start = time(NULL);
110 }
6a3e5d1a
BH
111 while(bytes) {
112 ret=read(fd, ptr, bytes);
113 if(ret < 0) {
114 if(errno==EAGAIN) {
cb0af1a1 115 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
4957a608
BH
116 if(ret < 0)
117 throw NetworkError("Waiting for data read");
118 if(!ret)
119 throw NetworkError("Timeout reading data");
120 continue;
6a3e5d1a
BH
121 }
122 else
4957a608 123 throw NetworkError("Reading data: "+stringerror());
6a3e5d1a
BH
124 }
125 if(!ret) {
126 if(!throwOnEOF && n == bytes)
4957a608 127 return 0;
6a3e5d1a 128 else
4957a608 129 throw NetworkError("Did not fulfill read from TCP due to EOF");
6a3e5d1a
BH
130 }
131
132 ptr += ret;
133 bytes -= ret;
cb0af1a1
RG
134 if (totalTimeout) {
135 time_t now = time(NULL);
136 unsigned int elapsed = now - start;
137 if (elapsed >= remainingTotal) {
138 throw NetworkError("Timeout while reading data");
139 }
140 start = now;
141 remainingTotal -= elapsed;
142 }
6a3e5d1a
BH
143 }
144 return n;
145}
12c86877 146
6a3e5d1a 147// ditto
cb0af1a1 148static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
6a3e5d1a
BH
149{
150 unsigned int bytes=n;
151 const char *ptr = (char*)buffer;
152 int ret;
153 while(bytes) {
154 ret=write(fd, ptr, bytes);
155 if(ret < 0) {
156 if(errno==EAGAIN) {
cb0af1a1 157 ret=waitForRWData(fd, false, idleTimeout, 0);
4957a608
BH
158 if(ret < 0)
159 throw NetworkError("Waiting for data write");
160 if(!ret)
161 throw NetworkError("Timeout writing data");
162 continue;
6a3e5d1a
BH
163 }
164 else
4957a608 165 throw NetworkError("Writing data: "+stringerror());
6a3e5d1a 166 }
12c86877 167 if(!ret) {
67d74e49 168 throw NetworkError("Did not fulfill TCP write due to EOF");
12c86877 169 }
6a3e5d1a
BH
170
171 ptr += ret;
172 bytes -= ret;
12c86877 173 }
12c86877
BH
174}
175
6a3e5d1a 176void connectWithTimeout(int fd, struct sockaddr* remote, size_t socklen)
12c86877 177{
6a3e5d1a
BH
178 int err;
179 Utility::socklen_t len=sizeof(err);
180
76473b92 181 if((err=connect(fd, remote, socklen))<0 && errno!=EINPROGRESS)
67d74e49 182 throw NetworkError("connect: "+stringerror());
6a3e5d1a
BH
183
184 if(!err)
185 goto done;
186
187 err=waitForRWData(fd, false, 5, 0);
188 if(err == 0)
67d74e49 189 throw NetworkError("Timeout connecting to remote");
6a3e5d1a 190 if(err < 0)
67d74e49 191 throw NetworkError("Error connecting to remote");
12c86877 192
6a3e5d1a 193 if(getsockopt(fd, SOL_SOCKET,SO_ERROR,(char *)&err,&len)<0)
67d74e49 194 throw NetworkError("Error connecting to remote: "+stringerror()); // Solaris
6a3e5d1a
BH
195
196 if(err)
67d74e49 197 throw NetworkError("Error connecting to remote: "+string(strerror(err)));
6a3e5d1a
BH
198
199 done:
200 ;
201}
12c86877 202
c2826d2e 203void TCPNameserver::sendPacket(std::unique_ptr<DNSPacket>& p, int outsock)
6a3e5d1a 204{
b552d7b1 205 g_rs.submitResponse(*p, false);
9951e2d0 206
fbaa5e09
BH
207 uint16_t len=htons(p->getString().length());
208 string buffer((const char*)&len, 2);
209 buffer.append(p->getString());
cb0af1a1 210 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
6a3e5d1a
BH
211}
212
213
cb0af1a1 214void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
6a3e5d1a
BH
215try
216{
cb0af1a1 217 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
6a3e5d1a 218}
67d74e49
BH
219catch(NetworkError& ae) {
220 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
12c86877
BH
221}
222
5fd567ec 223static void incTCPAnswerCount(const ComboAddress& remote)
224{
225 S.inc("tcp-answers");
226 if(remote.sin4.sin_family == AF_INET6)
227 S.inc("tcp6-answers");
228 else
229 S.inc("tcp4-answers");
230}
cb0af1a1
RG
231
232static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
233{
234 if (maxConnectionDuration) {
235 time_t elapsed = time(NULL) - start;
236 if (elapsed >= maxConnectionDuration) {
237 return true;
238 }
239 remainingTime = maxConnectionDuration - elapsed;
240 }
241 return false;
242}
243
244void TCPNameserver::decrementClientCount(const ComboAddress& remote)
245{
246 if (d_maxConnectionsPerClient) {
247 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
248 s_clientsCount[remote]--;
249 if (s_clientsCount[remote] == 0) {
250 s_clientsCount.erase(remote);
251 }
252 }
253}
254
12c86877
BH
255void *TCPNameserver::doConnection(void *data)
256{
519f5484 257 setThreadName("pdns/tcpConnect");
c2826d2e 258 std::unique_ptr<DNSPacket> packet;
b014ad98
BH
259 // Fix gcc-4.0 error (on AMD64)
260 int fd=(int)(long)data; // gotta love C (generates a harmless warning on opteron)
4f5e7925 261 ComboAddress remote;
262 socklen_t remotelen=sizeof(remote);
cb0af1a1
RG
263 size_t transactions = 0;
264 time_t start = 0;
265 if (d_maxConnectionDuration) {
266 start = time(NULL);
267 }
4f5e7925 268
12c86877 269 pthread_detach(pthread_self());
4f5e7925 270 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
e6a9dde5 271 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
4f5e7925 272 d_connectionroom_sem->post();
a7b68ae7
RG
273 try {
274 closesocket(fd);
275 }
276 catch(const PDNSException& e) {
e6a9dde5 277 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 278 }
4f5e7925 279 return 0;
280 }
281
3897b9e1 282 setNonBlocking(fd);
12c86877 283 try {
c2b4ccc0 284 int mesgsize=65535;
285 scoped_array<char> mesg(new char[mesgsize]);
12c86877 286
e6a9dde5 287 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
21a303f3 288 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
12c86877 289 for(;;) {
cb0af1a1
RG
290 unsigned int remainingTime = 0;
291 transactions++;
292 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
e6a9dde5 293 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
cb0af1a1
RG
294 break;
295 }
296 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 297 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
298 break;
299 }
6a3e5d1a
BH
300
301 uint16_t pktlen;
cb0af1a1 302 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
4957a608 303 break;
6a3e5d1a 304 else
4957a608 305 pktlen=ntohs(pktlen);
12c86877 306
366e1e5e
AT
307 // this check will always be false *if* no one touches
308 // the mesg array. pktlen can be maximum of 65535 as
309 // it is 2 byte unsigned variable. In getQuestion, we
310 // write to 0 up to pktlen-1 so 65535 is just right.
311
312 // do not remove this check as it will catch if someone
313 // decreases the mesg buffer size for some reason.
c2b4ccc0 314 if(pktlen > mesgsize) {
e6a9dde5 315 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
4957a608 316 break;
12c86877
BH
317 }
318
cb0af1a1 319 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 320 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
321 break;
322 }
323
324 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
12c86877 325 S.inc("tcp-queries");
5fd567ec 326 if(remote.sin4.sin_family == AF_INET6)
327 S.inc("tcp6-queries");
328 else
329 S.inc("tcp4-queries");
3e579e91 330
c2826d2e 331 packet=make_unique<DNSPacket>(true);
809fe23f 332 packet->setRemote(&remote);
e9dd48f9 333 packet->d_tcp=true;
ff76e8b4 334 packet->setSocket(fd);
c2b4ccc0 335 if(packet->parse(mesg.get(), pktlen)<0)
4957a608 336 break;
c1663439 337
6e59a580
KM
338 if(packet->qtype.getCode()==QType::AXFR) {
339 if(doAXFR(packet->qdomain, packet, fd))
5fd567ec 340 incTCPAnswerCount(remote);
6e59a580
KM
341 continue;
342 }
343
344 if(packet->qtype.getCode()==QType::IXFR) {
345 if(doIXFR(packet, fd))
5fd567ec 346 incTCPAnswerCount(remote);
4957a608 347 continue;
12c86877
BH
348 }
349
c2826d2e
RG
350 std::unique_ptr<DNSPacket> reply;
351 auto cached = make_unique<DNSPacket>(false);
fe498ace 352 if(logDNSQueries) {
cece60fc
CH
353 string remote_text;
354 if(packet->hasEDNSSubnet())
355 remote_text = packet->getRemote().toString() + "<-" + packet->getRealRemote().toString();
fe498ace 356 else
cece60fc 357 remote_text = packet->getRemote().toString();
e6a9dde5 358 g_log << Logger::Notice<<"TCP Remote "<< remote_text <<" wants '" << packet->qdomain<<"|"<<packet->qtype.getName() <<
1b16851b 359 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen();
fe498ace 360 }
bb5903e2 361
9a037bfa 362 if(PC.enabled()) {
c2826d2e 363 if(packet->couldBeCached() && PC.get(*packet, *cached)) { // short circuit - does the PacketCache recognize this question?
9a037bfa 364 if(logDNSQueries)
1b16851b 365 g_log<<": packetcache HIT"<<endl;
9a037bfa
KM
366 cached->setRemote(&packet->d_remote);
367 cached->d.id=packet->d.id;
368 cached->d.rd=packet->d.rd; // copy in recursion desired bit
369 cached->commitD(); // commit d to the packet inlined
370
371 sendPacket(cached, fd); // presigned, don't do it again
372 continue;
373 }
21a303f3 374 if(logDNSQueries)
1b16851b 375 g_log<<": packetcache MISS"<<endl;
bbe4b041
OM
376 } else {
377 if (logDNSQueries) {
1b16851b 378 g_log<<endl;
bbe4b041 379 }
12c86877 380 }
12c86877 381 {
4957a608
BH
382 Lock l(&s_plock);
383 if(!s_P) {
e6a9dde5 384 g_log<<Logger::Error<<"TCP server is without backend connections, launching"<<endl;
c2826d2e 385 s_P=make_unique<PacketHandler>();
4957a608 386 }
4957a608 387
c2826d2e 388 reply= s_P->doQuestion(*packet); // we really need to ask the backend :-)
12c86877
BH
389 }
390
12c86877 391 if(!reply) // unable to write an answer?
4957a608 392 break;
b552d7b1 393
ff76e8b4 394 sendPacket(reply, fd);
12c86877 395 }
12c86877 396 }
3f81d239 397 catch(PDNSException &ae) {
556252ea 398 Lock l(&s_plock);
c2826d2e 399 s_P.reset(); // on next call, backend will be recycled
e6a9dde5 400 g_log<<Logger::Error<<"TCP nameserver had error, cycling backend: "<<ae.reason<<endl;
ef1d2f44 401 }
0afa9049 402 catch(NetworkError &e) {
e6a9dde5 403 g_log<<Logger::Info<<"TCP Connection Thread died because of network error: "<<e.what()<<endl;
0afa9049
BH
404 }
405
adc10f99 406 catch(std::exception &e) {
e6a9dde5 407 g_log<<Logger::Error<<"TCP Connection Thread died because of STL error: "<<e.what()<<endl;
12c86877
BH
408 }
409 catch( ... )
410 {
e6a9dde5 411 g_log << Logger::Error << "TCP Connection Thread caught unknown exception." << endl;
12c86877 412 }
12c86877 413 d_connectionroom_sem->post();
a7b68ae7
RG
414
415 try {
416 closesocket(fd);
417 }
418 catch(const PDNSException& e) {
e6a9dde5 419 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 420 }
cb0af1a1 421 decrementClientCount(remote);
12c86877
BH
422
423 return 0;
424}
425
78bcb858 426
e082fb4c 427// call this method with s_plock held!
c2826d2e 428bool TCPNameserver::canDoAXFR(std::unique_ptr<DNSPacket>& q)
12c86877 429{
379ab445 430 if(::arg().mustDo("disable-axfr"))
318c3ec6
BH
431 return false;
432
78bcb858
BH
433 if(q->d_havetsig) { // if you have one, it must be good
434 TSIGRecordContent trc;
7abbc40f
PD
435 DNSName keyname;
436 string secret;
ea3816cf 437 if(!q->checkForCorrectTSIG(s_P->getBackend(), &keyname, &secret, &trc)) {
78bcb858 438 return false;
7f9ac49b
AT
439 } else {
440 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
441 if (q->d_tsig_algo == TSIG_GSS) {
1635f12b 442 GssContext gssctx(keyname);
7f9ac49b 443 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
e6a9dde5 444 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
7f9ac49b
AT
445 }
446 }
447 }
448
ea99d474 449 DNSSECKeeper dk(s_P->getBackend());
5e29f2f9 450
84fc3f8b
AT
451 if (q->d_tsig_algo == TSIG_GSS) {
452 vector<string> princs;
453 s_P->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
ff05fd12 454 for(const std::string& princ : princs) {
84fc3f8b 455 if (q->d_peer_principal == princ) {
e6a9dde5 456 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
84fc3f8b
AT
457 return true;
458 }
459 }
e6a9dde5 460 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
84fc3f8b
AT
461 return false;
462 }
463
3d03fee8 464 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
e6a9dde5 465 g_log<<Logger::Error<<"AXFR '"<<q->qdomain<<"' denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access to zone"<<endl;
78bcb858
BH
466 return false;
467 }
468 else {
e6a9dde5 469 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
78bcb858
BH
470 return true;
471 }
472 }
93afc0a3
PD
473
474 // cerr<<"checking allow-axfr-ips"<<endl;
475 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( (ComboAddress *) &q->d_remote )) {
e6a9dde5 476 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in allow-axfr-ips"<<endl;
12c86877 477 return true;
ab5edd12 478 }
93afc0a3
PD
479
480 FindNS fns;
481
482 // cerr<<"doing per-zone-axfr-acls"<<endl;
483 SOAData sd;
79ba7763 484 if(s_P->getBackend()->getSOAUncached(q->qdomain,sd)) {
93afc0a3
PD
485 // cerr<<"got backend and SOA"<<endl;
486 DNSBackend *B=sd.db;
487 vector<string> acl;
894bcf36 488 s_P->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
93afc0a3
PD
489 for (vector<string>::const_iterator i = acl.begin(); i != acl.end(); ++i) {
490 // cerr<<"matching against "<<*i<<endl;
491 if(pdns_iequals(*i, "AUTO-NS")) {
492 // cerr<<"AUTO-NS magic please!"<<endl;
493
494 DNSResourceRecord rr;
7abbc40f 495 set<DNSName> nsset;
93afc0a3 496
acb61e0a 497 B->lookup(QType(QType::NS),q->qdomain,sd.domain_id);
93afc0a3 498 while(B->get(rr))
290a083d 499 nsset.insert(DNSName(rr.content));
7abbc40f 500 for(const auto & j: nsset) {
35b942fe 501 vector<string> nsips=fns.lookup(j, s_P->getBackend());
93afc0a3
PD
502 for(vector<string>::const_iterator k=nsips.begin();k!=nsips.end();++k) {
503 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
ded6b08d 504 if(*k == q->getRemote().toString())
93afc0a3
PD
505 {
506 // cerr<<"got AUTO-NS hit"<<endl;
e6a9dde5 507 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in NSset"<<endl;
93afc0a3
PD
508 return true;
509 }
510 }
511 }
512 }
513 else
514 {
515 Netmask nm = Netmask(*i);
516 if(nm.match( (ComboAddress *) &q->d_remote ))
517 {
e6a9dde5 518 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in per-domain ACL"<<endl;
93afc0a3
PD
519 // cerr<<"hit!"<<endl;
520 return true;
521 }
522 }
523 }
524 }
525
12c86877
BH
526 extern CommunicatorClass Communicator;
527
ded6b08d 528 if(Communicator.justNotified(q->qdomain, q->getRemote().toString())) { // we just notified this ip
e6a9dde5 529 g_log<<Logger::Warning<<"Approved AXFR of '"<<q->qdomain<<"' from recently notified slave "<<q->getRemote()<<endl;
12c86877
BH
530 return true;
531 }
532
e6a9dde5 533 g_log<<Logger::Error<<"AXFR of domain '"<<q->qdomain<<"' denied: client IP "<<q->getRemote()<<" has no permission"<<endl;
12c86877
BH
534 return false;
535}
536
b317b510 537namespace {
54d84273
PD
538 struct NSECXEntry
539 {
22a0ef16 540 NSECBitmap d_set;
54d84273 541 unsigned int d_ttl;
feef1ece 542 bool d_auth;
54d84273 543 };
8e9b7d99 544
a5188bcd 545 static std::unique_ptr<DNSPacket> getFreshAXFRPacket(std::unique_ptr<DNSPacket>& q)
54d84273 546 {
c2826d2e 547 std::unique_ptr<DNSPacket> ret = std::unique_ptr<DNSPacket>(q->replyPacket());
54d84273
PD
548 ret->setCompress(false);
549 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
550 ret->d_tcp = true;
551 return ret;
552 }
8e9b7d99
BH
553}
554
54d84273 555
12c86877 556/** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
c2826d2e 557int TCPNameserver::doAXFR(const DNSName &target, std::unique_ptr<DNSPacket>& q, int outsock)
12c86877 558{
c2826d2e 559 std::unique_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
c67e46a1 560 if(q->d_dnssecOk)
05e24311 561 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
22893145 562
e6a9dde5 563 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' initiated by "<<q->getRemote()<<endl;
12c86877 564
22893145 565 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
12c86877
BH
566 SOAData sd;
567 {
568 Lock l(&s_plock);
e6a9dde5 569 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
12a965c5 570 if(!s_P) {
e6a9dde5 571 g_log<<Logger::Error<<"TCP server is without backend connections in doAXFR, launching"<<endl;
c2826d2e 572 s_P=make_unique<PacketHandler>();
12a965c5 573 }
12c86877 574
ea99d474 575 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
8090f5a2 576 if (!canDoAXFR(q)) {
e6a9dde5 577 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: "<<q->getRemote()<<" may not request AXFR"<<endl;
9c556f63 578 outpacket->setRcode(RCode::NotAuth);
8090f5a2
AT
579 sendPacket(outpacket,outsock);
580 return 0;
581 }
582
8090f5a2 583 if(!s_P->getBackend()->getSOAUncached(target, sd)) {
e6a9dde5 584 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative"<<endl;
9c556f63 585 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 586 sendPacket(outpacket,outsock);
12c86877
BH
587 return 0;
588 }
3de83124 589 }
22893145 590
8e9b7d99 591 UeberBackend db;
79ba7763 592 if(!db.getSOAUncached(target, sd)) {
e6a9dde5 593 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
79ba7763 594 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 595 sendPacket(outpacket,outsock);
3de83124 596 return 0;
12c86877 597 }
3de83124 598
ea99d474 599 DNSSECKeeper dk(&db);
40b3959a 600 DNSSECKeeper::clearCaches(target);
22893145
CH
601 bool securedZone = dk.isSecuredZone(target);
602 bool presignedZone = dk.isPresigned(target);
603
604 bool noAXFRBecauseOfNSEC3Narrow=false;
605 NSEC3PARAMRecordContent ns3pr;
606 bool narrow;
607 bool NSEC3Zone=false;
dacacb23 608 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
22893145
CH
609 NSEC3Zone=true;
610 if(narrow) {
e6a9dde5 611 g_log<<Logger::Error<<"Not doing AXFR of an NSEC3 narrow zone '"<<target<<"' for "<<q->getRemote()<<endl;
22893145
CH
612 noAXFRBecauseOfNSEC3Narrow=true;
613 }
614 }
615
616 if(noAXFRBecauseOfNSEC3Narrow) {
e6a9dde5 617 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' denied to "<<q->getRemote()<<endl;
22893145
CH
618 outpacket->setRcode(RCode::Refused);
619 // FIXME: should actually figure out if we are auth over a zone, and send out 9 if we aren't
620 sendPacket(outpacket,outsock);
621 return 0;
622 }
623
78bcb858 624 TSIGRecordContent trc;
7abbc40f
PD
625 DNSName tsigkeyname;
626 string tsigsecret;
78bcb858 627
ea3816cf 628 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
78bcb858 629
60a1c204 630 if(haveTSIGDetails && !tsigkeyname.empty()) {
2c26f25a 631 string tsig64;
3343ad1f 632 DNSName algorithm=trc.d_algoName; // FIXME400: check
290a083d 633 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
634 algorithm = DNSName("hmac-md5");
635 if (algorithm != DNSName("gss-tsig")) {
ea99d474 636 if(!db.getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
e6a9dde5 637 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
53ace5d5
PL
638 return 0;
639 }
640 if (B64Decode(tsig64, tsigsecret) == -1) {
e6a9dde5 641 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
53ace5d5
PL
642 return 0;
643 }
84fc3f8b 644 }
78bcb858 645 }
8e9b7d99 646
8e9b7d99 647
8267bd2c 648 // SOA *must* go out first, our signing pipe might reorder
e6a9dde5 649 DLOG(g_log<<"Sending out SOA"<<endl);
13f9e280
CH
650 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
651 outpacket->addRecord(soa);
3c68fb14 652 if(securedZone && !presignedZone) {
7abbc40f 653 set<DNSName> authSet;
8d3cbffa 654 authSet.insert(target);
ea99d474 655 addRRSigs(dk, db, authSet, outpacket->getRRS());
8d3cbffa 656 }
8e9b7d99 657
60a1c204 658 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858
BH
659 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
660
8267bd2c 661 sendPacket(outpacket, outsock);
78bcb858
BH
662
663 trc.d_mac = outpacket->d_trc.d_mac;
8267bd2c
BH
664 outpacket = getFreshAXFRPacket(q);
665
3c68fb14 666 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1));
8e9b7d99 667
6e8694df 668 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
9d3151d9 669 nsecxrepo_t nsecxrepo;
4888e4b2
BH
670
671 // this is where the DNSKEYs go in
0c350cb5 672
4c1474f3 673 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
0c350cb5 674
90ba52e0 675 DNSZoneRecord zrr;
0c350cb5 676
90ba52e0 677 zrr.dr.d_name = target;
678 zrr.dr.d_ttl = sd.default_ttl;
679 zrr.auth = 1; // please sign!
794c2f92 680
991a0977 681 string publishCDNSKEY, publishCDS;
0227812c
RG
682 dk.getPublishCDNSKEY(q->qdomain, publishCDNSKEY);
683 dk.getPublishCDS(q->qdomain, publishCDS);
90ba52e0 684 vector<DNSZoneRecord> cds, cdnskey;
f889ab99
PL
685 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(q->qdomain);
686 set<uint32_t> entryPointIds;
687 for (auto const& value : entryPoints)
688 entryPointIds.insert(value.second.id);
991a0977 689
ff05fd12 690 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
33918299
RG
691 if (!value.second.published) {
692 continue;
693 }
90ba52e0 694 zrr.dr.d_type = QType::DNSKEY;
695 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
6e8694df 696 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
9d3151d9 697 NSECXEntry& ne = nsecxrepo[keyname];
b317b510 698
22a0ef16 699 ne.d_set.set(zrr.dr.d_type);
794c2f92 700 ne.d_ttl = sd.default_ttl;
90ba52e0 701 csp.submit(zrr);
991a0977
PL
702
703 // generate CDS and CDNSKEY records
f889ab99 704 if(entryPointIds.count(value.second.id) > 0){
991a0977 705 if(publishCDNSKEY == "1") {
90ba52e0 706 zrr.dr.d_type=QType::CDNSKEY;
707 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
708 cdnskey.push_back(zrr);
991a0977
PL
709 }
710
711 if(!publishCDS.empty()){
90ba52e0 712 zrr.dr.d_type=QType::CDS;
991a0977
PL
713 vector<string> digestAlgos;
714 stringtok(digestAlgos, publishCDS, ", ");
56225bd3 715 for(auto const &digestAlgo : digestAlgos) {
90ba52e0 716 zrr.dr.d_content=std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns_stou(digestAlgo)));
717 cds.push_back(zrr);
991a0977
PL
718 }
719 }
720 }
4c1474f3 721 }
0c350cb5 722
cc8df07f 723 if(::arg().mustDo("direct-dnskey")) {
acb61e0a 724 sd.db->lookup(QType(QType::DNSKEY), target, sd.domain_id);
90ba52e0 725 while(sd.db->get(zrr)) {
726 zrr.dr.d_ttl = sd.default_ttl;
727 csp.submit(zrr);
6dae726d
PD
728 }
729 }
730
b8adb30d
KM
731 uint8_t flags;
732
95c5bc40 733 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
b8adb30d 734 flags = ns3pr.d_flags;
90ba52e0 735 zrr.dr.d_type = QType::NSEC3PARAM;
95c5bc40 736 ns3pr.d_flags = 0;
90ba52e0 737 zrr.dr.d_content = std::make_shared<NSEC3PARAMRecordContent>(ns3pr);
b8adb30d 738 ns3pr.d_flags = flags;
6e8694df 739 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
ce464268
BH
740 NSECXEntry& ne = nsecxrepo[keyname];
741
22a0ef16 742 ne.d_set.set(zrr.dr.d_type);
90ba52e0 743 csp.submit(zrr);
ce464268 744 }
8e9b7d99 745
0c350cb5
BH
746 // now start list zone
747 if(!(sd.db->list(target, sd.domain_id))) {
e6a9dde5 748 g_log<<Logger::Error<<"Backend signals error condition"<<endl;
9c556f63 749 outpacket->setRcode(RCode::ServFail);
0c350cb5
BH
750 sendPacket(outpacket,outsock);
751 return 0;
752 }
753
b772ffea 754
5633a4af 755 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
7abbc40f 756 set<DNSName> qnames, nsset, terms;
90ba52e0 757 vector<DNSZoneRecord> zrrs;
b772ffea 758
991a0977 759 // Add the CDNSKEY and CDS records we created earlier
cece60fc
CH
760 for (auto const &synth_zrr : cds)
761 zrrs.push_back(synth_zrr);
90ba52e0 762
cece60fc
CH
763 for (auto const &synth_zrr : cdnskey)
764 zrrs.push_back(synth_zrr);
90ba52e0 765
766 while(sd.db->get(zrr)) {
8bf260dd 767 zrr.dr.d_name.makeUsLowerCase();
90ba52e0 768 if(zrr.dr.d_name.isPartOf(target)) {
769 if (zrr.dr.d_type == QType::ALIAS && ::arg().mustDo("outgoing-axfr-expand-alias")) {
770 vector<DNSZoneRecord> ips;
771 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::A, ips);
772 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::AAAA, ips);
273d88b2 773 if(ret1 != RCode::NoError || ret2 != RCode::NoError) {
e6a9dde5 774 g_log<<Logger::Error<<"Error resolving for ALIAS "<<zrr.dr.d_content->getZoneRepresentation()<<", aborting AXFR"<<endl;
9c556f63 775 outpacket->setRcode(RCode::ServFail);
273d88b2
PD
776 sendPacket(outpacket,outsock);
777 return 0;
778 }
d86e1bf7 779 for(const auto& ip: ips) {
90ba52e0 780 zrr.dr.d_type = ip.dr.d_type;
0438fd89 781 zrr.dr.d_content = ip.dr.d_content;
90ba52e0 782 zrrs.push_back(zrr);
d86e1bf7 783 }
a68df29d 784 continue;
d86e1bf7
PD
785 }
786
b772ffea 787 if (rectify) {
90ba52e0 788 if (zrr.dr.d_type) {
789 qnames.insert(zrr.dr.d_name);
790 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
791 nsset.insert(zrr.dr.d_name);
b772ffea
KM
792 } else {
793 // remove existing ents
794 continue;
795 }
796 }
a68df29d 797 zrrs.push_back(zrr);
b772ffea 798 } else {
90ba52e0 799 if (zrr.dr.d_type)
e6a9dde5 800 g_log<<Logger::Warning<<"Zone '"<<target<<"' contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
b772ffea
KM
801 }
802 }
803
75f2589f 804 // Group records by name and type, signpipe stumbles over interrupted rrsets
22a676e0 805 if(securedZone && !presignedZone) {
8daafcc1
KM
806 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
807 return tie(a.dr.d_name, a.dr.d_type) < tie(b.dr.d_name, b.dr.d_type);
808 });
809 }
75f2589f 810
b772ffea
KM
811 if(rectify) {
812 // set auth
2010ac95
RG
813 for(DNSZoneRecord &loopZRR : zrrs) {
814 loopZRR.auth=true;
815 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
816 DNSName shorter(loopZRR.dr.d_name);
b772ffea 817 do {
e325f20c 818 if (shorter==target) // apex is always auth
cb045f61 819 break;
2010ac95
RG
820 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
821 loopZRR.auth=false;
cb045f61 822 break;
9f70b77a 823 }
7abbc40f 824 } while(shorter.chopOff());
9f70b77a 825 }
b772ffea
KM
826 }
827
828 if(NSEC3Zone) {
829 // ents are only required for NSEC3 zones
830 uint32_t maxent = ::arg().asNum("max-ent-entries");
6ded341a 831 set<DNSName> nsec3set, nonterm;
2010ac95 832 for (auto &loopZRR: zrrs) {
6ded341a 833 bool skip=false;
2010ac95 834 DNSName shorter = loopZRR.dr.d_name;
6ded341a
KM
835 if (shorter != target && shorter.chopOff() && shorter != target) {
836 do {
837 if(nsset.count(shorter)) {
838 skip=true;
839 break;
840 }
841 } while(shorter.chopOff() && shorter != target);
842 }
2010ac95
RG
843 shorter = loopZRR.dr.d_name;
844 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
6ded341a
KM
845 do {
846 if(!nsec3set.count(shorter)) {
847 nsec3set.insert(shorter);
848 }
849 } while(shorter != target && shorter.chopOff());
850 }
851 }
852
2010ac95
RG
853 for(DNSZoneRecord &loopZRR : zrrs) {
854 DNSName shorter(loopZRR.dr.d_name);
e325f20c 855 while(shorter != target && shorter.chopOff()) {
6ded341a 856 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
b772ffea 857 if(!(maxent)) {
e6a9dde5 858 g_log<<Logger::Warning<<"Zone '"<<target<<"' has too many empty non terminals."<<endl;
b772ffea
KM
859 return 0;
860 }
6ded341a
KM
861 nonterm.insert(shorter);
862 --maxent;
b772ffea
KM
863 }
864 }
865 }
866
9e23e712 867 for(const auto& nt : nonterm) {
2010ac95
RG
868 DNSZoneRecord tempRR;
869 tempRR.dr.d_name=nt;
870 tempRR.dr.d_type=QType::ENT;
871 tempRR.auth=true;
872 zrrs.push_back(tempRR);
b772ffea
KM
873 }
874 }
875 }
876
877
12c86877 878 /* now write all other records */
8e9b7d99 879
6e8694df 880 DNSName keyname;
3370c993 881 unsigned int udiff;
1c6d9830
BH
882 DTime dt;
883 dt.set();
bec14a20 884 int records=0;
2010ac95 885 for(DNSZoneRecord &loopZRR : zrrs) {
3c68fb14 886 if (!presignedZone && loopZRR.dr.d_type == QType::RRSIG)
794c2f92 887 continue;
6dae726d 888
991a0977 889 // only skip the DNSKEY, CDNSKEY and CDS if direct-dnskey is enabled, to avoid changing behaviour
6dae726d 890 // when it is not enabled.
2010ac95 891 if(::arg().mustDo("direct-dnskey") && (loopZRR.dr.d_type == QType::DNSKEY || loopZRR.dr.d_type == QType::CDNSKEY || loopZRR.dr.d_type == QType::CDS))
6dae726d
PD
892 continue;
893
bec14a20 894 records++;
2010ac95
RG
895 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
896 if (NSEC3Zone || loopZRR.dr.d_type) {
3c68fb14
KM
897 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
898 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
899 } else {
900 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
901 }
b5baefaf
PD
902 NSECXEntry& ne = nsecxrepo[keyname];
903 ne.d_ttl = sd.default_ttl;
3c68fb14
KM
904 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
905 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
22a0ef16 906 ne.d_set.set(loopZRR.dr.d_type);
b5baefaf
PD
907 }
908 }
b317b510 909 }
b5baefaf 910
2010ac95 911 if (!loopZRR.dr.d_type)
b5baefaf
PD
912 continue; // skip empty non-terminals
913
2010ac95 914 if(loopZRR.dr.d_type == QType::SOA)
12c86877 915 continue; // skip SOA - would indicate end of AXFR
add640c0 916
2010ac95 917 if(csp.submit(loopZRR)) {
1c6d9830
BH
918 for(;;) {
919 outpacket->getRRS() = csp.getChunk();
920 if(!outpacket->getRRS().empty()) {
60a1c204 921 if(haveTSIGDetails && !tsigkeyname.empty())
54d84273 922 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1c6d9830 923 sendPacket(outpacket, outsock);
78bcb858 924 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
925 outpacket=getFreshAXFRPacket(q);
926 }
927 else
928 break;
929 }
12c86877
BH
930 }
931 }
78bcb858 932 /*
3370c993 933 udiff=dt.udiffNoReset();
1c6d9830
BH
934 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
935 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
936 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 937 */
feef1ece 938 if(securedZone) {
4888e4b2 939 if(NSEC3Zone) {
9d3151d9 940 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
3c68fb14 941 if(iter->second.d_auth) {
feef1ece 942 NSEC3RecordContent n3rc;
22a0ef16 943 n3rc.set(iter->second.d_set);
27d4a65b
RG
944 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
945 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
946 n3rc.set(QType::RRSIG);
947 }
948 n3rc.d_salt = ns3pr.d_salt;
feef1ece
PD
949 n3rc.d_flags = ns3pr.d_flags;
950 n3rc.d_iterations = ns3pr.d_iterations;
690b86b7 951 n3rc.d_algorithm = DNSSECKeeper::DIGEST_SHA1; // SHA1, fixed in PowerDNS for now
feef1ece 952 nsecxrepo_t::const_iterator inext = iter;
cb167afd 953 ++inext;
feef1ece
PD
954 if(inext == nsecxrepo.end())
955 inext = nsecxrepo.begin();
3c68fb14 956 while(!inext->second.d_auth && inext != iter)
feef1ece 957 {
cb167afd 958 ++inext;
feef1ece
PD
959 if(inext == nsecxrepo.end())
960 inext = nsecxrepo.begin();
961 }
6e8694df
KM
962 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
963 zrr.dr.d_name = iter->first+sd.qname;
90ba52e0 964
965 zrr.dr.d_ttl = sd.default_ttl;
27d4a65b 966 zrr.dr.d_content = std::make_shared<NSEC3RecordContent>(std::move(n3rc));
90ba52e0 967 zrr.dr.d_type = QType::NSEC3;
968 zrr.dr.d_place = DNSResourceRecord::ANSWER;
969 zrr.auth=true;
970 if(csp.submit(zrr)) {
feef1ece
PD
971 for(;;) {
972 outpacket->getRRS() = csp.getChunk();
973 if(!outpacket->getRRS().empty()) {
60a1c204 974 if(haveTSIGDetails && !tsigkeyname.empty())
feef1ece
PD
975 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
976 sendPacket(outpacket, outsock);
977 trc.d_mac=outpacket->d_trc.d_mac;
978 outpacket=getFreshAXFRPacket(q);
979 }
980 else
981 break;
1c6d9830 982 }
1c6d9830 983 }
8e9b7d99 984 }
4888e4b2
BH
985 }
986 }
9d3151d9 987 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
ed9c3a50 988 NSECRecordContent nrc;
22a0ef16 989 nrc.set(iter->second.d_set);
27d4a65b
RG
990 nrc.set(QType::RRSIG);
991 nrc.set(QType::NSEC);
6e8694df
KM
992
993 if(boost::next(iter) != nsecxrepo.end())
994 nrc.d_next = boost::next(iter)->first;
ed9c3a50 995 else
6e8694df
KM
996 nrc.d_next=nsecxrepo.begin()->first;
997 zrr.dr.d_name = iter->first;
998
90ba52e0 999 zrr.dr.d_ttl = sd.default_ttl;
27d4a65b 1000 zrr.dr.d_content = std::make_shared<NSECRecordContent>(std::move(nrc));
90ba52e0 1001 zrr.dr.d_type = QType::NSEC;
1002 zrr.dr.d_place = DNSResourceRecord::ANSWER;
1003 zrr.auth=true;
1004 if(csp.submit(zrr)) {
1c6d9830
BH
1005 for(;;) {
1006 outpacket->getRRS() = csp.getChunk();
1007 if(!outpacket->getRRS().empty()) {
60a1c204 1008 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1009 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1c6d9830 1010 sendPacket(outpacket, outsock);
78bcb858 1011 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1012 outpacket=getFreshAXFRPacket(q);
1013 }
1014 else
1015 break;
1016 }
8e9b7d99 1017 }
add640c0 1018 }
add640c0 1019 }
78bcb858 1020 /*
3370c993 1021 udiff=dt.udiffNoReset();
1c6d9830
BH
1022 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1023 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1024 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1025 * */
bec14a20
BH
1026 for(;;) {
1027 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
1028 if(!outpacket->getRRS().empty()) {
60a1c204 1029 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1030 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
bec14a20 1031 sendPacket(outpacket, outsock);
78bcb858 1032 trc.d_mac=outpacket->d_trc.d_mac;
bec14a20
BH
1033 outpacket=getFreshAXFRPacket(q);
1034 }
1035 else
1036 break;
12c86877 1037 }
8e9b7d99 1038
1c6d9830 1039 udiff=dt.udiffNoReset();
f1f85f12 1040 if(securedZone)
e6a9dde5 1041 g_log<<Logger::Info<<"Done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1c6d9830 1042
e6a9dde5 1043 DLOG(g_log<<"Done writing out records"<<endl);
12c86877 1044 /* and terminate with yet again the SOA record */
8e9b7d99 1045 outpacket=getFreshAXFRPacket(q);
13f9e280 1046 outpacket->addRecord(soa);
60a1c204 1047 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1048 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
92c90b44 1049
ff76e8b4 1050 sendPacket(outpacket, outsock);
78bcb858 1051
e6a9dde5
PL
1052 DLOG(g_log<<"last packet - close"<<endl);
1053 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
12c86877
BH
1054
1055 return 1;
1056}
1057
c2826d2e 1058int TCPNameserver::doIXFR(std::unique_ptr<DNSPacket>& q, int outsock)
6e59a580 1059{
c2826d2e 1060 std::unique_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
6e59a580
KM
1061 if(q->d_dnssecOk)
1062 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1063
6e59a580 1064 uint32_t serial = 0;
27c0050c 1065 MOADNSParser mdp(false, q->getString());
6e59a580
KM
1066 for(MOADNSParser::answers_t::const_iterator i=mdp.d_answers.begin(); i != mdp.d_answers.end(); ++i) {
1067 const DNSRecord *rr = &i->first;
e693ff5a 1068 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
6e59a580
KM
1069 vector<string>parts;
1070 stringtok(parts, rr->d_content->getZoneRepresentation());
1071 if (parts.size() >= 3) {
95dd3b90
RG
1072 try {
1073 serial=pdns_stou(parts[2]);
1074 }
1075 catch(const std::out_of_range& oor) {
e6a9dde5 1076 g_log<<Logger::Error<<"Invalid serial in IXFR query"<<endl;
95dd3b90
RG
1077 outpacket->setRcode(RCode::FormErr);
1078 sendPacket(outpacket,outsock);
1079 return 0;
1080 }
6e59a580 1081 } else {
e6a9dde5 1082 g_log<<Logger::Error<<"No serial in IXFR query"<<endl;
6e59a580
KM
1083 outpacket->setRcode(RCode::FormErr);
1084 sendPacket(outpacket,outsock);
1085 return 0;
1086 }
3e67ea8b 1087 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
e6a9dde5 1088 g_log<<Logger::Error<<"Additional records in IXFR query, type: "<<QType(rr->d_type).getName()<<endl;
6e59a580
KM
1089 outpacket->setRcode(RCode::FormErr);
1090 sendPacket(outpacket,outsock);
1091 return 0;
1092 }
1093 }
1094
e6a9dde5 1095 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' initiated by "<<q->getRemote()<<" with serial "<<serial<<endl;
6e59a580 1096
22893145 1097 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
6e59a580 1098 SOAData sd;
6e59a580
KM
1099 {
1100 Lock l(&s_plock);
e6a9dde5 1101 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
6e59a580 1102 if(!s_P) {
e6a9dde5 1103 g_log<<Logger::Error<<"TCP server is without backend connections in doIXFR, launching"<<endl;
c2826d2e 1104 s_P=make_unique<PacketHandler>();
6e59a580
KM
1105 }
1106
22893145
CH
1107 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
1108 if(!canDoAXFR(q) || !s_P->getBackend()->getSOAUncached(q->qdomain, sd)) {
e6a9dde5 1109 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' failed: not authoritative"<<endl;
9c556f63 1110 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1111 sendPacket(outpacket,outsock);
1112 return 0;
1113 }
1114 }
1115
22893145
CH
1116 DNSSECKeeper dk;
1117 NSEC3PARAMRecordContent ns3pr;
1118 bool narrow;
1119
40b3959a 1120 DNSSECKeeper::clearCaches(q->qdomain);
22893145
CH
1121 bool securedZone = dk.isSecuredZone(q->qdomain);
1122 if(dk.getNSEC3PARAM(q->qdomain, &ns3pr, &narrow)) {
1123 if(narrow) {
e6a9dde5
PL
1124 g_log<<Logger::Error<<"Not doing IXFR of an NSEC3 narrow zone."<<endl;
1125 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' denied to "<<q->getRemote()<<endl;
22893145
CH
1126 outpacket->setRcode(RCode::Refused);
1127 sendPacket(outpacket,outsock);
1128 return 0;
1129 }
1130 }
1131
7abbc40f 1132 DNSName target = q->qdomain;
6e59a580
KM
1133
1134 UeberBackend db;
79ba7763 1135 if(!db.getSOAUncached(target, sd)) {
e6a9dde5 1136 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
79ba7763 1137 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1138 sendPacket(outpacket,outsock);
1139 return 0;
1140 }
24d9e514 1141
13f9e280 1142 if (!rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname))) {
6e59a580 1143 TSIGRecordContent trc;
7abbc40f
PD
1144 DNSName tsigkeyname;
1145 string tsigsecret;
6e59a580 1146
ea3816cf 1147 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
6e59a580 1148
60a1c204 1149 if(haveTSIGDetails && !tsigkeyname.empty()) {
bb7fb11c 1150 string tsig64;
3343ad1f 1151 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
290a083d 1152 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1153 algorithm = DNSName("hmac-md5");
6e59a580 1154 Lock l(&s_plock);
53ace5d5 1155 if(!s_P->getBackend()->getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
e6a9dde5 1156 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
53ace5d5
PL
1157 return 0;
1158 }
1159 if (B64Decode(tsig64, tsigsecret) == -1) {
e6a9dde5 1160 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
53ace5d5
PL
1161 return 0;
1162 }
6e59a580
KM
1163 }
1164
1165 UeberBackend signatureDB;
1166
1167 // SOA *must* go out first, our signing pipe might reorder
e6a9dde5 1168 DLOG(g_log<<"Sending out SOA"<<endl);
13f9e280
CH
1169 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
1170 outpacket->addRecord(soa);
6e59a580 1171 if(securedZone) {
7abbc40f 1172 set<DNSName> authSet;
6e59a580
KM
1173 authSet.insert(target);
1174 addRRSigs(dk, signatureDB, authSet, outpacket->getRRS());
1175 }
1176
60a1c204 1177 if(haveTSIGDetails && !tsigkeyname.empty())
6e59a580
KM
1178 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1179
1180 sendPacket(outpacket, outsock);
1181
e6a9dde5 1182 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
6e59a580
KM
1183
1184 return 1;
1185 }
1186
e6a9dde5 1187 g_log<<Logger::Error<<"IXFR fallback to AXFR for domain '"<<target<<"' our serial "<<sd.serial<<endl;
6e59a580
KM
1188 return doAXFR(q->qdomain, q, outsock);
1189}
1190
12c86877
BH
1191TCPNameserver::~TCPNameserver()
1192{
12c86877
BH
1193}
1194
1195TCPNameserver::TCPNameserver()
1196{
cb0af1a1
RG
1197 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1198 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1199 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1200 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1201
379ab445 1202// sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
c2826d2e 1203 d_connectionroom_sem = make_unique<Semaphore>( ::arg().asNum( "max-tcp-connections" ));
d322f931 1204 d_maxTCPConnections = ::arg().asNum( "max-tcp-connections" );
117e1bf2 1205 d_tid=0;
f5ad09dc 1206
12c86877 1207 vector<string>locals;
e326f785 1208 stringtok(locals,::arg()["local-ipv6"]," ,");
379ab445 1209 stringtok(locals,::arg()["local-address"]," ,");
f5ad09dc
PL
1210 if(locals.empty())
1211 throw PDNSException("No local addresses specified");
12c86877 1212
68b011bd 1213 d_ng.toMasks(::arg()["allow-axfr-ips"] );
9f1d5826 1214
12c86877 1215 signal(SIGPIPE,SIG_IGN);
12c86877 1216
f5ad09dc
PL
1217 for(auto const &laddr : locals) {
1218 ComboAddress local(laddr, ::arg().asNum("local-port"));
12c86877 1219
f5ad09dc
PL
1220 int s=socket(local.sin4.sin_family, SOCK_STREAM, 0);
1221 if(s<0)
1222 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
3897b9e1 1223 setCloseOnExec(s);
fb316318 1224
12c86877 1225 int tmp=1;
f5ad09dc 1226 if(setsockopt(s, SOL_SOCKET,SO_REUSEADDR, (char*)&tmp, sizeof tmp) < 0) {
e6a9dde5 1227 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
f5ad09dc 1228 _exit(1);
12c86877 1229 }
940d7811
RG
1230
1231 if (::arg().asNum("tcp-fast-open") > 0) {
1232#ifdef TCP_FASTOPEN
1233 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1234 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
f5ad09dc 1235 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket "<<local.toStringWithPort()<<": "<<stringerror()<<endl;
940d7811
RG
1236 }
1237#else
e6a9dde5 1238 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1239#endif
1240 }
1241
f5ad09dc
PL
1242 if(::arg().mustDo("non-local-bind"))
1243 Utility::setBindAny(local.sin4.sin_family, s);
1244
1245 if(local.isIPv6() && setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1246 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<stringerror()<<endl;
1247 }
fec7dd5a 1248
379ab445 1249 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
a702a96c 1250 int err = errno;
2c896042 1251 close(s);
a702a96c 1252 if( err == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
f5ad09dc 1253 g_log<<Logger::Error<<"Address " << local.toString() << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1254 continue;
1255 } else {
f5ad09dc 1256 g_log<<Logger::Error<<"Unable to bind to TCP socket " << local.toStringWithPort() << ": "<<stringerror(err)<<endl;
2ab7e9ac 1257 throw PDNSException("Unable to bind to TCP socket");
5ecb2885 1258 }
12c86877 1259 }
12c86877 1260
f5ad09dc
PL
1261 listen(s, 128);
1262 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
12c86877 1263 d_sockets.push_back(s);
8edfedf1
BH
1264 struct pollfd pfd;
1265 memset(&pfd, 0, sizeof(pfd));
1266 pfd.fd = s;
1267 pfd.events = POLLIN;
8edfedf1 1268 d_prfds.push_back(pfd);
12c86877 1269 }
12c86877
BH
1270}
1271
1272
ff76e8b4 1273//! Start of TCP operations thread, we launch a new thread for each incoming TCP question
12c86877
BH
1274void TCPNameserver::thread()
1275{
519f5484 1276 setThreadName("pdns/tcpnameser");
12c86877
BH
1277 try {
1278 for(;;) {
1279 int fd;
cb0af1a1
RG
1280 ComboAddress remote;
1281 Utility::socklen_t addrlen=remote.getSocklen();
12c86877 1282
8edfedf1 1283 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
8a63d3ce 1284 if(ret <= 0)
4957a608 1285 continue;
8a63d3ce 1286
12c86877 1287 int sock=-1;
8ce9e4e6 1288 for(const pollfd& pfd : d_prfds) {
c1ee10a6 1289 if(pfd.revents & POLLIN) {
4957a608 1290 sock = pfd.fd;
cb0af1a1
RG
1291 remote.sin4.sin_family = AF_INET6;
1292 addrlen=remote.getSocklen();
4957a608
BH
1293
1294 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
a702a96c
OM
1295 int err = errno;
1296 g_log<<Logger::Error<<"TCP question accept error: "<<stringerror(err)<<endl;
4957a608 1297
a702a96c 1298 if(err==EMFILE) {
e6a9dde5 1299 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
5bd2ea7b 1300 _exit(1);
4957a608
BH
1301 }
1302 }
1303 else {
cb0af1a1
RG
1304 if (d_maxConnectionsPerClient) {
1305 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
1306 if (s_clientsCount[remote] >= d_maxConnectionsPerClient) {
e6a9dde5 1307 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
cb0af1a1
RG
1308 close(fd);
1309 continue;
1310 }
1311 s_clientsCount[remote]++;
1312 }
1313
4957a608
BH
1314 pthread_t tid;
1315 d_connectionroom_sem->wait(); // blocks if no connections are available
1316
1317 int room;
1318 d_connectionroom_sem->getValue( &room);
1319 if(room<1)
e6a9dde5 1320 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
4957a608 1321
a702a96c
OM
1322 int err;
1323 if((err = pthread_create(&tid, 0, &doConnection, reinterpret_cast<void*>(fd)))) {
1324 g_log<<Logger::Error<<"Error creating thread: "<<stringerror(err)<<endl;
4957a608 1325 d_connectionroom_sem->post();
48e8d70b 1326 close(fd);
cb0af1a1 1327 decrementClientCount(remote);
4957a608
BH
1328 }
1329 }
1330 }
12c86877
BH
1331 }
1332 }
1333 }
3f81d239 1334 catch(PDNSException &AE) {
e6a9dde5 1335 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
12c86877
BH
1336 }
1337 catch(...) {
e6a9dde5 1338 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
12c86877 1339 }
5bd2ea7b 1340 _exit(1); // take rest of server with us
12c86877
BH
1341}
1342
1343
d322f931
PD
1344unsigned int TCPNameserver::numTCPConnections()
1345{
1346 int room;
1347 d_connectionroom_sem->getValue( &room);
1348 return d_maxTCPConnections - room;
1349}