]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/tcpreceiver.cc
Merge pull request #8113 from rgacogne/ddist-tcp-outstanding
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
CommitLineData
12c86877
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
2e7834cb 3 Copyright (C) 2002-2012 PowerDNS.COM BV
12c86877
BH
4
5 This program is free software; you can redistribute it and/or modify
22dc646a
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
f782fe38
MH
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
12c86877
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12c86877 21*/
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
b6f3b03a 25#include <boost/algorithm/string.hpp>
bf269e28 26#include "auth-packetcache.hh"
1258abe0 27#include "utility.hh"
519f5484 28#include "threadname.hh"
add640c0 29#include "dnssecinfra.hh"
4c1474f3 30#include "dnsseckeeper.hh"
12c86877 31#include <cstdio>
4888e4b2 32#include "base32.hh"
12c86877
BH
33#include <cstring>
34#include <cstdlib>
35#include <sys/types.h>
940d7811 36#include <netinet/tcp.h>
12c86877
BH
37#include <iostream>
38#include <string>
39#include "tcpreceiver.hh"
67d74e49 40#include "sstuff.hh"
fa8fd4d2 41
12c86877
BH
42#include <errno.h>
43#include <signal.h>
78bcb858 44#include "base64.hh"
12c86877
BH
45#include "ueberbackend.hh"
46#include "dnspacket.hh"
47#include "nameserver.hh"
48#include "distributor.hh"
49#include "lock.hh"
50#include "logger.hh"
51#include "arguments.hh"
379ab445 52
3e8216c8 53#include "common_startup.hh"
12c86877
BH
54#include "packethandler.hh"
55#include "statbag.hh"
56#include "resolver.hh"
57#include "communicator.hh"
61b26744 58#include "namespaces.hh"
8e9b7d99 59#include "signingpipe.hh"
273d88b2 60#include "stubresolver.hh"
bf269e28 61extern AuthPacketCache PC;
12c86877
BH
62extern StatBag S;
63
64/**
65\file tcpreceiver.cc
66\brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
67*/
68
ac2bb9e7 69pthread_mutex_t TCPNameserver::s_plock = PTHREAD_MUTEX_INITIALIZER;
12c86877
BH
70Semaphore *TCPNameserver::d_connectionroom_sem;
71PacketHandler *TCPNameserver::s_P;
9f1d5826 72NetmaskGroup TCPNameserver::d_ng;
cb0af1a1
RG
73size_t TCPNameserver::d_maxTransactionsPerConn;
74size_t TCPNameserver::d_maxConnectionsPerClient;
75unsigned int TCPNameserver::d_idleTimeout;
76unsigned int TCPNameserver::d_maxConnectionDuration;
77std::mutex TCPNameserver::s_clientsCountMutex;
78std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan> TCPNameserver::s_clientsCount;
12c86877 79
12c86877
BH
80void TCPNameserver::go()
81{
e6a9dde5 82 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
12c86877
BH
83 s_P=0;
84 try {
85 s_P=new PacketHandler;
86 }
3f81d239 87 catch(PDNSException &ae) {
e6a9dde5 88 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
12c86877
BH
89 }
90 pthread_create(&d_tid, 0, launcher, static_cast<void *>(this));
91}
92
93void *TCPNameserver::launcher(void *data)
94{
95 static_cast<TCPNameserver *>(data)->thread();
96 return 0;
97}
98
3f81d239 99// throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
cb0af1a1 100static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
12c86877 101{
6a3e5d1a
BH
102 unsigned int bytes=n;
103 char *ptr = (char*)buffer;
104 int ret;
cb0af1a1
RG
105 time_t start = 0;
106 unsigned int remainingTotal = totalTimeout;
107 if (totalTimeout) {
108 start = time(NULL);
109 }
6a3e5d1a
BH
110 while(bytes) {
111 ret=read(fd, ptr, bytes);
112 if(ret < 0) {
113 if(errno==EAGAIN) {
cb0af1a1 114 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
4957a608
BH
115 if(ret < 0)
116 throw NetworkError("Waiting for data read");
117 if(!ret)
118 throw NetworkError("Timeout reading data");
119 continue;
6a3e5d1a
BH
120 }
121 else
4957a608 122 throw NetworkError("Reading data: "+stringerror());
6a3e5d1a
BH
123 }
124 if(!ret) {
125 if(!throwOnEOF && n == bytes)
4957a608 126 return 0;
6a3e5d1a 127 else
4957a608 128 throw NetworkError("Did not fulfill read from TCP due to EOF");
6a3e5d1a
BH
129 }
130
131 ptr += ret;
132 bytes -= ret;
cb0af1a1
RG
133 if (totalTimeout) {
134 time_t now = time(NULL);
135 unsigned int elapsed = now - start;
136 if (elapsed >= remainingTotal) {
137 throw NetworkError("Timeout while reading data");
138 }
139 start = now;
140 remainingTotal -= elapsed;
141 }
6a3e5d1a
BH
142 }
143 return n;
144}
12c86877 145
6a3e5d1a 146// ditto
cb0af1a1 147static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
6a3e5d1a
BH
148{
149 unsigned int bytes=n;
150 const char *ptr = (char*)buffer;
151 int ret;
152 while(bytes) {
153 ret=write(fd, ptr, bytes);
154 if(ret < 0) {
155 if(errno==EAGAIN) {
cb0af1a1 156 ret=waitForRWData(fd, false, idleTimeout, 0);
4957a608
BH
157 if(ret < 0)
158 throw NetworkError("Waiting for data write");
159 if(!ret)
160 throw NetworkError("Timeout writing data");
161 continue;
6a3e5d1a
BH
162 }
163 else
4957a608 164 throw NetworkError("Writing data: "+stringerror());
6a3e5d1a 165 }
12c86877 166 if(!ret) {
67d74e49 167 throw NetworkError("Did not fulfill TCP write due to EOF");
12c86877 168 }
6a3e5d1a
BH
169
170 ptr += ret;
171 bytes -= ret;
12c86877 172 }
12c86877
BH
173}
174
6a3e5d1a 175void connectWithTimeout(int fd, struct sockaddr* remote, size_t socklen)
12c86877 176{
6a3e5d1a
BH
177 int err;
178 Utility::socklen_t len=sizeof(err);
179
76473b92 180 if((err=connect(fd, remote, socklen))<0 && errno!=EINPROGRESS)
67d74e49 181 throw NetworkError("connect: "+stringerror());
6a3e5d1a
BH
182
183 if(!err)
184 goto done;
185
186 err=waitForRWData(fd, false, 5, 0);
187 if(err == 0)
67d74e49 188 throw NetworkError("Timeout connecting to remote");
6a3e5d1a 189 if(err < 0)
67d74e49 190 throw NetworkError("Error connecting to remote");
12c86877 191
6a3e5d1a 192 if(getsockopt(fd, SOL_SOCKET,SO_ERROR,(char *)&err,&len)<0)
67d74e49 193 throw NetworkError("Error connecting to remote: "+stringerror()); // Solaris
6a3e5d1a
BH
194
195 if(err)
67d74e49 196 throw NetworkError("Error connecting to remote: "+string(strerror(err)));
6a3e5d1a
BH
197
198 done:
199 ;
200}
12c86877 201
6a3e5d1a
BH
202void TCPNameserver::sendPacket(shared_ptr<DNSPacket> p, int outsock)
203{
b552d7b1 204 g_rs.submitResponse(*p, false);
9951e2d0 205
fbaa5e09
BH
206 uint16_t len=htons(p->getString().length());
207 string buffer((const char*)&len, 2);
208 buffer.append(p->getString());
cb0af1a1 209 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
6a3e5d1a
BH
210}
211
212
cb0af1a1 213void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
6a3e5d1a
BH
214try
215{
cb0af1a1 216 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
6a3e5d1a 217}
67d74e49
BH
218catch(NetworkError& ae) {
219 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
12c86877
BH
220}
221
5fd567ec 222static void incTCPAnswerCount(const ComboAddress& remote)
223{
224 S.inc("tcp-answers");
225 if(remote.sin4.sin_family == AF_INET6)
226 S.inc("tcp6-answers");
227 else
228 S.inc("tcp4-answers");
229}
cb0af1a1
RG
230
231static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
232{
233 if (maxConnectionDuration) {
234 time_t elapsed = time(NULL) - start;
235 if (elapsed >= maxConnectionDuration) {
236 return true;
237 }
238 remainingTime = maxConnectionDuration - elapsed;
239 }
240 return false;
241}
242
243void TCPNameserver::decrementClientCount(const ComboAddress& remote)
244{
245 if (d_maxConnectionsPerClient) {
246 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
247 s_clientsCount[remote]--;
248 if (s_clientsCount[remote] == 0) {
249 s_clientsCount.erase(remote);
250 }
251 }
252}
253
12c86877
BH
254void *TCPNameserver::doConnection(void *data)
255{
519f5484 256 setThreadName("pdns/tcpConnect");
ff76e8b4 257 shared_ptr<DNSPacket> packet;
b014ad98
BH
258 // Fix gcc-4.0 error (on AMD64)
259 int fd=(int)(long)data; // gotta love C (generates a harmless warning on opteron)
4f5e7925 260 ComboAddress remote;
261 socklen_t remotelen=sizeof(remote);
cb0af1a1
RG
262 size_t transactions = 0;
263 time_t start = 0;
264 if (d_maxConnectionDuration) {
265 start = time(NULL);
266 }
4f5e7925 267
12c86877 268 pthread_detach(pthread_self());
4f5e7925 269 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
e6a9dde5 270 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
4f5e7925 271 d_connectionroom_sem->post();
a7b68ae7
RG
272 try {
273 closesocket(fd);
274 }
275 catch(const PDNSException& e) {
e6a9dde5 276 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 277 }
4f5e7925 278 return 0;
279 }
280
3897b9e1 281 setNonBlocking(fd);
12c86877 282 try {
c2b4ccc0 283 int mesgsize=65535;
284 scoped_array<char> mesg(new char[mesgsize]);
12c86877 285
e6a9dde5 286 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
21a303f3 287 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
12c86877 288 for(;;) {
cb0af1a1
RG
289 unsigned int remainingTime = 0;
290 transactions++;
291 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
e6a9dde5 292 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
cb0af1a1
RG
293 break;
294 }
295 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 296 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
297 break;
298 }
6a3e5d1a
BH
299
300 uint16_t pktlen;
cb0af1a1 301 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
4957a608 302 break;
6a3e5d1a 303 else
4957a608 304 pktlen=ntohs(pktlen);
12c86877 305
366e1e5e
AT
306 // this check will always be false *if* no one touches
307 // the mesg array. pktlen can be maximum of 65535 as
308 // it is 2 byte unsigned variable. In getQuestion, we
309 // write to 0 up to pktlen-1 so 65535 is just right.
310
311 // do not remove this check as it will catch if someone
312 // decreases the mesg buffer size for some reason.
c2b4ccc0 313 if(pktlen > mesgsize) {
e6a9dde5 314 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
4957a608 315 break;
12c86877
BH
316 }
317
cb0af1a1 318 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 319 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
320 break;
321 }
322
323 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
12c86877 324 S.inc("tcp-queries");
5fd567ec 325 if(remote.sin4.sin_family == AF_INET6)
326 S.inc("tcp6-queries");
327 else
328 S.inc("tcp4-queries");
3e579e91 329
27c0050c 330 packet=shared_ptr<DNSPacket>(new DNSPacket(true));
809fe23f 331 packet->setRemote(&remote);
e9dd48f9 332 packet->d_tcp=true;
ff76e8b4 333 packet->setSocket(fd);
c2b4ccc0 334 if(packet->parse(mesg.get(), pktlen)<0)
4957a608 335 break;
c1663439 336
6e59a580
KM
337 if(packet->qtype.getCode()==QType::AXFR) {
338 if(doAXFR(packet->qdomain, packet, fd))
5fd567ec 339 incTCPAnswerCount(remote);
6e59a580
KM
340 continue;
341 }
342
343 if(packet->qtype.getCode()==QType::IXFR) {
344 if(doIXFR(packet, fd))
5fd567ec 345 incTCPAnswerCount(remote);
4957a608 346 continue;
12c86877
BH
347 }
348
ff76e8b4 349 shared_ptr<DNSPacket> reply;
27c0050c 350 shared_ptr<DNSPacket> cached= shared_ptr<DNSPacket>(new DNSPacket(false));
fe498ace 351 if(logDNSQueries) {
cece60fc
CH
352 string remote_text;
353 if(packet->hasEDNSSubnet())
354 remote_text = packet->getRemote().toString() + "<-" + packet->getRealRemote().toString();
fe498ace 355 else
cece60fc 356 remote_text = packet->getRemote().toString();
e6a9dde5 357 g_log << Logger::Notice<<"TCP Remote "<< remote_text <<" wants '" << packet->qdomain<<"|"<<packet->qtype.getName() <<
bb5903e2 358 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen()<<": ";
fe498ace 359 }
bb5903e2 360
9a037bfa
KM
361 if(PC.enabled()) {
362 if(packet->couldBeCached() && PC.get(packet.get(), cached.get())) { // short circuit - does the PacketCache recognize this question?
363 if(logDNSQueries)
364 g_log<<"packetcache HIT"<<endl;
365 cached->setRemote(&packet->d_remote);
366 cached->d.id=packet->d.id;
367 cached->d.rd=packet->d.rd; // copy in recursion desired bit
368 cached->commitD(); // commit d to the packet inlined
369
370 sendPacket(cached, fd); // presigned, don't do it again
371 continue;
372 }
21a303f3 373 if(logDNSQueries)
9a037bfa 374 g_log<<"packetcache MISS"<<endl;
12c86877 375 }
12c86877 376 {
4957a608
BH
377 Lock l(&s_plock);
378 if(!s_P) {
e6a9dde5 379 g_log<<Logger::Error<<"TCP server is without backend connections, launching"<<endl;
4957a608
BH
380 s_P=new PacketHandler;
381 }
4957a608 382
e89efca5 383 reply=shared_ptr<DNSPacket>(s_P->doQuestion(packet.get())); // we really need to ask the backend :-)
12c86877
BH
384 }
385
12c86877 386 if(!reply) // unable to write an answer?
4957a608 387 break;
b552d7b1 388
ff76e8b4 389 sendPacket(reply, fd);
12c86877 390 }
12c86877 391 }
3f81d239 392 catch(PDNSException &ae) {
556252ea
BH
393 Lock l(&s_plock);
394 delete s_P;
395 s_P = 0; // on next call, backend will be recycled
e6a9dde5 396 g_log<<Logger::Error<<"TCP nameserver had error, cycling backend: "<<ae.reason<<endl;
ef1d2f44 397 }
0afa9049 398 catch(NetworkError &e) {
e6a9dde5 399 g_log<<Logger::Info<<"TCP Connection Thread died because of network error: "<<e.what()<<endl;
0afa9049
BH
400 }
401
adc10f99 402 catch(std::exception &e) {
e6a9dde5 403 g_log<<Logger::Error<<"TCP Connection Thread died because of STL error: "<<e.what()<<endl;
12c86877
BH
404 }
405 catch( ... )
406 {
e6a9dde5 407 g_log << Logger::Error << "TCP Connection Thread caught unknown exception." << endl;
12c86877 408 }
12c86877 409 d_connectionroom_sem->post();
a7b68ae7
RG
410
411 try {
412 closesocket(fd);
413 }
414 catch(const PDNSException& e) {
e6a9dde5 415 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 416 }
cb0af1a1 417 decrementClientCount(remote);
12c86877
BH
418
419 return 0;
420}
421
78bcb858 422
e082fb4c 423// call this method with s_plock held!
ff76e8b4 424bool TCPNameserver::canDoAXFR(shared_ptr<DNSPacket> q)
12c86877 425{
379ab445 426 if(::arg().mustDo("disable-axfr"))
318c3ec6
BH
427 return false;
428
78bcb858
BH
429 if(q->d_havetsig) { // if you have one, it must be good
430 TSIGRecordContent trc;
7abbc40f
PD
431 DNSName keyname;
432 string secret;
ea3816cf 433 if(!q->checkForCorrectTSIG(s_P->getBackend(), &keyname, &secret, &trc)) {
78bcb858 434 return false;
7f9ac49b
AT
435 } else {
436 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
437 if (q->d_tsig_algo == TSIG_GSS) {
1635f12b 438 GssContext gssctx(keyname);
7f9ac49b 439 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
e6a9dde5 440 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
7f9ac49b
AT
441 }
442 }
443 }
444
ea99d474 445 DNSSECKeeper dk(s_P->getBackend());
5e29f2f9 446
84fc3f8b
AT
447 if (q->d_tsig_algo == TSIG_GSS) {
448 vector<string> princs;
449 s_P->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
ff05fd12 450 for(const std::string& princ : princs) {
84fc3f8b 451 if (q->d_peer_principal == princ) {
e6a9dde5 452 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
84fc3f8b
AT
453 return true;
454 }
455 }
e6a9dde5 456 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
84fc3f8b
AT
457 return false;
458 }
459
3d03fee8 460 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
e6a9dde5 461 g_log<<Logger::Error<<"AXFR '"<<q->qdomain<<"' denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access to zone"<<endl;
78bcb858
BH
462 return false;
463 }
464 else {
e6a9dde5 465 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
78bcb858
BH
466 return true;
467 }
468 }
93afc0a3
PD
469
470 // cerr<<"checking allow-axfr-ips"<<endl;
471 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( (ComboAddress *) &q->d_remote )) {
e6a9dde5 472 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in allow-axfr-ips"<<endl;
12c86877 473 return true;
ab5edd12 474 }
93afc0a3
PD
475
476 FindNS fns;
477
478 // cerr<<"doing per-zone-axfr-acls"<<endl;
479 SOAData sd;
79ba7763 480 if(s_P->getBackend()->getSOAUncached(q->qdomain,sd)) {
93afc0a3
PD
481 // cerr<<"got backend and SOA"<<endl;
482 DNSBackend *B=sd.db;
483 vector<string> acl;
894bcf36 484 s_P->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
93afc0a3
PD
485 for (vector<string>::const_iterator i = acl.begin(); i != acl.end(); ++i) {
486 // cerr<<"matching against "<<*i<<endl;
487 if(pdns_iequals(*i, "AUTO-NS")) {
488 // cerr<<"AUTO-NS magic please!"<<endl;
489
490 DNSResourceRecord rr;
7abbc40f 491 set<DNSName> nsset;
93afc0a3
PD
492
493 B->lookup(QType(QType::NS),q->qdomain);
494 while(B->get(rr))
290a083d 495 nsset.insert(DNSName(rr.content));
7abbc40f 496 for(const auto & j: nsset) {
35b942fe 497 vector<string> nsips=fns.lookup(j, s_P->getBackend());
93afc0a3
PD
498 for(vector<string>::const_iterator k=nsips.begin();k!=nsips.end();++k) {
499 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
ded6b08d 500 if(*k == q->getRemote().toString())
93afc0a3
PD
501 {
502 // cerr<<"got AUTO-NS hit"<<endl;
e6a9dde5 503 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in NSset"<<endl;
93afc0a3
PD
504 return true;
505 }
506 }
507 }
508 }
509 else
510 {
511 Netmask nm = Netmask(*i);
512 if(nm.match( (ComboAddress *) &q->d_remote ))
513 {
e6a9dde5 514 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in per-domain ACL"<<endl;
93afc0a3
PD
515 // cerr<<"hit!"<<endl;
516 return true;
517 }
518 }
519 }
520 }
521
12c86877
BH
522 extern CommunicatorClass Communicator;
523
ded6b08d 524 if(Communicator.justNotified(q->qdomain, q->getRemote().toString())) { // we just notified this ip
e6a9dde5 525 g_log<<Logger::Warning<<"Approved AXFR of '"<<q->qdomain<<"' from recently notified slave "<<q->getRemote()<<endl;
12c86877
BH
526 return true;
527 }
528
e6a9dde5 529 g_log<<Logger::Error<<"AXFR of domain '"<<q->qdomain<<"' denied: client IP "<<q->getRemote()<<" has no permission"<<endl;
12c86877
BH
530 return false;
531}
532
b317b510 533namespace {
54d84273
PD
534 struct NSECXEntry
535 {
22a0ef16 536 NSECBitmap d_set;
54d84273 537 unsigned int d_ttl;
feef1ece 538 bool d_auth;
54d84273 539 };
8e9b7d99 540
54d84273
PD
541 shared_ptr<DNSPacket> getFreshAXFRPacket(shared_ptr<DNSPacket> q)
542 {
543 shared_ptr<DNSPacket> ret = shared_ptr<DNSPacket>(q->replyPacket());
544 ret->setCompress(false);
545 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
546 ret->d_tcp = true;
547 return ret;
548 }
8e9b7d99
BH
549}
550
54d84273 551
12c86877 552/** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
7abbc40f 553int TCPNameserver::doAXFR(const DNSName &target, shared_ptr<DNSPacket> q, int outsock)
12c86877 554{
8e9b7d99 555 shared_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
c67e46a1 556 if(q->d_dnssecOk)
05e24311 557 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
22893145 558
e6a9dde5 559 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' initiated by "<<q->getRemote()<<endl;
12c86877 560
22893145 561 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
12c86877
BH
562 SOAData sd;
563 {
564 Lock l(&s_plock);
e6a9dde5 565 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
12a965c5 566 if(!s_P) {
e6a9dde5 567 g_log<<Logger::Error<<"TCP server is without backend connections in doAXFR, launching"<<endl;
12a965c5
BH
568 s_P=new PacketHandler;
569 }
12c86877 570
ea99d474 571 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
8090f5a2 572 if (!canDoAXFR(q)) {
e6a9dde5 573 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: "<<q->getRemote()<<" may not request AXFR"<<endl;
9c556f63 574 outpacket->setRcode(RCode::NotAuth);
8090f5a2
AT
575 sendPacket(outpacket,outsock);
576 return 0;
577 }
578
8090f5a2 579 if(!s_P->getBackend()->getSOAUncached(target, sd)) {
e6a9dde5 580 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative"<<endl;
9c556f63 581 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 582 sendPacket(outpacket,outsock);
12c86877
BH
583 return 0;
584 }
3de83124 585 }
22893145 586
8e9b7d99 587 UeberBackend db;
79ba7763 588 if(!db.getSOAUncached(target, sd)) {
e6a9dde5 589 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
79ba7763 590 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 591 sendPacket(outpacket,outsock);
3de83124 592 return 0;
12c86877 593 }
3de83124 594
ea99d474 595 DNSSECKeeper dk(&db);
22893145
CH
596 dk.clearCaches(target);
597 bool securedZone = dk.isSecuredZone(target);
598 bool presignedZone = dk.isPresigned(target);
599
600 bool noAXFRBecauseOfNSEC3Narrow=false;
601 NSEC3PARAMRecordContent ns3pr;
602 bool narrow;
603 bool NSEC3Zone=false;
dacacb23 604 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
22893145
CH
605 NSEC3Zone=true;
606 if(narrow) {
e6a9dde5 607 g_log<<Logger::Error<<"Not doing AXFR of an NSEC3 narrow zone '"<<target<<"' for "<<q->getRemote()<<endl;
22893145
CH
608 noAXFRBecauseOfNSEC3Narrow=true;
609 }
610 }
611
612 if(noAXFRBecauseOfNSEC3Narrow) {
e6a9dde5 613 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' denied to "<<q->getRemote()<<endl;
22893145
CH
614 outpacket->setRcode(RCode::Refused);
615 // FIXME: should actually figure out if we are auth over a zone, and send out 9 if we aren't
616 sendPacket(outpacket,outsock);
617 return 0;
618 }
619
78bcb858 620 TSIGRecordContent trc;
7abbc40f
PD
621 DNSName tsigkeyname;
622 string tsigsecret;
78bcb858 623
ea3816cf 624 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
78bcb858 625
60a1c204 626 if(haveTSIGDetails && !tsigkeyname.empty()) {
2c26f25a 627 string tsig64;
3343ad1f 628 DNSName algorithm=trc.d_algoName; // FIXME400: check
290a083d 629 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
630 algorithm = DNSName("hmac-md5");
631 if (algorithm != DNSName("gss-tsig")) {
ea99d474 632 if(!db.getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
e6a9dde5 633 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
53ace5d5
PL
634 return 0;
635 }
636 if (B64Decode(tsig64, tsigsecret) == -1) {
e6a9dde5 637 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
53ace5d5
PL
638 return 0;
639 }
84fc3f8b 640 }
78bcb858 641 }
8e9b7d99 642
8e9b7d99 643
8267bd2c 644 // SOA *must* go out first, our signing pipe might reorder
e6a9dde5 645 DLOG(g_log<<"Sending out SOA"<<endl);
13f9e280
CH
646 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
647 outpacket->addRecord(soa);
3c68fb14 648 if(securedZone && !presignedZone) {
7abbc40f 649 set<DNSName> authSet;
8d3cbffa 650 authSet.insert(target);
ea99d474 651 addRRSigs(dk, db, authSet, outpacket->getRRS());
8d3cbffa 652 }
8e9b7d99 653
60a1c204 654 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858
BH
655 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
656
8267bd2c 657 sendPacket(outpacket, outsock);
78bcb858
BH
658
659 trc.d_mac = outpacket->d_trc.d_mac;
8267bd2c
BH
660 outpacket = getFreshAXFRPacket(q);
661
3c68fb14 662 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1));
8e9b7d99 663
6e8694df 664 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
9d3151d9 665 nsecxrepo_t nsecxrepo;
4888e4b2
BH
666
667 // this is where the DNSKEYs go in
0c350cb5 668
4c1474f3 669 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
0c350cb5 670
90ba52e0 671 DNSZoneRecord zrr;
0c350cb5 672
90ba52e0 673 zrr.dr.d_name = target;
674 zrr.dr.d_ttl = sd.default_ttl;
675 zrr.auth = 1; // please sign!
794c2f92 676
991a0977 677 string publishCDNSKEY, publishCDS;
0900d2d3
CH
678 dk.getFromMeta(q->qdomain, "PUBLISH-CDNSKEY", publishCDNSKEY);
679 dk.getFromMeta(q->qdomain, "PUBLISH-CDS", publishCDS);
90ba52e0 680 vector<DNSZoneRecord> cds, cdnskey;
f889ab99
PL
681 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(q->qdomain);
682 set<uint32_t> entryPointIds;
683 for (auto const& value : entryPoints)
684 entryPointIds.insert(value.second.id);
991a0977 685
ff05fd12 686 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
90ba52e0 687 zrr.dr.d_type = QType::DNSKEY;
688 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
6e8694df 689 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
9d3151d9 690 NSECXEntry& ne = nsecxrepo[keyname];
b317b510 691
22a0ef16 692 ne.d_set.set(zrr.dr.d_type);
794c2f92 693 ne.d_ttl = sd.default_ttl;
90ba52e0 694 csp.submit(zrr);
991a0977
PL
695
696 // generate CDS and CDNSKEY records
f889ab99 697 if(entryPointIds.count(value.second.id) > 0){
991a0977 698 if(publishCDNSKEY == "1") {
90ba52e0 699 zrr.dr.d_type=QType::CDNSKEY;
700 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
701 cdnskey.push_back(zrr);
991a0977
PL
702 }
703
704 if(!publishCDS.empty()){
90ba52e0 705 zrr.dr.d_type=QType::CDS;
991a0977
PL
706 vector<string> digestAlgos;
707 stringtok(digestAlgos, publishCDS, ", ");
56225bd3 708 for(auto const &digestAlgo : digestAlgos) {
90ba52e0 709 zrr.dr.d_content=std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns_stou(digestAlgo)));
710 cds.push_back(zrr);
991a0977
PL
711 }
712 }
713 }
4c1474f3 714 }
0c350cb5 715
cc8df07f 716 if(::arg().mustDo("direct-dnskey")) {
6dae726d 717 sd.db->lookup(QType(QType::DNSKEY), target, NULL, sd.domain_id);
90ba52e0 718 while(sd.db->get(zrr)) {
719 zrr.dr.d_ttl = sd.default_ttl;
720 csp.submit(zrr);
6dae726d
PD
721 }
722 }
723
b8adb30d
KM
724 uint8_t flags;
725
95c5bc40 726 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
b8adb30d 727 flags = ns3pr.d_flags;
90ba52e0 728 zrr.dr.d_type = QType::NSEC3PARAM;
95c5bc40 729 ns3pr.d_flags = 0;
90ba52e0 730 zrr.dr.d_content = std::make_shared<NSEC3PARAMRecordContent>(ns3pr);
b8adb30d 731 ns3pr.d_flags = flags;
6e8694df 732 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
ce464268
BH
733 NSECXEntry& ne = nsecxrepo[keyname];
734
22a0ef16 735 ne.d_set.set(zrr.dr.d_type);
90ba52e0 736 csp.submit(zrr);
ce464268 737 }
8e9b7d99 738
0c350cb5
BH
739 // now start list zone
740 if(!(sd.db->list(target, sd.domain_id))) {
e6a9dde5 741 g_log<<Logger::Error<<"Backend signals error condition"<<endl;
9c556f63 742 outpacket->setRcode(RCode::ServFail);
0c350cb5
BH
743 sendPacket(outpacket,outsock);
744 return 0;
745 }
746
b772ffea 747
5633a4af 748 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
7abbc40f 749 set<DNSName> qnames, nsset, terms;
90ba52e0 750 vector<DNSZoneRecord> zrrs;
b772ffea 751
991a0977 752 // Add the CDNSKEY and CDS records we created earlier
cece60fc
CH
753 for (auto const &synth_zrr : cds)
754 zrrs.push_back(synth_zrr);
90ba52e0 755
cece60fc
CH
756 for (auto const &synth_zrr : cdnskey)
757 zrrs.push_back(synth_zrr);
90ba52e0 758
759 while(sd.db->get(zrr)) {
8bf260dd 760 zrr.dr.d_name.makeUsLowerCase();
90ba52e0 761 if(zrr.dr.d_name.isPartOf(target)) {
762 if (zrr.dr.d_type == QType::ALIAS && ::arg().mustDo("outgoing-axfr-expand-alias")) {
763 vector<DNSZoneRecord> ips;
764 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::A, ips);
765 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::AAAA, ips);
273d88b2 766 if(ret1 != RCode::NoError || ret2 != RCode::NoError) {
e6a9dde5 767 g_log<<Logger::Error<<"Error resolving for ALIAS "<<zrr.dr.d_content->getZoneRepresentation()<<", aborting AXFR"<<endl;
9c556f63 768 outpacket->setRcode(RCode::ServFail);
273d88b2
PD
769 sendPacket(outpacket,outsock);
770 return 0;
771 }
d86e1bf7 772 for(const auto& ip: ips) {
90ba52e0 773 zrr.dr.d_type = ip.dr.d_type;
0438fd89 774 zrr.dr.d_content = ip.dr.d_content;
90ba52e0 775 zrrs.push_back(zrr);
d86e1bf7 776 }
a68df29d 777 continue;
d86e1bf7
PD
778 }
779
b772ffea 780 if (rectify) {
90ba52e0 781 if (zrr.dr.d_type) {
782 qnames.insert(zrr.dr.d_name);
783 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
784 nsset.insert(zrr.dr.d_name);
b772ffea
KM
785 } else {
786 // remove existing ents
787 continue;
788 }
789 }
a68df29d 790 zrrs.push_back(zrr);
b772ffea 791 } else {
90ba52e0 792 if (zrr.dr.d_type)
e6a9dde5 793 g_log<<Logger::Warning<<"Zone '"<<target<<"' contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
b772ffea
KM
794 }
795 }
796
75f2589f 797 // Group records by name and type, signpipe stumbles over interrupted rrsets
22a676e0 798 if(securedZone && !presignedZone) {
8daafcc1
KM
799 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
800 return tie(a.dr.d_name, a.dr.d_type) < tie(b.dr.d_name, b.dr.d_type);
801 });
802 }
75f2589f 803
b772ffea
KM
804 if(rectify) {
805 // set auth
2010ac95
RG
806 for(DNSZoneRecord &loopZRR : zrrs) {
807 loopZRR.auth=true;
808 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
809 DNSName shorter(loopZRR.dr.d_name);
b772ffea 810 do {
e325f20c 811 if (shorter==target) // apex is always auth
cb045f61 812 break;
2010ac95
RG
813 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
814 loopZRR.auth=false;
cb045f61 815 break;
9f70b77a 816 }
7abbc40f 817 } while(shorter.chopOff());
9f70b77a 818 }
b772ffea
KM
819 }
820
821 if(NSEC3Zone) {
822 // ents are only required for NSEC3 zones
823 uint32_t maxent = ::arg().asNum("max-ent-entries");
6ded341a 824 set<DNSName> nsec3set, nonterm;
2010ac95 825 for (auto &loopZRR: zrrs) {
6ded341a 826 bool skip=false;
2010ac95 827 DNSName shorter = loopZRR.dr.d_name;
6ded341a
KM
828 if (shorter != target && shorter.chopOff() && shorter != target) {
829 do {
830 if(nsset.count(shorter)) {
831 skip=true;
832 break;
833 }
834 } while(shorter.chopOff() && shorter != target);
835 }
2010ac95
RG
836 shorter = loopZRR.dr.d_name;
837 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
6ded341a
KM
838 do {
839 if(!nsec3set.count(shorter)) {
840 nsec3set.insert(shorter);
841 }
842 } while(shorter != target && shorter.chopOff());
843 }
844 }
845
2010ac95
RG
846 for(DNSZoneRecord &loopZRR : zrrs) {
847 DNSName shorter(loopZRR.dr.d_name);
e325f20c 848 while(shorter != target && shorter.chopOff()) {
6ded341a 849 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
b772ffea 850 if(!(maxent)) {
e6a9dde5 851 g_log<<Logger::Warning<<"Zone '"<<target<<"' has too many empty non terminals."<<endl;
b772ffea
KM
852 return 0;
853 }
6ded341a
KM
854 nonterm.insert(shorter);
855 --maxent;
b772ffea
KM
856 }
857 }
858 }
859
9e23e712 860 for(const auto& nt : nonterm) {
2010ac95
RG
861 DNSZoneRecord tempRR;
862 tempRR.dr.d_name=nt;
863 tempRR.dr.d_type=QType::ENT;
864 tempRR.auth=true;
865 zrrs.push_back(tempRR);
b772ffea
KM
866 }
867 }
868 }
869
870
12c86877 871 /* now write all other records */
8e9b7d99 872
6e8694df 873 DNSName keyname;
3370c993 874 unsigned int udiff;
1c6d9830
BH
875 DTime dt;
876 dt.set();
bec14a20 877 int records=0;
2010ac95 878 for(DNSZoneRecord &loopZRR : zrrs) {
3c68fb14 879 if (!presignedZone && loopZRR.dr.d_type == QType::RRSIG)
794c2f92 880 continue;
6dae726d 881
991a0977 882 // only skip the DNSKEY, CDNSKEY and CDS if direct-dnskey is enabled, to avoid changing behaviour
6dae726d 883 // when it is not enabled.
2010ac95 884 if(::arg().mustDo("direct-dnskey") && (loopZRR.dr.d_type == QType::DNSKEY || loopZRR.dr.d_type == QType::CDNSKEY || loopZRR.dr.d_type == QType::CDS))
6dae726d
PD
885 continue;
886
bec14a20 887 records++;
2010ac95
RG
888 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
889 if (NSEC3Zone || loopZRR.dr.d_type) {
3c68fb14
KM
890 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
891 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
892 } else {
893 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
894 }
b5baefaf
PD
895 NSECXEntry& ne = nsecxrepo[keyname];
896 ne.d_ttl = sd.default_ttl;
3c68fb14
KM
897 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
898 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
22a0ef16 899 ne.d_set.set(loopZRR.dr.d_type);
b5baefaf
PD
900 }
901 }
b317b510 902 }
b5baefaf 903
2010ac95 904 if (!loopZRR.dr.d_type)
b5baefaf
PD
905 continue; // skip empty non-terminals
906
2010ac95 907 if(loopZRR.dr.d_type == QType::SOA)
12c86877 908 continue; // skip SOA - would indicate end of AXFR
add640c0 909
2010ac95 910 if(csp.submit(loopZRR)) {
1c6d9830
BH
911 for(;;) {
912 outpacket->getRRS() = csp.getChunk();
913 if(!outpacket->getRRS().empty()) {
60a1c204 914 if(haveTSIGDetails && !tsigkeyname.empty())
54d84273 915 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1c6d9830 916 sendPacket(outpacket, outsock);
78bcb858 917 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
918 outpacket=getFreshAXFRPacket(q);
919 }
920 else
921 break;
922 }
12c86877
BH
923 }
924 }
78bcb858 925 /*
3370c993 926 udiff=dt.udiffNoReset();
1c6d9830
BH
927 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
928 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
929 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 930 */
feef1ece 931 if(securedZone) {
4888e4b2 932 if(NSEC3Zone) {
9d3151d9 933 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
3c68fb14 934 if(iter->second.d_auth) {
feef1ece 935 NSEC3RecordContent n3rc;
22a0ef16 936 n3rc.set(iter->second.d_set);
27d4a65b
RG
937 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
938 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
939 n3rc.set(QType::RRSIG);
940 }
941 n3rc.d_salt = ns3pr.d_salt;
feef1ece
PD
942 n3rc.d_flags = ns3pr.d_flags;
943 n3rc.d_iterations = ns3pr.d_iterations;
27d4a65b 944 n3rc.d_algorithm = DNSSECKeeper::SHA1; // SHA1, fixed in PowerDNS for now
feef1ece 945 nsecxrepo_t::const_iterator inext = iter;
cb167afd 946 ++inext;
feef1ece
PD
947 if(inext == nsecxrepo.end())
948 inext = nsecxrepo.begin();
3c68fb14 949 while(!inext->second.d_auth && inext != iter)
feef1ece 950 {
cb167afd 951 ++inext;
feef1ece
PD
952 if(inext == nsecxrepo.end())
953 inext = nsecxrepo.begin();
954 }
6e8694df
KM
955 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
956 zrr.dr.d_name = iter->first+sd.qname;
90ba52e0 957
958 zrr.dr.d_ttl = sd.default_ttl;
27d4a65b 959 zrr.dr.d_content = std::make_shared<NSEC3RecordContent>(std::move(n3rc));
90ba52e0 960 zrr.dr.d_type = QType::NSEC3;
961 zrr.dr.d_place = DNSResourceRecord::ANSWER;
962 zrr.auth=true;
963 if(csp.submit(zrr)) {
feef1ece
PD
964 for(;;) {
965 outpacket->getRRS() = csp.getChunk();
966 if(!outpacket->getRRS().empty()) {
60a1c204 967 if(haveTSIGDetails && !tsigkeyname.empty())
feef1ece
PD
968 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
969 sendPacket(outpacket, outsock);
970 trc.d_mac=outpacket->d_trc.d_mac;
971 outpacket=getFreshAXFRPacket(q);
972 }
973 else
974 break;
1c6d9830 975 }
1c6d9830 976 }
8e9b7d99 977 }
4888e4b2
BH
978 }
979 }
9d3151d9 980 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
ed9c3a50 981 NSECRecordContent nrc;
22a0ef16 982 nrc.set(iter->second.d_set);
27d4a65b
RG
983 nrc.set(QType::RRSIG);
984 nrc.set(QType::NSEC);
6e8694df
KM
985
986 if(boost::next(iter) != nsecxrepo.end())
987 nrc.d_next = boost::next(iter)->first;
ed9c3a50 988 else
6e8694df
KM
989 nrc.d_next=nsecxrepo.begin()->first;
990 zrr.dr.d_name = iter->first;
991
90ba52e0 992 zrr.dr.d_ttl = sd.default_ttl;
27d4a65b 993 zrr.dr.d_content = std::make_shared<NSECRecordContent>(std::move(nrc));
90ba52e0 994 zrr.dr.d_type = QType::NSEC;
995 zrr.dr.d_place = DNSResourceRecord::ANSWER;
996 zrr.auth=true;
997 if(csp.submit(zrr)) {
1c6d9830
BH
998 for(;;) {
999 outpacket->getRRS() = csp.getChunk();
1000 if(!outpacket->getRRS().empty()) {
60a1c204 1001 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1002 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1c6d9830 1003 sendPacket(outpacket, outsock);
78bcb858 1004 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
1005 outpacket=getFreshAXFRPacket(q);
1006 }
1007 else
1008 break;
1009 }
8e9b7d99 1010 }
add640c0 1011 }
add640c0 1012 }
78bcb858 1013 /*
3370c993 1014 udiff=dt.udiffNoReset();
1c6d9830
BH
1015 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
1016 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
1017 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 1018 * */
bec14a20
BH
1019 for(;;) {
1020 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
1021 if(!outpacket->getRRS().empty()) {
60a1c204 1022 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1023 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
bec14a20 1024 sendPacket(outpacket, outsock);
78bcb858 1025 trc.d_mac=outpacket->d_trc.d_mac;
bec14a20
BH
1026 outpacket=getFreshAXFRPacket(q);
1027 }
1028 else
1029 break;
12c86877 1030 }
8e9b7d99 1031
1c6d9830 1032 udiff=dt.udiffNoReset();
f1f85f12 1033 if(securedZone)
e6a9dde5 1034 g_log<<Logger::Info<<"Done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1c6d9830 1035
e6a9dde5 1036 DLOG(g_log<<"Done writing out records"<<endl);
12c86877 1037 /* and terminate with yet again the SOA record */
8e9b7d99 1038 outpacket=getFreshAXFRPacket(q);
13f9e280 1039 outpacket->addRecord(soa);
60a1c204 1040 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1041 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
92c90b44 1042
ff76e8b4 1043 sendPacket(outpacket, outsock);
78bcb858 1044
e6a9dde5
PL
1045 DLOG(g_log<<"last packet - close"<<endl);
1046 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
12c86877
BH
1047
1048 return 1;
1049}
1050
6e59a580
KM
1051int TCPNameserver::doIXFR(shared_ptr<DNSPacket> q, int outsock)
1052{
1053 shared_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
1054 if(q->d_dnssecOk)
1055 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1056
6e59a580 1057 uint32_t serial = 0;
27c0050c 1058 MOADNSParser mdp(false, q->getString());
6e59a580
KM
1059 for(MOADNSParser::answers_t::const_iterator i=mdp.d_answers.begin(); i != mdp.d_answers.end(); ++i) {
1060 const DNSRecord *rr = &i->first;
e693ff5a 1061 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
6e59a580
KM
1062 vector<string>parts;
1063 stringtok(parts, rr->d_content->getZoneRepresentation());
1064 if (parts.size() >= 3) {
95dd3b90
RG
1065 try {
1066 serial=pdns_stou(parts[2]);
1067 }
1068 catch(const std::out_of_range& oor) {
e6a9dde5 1069 g_log<<Logger::Error<<"Invalid serial in IXFR query"<<endl;
95dd3b90
RG
1070 outpacket->setRcode(RCode::FormErr);
1071 sendPacket(outpacket,outsock);
1072 return 0;
1073 }
6e59a580 1074 } else {
e6a9dde5 1075 g_log<<Logger::Error<<"No serial in IXFR query"<<endl;
6e59a580
KM
1076 outpacket->setRcode(RCode::FormErr);
1077 sendPacket(outpacket,outsock);
1078 return 0;
1079 }
3e67ea8b 1080 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
e6a9dde5 1081 g_log<<Logger::Error<<"Additional records in IXFR query, type: "<<QType(rr->d_type).getName()<<endl;
6e59a580
KM
1082 outpacket->setRcode(RCode::FormErr);
1083 sendPacket(outpacket,outsock);
1084 return 0;
1085 }
1086 }
1087
e6a9dde5 1088 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' initiated by "<<q->getRemote()<<" with serial "<<serial<<endl;
6e59a580 1089
22893145 1090 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
6e59a580 1091 SOAData sd;
6e59a580
KM
1092 {
1093 Lock l(&s_plock);
e6a9dde5 1094 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
6e59a580 1095 if(!s_P) {
e6a9dde5 1096 g_log<<Logger::Error<<"TCP server is without backend connections in doIXFR, launching"<<endl;
6e59a580
KM
1097 s_P=new PacketHandler;
1098 }
1099
22893145
CH
1100 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
1101 if(!canDoAXFR(q) || !s_P->getBackend()->getSOAUncached(q->qdomain, sd)) {
e6a9dde5 1102 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' failed: not authoritative"<<endl;
9c556f63 1103 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1104 sendPacket(outpacket,outsock);
1105 return 0;
1106 }
1107 }
1108
22893145
CH
1109 DNSSECKeeper dk;
1110 NSEC3PARAMRecordContent ns3pr;
1111 bool narrow;
1112
1113 dk.clearCaches(q->qdomain);
1114 bool securedZone = dk.isSecuredZone(q->qdomain);
1115 if(dk.getNSEC3PARAM(q->qdomain, &ns3pr, &narrow)) {
1116 if(narrow) {
e6a9dde5
PL
1117 g_log<<Logger::Error<<"Not doing IXFR of an NSEC3 narrow zone."<<endl;
1118 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' denied to "<<q->getRemote()<<endl;
22893145
CH
1119 outpacket->setRcode(RCode::Refused);
1120 sendPacket(outpacket,outsock);
1121 return 0;
1122 }
1123 }
1124
7abbc40f 1125 DNSName target = q->qdomain;
6e59a580
KM
1126
1127 UeberBackend db;
79ba7763 1128 if(!db.getSOAUncached(target, sd)) {
e6a9dde5 1129 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
79ba7763 1130 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1131 sendPacket(outpacket,outsock);
1132 return 0;
1133 }
24d9e514 1134
13f9e280 1135 if (!rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname))) {
6e59a580 1136 TSIGRecordContent trc;
7abbc40f
PD
1137 DNSName tsigkeyname;
1138 string tsigsecret;
6e59a580 1139
ea3816cf 1140 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
6e59a580 1141
60a1c204 1142 if(haveTSIGDetails && !tsigkeyname.empty()) {
bb7fb11c 1143 string tsig64;
3343ad1f 1144 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
290a083d 1145 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1146 algorithm = DNSName("hmac-md5");
6e59a580 1147 Lock l(&s_plock);
53ace5d5 1148 if(!s_P->getBackend()->getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
e6a9dde5 1149 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
53ace5d5
PL
1150 return 0;
1151 }
1152 if (B64Decode(tsig64, tsigsecret) == -1) {
e6a9dde5 1153 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
53ace5d5
PL
1154 return 0;
1155 }
6e59a580
KM
1156 }
1157
1158 UeberBackend signatureDB;
1159
1160 // SOA *must* go out first, our signing pipe might reorder
e6a9dde5 1161 DLOG(g_log<<"Sending out SOA"<<endl);
13f9e280
CH
1162 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
1163 outpacket->addRecord(soa);
6e59a580 1164 if(securedZone) {
7abbc40f 1165 set<DNSName> authSet;
6e59a580
KM
1166 authSet.insert(target);
1167 addRRSigs(dk, signatureDB, authSet, outpacket->getRRS());
1168 }
1169
60a1c204 1170 if(haveTSIGDetails && !tsigkeyname.empty())
6e59a580
KM
1171 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1172
1173 sendPacket(outpacket, outsock);
1174
e6a9dde5 1175 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
6e59a580
KM
1176
1177 return 1;
1178 }
1179
e6a9dde5 1180 g_log<<Logger::Error<<"IXFR fallback to AXFR for domain '"<<target<<"' our serial "<<sd.serial<<endl;
6e59a580
KM
1181 return doAXFR(q->qdomain, q, outsock);
1182}
1183
12c86877
BH
1184TCPNameserver::~TCPNameserver()
1185{
1186 delete d_connectionroom_sem;
1187}
1188
1189TCPNameserver::TCPNameserver()
1190{
cb0af1a1
RG
1191 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1192 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1193 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1194 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1195
379ab445
BH
1196// sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
1197 d_connectionroom_sem = new Semaphore( ::arg().asNum( "max-tcp-connections" ));
117e1bf2 1198 d_tid=0;
12c86877 1199 vector<string>locals;
379ab445 1200 stringtok(locals,::arg()["local-address"]," ,");
12c86877
BH
1201
1202 vector<string>locals6;
379ab445 1203 stringtok(locals6,::arg()["local-ipv6"]," ,");
12c86877 1204
12c86877 1205 if(locals.empty() && locals6.empty())
3f81d239 1206 throw PDNSException("No local address specified");
12c86877 1207
68b011bd 1208 d_ng.toMasks(::arg()["allow-axfr-ips"] );
9f1d5826 1209
12c86877 1210 signal(SIGPIPE,SIG_IGN);
12c86877
BH
1211
1212 for(vector<string>::const_iterator laddr=locals.begin();laddr!=locals.end();++laddr) {
12c86877 1213 int s=socket(AF_INET,SOCK_STREAM,0);
326484be 1214
12c86877 1215 if(s<0)
3f81d239 1216 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
12c86877 1217
3897b9e1 1218 setCloseOnExec(s);
fb316318 1219
379ab445 1220 ComboAddress local(*laddr, ::arg().asNum("local-port"));
12c86877
BH
1221
1222 int tmp=1;
1223 if(setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
e6a9dde5 1224 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
5bd2ea7b 1225 _exit(1);
12c86877 1226 }
940d7811
RG
1227
1228 if (::arg().asNum("tcp-fast-open") > 0) {
1229#ifdef TCP_FASTOPEN
1230 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1231 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 1232 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
940d7811
RG
1233 }
1234#else
e6a9dde5 1235 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1236#endif
1237 }
1238
fec7dd5a
SS
1239 if( ::arg().mustDo("non-local-bind") )
1240 Utility::setBindAny(AF_INET, s);
1241
379ab445 1242 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
2c896042 1243 close(s);
5ecb2885 1244 if( errno == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
e6a9dde5 1245 g_log<<Logger::Error<<"IPv4 Address " << *laddr << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1246 continue;
1247 } else {
e6a9dde5 1248 g_log<<Logger::Error<<"Unable to bind to TCP socket " << *laddr << ": "<<strerror(errno)<<endl;
2ab7e9ac 1249 throw PDNSException("Unable to bind to TCP socket");
5ecb2885 1250 }
12c86877
BH
1251 }
1252
1253 listen(s,128);
e6a9dde5 1254 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
12c86877 1255 d_sockets.push_back(s);
8edfedf1
BH
1256 struct pollfd pfd;
1257 memset(&pfd, 0, sizeof(pfd));
1258 pfd.fd = s;
1259 pfd.events = POLLIN;
1260
1261 d_prfds.push_back(pfd);
12c86877
BH
1262 }
1263
12c86877 1264 for(vector<string>::const_iterator laddr=locals6.begin();laddr!=locals6.end();++laddr) {
12c86877
BH
1265 int s=socket(AF_INET6,SOCK_STREAM,0);
1266
1267 if(s<0)
3f81d239 1268 throw PDNSException("Unable to acquire TCPv6 socket: "+stringerror());
178d5134 1269
3897b9e1 1270 setCloseOnExec(s);
fb316318 1271
379ab445 1272 ComboAddress local(*laddr, ::arg().asNum("local-port"));
12c86877
BH
1273
1274 int tmp=1;
1275 if(setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
e6a9dde5 1276 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
5bd2ea7b 1277 _exit(1);
12c86877 1278 }
940d7811
RG
1279
1280 if (::arg().asNum("tcp-fast-open") > 0) {
1281#ifdef TCP_FASTOPEN
1282 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1283 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
e6a9dde5 1284 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
940d7811
RG
1285 }
1286#else
e6a9dde5 1287 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1288#endif
1289 }
1290
fec7dd5a
SS
1291 if( ::arg().mustDo("non-local-bind") )
1292 Utility::setBindAny(AF_INET6, s);
326484be 1293 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
e6a9dde5 1294 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
326484be 1295 }
ff76e8b4 1296 if(bind(s, (const sockaddr*)&local, local.getSocklen())<0) {
2c896042 1297 close(s);
5ecb2885 1298 if( errno == EADDRNOTAVAIL && ! ::arg().mustDo("local-ipv6-nonexist-fail") ) {
e6a9dde5 1299 g_log<<Logger::Error<<"IPv6 Address " << *laddr << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1300 continue;
1301 } else {
e6a9dde5 1302 g_log<<Logger::Error<<"Unable to bind to TCPv6 socket" << *laddr << ": "<<strerror(errno)<<endl;
2ab7e9ac 1303 throw PDNSException("Unable to bind to TCPv6 socket");
5ecb2885 1304 }
12c86877
BH
1305 }
1306
1307 listen(s,128);
e6a9dde5 1308 g_log<<Logger::Error<<"TCPv6 server bound to "<<local.toStringWithPort()<<endl; // this gets %eth0 right
12c86877 1309 d_sockets.push_back(s);
8edfedf1
BH
1310
1311 struct pollfd pfd;
1312 memset(&pfd, 0, sizeof(pfd));
1313 pfd.fd = s;
1314 pfd.events = POLLIN;
1315
1316 d_prfds.push_back(pfd);
12c86877 1317 }
12c86877
BH
1318}
1319
1320
ff76e8b4 1321//! Start of TCP operations thread, we launch a new thread for each incoming TCP question
12c86877
BH
1322void TCPNameserver::thread()
1323{
519f5484 1324 setThreadName("pdns/tcpnameser");
12c86877
BH
1325 try {
1326 for(;;) {
1327 int fd;
cb0af1a1
RG
1328 ComboAddress remote;
1329 Utility::socklen_t addrlen=remote.getSocklen();
12c86877 1330
8edfedf1 1331 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
8a63d3ce 1332 if(ret <= 0)
4957a608 1333 continue;
8a63d3ce 1334
12c86877 1335 int sock=-1;
8ce9e4e6 1336 for(const pollfd& pfd : d_prfds) {
c1ee10a6 1337 if(pfd.revents & POLLIN) {
4957a608 1338 sock = pfd.fd;
cb0af1a1
RG
1339 remote.sin4.sin_family = AF_INET6;
1340 addrlen=remote.getSocklen();
4957a608
BH
1341
1342 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
e6a9dde5 1343 g_log<<Logger::Error<<"TCP question accept error: "<<strerror(errno)<<endl;
4957a608
BH
1344
1345 if(errno==EMFILE) {
e6a9dde5 1346 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
5bd2ea7b 1347 _exit(1);
4957a608
BH
1348 }
1349 }
1350 else {
cb0af1a1
RG
1351 if (d_maxConnectionsPerClient) {
1352 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
1353 if (s_clientsCount[remote] >= d_maxConnectionsPerClient) {
e6a9dde5 1354 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
cb0af1a1
RG
1355 close(fd);
1356 continue;
1357 }
1358 s_clientsCount[remote]++;
1359 }
1360
4957a608
BH
1361 pthread_t tid;
1362 d_connectionroom_sem->wait(); // blocks if no connections are available
1363
1364 int room;
1365 d_connectionroom_sem->getValue( &room);
1366 if(room<1)
e6a9dde5 1367 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
4957a608 1368
222efdc0 1369 if(pthread_create(&tid, 0, &doConnection, reinterpret_cast<void*>(fd))) {
e6a9dde5 1370 g_log<<Logger::Error<<"Error creating thread: "<<stringerror()<<endl;
4957a608 1371 d_connectionroom_sem->post();
48e8d70b 1372 close(fd);
cb0af1a1 1373 decrementClientCount(remote);
4957a608
BH
1374 }
1375 }
1376 }
12c86877
BH
1377 }
1378 }
1379 }
3f81d239 1380 catch(PDNSException &AE) {
e6a9dde5 1381 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
12c86877
BH
1382 }
1383 catch(...) {
e6a9dde5 1384 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
12c86877 1385 }
5bd2ea7b 1386 _exit(1); // take rest of server with us
12c86877
BH
1387}
1388
1389