]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/tcpreceiver.cc
Merge pull request #9070 from rgacogne/boost-173
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
CommitLineData
12c86877
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
2e7834cb 3 Copyright (C) 2002-2012 PowerDNS.COM BV
12c86877
BH
4
5 This program is free software; you can redistribute it and/or modify
22dc646a
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
f782fe38
MH
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
12c86877
BH
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
06bd9ccf 20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12c86877 21*/
870a0fe4
AT
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
b6f3b03a 25#include <boost/algorithm/string.hpp>
bf269e28 26#include "auth-packetcache.hh"
1258abe0 27#include "utility.hh"
519f5484 28#include "threadname.hh"
add640c0 29#include "dnssecinfra.hh"
4c1474f3 30#include "dnsseckeeper.hh"
12c86877 31#include <cstdio>
4888e4b2 32#include "base32.hh"
12c86877
BH
33#include <cstring>
34#include <cstdlib>
35#include <sys/types.h>
940d7811 36#include <netinet/tcp.h>
12c86877
BH
37#include <iostream>
38#include <string>
39#include "tcpreceiver.hh"
67d74e49 40#include "sstuff.hh"
fa8fd4d2 41
12c86877
BH
42#include <errno.h>
43#include <signal.h>
78bcb858 44#include "base64.hh"
12c86877
BH
45#include "ueberbackend.hh"
46#include "dnspacket.hh"
47#include "nameserver.hh"
48#include "distributor.hh"
49#include "lock.hh"
50#include "logger.hh"
51#include "arguments.hh"
379ab445 52
3e8216c8 53#include "common_startup.hh"
12c86877
BH
54#include "packethandler.hh"
55#include "statbag.hh"
56#include "resolver.hh"
57#include "communicator.hh"
61b26744 58#include "namespaces.hh"
8e9b7d99 59#include "signingpipe.hh"
273d88b2 60#include "stubresolver.hh"
bf269e28 61extern AuthPacketCache PC;
12c86877
BH
62extern StatBag S;
63
64/**
65\file tcpreceiver.cc
66\brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
67*/
68
0ddde5fb 69std::mutex TCPNameserver::s_plock;
c2826d2e
RG
70std::unique_ptr<Semaphore> TCPNameserver::d_connectionroom_sem{nullptr};
71std::unique_ptr<PacketHandler> TCPNameserver::s_P{nullptr};
d322f931 72unsigned int TCPNameserver::d_maxTCPConnections = 0;
9f1d5826 73NetmaskGroup TCPNameserver::d_ng;
cb0af1a1
RG
74size_t TCPNameserver::d_maxTransactionsPerConn;
75size_t TCPNameserver::d_maxConnectionsPerClient;
76unsigned int TCPNameserver::d_idleTimeout;
77unsigned int TCPNameserver::d_maxConnectionDuration;
78std::mutex TCPNameserver::s_clientsCountMutex;
79std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan> TCPNameserver::s_clientsCount;
12c86877 80
12c86877
BH
81void TCPNameserver::go()
82{
e6a9dde5 83 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
c2826d2e 84 s_P.reset();
12c86877 85 try {
c2826d2e 86 s_P=make_unique<PacketHandler>();
12c86877 87 }
3f81d239 88 catch(PDNSException &ae) {
e6a9dde5 89 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
12c86877 90 }
12c86877 91
0ddde5fb
RG
92 std::thread th(std::bind(&TCPNameserver::thread, this));
93 th.detach();
12c86877
BH
94}
95
3f81d239 96// throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
cb0af1a1 97static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
12c86877 98{
6a3e5d1a
BH
99 unsigned int bytes=n;
100 char *ptr = (char*)buffer;
101 int ret;
cb0af1a1
RG
102 time_t start = 0;
103 unsigned int remainingTotal = totalTimeout;
104 if (totalTimeout) {
105 start = time(NULL);
106 }
6a3e5d1a
BH
107 while(bytes) {
108 ret=read(fd, ptr, bytes);
109 if(ret < 0) {
110 if(errno==EAGAIN) {
cb0af1a1 111 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
4957a608
BH
112 if(ret < 0)
113 throw NetworkError("Waiting for data read");
114 if(!ret)
115 throw NetworkError("Timeout reading data");
116 continue;
6a3e5d1a
BH
117 }
118 else
4957a608 119 throw NetworkError("Reading data: "+stringerror());
6a3e5d1a
BH
120 }
121 if(!ret) {
122 if(!throwOnEOF && n == bytes)
4957a608 123 return 0;
6a3e5d1a 124 else
4957a608 125 throw NetworkError("Did not fulfill read from TCP due to EOF");
6a3e5d1a
BH
126 }
127
128 ptr += ret;
129 bytes -= ret;
cb0af1a1
RG
130 if (totalTimeout) {
131 time_t now = time(NULL);
132 unsigned int elapsed = now - start;
133 if (elapsed >= remainingTotal) {
134 throw NetworkError("Timeout while reading data");
135 }
136 start = now;
137 remainingTotal -= elapsed;
138 }
6a3e5d1a
BH
139 }
140 return n;
141}
12c86877 142
6a3e5d1a 143// ditto
cb0af1a1 144static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
6a3e5d1a
BH
145{
146 unsigned int bytes=n;
147 const char *ptr = (char*)buffer;
148 int ret;
149 while(bytes) {
150 ret=write(fd, ptr, bytes);
151 if(ret < 0) {
152 if(errno==EAGAIN) {
cb0af1a1 153 ret=waitForRWData(fd, false, idleTimeout, 0);
4957a608
BH
154 if(ret < 0)
155 throw NetworkError("Waiting for data write");
156 if(!ret)
157 throw NetworkError("Timeout writing data");
158 continue;
6a3e5d1a
BH
159 }
160 else
4957a608 161 throw NetworkError("Writing data: "+stringerror());
6a3e5d1a 162 }
12c86877 163 if(!ret) {
67d74e49 164 throw NetworkError("Did not fulfill TCP write due to EOF");
12c86877 165 }
6a3e5d1a
BH
166
167 ptr += ret;
168 bytes -= ret;
12c86877 169 }
12c86877
BH
170}
171
c2826d2e 172void TCPNameserver::sendPacket(std::unique_ptr<DNSPacket>& p, int outsock)
6a3e5d1a 173{
b552d7b1 174 g_rs.submitResponse(*p, false);
9951e2d0 175
fbaa5e09
BH
176 uint16_t len=htons(p->getString().length());
177 string buffer((const char*)&len, 2);
178 buffer.append(p->getString());
cb0af1a1 179 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
6a3e5d1a
BH
180}
181
182
cb0af1a1 183void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
6a3e5d1a
BH
184try
185{
cb0af1a1 186 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
6a3e5d1a 187}
67d74e49
BH
188catch(NetworkError& ae) {
189 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
12c86877
BH
190}
191
5fd567ec 192static void incTCPAnswerCount(const ComboAddress& remote)
193{
194 S.inc("tcp-answers");
195 if(remote.sin4.sin_family == AF_INET6)
196 S.inc("tcp6-answers");
197 else
198 S.inc("tcp4-answers");
199}
cb0af1a1
RG
200
201static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
202{
203 if (maxConnectionDuration) {
204 time_t elapsed = time(NULL) - start;
205 if (elapsed >= maxConnectionDuration) {
206 return true;
207 }
208 remainingTime = maxConnectionDuration - elapsed;
209 }
210 return false;
211}
212
213void TCPNameserver::decrementClientCount(const ComboAddress& remote)
214{
215 if (d_maxConnectionsPerClient) {
216 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
217 s_clientsCount[remote]--;
218 if (s_clientsCount[remote] == 0) {
219 s_clientsCount.erase(remote);
220 }
221 }
222}
223
0ddde5fb 224void TCPNameserver::doConnection(int fd)
12c86877 225{
519f5484 226 setThreadName("pdns/tcpConnect");
c2826d2e 227 std::unique_ptr<DNSPacket> packet;
4f5e7925 228 ComboAddress remote;
229 socklen_t remotelen=sizeof(remote);
cb0af1a1
RG
230 size_t transactions = 0;
231 time_t start = 0;
232 if (d_maxConnectionDuration) {
233 start = time(NULL);
234 }
4f5e7925 235
4f5e7925 236 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
e6a9dde5 237 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
4f5e7925 238 d_connectionroom_sem->post();
a7b68ae7
RG
239 try {
240 closesocket(fd);
241 }
242 catch(const PDNSException& e) {
e6a9dde5 243 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 244 }
0ddde5fb 245 return;
4f5e7925 246 }
247
3897b9e1 248 setNonBlocking(fd);
12c86877 249 try {
c2b4ccc0 250 int mesgsize=65535;
251 scoped_array<char> mesg(new char[mesgsize]);
12c86877 252
e6a9dde5 253 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
21a303f3 254 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
12c86877 255 for(;;) {
cb0af1a1
RG
256 unsigned int remainingTime = 0;
257 transactions++;
258 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
e6a9dde5 259 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
cb0af1a1
RG
260 break;
261 }
262 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 263 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
264 break;
265 }
6a3e5d1a
BH
266
267 uint16_t pktlen;
cb0af1a1 268 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
4957a608 269 break;
6a3e5d1a 270 else
4957a608 271 pktlen=ntohs(pktlen);
12c86877 272
366e1e5e
AT
273 // this check will always be false *if* no one touches
274 // the mesg array. pktlen can be maximum of 65535 as
275 // it is 2 byte unsigned variable. In getQuestion, we
276 // write to 0 up to pktlen-1 so 65535 is just right.
277
278 // do not remove this check as it will catch if someone
279 // decreases the mesg buffer size for some reason.
c2b4ccc0 280 if(pktlen > mesgsize) {
e6a9dde5 281 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
4957a608 282 break;
12c86877
BH
283 }
284
cb0af1a1 285 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
e6a9dde5 286 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
cb0af1a1
RG
287 break;
288 }
289
290 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
12c86877 291 S.inc("tcp-queries");
5fd567ec 292 if(remote.sin4.sin_family == AF_INET6)
293 S.inc("tcp6-queries");
294 else
295 S.inc("tcp4-queries");
3e579e91 296
c2826d2e 297 packet=make_unique<DNSPacket>(true);
809fe23f 298 packet->setRemote(&remote);
e9dd48f9 299 packet->d_tcp=true;
ff76e8b4 300 packet->setSocket(fd);
c2b4ccc0 301 if(packet->parse(mesg.get(), pktlen)<0)
4957a608 302 break;
c1663439 303
6e59a580
KM
304 if(packet->qtype.getCode()==QType::AXFR) {
305 if(doAXFR(packet->qdomain, packet, fd))
5fd567ec 306 incTCPAnswerCount(remote);
6e59a580
KM
307 continue;
308 }
309
310 if(packet->qtype.getCode()==QType::IXFR) {
311 if(doIXFR(packet, fd))
5fd567ec 312 incTCPAnswerCount(remote);
4957a608 313 continue;
12c86877
BH
314 }
315
c2826d2e
RG
316 std::unique_ptr<DNSPacket> reply;
317 auto cached = make_unique<DNSPacket>(false);
fe498ace 318 if(logDNSQueries) {
cece60fc
CH
319 string remote_text;
320 if(packet->hasEDNSSubnet())
321 remote_text = packet->getRemote().toString() + "<-" + packet->getRealRemote().toString();
fe498ace 322 else
cece60fc 323 remote_text = packet->getRemote().toString();
e6a9dde5 324 g_log << Logger::Notice<<"TCP Remote "<< remote_text <<" wants '" << packet->qdomain<<"|"<<packet->qtype.getName() <<
1b16851b 325 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen();
fe498ace 326 }
bb5903e2 327
9a037bfa 328 if(PC.enabled()) {
c2826d2e 329 if(packet->couldBeCached() && PC.get(*packet, *cached)) { // short circuit - does the PacketCache recognize this question?
9a037bfa 330 if(logDNSQueries)
1b16851b 331 g_log<<": packetcache HIT"<<endl;
9a037bfa
KM
332 cached->setRemote(&packet->d_remote);
333 cached->d.id=packet->d.id;
334 cached->d.rd=packet->d.rd; // copy in recursion desired bit
335 cached->commitD(); // commit d to the packet inlined
336
337 sendPacket(cached, fd); // presigned, don't do it again
338 continue;
339 }
21a303f3 340 if(logDNSQueries)
1b16851b 341 g_log<<": packetcache MISS"<<endl;
bbe4b041
OM
342 } else {
343 if (logDNSQueries) {
1b16851b 344 g_log<<endl;
bbe4b041 345 }
12c86877 346 }
12c86877 347 {
0ddde5fb 348 std::lock_guard<std::mutex> l(s_plock);
4957a608 349 if(!s_P) {
e6a9dde5 350 g_log<<Logger::Error<<"TCP server is without backend connections, launching"<<endl;
c2826d2e 351 s_P=make_unique<PacketHandler>();
4957a608 352 }
4957a608 353
c2826d2e 354 reply= s_P->doQuestion(*packet); // we really need to ask the backend :-)
12c86877
BH
355 }
356
12c86877 357 if(!reply) // unable to write an answer?
4957a608 358 break;
b552d7b1 359
ff76e8b4 360 sendPacket(reply, fd);
12c86877 361 }
12c86877 362 }
3f81d239 363 catch(PDNSException &ae) {
0ddde5fb 364 std::lock_guard<std::mutex> l(s_plock);
c2826d2e 365 s_P.reset(); // on next call, backend will be recycled
e6a9dde5 366 g_log<<Logger::Error<<"TCP nameserver had error, cycling backend: "<<ae.reason<<endl;
ef1d2f44 367 }
0afa9049 368 catch(NetworkError &e) {
e6a9dde5 369 g_log<<Logger::Info<<"TCP Connection Thread died because of network error: "<<e.what()<<endl;
0afa9049
BH
370 }
371
adc10f99 372 catch(std::exception &e) {
e6a9dde5 373 g_log<<Logger::Error<<"TCP Connection Thread died because of STL error: "<<e.what()<<endl;
12c86877
BH
374 }
375 catch( ... )
376 {
e6a9dde5 377 g_log << Logger::Error << "TCP Connection Thread caught unknown exception." << endl;
12c86877 378 }
12c86877 379 d_connectionroom_sem->post();
a7b68ae7
RG
380
381 try {
382 closesocket(fd);
383 }
384 catch(const PDNSException& e) {
e6a9dde5 385 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
a7b68ae7 386 }
cb0af1a1 387 decrementClientCount(remote);
12c86877
BH
388}
389
78bcb858 390
e082fb4c 391// call this method with s_plock held!
c2826d2e 392bool TCPNameserver::canDoAXFR(std::unique_ptr<DNSPacket>& q)
12c86877 393{
379ab445 394 if(::arg().mustDo("disable-axfr"))
318c3ec6
BH
395 return false;
396
78bcb858
BH
397 if(q->d_havetsig) { // if you have one, it must be good
398 TSIGRecordContent trc;
7abbc40f
PD
399 DNSName keyname;
400 string secret;
ea3816cf 401 if(!q->checkForCorrectTSIG(s_P->getBackend(), &keyname, &secret, &trc)) {
78bcb858 402 return false;
7f9ac49b
AT
403 } else {
404 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
405 if (q->d_tsig_algo == TSIG_GSS) {
1635f12b 406 GssContext gssctx(keyname);
7f9ac49b 407 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
e6a9dde5 408 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
7f9ac49b
AT
409 }
410 }
411 }
412
ea99d474 413 DNSSECKeeper dk(s_P->getBackend());
5e29f2f9 414
84fc3f8b
AT
415 if (q->d_tsig_algo == TSIG_GSS) {
416 vector<string> princs;
417 s_P->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
ff05fd12 418 for(const std::string& princ : princs) {
84fc3f8b 419 if (q->d_peer_principal == princ) {
e6a9dde5 420 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
84fc3f8b
AT
421 return true;
422 }
423 }
e6a9dde5 424 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
84fc3f8b
AT
425 return false;
426 }
427
3d03fee8 428 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
e6a9dde5 429 g_log<<Logger::Error<<"AXFR '"<<q->qdomain<<"' denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access to zone"<<endl;
78bcb858
BH
430 return false;
431 }
432 else {
e6a9dde5 433 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
78bcb858
BH
434 return true;
435 }
436 }
93afc0a3
PD
437
438 // cerr<<"checking allow-axfr-ips"<<endl;
439 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( (ComboAddress *) &q->d_remote )) {
e6a9dde5 440 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in allow-axfr-ips"<<endl;
12c86877 441 return true;
ab5edd12 442 }
93afc0a3
PD
443
444 FindNS fns;
445
446 // cerr<<"doing per-zone-axfr-acls"<<endl;
447 SOAData sd;
79ba7763 448 if(s_P->getBackend()->getSOAUncached(q->qdomain,sd)) {
93afc0a3
PD
449 // cerr<<"got backend and SOA"<<endl;
450 DNSBackend *B=sd.db;
451 vector<string> acl;
894bcf36 452 s_P->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
93afc0a3
PD
453 for (vector<string>::const_iterator i = acl.begin(); i != acl.end(); ++i) {
454 // cerr<<"matching against "<<*i<<endl;
455 if(pdns_iequals(*i, "AUTO-NS")) {
456 // cerr<<"AUTO-NS magic please!"<<endl;
457
458 DNSResourceRecord rr;
7abbc40f 459 set<DNSName> nsset;
93afc0a3 460
acb61e0a 461 B->lookup(QType(QType::NS),q->qdomain,sd.domain_id);
93afc0a3 462 while(B->get(rr))
290a083d 463 nsset.insert(DNSName(rr.content));
7abbc40f 464 for(const auto & j: nsset) {
35b942fe 465 vector<string> nsips=fns.lookup(j, s_P->getBackend());
93afc0a3
PD
466 for(vector<string>::const_iterator k=nsips.begin();k!=nsips.end();++k) {
467 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
ded6b08d 468 if(*k == q->getRemote().toString())
93afc0a3
PD
469 {
470 // cerr<<"got AUTO-NS hit"<<endl;
e6a9dde5 471 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in NSset"<<endl;
93afc0a3
PD
472 return true;
473 }
474 }
475 }
476 }
477 else
478 {
479 Netmask nm = Netmask(*i);
480 if(nm.match( (ComboAddress *) &q->d_remote ))
481 {
e6a9dde5 482 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in per-domain ACL"<<endl;
93afc0a3
PD
483 // cerr<<"hit!"<<endl;
484 return true;
485 }
486 }
487 }
488 }
489
12c86877
BH
490 extern CommunicatorClass Communicator;
491
ded6b08d 492 if(Communicator.justNotified(q->qdomain, q->getRemote().toString())) { // we just notified this ip
e6a9dde5 493 g_log<<Logger::Warning<<"Approved AXFR of '"<<q->qdomain<<"' from recently notified slave "<<q->getRemote()<<endl;
12c86877
BH
494 return true;
495 }
496
e6a9dde5 497 g_log<<Logger::Error<<"AXFR of domain '"<<q->qdomain<<"' denied: client IP "<<q->getRemote()<<" has no permission"<<endl;
12c86877
BH
498 return false;
499}
500
b317b510 501namespace {
54d84273
PD
502 struct NSECXEntry
503 {
22a0ef16 504 NSECBitmap d_set;
54d84273 505 unsigned int d_ttl;
feef1ece 506 bool d_auth;
54d84273 507 };
8e9b7d99 508
a5188bcd 509 static std::unique_ptr<DNSPacket> getFreshAXFRPacket(std::unique_ptr<DNSPacket>& q)
54d84273 510 {
c2826d2e 511 std::unique_ptr<DNSPacket> ret = std::unique_ptr<DNSPacket>(q->replyPacket());
54d84273
PD
512 ret->setCompress(false);
513 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
514 ret->d_tcp = true;
515 return ret;
516 }
8e9b7d99
BH
517}
518
54d84273 519
12c86877 520/** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
c2826d2e 521int TCPNameserver::doAXFR(const DNSName &target, std::unique_ptr<DNSPacket>& q, int outsock)
12c86877 522{
c2826d2e 523 std::unique_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
c67e46a1 524 if(q->d_dnssecOk)
05e24311 525 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
22893145 526
e6a9dde5 527 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' initiated by "<<q->getRemote()<<endl;
12c86877 528
22893145 529 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
12c86877
BH
530 SOAData sd;
531 {
0ddde5fb 532 std::lock_guard<std::mutex> l(s_plock);
e6a9dde5 533 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
12a965c5 534 if(!s_P) {
e6a9dde5 535 g_log<<Logger::Error<<"TCP server is without backend connections in doAXFR, launching"<<endl;
c2826d2e 536 s_P=make_unique<PacketHandler>();
12a965c5 537 }
12c86877 538
ea99d474 539 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
8090f5a2 540 if (!canDoAXFR(q)) {
e6a9dde5 541 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: "<<q->getRemote()<<" may not request AXFR"<<endl;
9c556f63 542 outpacket->setRcode(RCode::NotAuth);
8090f5a2
AT
543 sendPacket(outpacket,outsock);
544 return 0;
545 }
546
8090f5a2 547 if(!s_P->getBackend()->getSOAUncached(target, sd)) {
e6a9dde5 548 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative"<<endl;
9c556f63 549 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 550 sendPacket(outpacket,outsock);
12c86877
BH
551 return 0;
552 }
3de83124 553 }
22893145 554
8e9b7d99 555 UeberBackend db;
79ba7763 556 if(!db.getSOAUncached(target, sd)) {
e6a9dde5 557 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
79ba7763 558 outpacket->setRcode(RCode::NotAuth);
ff76e8b4 559 sendPacket(outpacket,outsock);
3de83124 560 return 0;
12c86877 561 }
3de83124 562
ea99d474 563 DNSSECKeeper dk(&db);
40b3959a 564 DNSSECKeeper::clearCaches(target);
22893145
CH
565 bool securedZone = dk.isSecuredZone(target);
566 bool presignedZone = dk.isPresigned(target);
567
568 bool noAXFRBecauseOfNSEC3Narrow=false;
569 NSEC3PARAMRecordContent ns3pr;
570 bool narrow;
571 bool NSEC3Zone=false;
dacacb23 572 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
22893145
CH
573 NSEC3Zone=true;
574 if(narrow) {
e6a9dde5 575 g_log<<Logger::Error<<"Not doing AXFR of an NSEC3 narrow zone '"<<target<<"' for "<<q->getRemote()<<endl;
22893145
CH
576 noAXFRBecauseOfNSEC3Narrow=true;
577 }
578 }
579
580 if(noAXFRBecauseOfNSEC3Narrow) {
e6a9dde5 581 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' denied to "<<q->getRemote()<<endl;
22893145
CH
582 outpacket->setRcode(RCode::Refused);
583 // FIXME: should actually figure out if we are auth over a zone, and send out 9 if we aren't
584 sendPacket(outpacket,outsock);
585 return 0;
586 }
587
78bcb858 588 TSIGRecordContent trc;
7abbc40f
PD
589 DNSName tsigkeyname;
590 string tsigsecret;
78bcb858 591
ea3816cf 592 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
78bcb858 593
60a1c204 594 if(haveTSIGDetails && !tsigkeyname.empty()) {
2c26f25a 595 string tsig64;
3343ad1f 596 DNSName algorithm=trc.d_algoName; // FIXME400: check
290a083d 597 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
598 algorithm = DNSName("hmac-md5");
599 if (algorithm != DNSName("gss-tsig")) {
ea99d474 600 if(!db.getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
e6a9dde5 601 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
53ace5d5
PL
602 return 0;
603 }
604 if (B64Decode(tsig64, tsigsecret) == -1) {
e6a9dde5 605 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
53ace5d5
PL
606 return 0;
607 }
84fc3f8b 608 }
78bcb858 609 }
8e9b7d99 610
8e9b7d99 611
8267bd2c 612 // SOA *must* go out first, our signing pipe might reorder
e6a9dde5 613 DLOG(g_log<<"Sending out SOA"<<endl);
13f9e280 614 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 615 outpacket->addRecord(DNSZoneRecord(soa));
3c68fb14 616 if(securedZone && !presignedZone) {
7abbc40f 617 set<DNSName> authSet;
8d3cbffa 618 authSet.insert(target);
ea99d474 619 addRRSigs(dk, db, authSet, outpacket->getRRS());
8d3cbffa 620 }
8e9b7d99 621
60a1c204 622 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858
BH
623 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
624
8267bd2c 625 sendPacket(outpacket, outsock);
78bcb858
BH
626
627 trc.d_mac = outpacket->d_trc.d_mac;
8267bd2c
BH
628 outpacket = getFreshAXFRPacket(q);
629
3c68fb14 630 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1));
8e9b7d99 631
6e8694df 632 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
9d3151d9 633 nsecxrepo_t nsecxrepo;
4888e4b2
BH
634
635 // this is where the DNSKEYs go in
0c350cb5 636
4c1474f3 637 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
0c350cb5 638
90ba52e0 639 DNSZoneRecord zrr;
0c350cb5 640
90ba52e0 641 zrr.dr.d_name = target;
192bcba2 642 zrr.dr.d_ttl = sd.minimum;
90ba52e0 643 zrr.auth = 1; // please sign!
794c2f92 644
991a0977 645 string publishCDNSKEY, publishCDS;
0227812c
RG
646 dk.getPublishCDNSKEY(q->qdomain, publishCDNSKEY);
647 dk.getPublishCDS(q->qdomain, publishCDS);
90ba52e0 648 vector<DNSZoneRecord> cds, cdnskey;
f889ab99
PL
649 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(q->qdomain);
650 set<uint32_t> entryPointIds;
651 for (auto const& value : entryPoints)
652 entryPointIds.insert(value.second.id);
991a0977 653
ff05fd12 654 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
33918299
RG
655 if (!value.second.published) {
656 continue;
657 }
90ba52e0 658 zrr.dr.d_type = QType::DNSKEY;
659 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
6e8694df 660 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
9d3151d9 661 NSECXEntry& ne = nsecxrepo[keyname];
b317b510 662
22a0ef16 663 ne.d_set.set(zrr.dr.d_type);
192bcba2 664 ne.d_ttl = sd.getNegativeTTL();
90ba52e0 665 csp.submit(zrr);
991a0977
PL
666
667 // generate CDS and CDNSKEY records
f889ab99 668 if(entryPointIds.count(value.second.id) > 0){
991a0977 669 if(publishCDNSKEY == "1") {
90ba52e0 670 zrr.dr.d_type=QType::CDNSKEY;
671 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
672 cdnskey.push_back(zrr);
991a0977
PL
673 }
674
675 if(!publishCDS.empty()){
90ba52e0 676 zrr.dr.d_type=QType::CDS;
991a0977
PL
677 vector<string> digestAlgos;
678 stringtok(digestAlgos, publishCDS, ", ");
56225bd3 679 for(auto const &digestAlgo : digestAlgos) {
90ba52e0 680 zrr.dr.d_content=std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns_stou(digestAlgo)));
681 cds.push_back(zrr);
991a0977
PL
682 }
683 }
684 }
4c1474f3 685 }
0c350cb5 686
cc8df07f 687 if(::arg().mustDo("direct-dnskey")) {
acb61e0a 688 sd.db->lookup(QType(QType::DNSKEY), target, sd.domain_id);
90ba52e0 689 while(sd.db->get(zrr)) {
192bcba2 690 zrr.dr.d_ttl = sd.minimum;
90ba52e0 691 csp.submit(zrr);
6dae726d
PD
692 }
693 }
694
b8adb30d
KM
695 uint8_t flags;
696
95c5bc40 697 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
b8adb30d 698 flags = ns3pr.d_flags;
90ba52e0 699 zrr.dr.d_type = QType::NSEC3PARAM;
95c5bc40 700 ns3pr.d_flags = 0;
90ba52e0 701 zrr.dr.d_content = std::make_shared<NSEC3PARAMRecordContent>(ns3pr);
b8adb30d 702 ns3pr.d_flags = flags;
6e8694df 703 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
ce464268
BH
704 NSECXEntry& ne = nsecxrepo[keyname];
705
22a0ef16 706 ne.d_set.set(zrr.dr.d_type);
90ba52e0 707 csp.submit(zrr);
ce464268 708 }
8e9b7d99 709
0c350cb5
BH
710 // now start list zone
711 if(!(sd.db->list(target, sd.domain_id))) {
e6a9dde5 712 g_log<<Logger::Error<<"Backend signals error condition"<<endl;
9c556f63 713 outpacket->setRcode(RCode::ServFail);
0c350cb5
BH
714 sendPacket(outpacket,outsock);
715 return 0;
716 }
717
b772ffea 718
5633a4af 719 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
7abbc40f 720 set<DNSName> qnames, nsset, terms;
90ba52e0 721 vector<DNSZoneRecord> zrrs;
b772ffea 722
991a0977 723 // Add the CDNSKEY and CDS records we created earlier
cece60fc
CH
724 for (auto const &synth_zrr : cds)
725 zrrs.push_back(synth_zrr);
90ba52e0 726
cece60fc
CH
727 for (auto const &synth_zrr : cdnskey)
728 zrrs.push_back(synth_zrr);
90ba52e0 729
730 while(sd.db->get(zrr)) {
8bf260dd 731 zrr.dr.d_name.makeUsLowerCase();
90ba52e0 732 if(zrr.dr.d_name.isPartOf(target)) {
733 if (zrr.dr.d_type == QType::ALIAS && ::arg().mustDo("outgoing-axfr-expand-alias")) {
734 vector<DNSZoneRecord> ips;
735 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::A, ips);
736 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::AAAA, ips);
273d88b2 737 if(ret1 != RCode::NoError || ret2 != RCode::NoError) {
e6a9dde5 738 g_log<<Logger::Error<<"Error resolving for ALIAS "<<zrr.dr.d_content->getZoneRepresentation()<<", aborting AXFR"<<endl;
9c556f63 739 outpacket->setRcode(RCode::ServFail);
273d88b2
PD
740 sendPacket(outpacket,outsock);
741 return 0;
742 }
d86e1bf7 743 for(const auto& ip: ips) {
90ba52e0 744 zrr.dr.d_type = ip.dr.d_type;
0438fd89 745 zrr.dr.d_content = ip.dr.d_content;
90ba52e0 746 zrrs.push_back(zrr);
d86e1bf7 747 }
a68df29d 748 continue;
d86e1bf7
PD
749 }
750
b772ffea 751 if (rectify) {
90ba52e0 752 if (zrr.dr.d_type) {
753 qnames.insert(zrr.dr.d_name);
754 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
755 nsset.insert(zrr.dr.d_name);
b772ffea
KM
756 } else {
757 // remove existing ents
758 continue;
759 }
760 }
a68df29d 761 zrrs.push_back(zrr);
b772ffea 762 } else {
90ba52e0 763 if (zrr.dr.d_type)
e6a9dde5 764 g_log<<Logger::Warning<<"Zone '"<<target<<"' contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
b772ffea
KM
765 }
766 }
767
75f2589f 768 // Group records by name and type, signpipe stumbles over interrupted rrsets
22a676e0 769 if(securedZone && !presignedZone) {
8daafcc1
KM
770 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
771 return tie(a.dr.d_name, a.dr.d_type) < tie(b.dr.d_name, b.dr.d_type);
772 });
773 }
75f2589f 774
b772ffea
KM
775 if(rectify) {
776 // set auth
2010ac95
RG
777 for(DNSZoneRecord &loopZRR : zrrs) {
778 loopZRR.auth=true;
779 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
780 DNSName shorter(loopZRR.dr.d_name);
b772ffea 781 do {
e325f20c 782 if (shorter==target) // apex is always auth
cb045f61 783 break;
2010ac95
RG
784 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
785 loopZRR.auth=false;
cb045f61 786 break;
9f70b77a 787 }
7abbc40f 788 } while(shorter.chopOff());
9f70b77a 789 }
b772ffea
KM
790 }
791
792 if(NSEC3Zone) {
793 // ents are only required for NSEC3 zones
794 uint32_t maxent = ::arg().asNum("max-ent-entries");
6ded341a 795 set<DNSName> nsec3set, nonterm;
2010ac95 796 for (auto &loopZRR: zrrs) {
6ded341a 797 bool skip=false;
2010ac95 798 DNSName shorter = loopZRR.dr.d_name;
6ded341a
KM
799 if (shorter != target && shorter.chopOff() && shorter != target) {
800 do {
801 if(nsset.count(shorter)) {
802 skip=true;
803 break;
804 }
805 } while(shorter.chopOff() && shorter != target);
806 }
2010ac95
RG
807 shorter = loopZRR.dr.d_name;
808 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
6ded341a
KM
809 do {
810 if(!nsec3set.count(shorter)) {
811 nsec3set.insert(shorter);
812 }
813 } while(shorter != target && shorter.chopOff());
814 }
815 }
816
2010ac95
RG
817 for(DNSZoneRecord &loopZRR : zrrs) {
818 DNSName shorter(loopZRR.dr.d_name);
e325f20c 819 while(shorter != target && shorter.chopOff()) {
6ded341a 820 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
b772ffea 821 if(!(maxent)) {
e6a9dde5 822 g_log<<Logger::Warning<<"Zone '"<<target<<"' has too many empty non terminals."<<endl;
b772ffea
KM
823 return 0;
824 }
6ded341a
KM
825 nonterm.insert(shorter);
826 --maxent;
b772ffea
KM
827 }
828 }
829 }
830
9e23e712 831 for(const auto& nt : nonterm) {
2010ac95
RG
832 DNSZoneRecord tempRR;
833 tempRR.dr.d_name=nt;
834 tempRR.dr.d_type=QType::ENT;
835 tempRR.auth=true;
836 zrrs.push_back(tempRR);
b772ffea
KM
837 }
838 }
839 }
840
841
12c86877 842 /* now write all other records */
8e9b7d99 843
6e8694df 844 DNSName keyname;
3370c993 845 unsigned int udiff;
1c6d9830
BH
846 DTime dt;
847 dt.set();
bec14a20 848 int records=0;
2010ac95 849 for(DNSZoneRecord &loopZRR : zrrs) {
3c68fb14 850 if (!presignedZone && loopZRR.dr.d_type == QType::RRSIG)
794c2f92 851 continue;
6dae726d 852
991a0977 853 // only skip the DNSKEY, CDNSKEY and CDS if direct-dnskey is enabled, to avoid changing behaviour
6dae726d 854 // when it is not enabled.
2010ac95 855 if(::arg().mustDo("direct-dnskey") && (loopZRR.dr.d_type == QType::DNSKEY || loopZRR.dr.d_type == QType::CDNSKEY || loopZRR.dr.d_type == QType::CDS))
6dae726d
PD
856 continue;
857
bec14a20 858 records++;
2010ac95
RG
859 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
860 if (NSEC3Zone || loopZRR.dr.d_type) {
3c68fb14
KM
861 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
862 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
863 } else {
864 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
865 }
b5baefaf 866 NSECXEntry& ne = nsecxrepo[keyname];
192bcba2 867 ne.d_ttl = sd.getNegativeTTL();
3c68fb14
KM
868 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
869 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
22a0ef16 870 ne.d_set.set(loopZRR.dr.d_type);
b5baefaf
PD
871 }
872 }
b317b510 873 }
b5baefaf 874
2010ac95 875 if (!loopZRR.dr.d_type)
b5baefaf
PD
876 continue; // skip empty non-terminals
877
2010ac95 878 if(loopZRR.dr.d_type == QType::SOA)
12c86877 879 continue; // skip SOA - would indicate end of AXFR
add640c0 880
2010ac95 881 if(csp.submit(loopZRR)) {
1c6d9830
BH
882 for(;;) {
883 outpacket->getRRS() = csp.getChunk();
884 if(!outpacket->getRRS().empty()) {
60a1c204 885 if(haveTSIGDetails && !tsigkeyname.empty())
54d84273 886 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1c6d9830 887 sendPacket(outpacket, outsock);
78bcb858 888 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
889 outpacket=getFreshAXFRPacket(q);
890 }
891 else
892 break;
893 }
12c86877
BH
894 }
895 }
78bcb858 896 /*
3370c993 897 udiff=dt.udiffNoReset();
1c6d9830
BH
898 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
899 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
900 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 901 */
feef1ece 902 if(securedZone) {
4888e4b2 903 if(NSEC3Zone) {
9d3151d9 904 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
3c68fb14 905 if(iter->second.d_auth) {
feef1ece 906 NSEC3RecordContent n3rc;
22a0ef16 907 n3rc.set(iter->second.d_set);
27d4a65b
RG
908 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
909 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
910 n3rc.set(QType::RRSIG);
911 }
912 n3rc.d_salt = ns3pr.d_salt;
feef1ece
PD
913 n3rc.d_flags = ns3pr.d_flags;
914 n3rc.d_iterations = ns3pr.d_iterations;
690b86b7 915 n3rc.d_algorithm = DNSSECKeeper::DIGEST_SHA1; // SHA1, fixed in PowerDNS for now
feef1ece 916 nsecxrepo_t::const_iterator inext = iter;
cb167afd 917 ++inext;
feef1ece
PD
918 if(inext == nsecxrepo.end())
919 inext = nsecxrepo.begin();
3c68fb14 920 while(!inext->second.d_auth && inext != iter)
feef1ece 921 {
cb167afd 922 ++inext;
feef1ece
PD
923 if(inext == nsecxrepo.end())
924 inext = nsecxrepo.begin();
925 }
6e8694df
KM
926 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
927 zrr.dr.d_name = iter->first+sd.qname;
90ba52e0 928
192bcba2 929 zrr.dr.d_ttl = sd.getNegativeTTL();
27d4a65b 930 zrr.dr.d_content = std::make_shared<NSEC3RecordContent>(std::move(n3rc));
90ba52e0 931 zrr.dr.d_type = QType::NSEC3;
932 zrr.dr.d_place = DNSResourceRecord::ANSWER;
933 zrr.auth=true;
934 if(csp.submit(zrr)) {
feef1ece
PD
935 for(;;) {
936 outpacket->getRRS() = csp.getChunk();
937 if(!outpacket->getRRS().empty()) {
60a1c204 938 if(haveTSIGDetails && !tsigkeyname.empty())
feef1ece
PD
939 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
940 sendPacket(outpacket, outsock);
941 trc.d_mac=outpacket->d_trc.d_mac;
942 outpacket=getFreshAXFRPacket(q);
943 }
944 else
945 break;
1c6d9830 946 }
1c6d9830 947 }
8e9b7d99 948 }
4888e4b2
BH
949 }
950 }
9d3151d9 951 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
ed9c3a50 952 NSECRecordContent nrc;
22a0ef16 953 nrc.set(iter->second.d_set);
27d4a65b
RG
954 nrc.set(QType::RRSIG);
955 nrc.set(QType::NSEC);
6e8694df
KM
956
957 if(boost::next(iter) != nsecxrepo.end())
958 nrc.d_next = boost::next(iter)->first;
ed9c3a50 959 else
6e8694df
KM
960 nrc.d_next=nsecxrepo.begin()->first;
961 zrr.dr.d_name = iter->first;
962
192bcba2 963 zrr.dr.d_ttl = sd.getNegativeTTL();
27d4a65b 964 zrr.dr.d_content = std::make_shared<NSECRecordContent>(std::move(nrc));
90ba52e0 965 zrr.dr.d_type = QType::NSEC;
966 zrr.dr.d_place = DNSResourceRecord::ANSWER;
967 zrr.auth=true;
968 if(csp.submit(zrr)) {
1c6d9830
BH
969 for(;;) {
970 outpacket->getRRS() = csp.getChunk();
971 if(!outpacket->getRRS().empty()) {
60a1c204 972 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 973 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1c6d9830 974 sendPacket(outpacket, outsock);
78bcb858 975 trc.d_mac=outpacket->d_trc.d_mac;
1c6d9830
BH
976 outpacket=getFreshAXFRPacket(q);
977 }
978 else
979 break;
980 }
8e9b7d99 981 }
add640c0 982 }
add640c0 983 }
78bcb858 984 /*
3370c993 985 udiff=dt.udiffNoReset();
1c6d9830
BH
986 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
987 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
988 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
78bcb858 989 * */
bec14a20
BH
990 for(;;) {
991 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
992 if(!outpacket->getRRS().empty()) {
60a1c204 993 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 994 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
bec14a20 995 sendPacket(outpacket, outsock);
78bcb858 996 trc.d_mac=outpacket->d_trc.d_mac;
bec14a20
BH
997 outpacket=getFreshAXFRPacket(q);
998 }
999 else
1000 break;
12c86877 1001 }
8e9b7d99 1002
1c6d9830 1003 udiff=dt.udiffNoReset();
f1f85f12 1004 if(securedZone)
e6a9dde5 1005 g_log<<Logger::Info<<"Done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1c6d9830 1006
e6a9dde5 1007 DLOG(g_log<<"Done writing out records"<<endl);
12c86877 1008 /* and terminate with yet again the SOA record */
8e9b7d99 1009 outpacket=getFreshAXFRPacket(q);
9bbcf03a 1010 outpacket->addRecord(std::move(soa));
60a1c204 1011 if(haveTSIGDetails && !tsigkeyname.empty())
78bcb858 1012 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
92c90b44 1013
ff76e8b4 1014 sendPacket(outpacket, outsock);
78bcb858 1015
e6a9dde5
PL
1016 DLOG(g_log<<"last packet - close"<<endl);
1017 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
12c86877
BH
1018
1019 return 1;
1020}
1021
c2826d2e 1022int TCPNameserver::doIXFR(std::unique_ptr<DNSPacket>& q, int outsock)
6e59a580 1023{
c2826d2e 1024 std::unique_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
6e59a580
KM
1025 if(q->d_dnssecOk)
1026 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1027
6e59a580 1028 uint32_t serial = 0;
27c0050c 1029 MOADNSParser mdp(false, q->getString());
6e59a580
KM
1030 for(MOADNSParser::answers_t::const_iterator i=mdp.d_answers.begin(); i != mdp.d_answers.end(); ++i) {
1031 const DNSRecord *rr = &i->first;
e693ff5a 1032 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
6e59a580
KM
1033 vector<string>parts;
1034 stringtok(parts, rr->d_content->getZoneRepresentation());
1035 if (parts.size() >= 3) {
95dd3b90
RG
1036 try {
1037 serial=pdns_stou(parts[2]);
1038 }
1039 catch(const std::out_of_range& oor) {
e6a9dde5 1040 g_log<<Logger::Error<<"Invalid serial in IXFR query"<<endl;
95dd3b90
RG
1041 outpacket->setRcode(RCode::FormErr);
1042 sendPacket(outpacket,outsock);
1043 return 0;
1044 }
6e59a580 1045 } else {
e6a9dde5 1046 g_log<<Logger::Error<<"No serial in IXFR query"<<endl;
6e59a580
KM
1047 outpacket->setRcode(RCode::FormErr);
1048 sendPacket(outpacket,outsock);
1049 return 0;
1050 }
3e67ea8b 1051 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
e6a9dde5 1052 g_log<<Logger::Error<<"Additional records in IXFR query, type: "<<QType(rr->d_type).getName()<<endl;
6e59a580
KM
1053 outpacket->setRcode(RCode::FormErr);
1054 sendPacket(outpacket,outsock);
1055 return 0;
1056 }
1057 }
1058
e6a9dde5 1059 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' initiated by "<<q->getRemote()<<" with serial "<<serial<<endl;
6e59a580 1060
22893145 1061 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
6e59a580 1062 SOAData sd;
6e59a580 1063 {
0ddde5fb 1064 std::lock_guard<std::mutex> l(s_plock);
e6a9dde5 1065 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
6e59a580 1066 if(!s_P) {
e6a9dde5 1067 g_log<<Logger::Error<<"TCP server is without backend connections in doIXFR, launching"<<endl;
c2826d2e 1068 s_P=make_unique<PacketHandler>();
6e59a580
KM
1069 }
1070
22893145
CH
1071 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
1072 if(!canDoAXFR(q) || !s_P->getBackend()->getSOAUncached(q->qdomain, sd)) {
e6a9dde5 1073 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' failed: not authoritative"<<endl;
9c556f63 1074 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1075 sendPacket(outpacket,outsock);
1076 return 0;
1077 }
1078 }
1079
22893145
CH
1080 DNSSECKeeper dk;
1081 NSEC3PARAMRecordContent ns3pr;
1082 bool narrow;
1083
40b3959a 1084 DNSSECKeeper::clearCaches(q->qdomain);
22893145
CH
1085 bool securedZone = dk.isSecuredZone(q->qdomain);
1086 if(dk.getNSEC3PARAM(q->qdomain, &ns3pr, &narrow)) {
1087 if(narrow) {
e6a9dde5
PL
1088 g_log<<Logger::Error<<"Not doing IXFR of an NSEC3 narrow zone."<<endl;
1089 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' denied to "<<q->getRemote()<<endl;
22893145
CH
1090 outpacket->setRcode(RCode::Refused);
1091 sendPacket(outpacket,outsock);
1092 return 0;
1093 }
1094 }
1095
7abbc40f 1096 DNSName target = q->qdomain;
6e59a580
KM
1097
1098 UeberBackend db;
79ba7763 1099 if(!db.getSOAUncached(target, sd)) {
e6a9dde5 1100 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
79ba7763 1101 outpacket->setRcode(RCode::NotAuth);
6e59a580
KM
1102 sendPacket(outpacket,outsock);
1103 return 0;
1104 }
24d9e514 1105
13f9e280 1106 if (!rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname))) {
6e59a580 1107 TSIGRecordContent trc;
7abbc40f
PD
1108 DNSName tsigkeyname;
1109 string tsigsecret;
6e59a580 1110
ea3816cf 1111 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
6e59a580 1112
60a1c204 1113 if(haveTSIGDetails && !tsigkeyname.empty()) {
bb7fb11c 1114 string tsig64;
3343ad1f 1115 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
290a083d 1116 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1117 algorithm = DNSName("hmac-md5");
0ddde5fb 1118 std::lock_guard<std::mutex> l(s_plock);
53ace5d5 1119 if(!s_P->getBackend()->getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
e6a9dde5 1120 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
53ace5d5
PL
1121 return 0;
1122 }
1123 if (B64Decode(tsig64, tsigsecret) == -1) {
e6a9dde5 1124 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
53ace5d5
PL
1125 return 0;
1126 }
6e59a580
KM
1127 }
1128
1129 UeberBackend signatureDB;
1130
1131 // SOA *must* go out first, our signing pipe might reorder
e6a9dde5 1132 DLOG(g_log<<"Sending out SOA"<<endl);
13f9e280 1133 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
9bbcf03a 1134 outpacket->addRecord(std::move(soa));
ada68bd9 1135 if(securedZone && outpacket->d_dnssecOk) {
7abbc40f 1136 set<DNSName> authSet;
6e59a580
KM
1137 authSet.insert(target);
1138 addRRSigs(dk, signatureDB, authSet, outpacket->getRRS());
1139 }
1140
60a1c204 1141 if(haveTSIGDetails && !tsigkeyname.empty())
6e59a580
KM
1142 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1143
1144 sendPacket(outpacket, outsock);
1145
e6a9dde5 1146 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
6e59a580
KM
1147
1148 return 1;
1149 }
1150
e6a9dde5 1151 g_log<<Logger::Error<<"IXFR fallback to AXFR for domain '"<<target<<"' our serial "<<sd.serial<<endl;
6e59a580
KM
1152 return doAXFR(q->qdomain, q, outsock);
1153}
1154
12c86877
BH
1155TCPNameserver::~TCPNameserver()
1156{
12c86877
BH
1157}
1158
1159TCPNameserver::TCPNameserver()
1160{
cb0af1a1
RG
1161 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1162 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1163 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1164 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1165
379ab445 1166// sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
c2826d2e 1167 d_connectionroom_sem = make_unique<Semaphore>( ::arg().asNum( "max-tcp-connections" ));
d322f931 1168 d_maxTCPConnections = ::arg().asNum( "max-tcp-connections" );
f5ad09dc 1169
12c86877 1170 vector<string>locals;
e326f785 1171 stringtok(locals,::arg()["local-ipv6"]," ,");
379ab445 1172 stringtok(locals,::arg()["local-address"]," ,");
f5ad09dc
PL
1173 if(locals.empty())
1174 throw PDNSException("No local addresses specified");
12c86877 1175
68b011bd 1176 d_ng.toMasks(::arg()["allow-axfr-ips"] );
9f1d5826 1177
12c86877 1178 signal(SIGPIPE,SIG_IGN);
12c86877 1179
f5ad09dc
PL
1180 for(auto const &laddr : locals) {
1181 ComboAddress local(laddr, ::arg().asNum("local-port"));
12c86877 1182
f5ad09dc
PL
1183 int s=socket(local.sin4.sin_family, SOCK_STREAM, 0);
1184 if(s<0)
1185 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
3897b9e1 1186 setCloseOnExec(s);
fb316318 1187
12c86877 1188 int tmp=1;
f5ad09dc 1189 if(setsockopt(s, SOL_SOCKET,SO_REUSEADDR, (char*)&tmp, sizeof tmp) < 0) {
e6a9dde5 1190 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
f5ad09dc 1191 _exit(1);
12c86877 1192 }
940d7811
RG
1193
1194 if (::arg().asNum("tcp-fast-open") > 0) {
1195#ifdef TCP_FASTOPEN
1196 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1197 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
f5ad09dc 1198 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket "<<local.toStringWithPort()<<": "<<stringerror()<<endl;
940d7811
RG
1199 }
1200#else
e6a9dde5 1201 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
940d7811
RG
1202#endif
1203 }
1204
f5ad09dc
PL
1205 if(::arg().mustDo("non-local-bind"))
1206 Utility::setBindAny(local.sin4.sin_family, s);
1207
1208 if(local.isIPv6() && setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1209 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<stringerror()<<endl;
1210 }
fec7dd5a 1211
379ab445 1212 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
a702a96c 1213 int err = errno;
2c896042 1214 close(s);
a702a96c 1215 if( err == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
f5ad09dc 1216 g_log<<Logger::Error<<"Address " << local.toString() << " does not exist on this server - skipping TCP bind" << endl;
5ecb2885
MZ
1217 continue;
1218 } else {
f5ad09dc 1219 g_log<<Logger::Error<<"Unable to bind to TCP socket " << local.toStringWithPort() << ": "<<stringerror(err)<<endl;
2ab7e9ac 1220 throw PDNSException("Unable to bind to TCP socket");
5ecb2885 1221 }
12c86877 1222 }
12c86877 1223
f5ad09dc
PL
1224 listen(s, 128);
1225 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
12c86877 1226 d_sockets.push_back(s);
8edfedf1
BH
1227 struct pollfd pfd;
1228 memset(&pfd, 0, sizeof(pfd));
1229 pfd.fd = s;
1230 pfd.events = POLLIN;
8edfedf1 1231 d_prfds.push_back(pfd);
12c86877 1232 }
12c86877
BH
1233}
1234
1235
ff76e8b4 1236//! Start of TCP operations thread, we launch a new thread for each incoming TCP question
12c86877
BH
1237void TCPNameserver::thread()
1238{
519f5484 1239 setThreadName("pdns/tcpnameser");
12c86877
BH
1240 try {
1241 for(;;) {
1242 int fd;
cb0af1a1
RG
1243 ComboAddress remote;
1244 Utility::socklen_t addrlen=remote.getSocklen();
12c86877 1245
8edfedf1 1246 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
8a63d3ce 1247 if(ret <= 0)
4957a608 1248 continue;
8a63d3ce 1249
12c86877 1250 int sock=-1;
8ce9e4e6 1251 for(const pollfd& pfd : d_prfds) {
c1ee10a6 1252 if(pfd.revents & POLLIN) {
4957a608 1253 sock = pfd.fd;
cb0af1a1
RG
1254 remote.sin4.sin_family = AF_INET6;
1255 addrlen=remote.getSocklen();
4957a608
BH
1256
1257 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
a702a96c
OM
1258 int err = errno;
1259 g_log<<Logger::Error<<"TCP question accept error: "<<stringerror(err)<<endl;
4957a608 1260
a702a96c 1261 if(err==EMFILE) {
e6a9dde5 1262 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
5bd2ea7b 1263 _exit(1);
4957a608
BH
1264 }
1265 }
1266 else {
cb0af1a1
RG
1267 if (d_maxConnectionsPerClient) {
1268 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
1269 if (s_clientsCount[remote] >= d_maxConnectionsPerClient) {
e6a9dde5 1270 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
cb0af1a1
RG
1271 close(fd);
1272 continue;
1273 }
1274 s_clientsCount[remote]++;
1275 }
1276
4957a608
BH
1277 d_connectionroom_sem->wait(); // blocks if no connections are available
1278
1279 int room;
1280 d_connectionroom_sem->getValue( &room);
1281 if(room<1)
e6a9dde5 1282 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
4957a608 1283
0ddde5fb
RG
1284 try {
1285 std::thread connThread(doConnection, fd);
1286 connThread.detach();
1287 }
1288 catch (std::exception& e) {
1289 g_log<<Logger::Error<<"Error creating thread: "<<e.what()<<endl;
4957a608 1290 d_connectionroom_sem->post();
48e8d70b 1291 close(fd);
cb0af1a1 1292 decrementClientCount(remote);
4957a608
BH
1293 }
1294 }
1295 }
12c86877
BH
1296 }
1297 }
1298 }
3f81d239 1299 catch(PDNSException &AE) {
e6a9dde5 1300 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
12c86877
BH
1301 }
1302 catch(...) {
e6a9dde5 1303 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
12c86877 1304 }
5bd2ea7b 1305 _exit(1); // take rest of server with us
12c86877
BH
1306}
1307
1308
d322f931
PD
1309unsigned int TCPNameserver::numTCPConnections()
1310{
1311 int room;
1312 d_connectionroom_sem->getValue( &room);
1313 return d_maxTCPConnections - room;
1314}