]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/tcpreceiver.cc
Better (actual) fix for leak reported by Coverity.
[thirdparty/pdns.git] / pdns / tcpreceiver.cc
1 /*
2 PowerDNS Versatile Database Driven Nameserver
3 Copyright (C) 2002-2012 PowerDNS.COM BV
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 #include <boost/algorithm/string.hpp>
26 #include "auth-packetcache.hh"
27 #include "utility.hh"
28 #include "threadname.hh"
29 #include "dnssecinfra.hh"
30 #include "dnsseckeeper.hh"
31 #include <cstdio>
32 #include "base32.hh"
33 #include <cstring>
34 #include <cstdlib>
35 #include <sys/types.h>
36 #include <netinet/tcp.h>
37 #include <iostream>
38 #include <string>
39 #include "tcpreceiver.hh"
40 #include "sstuff.hh"
41
42 #include <errno.h>
43 #include <signal.h>
44 #include "base64.hh"
45 #include "ueberbackend.hh"
46 #include "dnspacket.hh"
47 #include "nameserver.hh"
48 #include "distributor.hh"
49 #include "lock.hh"
50 #include "logger.hh"
51 #include "arguments.hh"
52
53 #include "common_startup.hh"
54 #include "packethandler.hh"
55 #include "statbag.hh"
56 #include "resolver.hh"
57 #include "communicator.hh"
58 #include "namespaces.hh"
59 #include "signingpipe.hh"
60 #include "stubresolver.hh"
61 extern AuthPacketCache PC;
62 extern StatBag S;
63
64 /**
65 \file tcpreceiver.cc
66 \brief This file implements the tcpreceiver that receives and answers questions over TCP/IP
67 */
68
69 std::mutex TCPNameserver::s_plock;
70 std::unique_ptr<Semaphore> TCPNameserver::d_connectionroom_sem{nullptr};
71 std::unique_ptr<PacketHandler> TCPNameserver::s_P{nullptr};
72 unsigned int TCPNameserver::d_maxTCPConnections = 0;
73 NetmaskGroup TCPNameserver::d_ng;
74 size_t TCPNameserver::d_maxTransactionsPerConn;
75 size_t TCPNameserver::d_maxConnectionsPerClient;
76 unsigned int TCPNameserver::d_idleTimeout;
77 unsigned int TCPNameserver::d_maxConnectionDuration;
78 std::mutex TCPNameserver::s_clientsCountMutex;
79 std::map<ComboAddress,size_t,ComboAddress::addressOnlyLessThan> TCPNameserver::s_clientsCount;
80
81 void TCPNameserver::go()
82 {
83 g_log<<Logger::Error<<"Creating backend connection for TCP"<<endl;
84 s_P.reset();
85 try {
86 s_P=make_unique<PacketHandler>();
87 }
88 catch(PDNSException &ae) {
89 g_log<<Logger::Error<<"TCP server is unable to launch backends - will try again when questions come in: "<<ae.reason<<endl;
90 }
91
92 std::thread th(std::bind(&TCPNameserver::thread, this));
93 th.detach();
94 }
95
96 // throws PDNSException if things didn't go according to plan, returns 0 if really 0 bytes were read
97 static int readnWithTimeout(int fd, void* buffer, unsigned int n, unsigned int idleTimeout, bool throwOnEOF=true, unsigned int totalTimeout=0)
98 {
99 unsigned int bytes=n;
100 char *ptr = (char*)buffer;
101 int ret;
102 time_t start = 0;
103 unsigned int remainingTotal = totalTimeout;
104 if (totalTimeout) {
105 start = time(NULL);
106 }
107 while(bytes) {
108 ret=read(fd, ptr, bytes);
109 if(ret < 0) {
110 if(errno==EAGAIN) {
111 ret=waitForData(fd, (totalTimeout == 0 || idleTimeout <= remainingTotal) ? idleTimeout : remainingTotal);
112 if(ret < 0)
113 throw NetworkError("Waiting for data read");
114 if(!ret)
115 throw NetworkError("Timeout reading data");
116 continue;
117 }
118 else
119 throw NetworkError("Reading data: "+stringerror());
120 }
121 if(!ret) {
122 if(!throwOnEOF && n == bytes)
123 return 0;
124 else
125 throw NetworkError("Did not fulfill read from TCP due to EOF");
126 }
127
128 ptr += ret;
129 bytes -= ret;
130 if (totalTimeout) {
131 time_t now = time(NULL);
132 unsigned int elapsed = now - start;
133 if (elapsed >= remainingTotal) {
134 throw NetworkError("Timeout while reading data");
135 }
136 start = now;
137 remainingTotal -= elapsed;
138 }
139 }
140 return n;
141 }
142
143 // ditto
144 static void writenWithTimeout(int fd, const void *buffer, unsigned int n, unsigned int idleTimeout)
145 {
146 unsigned int bytes=n;
147 const char *ptr = (char*)buffer;
148 int ret;
149 while(bytes) {
150 ret=write(fd, ptr, bytes);
151 if(ret < 0) {
152 if(errno==EAGAIN) {
153 ret=waitForRWData(fd, false, idleTimeout, 0);
154 if(ret < 0)
155 throw NetworkError("Waiting for data write");
156 if(!ret)
157 throw NetworkError("Timeout writing data");
158 continue;
159 }
160 else
161 throw NetworkError("Writing data: "+stringerror());
162 }
163 if(!ret) {
164 throw NetworkError("Did not fulfill TCP write due to EOF");
165 }
166
167 ptr += ret;
168 bytes -= ret;
169 }
170 }
171
172 void TCPNameserver::sendPacket(std::unique_ptr<DNSPacket>& p, int outsock)
173 {
174 g_rs.submitResponse(*p, false);
175
176 uint16_t len=htons(p->getString().length());
177 string buffer((const char*)&len, 2);
178 buffer.append(p->getString());
179 writenWithTimeout(outsock, buffer.c_str(), buffer.length(), d_idleTimeout);
180 }
181
182
183 void TCPNameserver::getQuestion(int fd, char *mesg, int pktlen, const ComboAddress &remote, unsigned int totalTime)
184 try
185 {
186 readnWithTimeout(fd, mesg, pktlen, d_idleTimeout, true, totalTime);
187 }
188 catch(NetworkError& ae) {
189 throw NetworkError("Error reading DNS data from TCP client "+remote.toString()+": "+ae.what());
190 }
191
192 static void incTCPAnswerCount(const ComboAddress& remote)
193 {
194 S.inc("tcp-answers");
195 if(remote.sin4.sin_family == AF_INET6)
196 S.inc("tcp6-answers");
197 else
198 S.inc("tcp4-answers");
199 }
200
201 static bool maxConnectionDurationReached(unsigned int maxConnectionDuration, time_t start, unsigned int& remainingTime)
202 {
203 if (maxConnectionDuration) {
204 time_t elapsed = time(NULL) - start;
205 if (elapsed >= maxConnectionDuration) {
206 return true;
207 }
208 remainingTime = maxConnectionDuration - elapsed;
209 }
210 return false;
211 }
212
213 void TCPNameserver::decrementClientCount(const ComboAddress& remote)
214 {
215 if (d_maxConnectionsPerClient) {
216 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
217 s_clientsCount[remote]--;
218 if (s_clientsCount[remote] == 0) {
219 s_clientsCount.erase(remote);
220 }
221 }
222 }
223
224 void TCPNameserver::doConnection(int fd)
225 {
226 setThreadName("pdns/tcpConnect");
227 std::unique_ptr<DNSPacket> packet;
228 ComboAddress remote;
229 socklen_t remotelen=sizeof(remote);
230 size_t transactions = 0;
231 time_t start = 0;
232 if (d_maxConnectionDuration) {
233 start = time(NULL);
234 }
235
236 if(getpeername(fd, (struct sockaddr *)&remote, &remotelen) < 0) {
237 g_log<<Logger::Warning<<"Received question from socket which had no remote address, dropping ("<<stringerror()<<")"<<endl;
238 d_connectionroom_sem->post();
239 try {
240 closesocket(fd);
241 }
242 catch(const PDNSException& e) {
243 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
244 }
245 return;
246 }
247
248 setNonBlocking(fd);
249 try {
250 int mesgsize=65535;
251 scoped_array<char> mesg(new char[mesgsize]);
252
253 DLOG(g_log<<"TCP Connection accepted on fd "<<fd<<endl);
254 bool logDNSQueries= ::arg().mustDo("log-dns-queries");
255 for(;;) {
256 unsigned int remainingTime = 0;
257 transactions++;
258 if (d_maxTransactionsPerConn && transactions > d_maxTransactionsPerConn) {
259 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the number of transactions per connection, dropping.";
260 break;
261 }
262 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
263 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
264 break;
265 }
266
267 uint16_t pktlen;
268 if(!readnWithTimeout(fd, &pktlen, 2, d_idleTimeout, false, remainingTime))
269 break;
270 else
271 pktlen=ntohs(pktlen);
272
273 // this check will always be false *if* no one touches
274 // the mesg array. pktlen can be maximum of 65535 as
275 // it is 2 byte unsigned variable. In getQuestion, we
276 // write to 0 up to pktlen-1 so 65535 is just right.
277
278 // do not remove this check as it will catch if someone
279 // decreases the mesg buffer size for some reason.
280 if(pktlen > mesgsize) {
281 g_log<<Logger::Warning<<"Received an overly large question from "<<remote.toString()<<", dropping"<<endl;
282 break;
283 }
284
285 if (maxConnectionDurationReached(d_maxConnectionDuration, start, remainingTime)) {
286 g_log << Logger::Notice<<"TCP Remote "<< remote <<" exceeded the maximum TCP connection duration, dropping.";
287 break;
288 }
289
290 getQuestion(fd, mesg.get(), pktlen, remote, remainingTime);
291 S.inc("tcp-queries");
292 if(remote.sin4.sin_family == AF_INET6)
293 S.inc("tcp6-queries");
294 else
295 S.inc("tcp4-queries");
296
297 packet=make_unique<DNSPacket>(true);
298 packet->setRemote(&remote);
299 packet->d_tcp=true;
300 packet->setSocket(fd);
301 if(packet->parse(mesg.get(), pktlen)<0)
302 break;
303
304 if(packet->qtype.getCode()==QType::AXFR) {
305 if(doAXFR(packet->qdomain, packet, fd))
306 incTCPAnswerCount(remote);
307 continue;
308 }
309
310 if(packet->qtype.getCode()==QType::IXFR) {
311 if(doIXFR(packet, fd))
312 incTCPAnswerCount(remote);
313 continue;
314 }
315
316 std::unique_ptr<DNSPacket> reply;
317 auto cached = make_unique<DNSPacket>(false);
318 if(logDNSQueries) {
319 string remote_text;
320 if(packet->hasEDNSSubnet())
321 remote_text = packet->getRemote().toString() + "<-" + packet->getRealRemote().toString();
322 else
323 remote_text = packet->getRemote().toString();
324 g_log << Logger::Notice<<"TCP Remote "<< remote_text <<" wants '" << packet->qdomain<<"|"<<packet->qtype.getName() <<
325 "', do = " <<packet->d_dnssecOk <<", bufsize = "<< packet->getMaxReplyLen();
326 }
327
328 if(PC.enabled()) {
329 if(packet->couldBeCached() && PC.get(*packet, *cached)) { // short circuit - does the PacketCache recognize this question?
330 if(logDNSQueries)
331 g_log<<": packetcache HIT"<<endl;
332 cached->setRemote(&packet->d_remote);
333 cached->d.id=packet->d.id;
334 cached->d.rd=packet->d.rd; // copy in recursion desired bit
335 cached->commitD(); // commit d to the packet inlined
336
337 sendPacket(cached, fd); // presigned, don't do it again
338 continue;
339 }
340 if(logDNSQueries)
341 g_log<<": packetcache MISS"<<endl;
342 } else {
343 if (logDNSQueries) {
344 g_log<<endl;
345 }
346 }
347 {
348 std::lock_guard<std::mutex> l(s_plock);
349 if(!s_P) {
350 g_log<<Logger::Error<<"TCP server is without backend connections, launching"<<endl;
351 s_P=make_unique<PacketHandler>();
352 }
353
354 reply= s_P->doQuestion(*packet); // we really need to ask the backend :-)
355 }
356
357 if(!reply) // unable to write an answer?
358 break;
359
360 sendPacket(reply, fd);
361 }
362 }
363 catch(PDNSException &ae) {
364 std::lock_guard<std::mutex> l(s_plock);
365 s_P.reset(); // on next call, backend will be recycled
366 g_log<<Logger::Error<<"TCP nameserver had error, cycling backend: "<<ae.reason<<endl;
367 }
368 catch(NetworkError &e) {
369 g_log<<Logger::Info<<"TCP Connection Thread died because of network error: "<<e.what()<<endl;
370 }
371
372 catch(std::exception &e) {
373 g_log<<Logger::Error<<"TCP Connection Thread died because of STL error: "<<e.what()<<endl;
374 }
375 catch( ... )
376 {
377 g_log << Logger::Error << "TCP Connection Thread caught unknown exception." << endl;
378 }
379 d_connectionroom_sem->post();
380
381 try {
382 closesocket(fd);
383 }
384 catch(const PDNSException& e) {
385 g_log<<Logger::Error<<"Error closing TCP socket: "<<e.reason<<endl;
386 }
387 decrementClientCount(remote);
388 }
389
390
391 // call this method with s_plock held!
392 bool TCPNameserver::canDoAXFR(std::unique_ptr<DNSPacket>& q)
393 {
394 if(::arg().mustDo("disable-axfr"))
395 return false;
396
397 if(q->d_havetsig) { // if you have one, it must be good
398 TSIGRecordContent trc;
399 DNSName keyname;
400 string secret;
401 if(!q->checkForCorrectTSIG(s_P->getBackend(), &keyname, &secret, &trc)) {
402 return false;
403 } else {
404 getTSIGHashEnum(trc.d_algoName, q->d_tsig_algo);
405 if (q->d_tsig_algo == TSIG_GSS) {
406 GssContext gssctx(keyname);
407 if (!gssctx.getPeerPrincipal(q->d_peer_principal)) {
408 g_log<<Logger::Warning<<"Failed to extract peer principal from GSS context with keyname '"<<keyname<<"'"<<endl;
409 }
410 }
411 }
412
413 DNSSECKeeper dk(s_P->getBackend());
414
415 if (q->d_tsig_algo == TSIG_GSS) {
416 vector<string> princs;
417 s_P->getBackend()->getDomainMetadata(q->qdomain, "GSS-ALLOW-AXFR-PRINCIPAL", princs);
418 for(const std::string& princ : princs) {
419 if (q->d_peer_principal == princ) {
420 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig'"<<endl;
421 return true;
422 }
423 }
424 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' denied: TSIG signed request with principal '"<<q->d_peer_principal<<"' and algorithm 'gss-tsig' is not permitted"<<endl;
425 return false;
426 }
427
428 if(!dk.TSIGGrantsAccess(q->qdomain, keyname)) {
429 g_log<<Logger::Error<<"AXFR '"<<q->qdomain<<"' denied: key with name '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"' does not grant access to zone"<<endl;
430 return false;
431 }
432 else {
433 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: TSIG signed request with authorized key '"<<keyname<<"' and algorithm '"<<getTSIGAlgoName(q->d_tsig_algo)<<"'"<<endl;
434 return true;
435 }
436 }
437
438 // cerr<<"checking allow-axfr-ips"<<endl;
439 if(!(::arg()["allow-axfr-ips"].empty()) && d_ng.match( (ComboAddress *) &q->d_remote )) {
440 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in allow-axfr-ips"<<endl;
441 return true;
442 }
443
444 FindNS fns;
445
446 // cerr<<"doing per-zone-axfr-acls"<<endl;
447 SOAData sd;
448 if(s_P->getBackend()->getSOAUncached(q->qdomain,sd)) {
449 // cerr<<"got backend and SOA"<<endl;
450 DNSBackend *B=sd.db;
451 vector<string> acl;
452 s_P->getBackend()->getDomainMetadata(q->qdomain, "ALLOW-AXFR-FROM", acl);
453 for (vector<string>::const_iterator i = acl.begin(); i != acl.end(); ++i) {
454 // cerr<<"matching against "<<*i<<endl;
455 if(pdns_iequals(*i, "AUTO-NS")) {
456 // cerr<<"AUTO-NS magic please!"<<endl;
457
458 DNSResourceRecord rr;
459 set<DNSName> nsset;
460
461 B->lookup(QType(QType::NS),q->qdomain,sd.domain_id);
462 while(B->get(rr))
463 nsset.insert(DNSName(rr.content));
464 for(const auto & j: nsset) {
465 vector<string> nsips=fns.lookup(j, s_P->getBackend());
466 for(vector<string>::const_iterator k=nsips.begin();k!=nsips.end();++k) {
467 // cerr<<"got "<<*k<<" from AUTO-NS"<<endl;
468 if(*k == q->getRemote().toString())
469 {
470 // cerr<<"got AUTO-NS hit"<<endl;
471 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in NSset"<<endl;
472 return true;
473 }
474 }
475 }
476 }
477 else
478 {
479 Netmask nm = Netmask(*i);
480 if(nm.match( (ComboAddress *) &q->d_remote ))
481 {
482 g_log<<Logger::Warning<<"AXFR of domain '"<<q->qdomain<<"' allowed: client IP "<<q->getRemote()<<" is in per-domain ACL"<<endl;
483 // cerr<<"hit!"<<endl;
484 return true;
485 }
486 }
487 }
488 }
489
490 extern CommunicatorClass Communicator;
491
492 if(Communicator.justNotified(q->qdomain, q->getRemote().toString())) { // we just notified this ip
493 g_log<<Logger::Warning<<"Approved AXFR of '"<<q->qdomain<<"' from recently notified slave "<<q->getRemote()<<endl;
494 return true;
495 }
496
497 g_log<<Logger::Error<<"AXFR of domain '"<<q->qdomain<<"' denied: client IP "<<q->getRemote()<<" has no permission"<<endl;
498 return false;
499 }
500
501 namespace {
502 struct NSECXEntry
503 {
504 NSECBitmap d_set;
505 unsigned int d_ttl;
506 bool d_auth;
507 };
508
509 static std::unique_ptr<DNSPacket> getFreshAXFRPacket(std::unique_ptr<DNSPacket>& q)
510 {
511 std::unique_ptr<DNSPacket> ret = std::unique_ptr<DNSPacket>(q->replyPacket());
512 ret->setCompress(false);
513 ret->d_dnssecOk=false; // RFC 5936, 2.2.5
514 ret->d_tcp = true;
515 return ret;
516 }
517 }
518
519
520 /** do the actual zone transfer. Return 0 in case of error, 1 in case of success */
521 int TCPNameserver::doAXFR(const DNSName &target, std::unique_ptr<DNSPacket>& q, int outsock)
522 {
523 std::unique_ptr<DNSPacket> outpacket= getFreshAXFRPacket(q);
524 if(q->d_dnssecOk)
525 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
526
527 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' initiated by "<<q->getRemote()<<endl;
528
529 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
530 SOAData sd;
531 {
532 std::lock_guard<std::mutex> l(s_plock);
533 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no AXFR
534 if(!s_P) {
535 g_log<<Logger::Error<<"TCP server is without backend connections in doAXFR, launching"<<endl;
536 s_P=make_unique<PacketHandler>();
537 }
538
539 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
540 if (!canDoAXFR(q)) {
541 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: "<<q->getRemote()<<" may not request AXFR"<<endl;
542 outpacket->setRcode(RCode::NotAuth);
543 sendPacket(outpacket,outsock);
544 return 0;
545 }
546
547 if(!s_P->getBackend()->getSOAUncached(target, sd)) {
548 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative"<<endl;
549 outpacket->setRcode(RCode::NotAuth);
550 sendPacket(outpacket,outsock);
551 return 0;
552 }
553 }
554
555 UeberBackend db;
556 if(!db.getSOAUncached(target, sd)) {
557 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
558 outpacket->setRcode(RCode::NotAuth);
559 sendPacket(outpacket,outsock);
560 return 0;
561 }
562
563 DNSSECKeeper dk(&db);
564 DNSSECKeeper::clearCaches(target);
565 bool securedZone = dk.isSecuredZone(target);
566 bool presignedZone = dk.isPresigned(target);
567
568 bool noAXFRBecauseOfNSEC3Narrow=false;
569 NSEC3PARAMRecordContent ns3pr;
570 bool narrow;
571 bool NSEC3Zone=false;
572 if(securedZone && dk.getNSEC3PARAM(target, &ns3pr, &narrow)) {
573 NSEC3Zone=true;
574 if(narrow) {
575 g_log<<Logger::Error<<"Not doing AXFR of an NSEC3 narrow zone '"<<target<<"' for "<<q->getRemote()<<endl;
576 noAXFRBecauseOfNSEC3Narrow=true;
577 }
578 }
579
580 if(noAXFRBecauseOfNSEC3Narrow) {
581 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' denied to "<<q->getRemote()<<endl;
582 outpacket->setRcode(RCode::Refused);
583 // FIXME: should actually figure out if we are auth over a zone, and send out 9 if we aren't
584 sendPacket(outpacket,outsock);
585 return 0;
586 }
587
588 TSIGRecordContent trc;
589 DNSName tsigkeyname;
590 string tsigsecret;
591
592 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
593
594 if(haveTSIGDetails && !tsigkeyname.empty()) {
595 string tsig64;
596 DNSName algorithm=trc.d_algoName; // FIXME400: check
597 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
598 algorithm = DNSName("hmac-md5");
599 if (algorithm != DNSName("gss-tsig")) {
600 if(!db.getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
601 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
602 return 0;
603 }
604 if (B64Decode(tsig64, tsigsecret) == -1) {
605 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
606 return 0;
607 }
608 }
609 }
610
611
612 // SOA *must* go out first, our signing pipe might reorder
613 DLOG(g_log<<"Sending out SOA"<<endl);
614 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
615 outpacket->addRecord(DNSZoneRecord(soa));
616 if(securedZone && !presignedZone) {
617 set<DNSName> authSet;
618 authSet.insert(target);
619 addRRSigs(dk, db, authSet, outpacket->getRRS());
620 }
621
622 if(haveTSIGDetails && !tsigkeyname.empty())
623 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
624
625 sendPacket(outpacket, outsock);
626
627 trc.d_mac = outpacket->d_trc.d_mac;
628 outpacket = getFreshAXFRPacket(q);
629
630 ChunkedSigningPipe csp(target, (securedZone && !presignedZone), ::arg().asNum("signing-threads", 1));
631
632 typedef map<DNSName, NSECXEntry, CanonDNSNameCompare> nsecxrepo_t;
633 nsecxrepo_t nsecxrepo;
634
635 // this is where the DNSKEYs go in
636
637 DNSSECKeeper::keyset_t keys = dk.getKeys(target);
638
639 DNSZoneRecord zrr;
640
641 zrr.dr.d_name = target;
642 zrr.dr.d_ttl = sd.minimum;
643 zrr.auth = 1; // please sign!
644
645 string publishCDNSKEY, publishCDS;
646 dk.getPublishCDNSKEY(q->qdomain, publishCDNSKEY);
647 dk.getPublishCDS(q->qdomain, publishCDS);
648 vector<DNSZoneRecord> cds, cdnskey;
649 DNSSECKeeper::keyset_t entryPoints = dk.getEntryPoints(q->qdomain);
650 set<uint32_t> entryPointIds;
651 for (auto const& value : entryPoints)
652 entryPointIds.insert(value.second.id);
653
654 for(const DNSSECKeeper::keyset_t::value_type& value : keys) {
655 if (!value.second.published) {
656 continue;
657 }
658 zrr.dr.d_type = QType::DNSKEY;
659 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
660 DNSName keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name))) : zrr.dr.d_name;
661 NSECXEntry& ne = nsecxrepo[keyname];
662
663 ne.d_set.set(zrr.dr.d_type);
664 ne.d_ttl = sd.getNegativeTTL();
665 csp.submit(zrr);
666
667 // generate CDS and CDNSKEY records
668 if(entryPointIds.count(value.second.id) > 0){
669 if(publishCDNSKEY == "1") {
670 zrr.dr.d_type=QType::CDNSKEY;
671 zrr.dr.d_content = std::make_shared<DNSKEYRecordContent>(value.first.getDNSKEY());
672 cdnskey.push_back(zrr);
673 }
674
675 if(!publishCDS.empty()){
676 zrr.dr.d_type=QType::CDS;
677 vector<string> digestAlgos;
678 stringtok(digestAlgos, publishCDS, ", ");
679 for(auto const &digestAlgo : digestAlgos) {
680 zrr.dr.d_content=std::make_shared<DSRecordContent>(makeDSFromDNSKey(target, value.first.getDNSKEY(), pdns_stou(digestAlgo)));
681 cds.push_back(zrr);
682 }
683 }
684 }
685 }
686
687 if(::arg().mustDo("direct-dnskey")) {
688 sd.db->lookup(QType(QType::DNSKEY), target, sd.domain_id);
689 while(sd.db->get(zrr)) {
690 zrr.dr.d_ttl = sd.minimum;
691 csp.submit(zrr);
692 }
693 }
694
695 uint8_t flags;
696
697 if(NSEC3Zone) { // now stuff in the NSEC3PARAM
698 flags = ns3pr.d_flags;
699 zrr.dr.d_type = QType::NSEC3PARAM;
700 ns3pr.d_flags = 0;
701 zrr.dr.d_content = std::make_shared<NSEC3PARAMRecordContent>(ns3pr);
702 ns3pr.d_flags = flags;
703 DNSName keyname = DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, zrr.dr.d_name)));
704 NSECXEntry& ne = nsecxrepo[keyname];
705
706 ne.d_set.set(zrr.dr.d_type);
707 csp.submit(zrr);
708 }
709
710 // now start list zone
711 if(!(sd.db->list(target, sd.domain_id))) {
712 g_log<<Logger::Error<<"Backend signals error condition"<<endl;
713 outpacket->setRcode(RCode::ServFail);
714 sendPacket(outpacket,outsock);
715 return 0;
716 }
717
718
719 const bool rectify = !(presignedZone || ::arg().mustDo("disable-axfr-rectify"));
720 set<DNSName> qnames, nsset, terms;
721 vector<DNSZoneRecord> zrrs;
722
723 // Add the CDNSKEY and CDS records we created earlier
724 for (auto const &synth_zrr : cds)
725 zrrs.push_back(synth_zrr);
726
727 for (auto const &synth_zrr : cdnskey)
728 zrrs.push_back(synth_zrr);
729
730 while(sd.db->get(zrr)) {
731 zrr.dr.d_name.makeUsLowerCase();
732 if(zrr.dr.d_name.isPartOf(target)) {
733 if (zrr.dr.d_type == QType::ALIAS && ::arg().mustDo("outgoing-axfr-expand-alias")) {
734 vector<DNSZoneRecord> ips;
735 int ret1 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::A, ips);
736 int ret2 = stubDoResolve(getRR<ALIASRecordContent>(zrr.dr)->d_content, QType::AAAA, ips);
737 if(ret1 != RCode::NoError || ret2 != RCode::NoError) {
738 g_log<<Logger::Error<<"Error resolving for ALIAS "<<zrr.dr.d_content->getZoneRepresentation()<<", aborting AXFR"<<endl;
739 outpacket->setRcode(RCode::ServFail);
740 sendPacket(outpacket,outsock);
741 return 0;
742 }
743 for(const auto& ip: ips) {
744 zrr.dr.d_type = ip.dr.d_type;
745 zrr.dr.d_content = ip.dr.d_content;
746 zrrs.push_back(zrr);
747 }
748 continue;
749 }
750
751 if (rectify) {
752 if (zrr.dr.d_type) {
753 qnames.insert(zrr.dr.d_name);
754 if(zrr.dr.d_type == QType::NS && zrr.dr.d_name!=target)
755 nsset.insert(zrr.dr.d_name);
756 } else {
757 // remove existing ents
758 continue;
759 }
760 }
761 zrrs.push_back(zrr);
762 } else {
763 if (zrr.dr.d_type)
764 g_log<<Logger::Warning<<"Zone '"<<target<<"' contains out-of-zone data '"<<zrr.dr.d_name<<"|"<<DNSRecordContent::NumberToType(zrr.dr.d_type)<<"', ignoring"<<endl;
765 }
766 }
767
768 // Group records by name and type, signpipe stumbles over interrupted rrsets
769 if(securedZone && !presignedZone) {
770 sort(zrrs.begin(), zrrs.end(), [](const DNSZoneRecord& a, const DNSZoneRecord& b) {
771 return tie(a.dr.d_name, a.dr.d_type) < tie(b.dr.d_name, b.dr.d_type);
772 });
773 }
774
775 if(rectify) {
776 // set auth
777 for(DNSZoneRecord &loopZRR : zrrs) {
778 loopZRR.auth=true;
779 if (loopZRR.dr.d_type != QType::NS || loopZRR.dr.d_name!=target) {
780 DNSName shorter(loopZRR.dr.d_name);
781 do {
782 if (shorter==target) // apex is always auth
783 break;
784 if(nsset.count(shorter) && !(loopZRR.dr.d_name==shorter && loopZRR.dr.d_type == QType::DS)) {
785 loopZRR.auth=false;
786 break;
787 }
788 } while(shorter.chopOff());
789 }
790 }
791
792 if(NSEC3Zone) {
793 // ents are only required for NSEC3 zones
794 uint32_t maxent = ::arg().asNum("max-ent-entries");
795 set<DNSName> nsec3set, nonterm;
796 for (auto &loopZRR: zrrs) {
797 bool skip=false;
798 DNSName shorter = loopZRR.dr.d_name;
799 if (shorter != target && shorter.chopOff() && shorter != target) {
800 do {
801 if(nsset.count(shorter)) {
802 skip=true;
803 break;
804 }
805 } while(shorter.chopOff() && shorter != target);
806 }
807 shorter = loopZRR.dr.d_name;
808 if(!skip && (loopZRR.dr.d_type != QType::NS || !ns3pr.d_flags)) {
809 do {
810 if(!nsec3set.count(shorter)) {
811 nsec3set.insert(shorter);
812 }
813 } while(shorter != target && shorter.chopOff());
814 }
815 }
816
817 for(DNSZoneRecord &loopZRR : zrrs) {
818 DNSName shorter(loopZRR.dr.d_name);
819 while(shorter != target && shorter.chopOff()) {
820 if(!qnames.count(shorter) && !nonterm.count(shorter) && nsec3set.count(shorter)) {
821 if(!(maxent)) {
822 g_log<<Logger::Warning<<"Zone '"<<target<<"' has too many empty non terminals."<<endl;
823 return 0;
824 }
825 nonterm.insert(shorter);
826 --maxent;
827 }
828 }
829 }
830
831 for(const auto& nt : nonterm) {
832 DNSZoneRecord tempRR;
833 tempRR.dr.d_name=nt;
834 tempRR.dr.d_type=QType::ENT;
835 tempRR.auth=true;
836 zrrs.push_back(tempRR);
837 }
838 }
839 }
840
841
842 /* now write all other records */
843
844 DNSName keyname;
845 unsigned int udiff;
846 DTime dt;
847 dt.set();
848 int records=0;
849 for(DNSZoneRecord &loopZRR : zrrs) {
850 if (!presignedZone && loopZRR.dr.d_type == QType::RRSIG)
851 continue;
852
853 // only skip the DNSKEY, CDNSKEY and CDS if direct-dnskey is enabled, to avoid changing behaviour
854 // when it is not enabled.
855 if(::arg().mustDo("direct-dnskey") && (loopZRR.dr.d_type == QType::DNSKEY || loopZRR.dr.d_type == QType::CDNSKEY || loopZRR.dr.d_type == QType::CDS))
856 continue;
857
858 records++;
859 if(securedZone && (loopZRR.auth || loopZRR.dr.d_type == QType::NS)) {
860 if (NSEC3Zone || loopZRR.dr.d_type) {
861 if (presignedZone && NSEC3Zone && loopZRR.dr.d_type == QType::RRSIG && getRR<RRSIGRecordContent>(loopZRR.dr)->d_type == QType::NSEC3) {
862 keyname = loopZRR.dr.d_name.makeRelative(sd.qname);
863 } else {
864 keyname = NSEC3Zone ? DNSName(toBase32Hex(hashQNameWithSalt(ns3pr, loopZRR.dr.d_name))) : loopZRR.dr.d_name;
865 }
866 NSECXEntry& ne = nsecxrepo[keyname];
867 ne.d_ttl = sd.getNegativeTTL();
868 ne.d_auth = (ne.d_auth || loopZRR.auth || (NSEC3Zone && (!ns3pr.d_flags)));
869 if (loopZRR.dr.d_type && loopZRR.dr.d_type != QType::RRSIG) {
870 ne.d_set.set(loopZRR.dr.d_type);
871 }
872 }
873 }
874
875 if (!loopZRR.dr.d_type)
876 continue; // skip empty non-terminals
877
878 if(loopZRR.dr.d_type == QType::SOA)
879 continue; // skip SOA - would indicate end of AXFR
880
881 if(csp.submit(loopZRR)) {
882 for(;;) {
883 outpacket->getRRS() = csp.getChunk();
884 if(!outpacket->getRRS().empty()) {
885 if(haveTSIGDetails && !tsigkeyname.empty())
886 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
887 sendPacket(outpacket, outsock);
888 trc.d_mac=outpacket->d_trc.d_mac;
889 outpacket=getFreshAXFRPacket(q);
890 }
891 else
892 break;
893 }
894 }
895 }
896 /*
897 udiff=dt.udiffNoReset();
898 cerr<<"Starting NSEC: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
899 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
900 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
901 */
902 if(securedZone) {
903 if(NSEC3Zone) {
904 for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
905 if(iter->second.d_auth) {
906 NSEC3RecordContent n3rc;
907 n3rc.set(iter->second.d_set);
908 const auto numberOfTypesSet = n3rc.numberOfTypesSet();
909 if (numberOfTypesSet != 0 && (numberOfTypesSet != 1 || !n3rc.isSet(QType::NS))) {
910 n3rc.set(QType::RRSIG);
911 }
912 n3rc.d_salt = ns3pr.d_salt;
913 n3rc.d_flags = ns3pr.d_flags;
914 n3rc.d_iterations = ns3pr.d_iterations;
915 n3rc.d_algorithm = DNSSECKeeper::DIGEST_SHA1; // SHA1, fixed in PowerDNS for now
916 nsecxrepo_t::const_iterator inext = iter;
917 ++inext;
918 if(inext == nsecxrepo.end())
919 inext = nsecxrepo.begin();
920 while(!inext->second.d_auth && inext != iter)
921 {
922 ++inext;
923 if(inext == nsecxrepo.end())
924 inext = nsecxrepo.begin();
925 }
926 n3rc.d_nexthash = fromBase32Hex(inext->first.toStringNoDot());
927 zrr.dr.d_name = iter->first+sd.qname;
928
929 zrr.dr.d_ttl = sd.getNegativeTTL();
930 zrr.dr.d_content = std::make_shared<NSEC3RecordContent>(std::move(n3rc));
931 zrr.dr.d_type = QType::NSEC3;
932 zrr.dr.d_place = DNSResourceRecord::ANSWER;
933 zrr.auth=true;
934 if(csp.submit(zrr)) {
935 for(;;) {
936 outpacket->getRRS() = csp.getChunk();
937 if(!outpacket->getRRS().empty()) {
938 if(haveTSIGDetails && !tsigkeyname.empty())
939 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
940 sendPacket(outpacket, outsock);
941 trc.d_mac=outpacket->d_trc.d_mac;
942 outpacket=getFreshAXFRPacket(q);
943 }
944 else
945 break;
946 }
947 }
948 }
949 }
950 }
951 else for(nsecxrepo_t::const_iterator iter = nsecxrepo.begin(); iter != nsecxrepo.end(); ++iter) {
952 NSECRecordContent nrc;
953 nrc.set(iter->second.d_set);
954 nrc.set(QType::RRSIG);
955 nrc.set(QType::NSEC);
956
957 if(boost::next(iter) != nsecxrepo.end())
958 nrc.d_next = boost::next(iter)->first;
959 else
960 nrc.d_next=nsecxrepo.begin()->first;
961 zrr.dr.d_name = iter->first;
962
963 zrr.dr.d_ttl = sd.getNegativeTTL();
964 zrr.dr.d_content = std::make_shared<NSECRecordContent>(std::move(nrc));
965 zrr.dr.d_type = QType::NSEC;
966 zrr.dr.d_place = DNSResourceRecord::ANSWER;
967 zrr.auth=true;
968 if(csp.submit(zrr)) {
969 for(;;) {
970 outpacket->getRRS() = csp.getChunk();
971 if(!outpacket->getRRS().empty()) {
972 if(haveTSIGDetails && !tsigkeyname.empty())
973 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
974 sendPacket(outpacket, outsock);
975 trc.d_mac=outpacket->d_trc.d_mac;
976 outpacket=getFreshAXFRPacket(q);
977 }
978 else
979 break;
980 }
981 }
982 }
983 }
984 /*
985 udiff=dt.udiffNoReset();
986 cerr<<"Flushing pipe: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<csp.d_signed<<" / "<<udiff/1000000.0<<endl;
987 cerr<<"Outstanding: "<<csp.d_outstanding<<", "<<csp.d_queued - csp.d_signed << endl;
988 cerr<<"Ready for consumption: "<<csp.getReady()<<endl;
989 * */
990 for(;;) {
991 outpacket->getRRS() = csp.getChunk(true); // flush the pipe
992 if(!outpacket->getRRS().empty()) {
993 if(haveTSIGDetails && !tsigkeyname.empty())
994 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true); // first answer is 'normal'
995 sendPacket(outpacket, outsock);
996 trc.d_mac=outpacket->d_trc.d_mac;
997 outpacket=getFreshAXFRPacket(q);
998 }
999 else
1000 break;
1001 }
1002
1003 udiff=dt.udiffNoReset();
1004 if(securedZone)
1005 g_log<<Logger::Info<<"Done signing: "<<csp.d_signed/(udiff/1000000.0)<<" sigs/s, "<<endl;
1006
1007 DLOG(g_log<<"Done writing out records"<<endl);
1008 /* and terminate with yet again the SOA record */
1009 outpacket=getFreshAXFRPacket(q);
1010 outpacket->addRecord(std::move(soa));
1011 if(haveTSIGDetails && !tsigkeyname.empty())
1012 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac, true);
1013
1014 sendPacket(outpacket, outsock);
1015
1016 DLOG(g_log<<"last packet - close"<<endl);
1017 g_log<<Logger::Error<<"AXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
1018
1019 return 1;
1020 }
1021
1022 int TCPNameserver::doIXFR(std::unique_ptr<DNSPacket>& q, int outsock)
1023 {
1024 std::unique_ptr<DNSPacket> outpacket=getFreshAXFRPacket(q);
1025 if(q->d_dnssecOk)
1026 outpacket->d_dnssecOk=true; // RFC 5936, 2.2.5 'SHOULD'
1027
1028 uint32_t serial = 0;
1029 MOADNSParser mdp(false, q->getString());
1030 for(MOADNSParser::answers_t::const_iterator i=mdp.d_answers.begin(); i != mdp.d_answers.end(); ++i) {
1031 const DNSRecord *rr = &i->first;
1032 if (rr->d_type == QType::SOA && rr->d_place == DNSResourceRecord::AUTHORITY) {
1033 vector<string>parts;
1034 stringtok(parts, rr->d_content->getZoneRepresentation());
1035 if (parts.size() >= 3) {
1036 try {
1037 serial=pdns_stou(parts[2]);
1038 }
1039 catch(const std::out_of_range& oor) {
1040 g_log<<Logger::Error<<"Invalid serial in IXFR query"<<endl;
1041 outpacket->setRcode(RCode::FormErr);
1042 sendPacket(outpacket,outsock);
1043 return 0;
1044 }
1045 } else {
1046 g_log<<Logger::Error<<"No serial in IXFR query"<<endl;
1047 outpacket->setRcode(RCode::FormErr);
1048 sendPacket(outpacket,outsock);
1049 return 0;
1050 }
1051 } else if (rr->d_type != QType::TSIG && rr->d_type != QType::OPT) {
1052 g_log<<Logger::Error<<"Additional records in IXFR query, type: "<<QType(rr->d_type).getName()<<endl;
1053 outpacket->setRcode(RCode::FormErr);
1054 sendPacket(outpacket,outsock);
1055 return 0;
1056 }
1057 }
1058
1059 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' initiated by "<<q->getRemote()<<" with serial "<<serial<<endl;
1060
1061 // determine if zone exists and AXFR is allowed using existing backend before spawning a new backend.
1062 SOAData sd;
1063 {
1064 std::lock_guard<std::mutex> l(s_plock);
1065 DLOG(g_log<<"Looking for SOA"<<endl); // find domain_id via SOA and list complete domain. No SOA, no IXFR
1066 if(!s_P) {
1067 g_log<<Logger::Error<<"TCP server is without backend connections in doIXFR, launching"<<endl;
1068 s_P=make_unique<PacketHandler>();
1069 }
1070
1071 // canDoAXFR does all the ACL checks, and has the if(disable-axfr) shortcut, call it first.
1072 if(!canDoAXFR(q) || !s_P->getBackend()->getSOAUncached(q->qdomain, sd)) {
1073 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' failed: not authoritative"<<endl;
1074 outpacket->setRcode(RCode::NotAuth);
1075 sendPacket(outpacket,outsock);
1076 return 0;
1077 }
1078 }
1079
1080 DNSSECKeeper dk;
1081 NSEC3PARAMRecordContent ns3pr;
1082 bool narrow;
1083
1084 DNSSECKeeper::clearCaches(q->qdomain);
1085 bool securedZone = dk.isSecuredZone(q->qdomain);
1086 if(dk.getNSEC3PARAM(q->qdomain, &ns3pr, &narrow)) {
1087 if(narrow) {
1088 g_log<<Logger::Error<<"Not doing IXFR of an NSEC3 narrow zone."<<endl;
1089 g_log<<Logger::Error<<"IXFR of domain '"<<q->qdomain<<"' denied to "<<q->getRemote()<<endl;
1090 outpacket->setRcode(RCode::Refused);
1091 sendPacket(outpacket,outsock);
1092 return 0;
1093 }
1094 }
1095
1096 DNSName target = q->qdomain;
1097
1098 UeberBackend db;
1099 if(!db.getSOAUncached(target, sd)) {
1100 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' failed: not authoritative in second instance"<<endl;
1101 outpacket->setRcode(RCode::NotAuth);
1102 sendPacket(outpacket,outsock);
1103 return 0;
1104 }
1105
1106 if (!rfc1982LessThan(serial, calculateEditSOA(sd.serial, dk, sd.qname))) {
1107 TSIGRecordContent trc;
1108 DNSName tsigkeyname;
1109 string tsigsecret;
1110
1111 bool haveTSIGDetails = q->getTSIGDetails(&trc, &tsigkeyname);
1112
1113 if(haveTSIGDetails && !tsigkeyname.empty()) {
1114 string tsig64;
1115 DNSName algorithm=trc.d_algoName; // FIXME400: was toLowerCanonic, compare output
1116 if (algorithm == DNSName("hmac-md5.sig-alg.reg.int"))
1117 algorithm = DNSName("hmac-md5");
1118 std::lock_guard<std::mutex> l(s_plock);
1119 if(!s_P->getBackend()->getTSIGKey(tsigkeyname, &algorithm, &tsig64)) {
1120 g_log<<Logger::Error<<"TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"' not found"<<endl;
1121 return 0;
1122 }
1123 if (B64Decode(tsig64, tsigsecret) == -1) {
1124 g_log<<Logger::Error<<"Unable to Base-64 decode TSIG key '"<<tsigkeyname<<"' for domain '"<<target<<"'"<<endl;
1125 return 0;
1126 }
1127 }
1128
1129 UeberBackend signatureDB;
1130
1131 // SOA *must* go out first, our signing pipe might reorder
1132 DLOG(g_log<<"Sending out SOA"<<endl);
1133 DNSZoneRecord soa = makeEditedDNSZRFromSOAData(dk, sd);
1134 outpacket->addRecord(std::move(soa));
1135 if(securedZone && outpacket->d_dnssecOk) {
1136 set<DNSName> authSet;
1137 authSet.insert(target);
1138 addRRSigs(dk, signatureDB, authSet, outpacket->getRRS());
1139 }
1140
1141 if(haveTSIGDetails && !tsigkeyname.empty())
1142 outpacket->setTSIGDetails(trc, tsigkeyname, tsigsecret, trc.d_mac); // first answer is 'normal'
1143
1144 sendPacket(outpacket, outsock);
1145
1146 g_log<<Logger::Error<<"IXFR of domain '"<<target<<"' to "<<q->getRemote()<<" finished"<<endl;
1147
1148 return 1;
1149 }
1150
1151 g_log<<Logger::Error<<"IXFR fallback to AXFR for domain '"<<target<<"' our serial "<<sd.serial<<endl;
1152 return doAXFR(q->qdomain, q, outsock);
1153 }
1154
1155 TCPNameserver::~TCPNameserver()
1156 {
1157 }
1158
1159 TCPNameserver::TCPNameserver()
1160 {
1161 d_maxTransactionsPerConn = ::arg().asNum("max-tcp-transactions-per-conn");
1162 d_idleTimeout = ::arg().asNum("tcp-idle-timeout");
1163 d_maxConnectionDuration = ::arg().asNum("max-tcp-connection-duration");
1164 d_maxConnectionsPerClient = ::arg().asNum("max-tcp-connections-per-client");
1165
1166 // sem_init(&d_connectionroom_sem,0,::arg().asNum("max-tcp-connections"));
1167 d_connectionroom_sem = make_unique<Semaphore>( ::arg().asNum( "max-tcp-connections" ));
1168 d_maxTCPConnections = ::arg().asNum( "max-tcp-connections" );
1169
1170 vector<string>locals;
1171 stringtok(locals,::arg()["local-ipv6"]," ,");
1172 stringtok(locals,::arg()["local-address"]," ,");
1173 if(locals.empty())
1174 throw PDNSException("No local addresses specified");
1175
1176 d_ng.toMasks(::arg()["allow-axfr-ips"] );
1177
1178 signal(SIGPIPE,SIG_IGN);
1179
1180 for(auto const &laddr : locals) {
1181 ComboAddress local(laddr, ::arg().asNum("local-port"));
1182
1183 int s=socket(local.sin4.sin_family, SOCK_STREAM, 0);
1184 if(s<0)
1185 throw PDNSException("Unable to acquire TCP socket: "+stringerror());
1186 setCloseOnExec(s);
1187
1188 int tmp=1;
1189 if(setsockopt(s, SOL_SOCKET,SO_REUSEADDR, (char*)&tmp, sizeof tmp) < 0) {
1190 g_log<<Logger::Error<<"Setsockopt failed"<<endl;
1191 _exit(1);
1192 }
1193
1194 if (::arg().asNum("tcp-fast-open") > 0) {
1195 #ifdef TCP_FASTOPEN
1196 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1197 if (setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1198 g_log<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket "<<local.toStringWithPort()<<": "<<stringerror()<<endl;
1199 }
1200 #else
1201 g_log<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1202 #endif
1203 }
1204
1205 if(::arg().mustDo("non-local-bind"))
1206 Utility::setBindAny(local.sin4.sin_family, s);
1207
1208 if(local.isIPv6() && setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1209 g_log<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<stringerror()<<endl;
1210 }
1211
1212 if(::bind(s, (sockaddr*)&local, local.getSocklen())<0) {
1213 int err = errno;
1214 close(s);
1215 if( err == EADDRNOTAVAIL && ! ::arg().mustDo("local-address-nonexist-fail") ) {
1216 g_log<<Logger::Error<<"Address " << local.toString() << " does not exist on this server - skipping TCP bind" << endl;
1217 continue;
1218 } else {
1219 g_log<<Logger::Error<<"Unable to bind to TCP socket " << local.toStringWithPort() << ": "<<stringerror(err)<<endl;
1220 throw PDNSException("Unable to bind to TCP socket");
1221 }
1222 }
1223
1224 listen(s, 128);
1225 g_log<<Logger::Error<<"TCP server bound to "<<local.toStringWithPort()<<endl;
1226 d_sockets.push_back(s);
1227 struct pollfd pfd;
1228 memset(&pfd, 0, sizeof(pfd));
1229 pfd.fd = s;
1230 pfd.events = POLLIN;
1231 d_prfds.push_back(pfd);
1232 }
1233 }
1234
1235
1236 //! Start of TCP operations thread, we launch a new thread for each incoming TCP question
1237 void TCPNameserver::thread()
1238 {
1239 setThreadName("pdns/tcpnameser");
1240 try {
1241 for(;;) {
1242 int fd;
1243 ComboAddress remote;
1244 Utility::socklen_t addrlen=remote.getSocklen();
1245
1246 int ret=poll(&d_prfds[0], d_prfds.size(), -1); // blocks, forever if need be
1247 if(ret <= 0)
1248 continue;
1249
1250 int sock=-1;
1251 for(const pollfd& pfd : d_prfds) {
1252 if(pfd.revents & POLLIN) {
1253 sock = pfd.fd;
1254 remote.sin4.sin_family = AF_INET6;
1255 addrlen=remote.getSocklen();
1256
1257 if((fd=accept(sock, (sockaddr*)&remote, &addrlen))<0) {
1258 int err = errno;
1259 g_log<<Logger::Error<<"TCP question accept error: "<<stringerror(err)<<endl;
1260
1261 if(err==EMFILE) {
1262 g_log<<Logger::Error<<"TCP handler out of filedescriptors, exiting, won't recover from this"<<endl;
1263 _exit(1);
1264 }
1265 }
1266 else {
1267 if (d_maxConnectionsPerClient) {
1268 std::lock_guard<std::mutex> lock(s_clientsCountMutex);
1269 if (s_clientsCount[remote] >= d_maxConnectionsPerClient) {
1270 g_log<<Logger::Notice<<"Limit of simultaneous TCP connections per client reached for "<< remote<<", dropping"<<endl;
1271 close(fd);
1272 continue;
1273 }
1274 s_clientsCount[remote]++;
1275 }
1276
1277 d_connectionroom_sem->wait(); // blocks if no connections are available
1278
1279 int room;
1280 d_connectionroom_sem->getValue( &room);
1281 if(room<1)
1282 g_log<<Logger::Warning<<"Limit of simultaneous TCP connections reached - raise max-tcp-connections"<<endl;
1283
1284 try {
1285 std::thread connThread(doConnection, fd);
1286 connThread.detach();
1287 }
1288 catch (std::exception& e) {
1289 g_log<<Logger::Error<<"Error creating thread: "<<e.what()<<endl;
1290 d_connectionroom_sem->post();
1291 close(fd);
1292 decrementClientCount(remote);
1293 }
1294 }
1295 }
1296 }
1297 }
1298 }
1299 catch(PDNSException &AE) {
1300 g_log<<Logger::Error<<"TCP Nameserver thread dying because of fatal error: "<<AE.reason<<endl;
1301 }
1302 catch(...) {
1303 g_log<<Logger::Error<<"TCPNameserver dying because of an unexpected fatal error"<<endl;
1304 }
1305 _exit(1); // take rest of server with us
1306 }
1307
1308
1309 unsigned int TCPNameserver::numTCPConnections()
1310 {
1311 int room;
1312 d_connectionroom_sem->getValue( &room);
1313 return d_maxTCPConnections - room;
1314 }