]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
make advanced multiplexers and --fork play nicely
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
22c012a8 3 Copyright (C) 2003 - 2006 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
f28307ad
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
288f4aa9
BH
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
06bd9ccf 16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 17*/
caa6eefa
BH
18
19#include "utility.hh"
288f4aa9
BH
20#include <iostream>
21#include <errno.h>
22#include <map>
23#include <set>
caa6eefa 24#ifndef WIN32
288f4aa9 25#include <netdb.h>
caa6eefa 26#endif // WIN32
97bb160b 27#include "recursor_cache.hh"
288f4aa9 28#include <stdio.h>
c75a6a9e 29#include <signal.h>
288f4aa9
BH
30#include <stdlib.h>
31#include <unistd.h>
c8ddb7c2 32#include <netinet/tcp.h>
288f4aa9
BH
33#include "mtasker.hh"
34#include <utility>
288f4aa9
BH
35#include "arguments.hh"
36#include "syncres.hh"
88def049
BH
37#include <fcntl.h>
38#include <fstream>
5c633640
BH
39#include "sstuff.hh"
40#include <boost/tuple/tuple.hpp>
41#include <boost/tuple/tuple_comparison.hpp>
72df400f 42#include <boost/shared_array.hpp>
ea634573 43#include <boost/lexical_cast.hpp>
7f1fa77d 44#include <boost/function.hpp>
ea634573
BH
45#include "dnsparser.hh"
46#include "dnswriter.hh"
47#include "dnsrecords.hh"
f814d7c8 48#include "zoneparser-tng.hh"
1d5b3ce6 49#include "rec_channel.hh"
aaacf7f2 50#include "logger.hh"
c8ddb7c2 51#include "iputils.hh"
09e6702a 52#include "mplexer.hh"
1d5b3ce6 53
a2bfc3ff
BH
54#ifndef RECURSOR
55#include "statbag.hh"
56StatBag S;
57#endif
58
09e6702a
BH
59FDMultiplexer* g_fdm;
60unsigned int g_maxTCPPerClient;
61bool g_logCommonErrors;
33988bfb 62using namespace boost;
5c633640 63
27adc173 64#ifdef __FreeBSD__ // see cvstrac ticket #26
7f617eb9
BH
65#include <pthread.h>
66#include <semaphore.h>
67#endif
68
eefd15f9 69MemRecursorCache RC;
1d5b3ce6
BH
70RecursorStats g_stats;
71bool g_quiet;
c8ddb7c2 72NetmaskGroup* g_allowFrom;
88def049 73string s_programname="pdns_recursor";
37d3f960 74vector<int> g_tcpListenSockets;
3159c9ef 75int g_tcpTimeout;
288f4aa9 76
ea634573 77struct DNSComboWriter {
c9e9e5e0 78 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), d_tcp(false), d_socket(-1)
ea634573
BH
79 {}
80 MOADNSParser d_mdp;
37d3f960 81 void setRemote(ComboAddress* sa)
ea634573 82 {
37d3f960
BH
83 d_remote=*sa;
84 d_socklen= d_remote.sin4.sin_family == AF_INET ? sizeof(sockaddr_in) : sizeof(sockaddr_in6);
ea634573
BH
85 }
86
87 void setSocket(int sock)
88 {
89 d_socket=sock;
90 }
a1754c6a
BH
91
92 string getRemote() const
93 {
37d3f960 94 return d_remote.toString();
a1754c6a
BH
95 }
96
c9e9e5e0 97 struct timeval d_now;
37d3f960 98 ComboAddress d_remote;
ea634573
BH
99 socklen_t d_socklen;
100 bool d_tcp;
101 int d_socket;
102};
103
104
27adc173
BH
105#ifndef WIN32
106#ifndef __FreeBSD__
288f4aa9
BH
107extern "C" {
108 int sem_init(sem_t*, int, unsigned int){return 0;}
109 int sem_wait(sem_t*){return 0;}
110 int sem_trywait(sem_t*){return 0;}
111 int sem_post(sem_t*){return 0;}
112 int sem_getvalue(sem_t*, int*){return 0;}
dcf9bd8f 113 pthread_t pthread_self(void){return (pthread_t) 0;}
98e05fce 114 int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr){ return 0; }
dcf9bd8f
BH
115 int pthread_mutex_lock(pthread_mutex_t *mutex){ return 0; }
116 int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
df38dbe8 117 int pthread_mutex_destroy(pthread_mutex_t *mutex) { return 0; }
288f4aa9 118}
27adc173 119#endif // __FreeBSD__
caa6eefa 120#endif // WIN32
288f4aa9 121
288f4aa9
BH
122ArgvMap &arg()
123{
124 static ArgvMap theArg;
125 return theArg;
126}
4ef015cd 127
09e6702a 128struct timeval g_now;
cd50f30d 129typedef vector<int> tcpserversocks_t;
5c633640 130
35ce8576
BH
131typedef MTasker<PacketID,string> MT_t;
132MT_t* MT;
5c633640 133
09e6702a
BH
134
135void handleTCPClientWritable(int fd, boost::any& var);
136
50c81227 137// -1 is error, 0 is timeout, 1 is success
5c633640
BH
138int asendtcp(const string& data, Socket* sock)
139{
140 PacketID pident;
141 pident.sock=sock;
142 pident.outMSG=data;
5c633640 143
09e6702a 144 g_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
50c81227 145 string packet;
5c633640 146
9170fbaf
BH
147 int ret=MT->waitEvent(pident,&packet,1);
148 if(!ret || ret==-1) { // timeout
09e6702a 149 g_fdm->removeWriteFD(sock->getHandle());
5c633640 150 }
50c81227
BH
151 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
152 return -1;
153 }
9170fbaf 154 return ret;
5c633640
BH
155}
156
09e6702a
BH
157void handleTCPClientReadable(int fd, boost::any& var);
158
9170fbaf 159// -1 is error, 0 is timeout, 1 is success
5c633640 160int arecvtcp(string& data, int len, Socket* sock)
288f4aa9 161{
50c81227 162 data.clear();
5c633640
BH
163 PacketID pident;
164 pident.sock=sock;
165 pident.inNeeded=len;
09e6702a 166 g_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
5c633640 167
9170fbaf
BH
168 int ret=MT->waitEvent(pident,&data,1);
169 if(!ret || ret==-1) { // timeout
09e6702a 170 g_fdm->removeReadFD(sock->getHandle());
288f4aa9 171 }
50c81227
BH
172 else if(data.empty()) {// error, EOF or other
173 return -1;
174 }
175
9170fbaf 176 return ret;
288f4aa9
BH
177}
178
998a4334 179// returns -1 for errors which might go away, throws for ones that won't
4ef015cd
BH
180int makeClientSocket()
181{
182 int ret=socket(AF_INET, SOCK_DGRAM, 0);
998a4334
BH
183 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
184 return ret;
185
4ef015cd
BH
186 if(ret<0)
187 throw AhuException("Making a socket for resolver: "+stringerror());
188
189 static optional<struct sockaddr_in> sin;
4ef015cd
BH
190 if(!sin) {
191 struct sockaddr_in tmp;
192 sin=tmp;
193 memset((char *)&*sin,0, sizeof(sin));
194 sin->sin_family = AF_INET;
195
196 if(!IpToU32(::arg()["query-local-address"], &sin->sin_addr.s_addr))
197 throw AhuException("Unable to resolve local address '"+ ::arg()["query-local-address"] +"'");
4ef015cd 198 }
998a4334
BH
199
200 int tries=10;
4ef015cd 201 while(--tries) {
998a4334
BH
202 uint16_t port=1025+Utility::random()%64510;
203 if(tries==1) // fall back to kernel 'random'
204 port=0;
78983f94 205
4ef015cd
BH
206 sin->sin_port = htons(port);
207
095c3045 208 if (::bind(ret, (struct sockaddr *)&*sin, sizeof(*sin)) >= 0)
4ef015cd
BH
209 break;
210 }
211 if(!tries)
212 throw AhuException("Resolver binding to local query client socket: "+stringerror());
213
214 Utility::setNonBlocking(ret);
215 return ret;
216}
217
09e6702a
BH
218void handleUDPServerResponse(int fd, boost::any&);
219
4ef015cd
BH
220// you can ask this class for a UDP socket to send a query from
221// this socket is not yours, don't even think about deleting it
222// but after you call 'returnSocket' on it, don't assume anything anymore
223class UDPClientSocks
224{
4ef015cd
BH
225 unsigned int d_numsocks;
226 unsigned int d_maxsocks;
998a4334 227
4ef015cd 228public:
998a4334 229 UDPClientSocks() : d_numsocks(0), d_maxsocks(5000)
4ef015cd
BH
230 {
231 }
232
233 typedef map<int,int> socks_t;
234 socks_t d_socks;
235
998a4334 236 // returning -1 means: temporary OS error (ie, out of files)
4ef015cd
BH
237 int getSocket()
238 {
998a4334
BH
239 pair<int, int> sock=make_pair(makeClientSocket(), 1);
240 if(sock.first < 0) // temporary error - exception otherwise
241 return -1;
242
243 d_socks.insert(sock);
244 d_numsocks++;
245 return sock.first;
4ef015cd
BH
246 }
247
095c3045
BH
248 void returnSocket(int fd)
249 {
250 socks_t::iterator i=d_socks.find(fd);
251 returnSocket(i);
252 }
253
4ef015cd 254 // return a socket to the pool, or simply erase it
095c3045 255 void returnSocket(socks_t::iterator& i)
4ef015cd 256 {
600fc20b
BH
257 if(i==d_socks.end()) {
258 throw AhuException("Trying to return a socket not in the pool");
259 }
998a4334
BH
260 g_fdm->removeReadFD(i->first);
261 ::close(i->first);
262
263 d_socks.erase(i++);
264 --d_numsocks;
4ef015cd
BH
265 }
266}g_udpclientsocks;
267
288f4aa9
BH
268
269/* these two functions are used by LWRes */
998a4334
BH
270// -2 is OS error, -1 is error that depends on the remote, > 1 is success
271int asendto(const char *data, int len, int flags, struct sockaddr *toaddr, int addrlen, int id, const string& domain, int* fd)
288f4aa9 272{
4ef015cd 273 *fd=g_udpclientsocks.getSocket();
998a4334
BH
274 if(*fd < 0)
275 return -2;
276 PacketID pident;
277 pident.fd=*fd;
278 pident.id=id;
279 pident.domain=domain;
280 memcpy(&pident.remote, toaddr, sizeof(pident.remote));
281
282 int ret=connect(*fd, toaddr, addrlen);
283 if(ret < 0)
284 return ret;
285
286 g_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
287 return send(*fd, data, len, 0);
288f4aa9
BH
288}
289
9170fbaf 290// -1 is error, 0 is timeout, 1 is success
4ef015cd 291int arecvfrom(char *data, int len, int flags, struct sockaddr *toaddr, Utility::socklen_t *addrlen, int *d_len, int id, const string& domain, int fd)
288f4aa9 292{
0d5f0a9f
BH
293 static optional<unsigned int> nearMissLimit;
294 if(!nearMissLimit)
295 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
296
288f4aa9 297 PacketID pident;
4ef015cd 298 pident.fd=fd;
288f4aa9 299 pident.id=id;
0d5f0a9f 300 pident.domain=domain;
29a14b24 301 memcpy(&pident.remote, toaddr, sizeof(pident.remote));
b636533b 302
288f4aa9 303 string packet;
29a14b24 304 int ret=MT->waitEvent(pident, &packet, 1);
9170fbaf 305 if(ret > 0) {
998a4334
BH
306 if(packet.empty()) {// means "error"
307 return -1;
308 }
309
9170fbaf
BH
310 *d_len=packet.size();
311 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f 312 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
37d3f960 313 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<sockAddrToString((struct sockaddr_in*)toaddr)<<", assuming spoof attempt."<<endl;
0d5f0a9f 314 g_stats.spoofCount++;
35ce8576
BH
315 return -1;
316 }
288f4aa9 317 }
09e6702a 318 else {
095c3045 319 g_udpclientsocks.returnSocket(fd);
09e6702a 320 }
9170fbaf 321 return ret;
288f4aa9
BH
322}
323
aa4e4cbf 324void setBuffer(int fd, int optname, uint32_t size)
ce8deb27 325{
9b356afc 326 uint32_t psize=0;
91e4ecf3 327 socklen_t len=sizeof(psize);
9b356afc 328
aa4e4cbf 329 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
a19fb8e8 330 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
9b356afc
BH
331 return;
332 }
333
aa4e4cbf 334 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
a19fb8e8 335 L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
ce8deb27
BH
336}
337
338
aa4e4cbf
BH
339static void setReceiveBuffer(int fd, uint32_t size)
340{
341 setBuffer(fd, SO_RCVBUF, size);
342}
343
344static void setSendBuffer(int fd, uint32_t size)
345{
346 setBuffer(fd, SO_SNDBUF, size);
347}
348
88def049
BH
349static void writePid(void)
350{
2e3d8a19 351 string fname=::arg()["socket-dir"]+"/"+s_programname+".pid";
88def049
BH
352 ofstream of(fname.c_str());
353 if(of)
369369f6 354 of<< getpid() <<endl;
88def049 355 else
562588a3 356 L<<Logger::Error<<"Requested to write pid for "<<getpid()<<" to "<<fname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
357}
358
bdf40704 359void primeHints(void)
288f4aa9
BH
360{
361 // prime root cache
288f4aa9 362 set<DNSResourceRecord>nsset;
f814d7c8 363
2e3d8a19 364 if(::arg()["hint-file"].empty()) {
f814d7c8
BH
365 static char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", "192.112.36.4", "128.63.2.53",
366 "192.36.148.17","192.58.128.30", "193.0.14.129", "198.32.64.12", "202.12.27.33"};
367 DNSResourceRecord arr, nsrr;
368 arr.qtype=QType::A;
369 arr.ttl=time(0)+3600000;
370 nsrr.qtype=QType::NS;
371 nsrr.ttl=time(0)+3600000;
372
5456e605 373 for(char c='a';c<='m';++c) {
f814d7c8 374 static char templ[40];
7738a23f 375 strncpy(templ,"a.root-servers.net.", sizeof(templ) - 1);
f814d7c8
BH
376 *templ=c;
377 arr.qname=nsrr.content=templ;
5456e605 378 arr.content=ips[c-'a'];
f814d7c8
BH
379 set<DNSResourceRecord> aset;
380 aset.insert(arr);
381 RC.replace(string(templ), QType(QType::A), aset);
382
383 nsset.insert(nsrr);
384 }
385 }
386 else {
2e3d8a19 387 ZoneParserTNG zpt(::arg()["hint-file"]);
f814d7c8 388 DNSResourceRecord rr;
ea634573 389 set<DNSResourceRecord> aset;
288f4aa9 390
f814d7c8 391 while(zpt.get(rr)) {
f814d7c8
BH
392 rr.ttl+=time(0);
393 if(rr.qtype.getCode()==QType::A) {
394 set<DNSResourceRecord> aset;
395 aset.insert(rr);
396 RC.replace(rr.qname, QType(QType::A), aset);
397 }
398 if(rr.qtype.getCode()==QType::NS) {
e2e2c5d8 399 rr.content=toLower(rr.content);
f814d7c8
BH
400 nsset.insert(rr);
401 }
402 }
288f4aa9 403 }
7738a23f 404 RC.replace(".", QType(QType::NS), nsset); // and stuff in the cache
288f4aa9
BH
405}
406
37d3f960 407map<ComboAddress, uint32_t> g_tcpClientCounts;
0e9d9ce2
BH
408
409struct TCPConnection
410{
411 int fd;
7f1fa77d 412 enum stateenum {BYTE0, BYTE1, GETQUESTION, DONE} state;
0e9d9ce2
BH
413 int qlen;
414 int bytesread;
37d3f960 415 ComboAddress remote;
0e9d9ce2
BH
416 char data[65535];
417 time_t startTime;
418
419 void closeAndCleanup()
420 {
421 close(fd);
37d3f960
BH
422 if(!g_tcpClientCounts[remote]--)
423 g_tcpClientCounts.erase(remote);
6dcd28c3 424 s_currentConnections--;
0e9d9ce2 425 }
6dcd28c3 426 static unsigned int s_currentConnections; //!< total number of current TCP connections
0e9d9ce2
BH
427};
428
6dcd28c3
BH
429unsigned int TCPConnection::s_currentConnections;
430
288f4aa9
BH
431void startDoResolve(void *p)
432{
7b1469bb 433 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 434 try {
10321a98
BH
435 uint16_t maxudpsize=512;
436 MOADNSParser::EDNSOpts edo;
437 if(dc->d_mdp.getEDNSOpts(&edo)) {
438 maxudpsize=edo.d_packetsize;
439 }
09e6702a 440
ea634573 441 vector<DNSResourceRecord> ret;
9170fbaf 442
ea634573
BH
443 vector<uint8_t> packet;
444 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
445
446 pw.getHeader()->aa=0;
447 pw.getHeader()->ra=1;
c154c8a4 448 pw.getHeader()->qr=1;
ea634573 449 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 450 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
ea634573 451
c9e9e5e0 452 SyncRes sr(dc->d_now);
1d5b3ce6 453 if(!g_quiet)
8a63d3ce
BH
454 L<<Logger::Error<<"["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
455 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 456
fededf47 457 sr.setId(MT->getTid());
ea634573 458 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
459 sr.setCacheOnly();
460
ea634573 461 int res=sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret);
1d5b3ce6 462 if(res<0) {
ea634573 463 pw.getHeader()->rcode=RCode::ServFail;
bec87d21 464 // no commit here, because no record
1d5b3ce6
BH
465 g_stats.servFails++;
466 }
288f4aa9 467 else {
ea634573 468 pw.getHeader()->rcode=res;
1d5b3ce6 469 switch(res) {
5e4a2466
BH
470 case RCode::ServFail:
471 g_stats.servFails++;
472 break;
1d5b3ce6
BH
473 case RCode::NXDomain:
474 g_stats.nxDomains++;
475 break;
476 case RCode::NoError:
477 g_stats.noErrors++;
478 break;
479 }
480
c154c8a4 481 if(ret.size()) {
e67e250f 482 shuffle(ret);
c154c8a4 483 for(vector<DNSResourceRecord>::const_iterator i=ret.begin();i!=ret.end();++i) {
10321a98 484 pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, 1, (DNSPacketWriter::Place)i->d_place);
7b1469bb
BH
485
486 shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), 1, i->content));
487
c154c8a4 488 drc->toPacket(pw);
7b1469bb 489
10321a98
BH
490 if(!dc->d_tcp && pw.size() > maxudpsize) {
491 pw.rollback();
1791e3c4
BH
492 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
493 pw.getHeader()->tc=1;
10321a98
BH
494 goto sendit; // need to jump over pw.commit
495 }
c154c8a4
BH
496 }
497 pw.commit();
ea634573 498 }
288f4aa9 499 }
10321a98 500 sendit:;
ea634573 501 if(!dc->d_tcp) {
37d3f960 502 sendto(dc->d_socket, &*packet.begin(), packet.size(), 0, (struct sockaddr *)(&dc->d_remote), dc->d_socklen);
feccc9fc 503 }
9c495589
BH
504 else {
505 char buf[2];
ea634573
BH
506 buf[0]=packet.size()/256;
507 buf[1]=packet.size()%256;
feccc9fc
BH
508
509 struct iovec iov[2];
510
ea634573
BH
511 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
512 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 513
ea634573 514 int ret=writev(dc->d_socket, iov, 2);
0e9d9ce2 515 bool hadError=true;
feccc9fc 516
0e9d9ce2
BH
517 if(ret == 0)
518 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
519 else if(ret < 0 )
520 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 521 else if((unsigned int)ret != 2 + packet.size())
aa4e4cbf 522 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2
BH
523 else
524 hadError=false;
09e6702a
BH
525
526 // update tcp connection status, either by closing or moving to 'BYTE0'
527
528 if(hadError) {
529 g_fdm->removeReadFD(dc->d_socket);
530 close(dc->d_socket);
09e6702a 531 }
a6ae6414
BH
532 else {
533 any_cast<TCPConnection&>(g_fdm->getReadParameter(dc->d_socket)).state=TCPConnection::BYTE0;
534 struct timeval now;
535 gettimeofday(&now, 0); // needs to be updated
536 g_fdm->setReadTTD(dc->d_socket, now, g_tcpTimeout);
0e9d9ce2 537 }
9c495589
BH
538 }
539
1d5b3ce6 540 if(!g_quiet) {
8a63d3ce 541 L<<Logger::Error<<"["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 542 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
5c633640 543 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e
BH
544 }
545
eefd15f9 546 sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++;
fe213470
BH
547 float spent=makeFloat(sr.d_now-dc->d_now);
548 if(spent < 0.001)
549 g_stats.answers0_1++;
550 else if(spent < 0.010)
551 g_stats.answers1_10++;
552 else if(spent < 0.1)
553 g_stats.answers10_100++;
554 else if(spent < 1.0)
555 g_stats.answers100_1000++;
556 else
557 g_stats.answersSlow++;
558
574af7ea 559 uint64_t newLat=(uint64_t)(spent*1000000);
87b8e43a
BH
560 if(newLat < 1000000) // outliers of several minutes exist..
561 g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
ea634573 562 delete dc;
288f4aa9
BH
563 }
564 catch(AhuException &ae) {
c836dc19 565 L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
288f4aa9 566 }
7b1469bb
BH
567 catch(MOADNSException& e) {
568 L<<Logger::Error<<"DNS parser error: "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
569 }
c154c8a4
BH
570 catch(exception& e) {
571 L<<Logger::Error<<"STL error: "<<e.what()<<endl;
572 }
288f4aa9 573 catch(...) {
c836dc19 574 L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
288f4aa9
BH
575 }
576}
577
1d5b3ce6
BH
578RecursorControlChannel s_rcc;
579
580void makeControlChannelSocket()
581{
41f7a068
BH
582 string sockname=::arg()["socket-dir"]+"/pdns_recursor.controlsocket";
583 if(::arg().mustDo("fork")) {
584 sockname+="."+lexical_cast<string>(getpid());
585 L<<Logger::Warning<<"Forked control socket name: "<<sockname<<endl;
586 }
587 s_rcc.listen(sockname);
1d5b3ce6
BH
588}
589
09e6702a
BH
590void handleRunningTCPQuestion(int fd, boost::any& var)
591{
592 TCPConnection& conn=any_cast<TCPConnection&>(var);
593
594 if(conn.state==TCPConnection::BYTE0) {
595 int bytes=read(conn.fd,conn.data,2);
596 if(bytes==1)
597 conn.state=TCPConnection::BYTE1;
598 if(bytes==2) {
599 conn.qlen=(conn.data[0]<<8)+conn.data[1];
600 conn.bytesread=0;
601 conn.state=TCPConnection::GETQUESTION;
602 }
603 if(!bytes || bytes < 0) {
6dcd28c3 604 TCPConnection tmp(conn);
09e6702a 605 g_fdm->removeReadFD(fd);
6dcd28c3 606 tmp.closeAndCleanup();
09e6702a
BH
607 return;
608 }
609 }
610 else if(conn.state==TCPConnection::BYTE1) {
611 int bytes=read(conn.fd,conn.data+1,1);
612 if(bytes==1) {
613 conn.state=TCPConnection::GETQUESTION;
614 conn.qlen=(conn.data[0]<<8)+conn.data[1];
615 conn.bytesread=0;
616 }
617 if(!bytes || bytes < 0) {
618 if(g_logCommonErrors)
37d3f960 619 L<<Logger::Error<<"TCP client "<< conn.remote.toString() <<" disconnected after first byte"<<endl;
6dcd28c3 620 TCPConnection tmp(conn);
09e6702a 621 g_fdm->removeReadFD(fd);
6dcd28c3 622 tmp.closeAndCleanup(); // conn loses validity here..
09e6702a
BH
623 return;
624 }
625 }
626 else if(conn.state==TCPConnection::GETQUESTION) {
627 int bytes=read(conn.fd,conn.data + conn.bytesread,conn.qlen - conn.bytesread);
628 if(!bytes || bytes < 0) {
37d3f960 629 L<<Logger::Error<<"TCP client "<< conn.remote.toString() <<" disconnected while reading question body"<<endl;
6dcd28c3 630 TCPConnection tmp(conn);
09e6702a 631 g_fdm->removeReadFD(fd);
6dcd28c3 632 tmp.closeAndCleanup(); // conn loses validity here..
09e6702a
BH
633
634 return;
635 }
636 conn.bytesread+=bytes;
637 if(conn.bytesread==conn.qlen) {
a6ae6414 638 conn.state=TCPConnection::DONE; // this makes us immune from timeouts, from now on *we* are responsible
09e6702a
BH
639 DNSComboWriter* dc=0;
640 try {
641 dc=new DNSComboWriter(conn.data, conn.qlen, g_now);
642 }
643 catch(MOADNSException &mde) {
644 g_stats.clientParseError++;
37d3f960 645 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn.remote.toString() <<endl;
6dcd28c3 646 TCPConnection tmp(conn);
09e6702a 647 g_fdm->removeReadFD(fd);
6dcd28c3 648 tmp.closeAndCleanup();
09e6702a
BH
649 return;
650 }
651
652 dc->setSocket(conn.fd);
653 dc->d_tcp=true;
37d3f960 654 dc->setRemote(&conn.remote);
09e6702a
BH
655 if(dc->d_mdp.d_header.qr)
656 L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
657 else {
658 ++g_stats.qcounter;
659 ++g_stats.tcpqcounter;
660 MT->makeThread(startDoResolve, dc);
661 return;
662 }
663 }
664 }
665}
666
6dcd28c3 667//! Handle new incoming TCP connection
09e6702a
BH
668void handleNewTCPQuestion(int fd, boost::any& )
669{
37d3f960 670 ComboAddress addr;
09e6702a
BH
671 socklen_t addrlen=sizeof(addr);
672 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
673 if(newsock>0) {
674 if(g_allowFrom && !g_allowFrom->match(&addr)) {
675 g_stats.unauthorizedTCP++;
676 close(newsock);
677 return;
678 }
679
37d3f960 680 if(g_maxTCPPerClient && g_tcpClientCounts.count(addr) && g_tcpClientCounts[addr] >= g_maxTCPPerClient) {
09e6702a
BH
681 g_stats.tcpClientOverflow++;
682 close(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
683 return;
684 }
37d3f960 685 g_tcpClientCounts[addr]++;
09e6702a
BH
686 Utility::setNonBlocking(newsock);
687 TCPConnection tc;
688 tc.fd=newsock;
689 tc.state=TCPConnection::BYTE0;
690 tc.remote=addr;
691 tc.startTime=g_now.tv_sec;
6dcd28c3 692 TCPConnection::s_currentConnections++;
09e6702a 693 g_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
0bff046b
BH
694 struct timeval now;
695 gettimeofday(&now, 0);
3159c9ef 696 g_fdm->setReadTTD(tc.fd, now, g_tcpTimeout);
09e6702a
BH
697 }
698}
699
5db529f8
BH
700
701void handleNewUDPQuestion(int fd, boost::any& var)
702{
703 int d_len;
704 char data[1500];
705 ComboAddress fromaddr;
706 socklen_t addrlen=sizeof(fromaddr);
707
708 while((d_len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
709 if(g_allowFrom && !g_allowFrom->match(&fromaddr)) {
710 g_stats.unauthorizedUDP++;
711 continue;
712 }
713
714 try {
715 DNSComboWriter* dc = new DNSComboWriter(data, d_len, g_now);
716
717 dc->setRemote(&fromaddr);
718
719 if(dc->d_mdp.d_header.qr) {
720 if(g_logCommonErrors)
721 L<<Logger::Error<<"Ignoring answer from "<<dc->getRemote()<<" on server socket!"<<endl;
722 }
723 else {
724 ++g_stats.qcounter;
725 dc->setSocket(fd);
726 dc->d_tcp=false;
727 MT->makeThread(startDoResolve, (void*) dc);
728 }
729 }
730 catch(MOADNSException& mde) {
731 g_stats.clientParseError++;
732 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
733 }
734 }
735}
736
737typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
738deferredAdd_t deferredAdd;
739
f28307ad 740void makeTCPServerSockets()
9c495589 741{
37d3f960 742 int fd;
f28307ad 743 vector<string>locals;
2e3d8a19 744 stringtok(locals,::arg()["local-address"]," ,");
9c495589 745
f28307ad
BH
746 if(locals.empty())
747 throw AhuException("No local address specified");
748
37d3f960 749 ComboAddress sin;
f28307ad 750 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
f28307ad 751 memset((char *)&sin,0, sizeof(sin));
37d3f960
BH
752 ComboAddress sin;
753 sin.sin4.sin_family = AF_INET;
754 if(!IpToU32(*i, &sin.sin4.sin_addr.s_addr)) {
755 sin.sin6.sin6_family = AF_INET6;
756 if(inet_pton(AF_INET6, i->c_str(), &sin.sin6.sin6_addr) <= 0)
757 throw AhuException("Unable to resolve local address '"+ *i +"'");
758 }
759
760 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
761 if(fd<0)
762 throw AhuException("Making a TCP server socket for resolver: "+stringerror());
f28307ad
BH
763
764 int tmp=1;
37d3f960 765 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
f28307ad 766 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 767 exit(1);
f28307ad
BH
768 }
769
c8ddb7c2 770#ifdef TCP_DEFER_ACCEPT
37d3f960
BH
771 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
772 if(i==locals.begin())
773 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
c8ddb7c2
BH
774 }
775#endif
776
37d3f960
BH
777 sin.sin4.sin_port = htons(::arg().asNum("local-port"));
778 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
779 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
f28307ad
BH
780 throw AhuException("Binding TCP server socket for "+*i+": "+stringerror());
781
37d3f960
BH
782 Utility::setNonBlocking(fd);
783 setSendBuffer(fd, 65000);
784 listen(fd, 128);
5db529f8
BH
785 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
786 // g_fdm->addReadFD(fd, handleNewTCPQuestion);
aa136564
BH
787 if(sin.sin4.sin_family == AF_INET)
788 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<::arg().asNum("local-port")<<endl;
789 else
790 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<::arg().asNum("local-port")<<endl;
f28307ad 791 }
9c495589
BH
792}
793
09e6702a 794
f28307ad 795void makeUDPServerSockets()
288f4aa9 796{
f28307ad 797 vector<string>locals;
2e3d8a19 798 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 799
f28307ad
BH
800 if(locals.empty())
801 throw AhuException("No local address specified");
802
2e3d8a19 803 if(::arg()["local-address"]=="0.0.0.0") {
c836dc19 804 L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
288f4aa9 805 }
525b8a7c 806
f28307ad 807 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
37d3f960
BH
808 ComboAddress sin;
809 memset(&sin, 0, sizeof(sin));
810 sin.sin4.sin_family = AF_INET;
811 if(!IpToU32(*i, &sin.sin4.sin_addr.s_addr)) {
812 sin.sin6.sin6_family = AF_INET6;
813 if(inet_pton(AF_INET6, i->c_str(), &sin.sin6.sin6_addr) <= 0)
814 throw AhuException("Unable to resolve local address '"+ *i +"'");
815 }
816
817 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM,0);
f28307ad 818 if(fd<0)
37d3f960
BH
819 throw AhuException("Making a UDP server socket for resolver: "+stringerror());
820
a19fb8e8 821 setReceiveBuffer(fd, 200000);
37d3f960
BH
822 sin.sin4.sin_port = htons(::arg().asNum("local-port"));
823
824 int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
825 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
f28307ad
BH
826 throw AhuException("Resolver binding to server socket for "+*i+": "+stringerror());
827
828 Utility::setNonBlocking(fd);
5db529f8
BH
829 // g_fdm->addReadFD(fd, handleNewUDPQuestion);
830 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
37d3f960 831 g_tcpListenSockets.push_back(fd);
aa136564
BH
832 if(sin.sin4.sin_family == AF_INET)
833 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<::arg().asNum("local-port")<<endl;
834 else
835 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<::arg().asNum("local-port")<<endl;
f28307ad 836 }
c836dc19 837}
caa6eefa 838
9c495589 839
caa6eefa 840#ifndef WIN32
c836dc19
BH
841void daemonize(void)
842{
843 if(fork())
844 exit(0); // bye bye
845
846 setsid();
847
848 // cleanup open fds, but skip sockets
849 close(0);
850 close(1);
851 close(2);
288f4aa9 852}
caa6eefa
BH
853#endif
854
aaacf7f2 855uint64_t counter;
c75a6a9e
BH
856bool statsWanted;
857
1d5b3ce6 858
c75a6a9e
BH
859void usr1Handler(int)
860{
861 statsWanted=true;
862}
ae1b2e98 863
c9e9e5e0
BH
864
865
9170fbaf
BH
866void usr2Handler(int)
867{
868 SyncRes::setLog(true);
1d5b3ce6
BH
869 g_quiet=false;
870 ::arg().set("quiet")="no";
c9e9e5e0 871
9170fbaf
BH
872}
873
c75a6a9e
BH
874void doStats(void)
875{
aaacf7f2
BH
876 if(g_stats.qcounter) {
877 L<<Logger::Error<<"stats: "<<g_stats.qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
8a5602d4 878 <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
2e3d8a19 879 L<<Logger::Error<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
8cd5b55e 880 <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
8a5602d4 881 L<<Logger::Error<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
525b8a7c
BH
882 L<<Logger::Error<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
883 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
5c633640 884 L<<Logger::Error<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
c75a6a9e 885 }
7becf07f
BH
886 else if(statsWanted)
887 L<<Logger::Error<<"stats: no stats yet!"<<endl;
888
c75a6a9e
BH
889 statsWanted=false;
890}
c836dc19 891
29f0b1ce 892static void houseKeeping(void *)
c836dc19 893{
ae1b2e98 894 static time_t last_stat, last_rootupdate, last_prune;
c9e9e5e0
BH
895 struct timeval now;
896 gettimeofday(&now, 0);
897
255e0a07 898 if(now.tv_sec - last_prune > 300) {
5e4a2466
BH
899 DTime dt;
900 dt.setTimeval(now);
eefd15f9 901 RC.doPrune();
33988bfb
BH
902
903 typedef SyncRes::negcache_t::nth_index<1>::type negcache_by_ttd_index_t;
904 negcache_by_ttd_index_t& ttdindex=boost::multi_index::get<1>(SyncRes::s_negcache);
905
906 negcache_by_ttd_index_t::iterator i=ttdindex.lower_bound(now.tv_sec);
907 ttdindex.erase(ttdindex.begin(), i);
2e3d8a19 908
c9e9e5e0 909 time_t limit=now.tv_sec-300;
2e3d8a19
BH
910 for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
911 if(i->second.stale(limit))
912 SyncRes::s_nsSpeeds.erase(i++);
913 else
914 ++i;
915
255e0a07 916 // cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
5e4a2466 917// cout<<"Prune took "<<dt.udiff()<<"usec\n";
ae1b2e98
BH
918 last_prune=time(0);
919 }
c9e9e5e0 920 if(now.tv_sec - last_stat>1800) {
c75a6a9e 921 doStats();
c836dc19
BH
922 last_stat=time(0);
923 }
c9e9e5e0
BH
924 if(now.tv_sec -last_rootupdate>7200) {
925 SyncRes sr(now);
ea634573 926 vector<DNSResourceRecord> ret;
c836dc19
BH
927
928 sr.setNoCache();
7738a23f 929 int res=sr.beginResolve(".", QType(QType::NS), ret);
c836dc19
BH
930 if(!res) {
931 L<<Logger::Error<<"Refreshed . records"<<endl;
c9e9e5e0 932 last_rootupdate=now.tv_sec;
c836dc19
BH
933 }
934 else
935 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
936 }
937}
288f4aa9 938
4e120339 939
9c495589 940
d6d5dea7 941#if 0
d6d5dea7
BH
942#include <execinfo.h>
943
c9e9e5e0
BH
944 multimap<uint32_t,string> rev;
945 for(map<string,uint32_t>::const_iterator i=casesptr->begin(); i!=casesptr->end(); ++i) {
946 rev.insert(make_pair(i->second,i->first));
947 }
948 for(multimap<uint32_t,string>::const_iterator i=rev.begin(); i!= rev.end(); ++i)
949 cout<<i->first<<" times: \n"<<i->second<<"\n";
950
951 cout.flush();
952
953map<string,uint32_t>* casesptr;
954static string maketrace()
d6d5dea7
BH
955{
956 void *array[20]; //only care about last 17 functions (3 taken with tracing support)
957 size_t size;
958 char **strings;
959 size_t i;
960
c9e9e5e0 961 size = backtrace (array, 5);
d6d5dea7
BH
962 strings = backtrace_symbols (array, size); //Need -rdynamic gcc (linker) flag for this to work
963
c9e9e5e0
BH
964 string ret;
965
d6d5dea7 966 for (i = 0; i < size; i++) //skip useless functions
c9e9e5e0
BH
967 ret+=string(strings[i])+"\n";
968 return ret;
d6d5dea7
BH
969}
970
971extern "C" {
c9e9e5e0 972
d6d5dea7
BH
973int gettimeofday (struct timeval *__restrict __tv,
974 __timezone_ptr_t __tz)
975{
c9e9e5e0
BH
976 static map<string, uint32_t> s_cases;
977 casesptr=&s_cases;
978 s_cases[maketrace()]++;
979 __tv->tv_sec=time(0);
d6d5dea7
BH
980 return 0;
981}
982
983}
c9e9e5e0 984#endif
d6d5dea7 985
0d5f0a9f
BH
986string questionExpand(const char* packet, uint16_t len)
987{
988 const char* end=packet+len;
989 const char* pos=packet+12;
990 unsigned char labellen;
991 string ret;
4ef015cd 992 ret.reserve(len-12);
0d5f0a9f
BH
993 while((labellen=*pos++)) {
994 if(pos+labellen > end)
995 break;
996 ret.append(pos, labellen);
997 ret.append(1,'.');
998 pos+=labellen;
999 }
1000 if(ret.empty())
1001 ret=".";
1002 return ret;
1003}
1004
09e6702a
BH
1005
1006void handleRCC(int fd, boost::any& var)
1007{
1008 string remote;
1009 string msg=s_rcc.recv(&remote);
1010 RecursorControlParser rcp;
1011 RecursorControlParser::func_t* command;
1012 string answer=rcp.getAnswer(msg, &command);
1013 s_rcc.send(answer, &remote);
1014 command();
1015}
1016
1017void handleTCPClientReadable(int fd, boost::any& var)
1018{
1019 PacketID& pident=any_cast<PacketID&>(var);
1020 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident.inNeeded: "<<pident.inNeeded<<", "<<pident.sock->getHandle()<<endl;
1021
1022 shared_array<char> buffer(new char[pident.inNeeded]);
1023
1024 int ret=read(fd, buffer.get(), pident.inNeeded);
1025 if(ret > 0) {
1026 pident.inMSG.append(&buffer[0], &buffer[ret]);
1027 pident.inNeeded-=ret;
1028 if(!pident.inNeeded) {
1029 // cerr<<"Got entire load of "<<pident.inMSG.size()<<" bytes"<<endl;
1030 PacketID pid=pident;
1031 string msg=pident.inMSG;
1032
1033 g_fdm->removeReadFD(fd);
1034 MT->sendEvent(pid, &msg);
1035 }
1036 else {
1037 // cerr<<"Still have "<<pident.inNeeded<<" left to go"<<endl;
1038 }
1039 }
1040 else {
1041 PacketID tmp=pident;
1042 g_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
1043 string empty;
1044 MT->sendEvent(tmp, &empty); // this conveys error status
1045 }
1046}
1047
1048void handleTCPClientWritable(int fd, boost::any& var)
1049{
1050 PacketID& pid=any_cast<PacketID&>(var);
1051
1052 int ret=write(fd, pid.outMSG.c_str(), pid.outMSG.size() - pid.outPos);
1053 if(ret > 0) {
1054 pid.outPos+=ret;
1055 if(pid.outPos==pid.outMSG.size()) {
1056 PacketID tmp=pid;
1057 g_fdm->removeWriteFD(fd);
1058 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
1059 }
1060 }
1061 else { // error or EOF
998a4334 1062 PacketID tmp(pid);
09e6702a
BH
1063 g_fdm->removeWriteFD(fd);
1064 string sent;
998a4334 1065 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
09e6702a
BH
1066 }
1067}
1068
998a4334 1069void handleUDPServerResponse(int fd, boost::any& var)
09e6702a 1070{
600fc20b 1071 PacketID pid=any_cast<PacketID>(var);
998a4334 1072 int len;
09e6702a
BH
1073 char data[1500];
1074 struct sockaddr_in fromaddr;
1075 socklen_t addrlen=sizeof(fromaddr);
1076
998a4334 1077 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
600fc20b 1078
998a4334
BH
1079 if(len < (int)sizeof(dnsheader)) {
1080 if(len < 0)
2a5e6212 1081 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
09e6702a
BH
1082 else {
1083 g_stats.serverParseError++;
1084 if(g_logCommonErrors)
37d3f960 1085 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< sockAddrToString((struct sockaddr_in*) &fromaddr) <<
998a4334
BH
1086 ": packet smalller than DNS header"<<endl;
1087 }
1088 string empty;
600fc20b 1089 g_udpclientsocks.returnSocket(fd);
998a4334
BH
1090 MT->sendEvent(pid, &empty); // this denotes error
1091 return;
1092 }
1093
1094 dnsheader dh;
1095 memcpy(&dh, data, sizeof(dh));
1096
1097 if(!dh.qdcount) // UPC, Nominum?
1098 return;
1099
1100 if(dh.qr) {
1101 PacketID pident;
1102 pident.remote=fromaddr;
1103 pident.id=dh.id;
1104 pident.fd=fd;
1105 pident.domain=questionExpand(data, len); // don't copy this from above - we need to do the actual read
1106 string packet;
1107 packet.assign(data, len);
1108 if(!MT->sendEvent(pident, &packet)) {
1109 // if(g_logCommonErrors)
1110 // L<<Logger::Warning<<"Discarding unexpected packet from "<<sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen)<<endl;
1111 g_stats.unexpectedCount++;
1112
1113 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1114 if(pident.fd==mthread->key.fd && !memcmp(&mthread->key.remote.sin_addr, &pident.remote.sin_addr, sizeof(pident.remote.sin_addr)) &&
1115 !strcasecmp(pident.domain.c_str(), mthread->key.domain.c_str())) {
1116 mthread->key.nearMisses++;
1117 }
1118 }
09e6702a 1119 }
2a5e6212
BH
1120 else
1121 g_udpclientsocks.returnSocket(fd);
09e6702a 1122 }
998a4334 1123 else
37d3f960 1124 L<<Logger::Warning<<"Ignoring question on outgoing socket from "<< sockAddrToString((struct sockaddr_in*) &fromaddr) <<endl;
09e6702a
BH
1125}
1126
1f4abb20
BH
1127FDMultiplexer* getMultiplexer()
1128{
1129 FDMultiplexer* ret;
1130 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1131 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1132 try {
1133 ret=i->second();
1134 L<<Logger::Error<<"Enabled '"<<ret->getName()<<"' multiplexer"<<endl;
1135 return ret;
1136 }
1137 catch(...)
1138 {}
1139 }
1140 L<<Logger::Error<<"No working multiplexer found!"<<endl;
1141 exit(1);
1142}
1143
288f4aa9
BH
1144int main(int argc, char **argv)
1145{
8a63d3ce 1146 reportBasicTypes();
ea634573 1147
22030c37 1148 int ret = EXIT_SUCCESS;
caa6eefa
BH
1149#ifdef WIN32
1150 WSADATA wsaData;
1151 WSAStartup( MAKEWORD( 2, 0 ), &wsaData );
1152#endif // WIN32
1153
288f4aa9 1154 try {
caa6eefa 1155 Utility::srandom(time(0));
2e3d8a19
BH
1156 ::arg().set("soa-minimum-ttl","Don't change")="0";
1157 ::arg().set("soa-serial-offset","Don't change")="0";
1158 ::arg().set("no-shuffle","Don't change")="off";
1159 ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
1160 ::arg().set("local-port","port to listen on")="53";
01ed3112 1161 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas")="127.0.0.1";
2e3d8a19
BH
1162 ::arg().set("trace","if we should output heaps of logging")="off";
1163 ::arg().set("daemon","Operate as a daemon")="yes";
0e9d9ce2 1164 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
1165 ::arg().set("chroot","switch to chroot jail")="";
1166 ::arg().set("setgid","If set, change group id to this gid for more security")="";
1167 ::arg().set("setuid","If set, change user id to this uid for more security")="";
1168 ::arg().set("quiet","Suppress logging of questions and answers")="true";
1169 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
1170 ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
1171 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
1172 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
998a4334 1173 // ::arg().set("query-local-port","Source port address for sending queries, defaults to random")="";
2e3d8a19
BH
1174 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
1175 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
1176 ::arg().set("hint-file", "If set, load root hints from this file")="";
bec87d21 1177 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="0";
01ed3112 1178 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12";
4e120339 1179 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
41f7a068 1180 ::arg().set("fork", "If set, fork the daemon for possible double performance")="no";
0d5f0a9f 1181 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4ef015cd 1182 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
2e3d8a19
BH
1183
1184 ::arg().setCmd("help","Provide a helpful message");
c75a6a9e 1185 L.toConsole(Logger::Warning);
2e3d8a19 1186 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 1187
2e3d8a19 1188 string configname=::arg()["config-dir"]+"/recursor.conf";
c75a6a9e
BH
1189 cleanSlashes(configname);
1190
2e3d8a19 1191 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
1192 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
1193
2e3d8a19 1194 ::arg().parse(argc,argv);
c836dc19 1195
2e3d8a19 1196 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 1197
2e3d8a19 1198 if(::arg().mustDo("help")) {
b636533b 1199 cerr<<"syntax:"<<endl<<endl;
2e3d8a19 1200 cerr<<::arg().helpstring(::arg()["help"])<<endl;
b636533b
BH
1201 exit(99);
1202 }
1203
c836dc19 1204 L.setName("pdns_recursor");
1f4abb20 1205
288f4aa9 1206
22c012a8 1207 L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2006 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
0d189311
BH
1208#ifdef __GNUC__
1209 L<<", gcc "__VERSION__;
1210#endif // add other compilers here
1211 L<<") starting up"<<endl;
1212
22c012a8 1213 L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
0e9d9ce2
BH
1214 L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
1215 "This is free software, and you are welcome to redistribute it "
1216 "according to the terms of the GPL version 2."<<endl;
4a75412a
BH
1217
1218 if(!::arg()["allow-from"].empty()) {
1219 g_allowFrom=new NetmaskGroup;
1220 vector<string> ips;
1221 stringtok(ips, ::arg()["allow-from"], ", ");
1222 L<<Logger::Warning<<"Only allowing queries from: ";
1223 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1224 g_allowFrom->addMask(*i);
1225 if(i!=ips.begin())
1226 L<<Logger::Warning<<", ";
1227 L<<Logger::Warning<<*i;
1228 }
1229 L<<Logger::Warning<<endl;
1230 }
1231 else if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
1232 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
0e9d9ce2
BH
1233
1234 g_quiet=::arg().mustDo("quiet");
1235 if(::arg().mustDo("trace")) {
7b35aa49 1236 SyncRes::setLog(true);
2e3d8a19 1237 ::arg().set("quiet")="no";
1d5b3ce6 1238 g_quiet=false;
0e9d9ce2
BH
1239 }
1240
09e6702a 1241 g_logCommonErrors=::arg().mustDo("log-common-errors");
4d0217fc 1242
f28307ad
BH
1243 makeUDPServerSockets();
1244 makeTCPServerSockets();
41f7a068
BH
1245
1246 if(::arg().mustDo("fork")) {
1247 fork();
1248 L<<Logger::Warning<<"This is forked pid "<<getpid()<<endl;
1249 }
4ef015cd 1250
5db529f8
BH
1251 g_fdm=getMultiplexer();
1252
1253 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
1254 g_fdm->addReadFD(i->first, i->second);
41f7a068 1255
1d5b3ce6
BH
1256 makeControlChannelSocket();
1257
fededf47 1258 MT=new MTasker<PacketID,string>(100000);
288f4aa9
BH
1259
1260 PacketID pident;
998a4334 1261 primeHints();
c836dc19 1262 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
caa6eefa 1263#ifndef WIN32
2e3d8a19 1264 if(::arg().mustDo("daemon")) {
c836dc19
BH
1265 L.toConsole(Logger::Critical);
1266 daemonize();
1267 }
c75a6a9e 1268 signal(SIGUSR1,usr1Handler);
9170fbaf 1269 signal(SIGUSR2,usr2Handler);
4389619a 1270 signal(SIGPIPE,SIG_IGN);
88def049
BH
1271
1272 writePid();
caa6eefa 1273#endif
c75a6a9e 1274
08efacea 1275 int newgid=0;
2e3d8a19
BH
1276 if(!::arg()["setgid"].empty())
1277 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
08efacea 1278 int newuid=0;
2e3d8a19
BH
1279 if(!::arg()["setuid"].empty())
1280 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
08efacea
BH
1281
1282
2e3d8a19
BH
1283 if (!::arg()["chroot"].empty()) {
1284 if (chroot(::arg()["chroot"].c_str())<0) {
1285 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
08efacea
BH
1286 exit(1);
1287 }
1288 }
1289
1290 Utility::dropPrivs(newuid, newgid);
1291
0e9d9ce2 1292
49f076e8 1293 counter=0;
2e3d8a19 1294 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3159c9ef 1295 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
4e120339 1296
09e6702a 1297 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
4e120339 1298
09e6702a 1299 g_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
6dcd28c3 1300 bool listenOnTCP(true);
09e6702a 1301
288f4aa9 1302 for(;;) {
fededf47 1303 while(MT->schedule()); // housekeeping, let threads do their thing
288f4aa9 1304
998a4334 1305 if(!(counter%500)) {
d23a4bc7 1306 MT->makeThread(houseKeeping,0);
4a75412a 1307 }
998a4334 1308
a6ae6414 1309 if(!(counter%11)) {
998a4334
BH
1310 typedef vector<pair<int, boost::any> > expired_t;
1311 expired_t expired=g_fdm->getTimeouts(g_now);
1312
1313 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
1314 TCPConnection conn=any_cast<TCPConnection>(i->second);
a6ae6414 1315 if(conn.state != TCPConnection::DONE) {
600fc20b 1316 if(g_logCommonErrors)
37d3f960 1317 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn.remote.toString() <<endl;
a6ae6414
BH
1318 g_fdm->removeReadFD(i->first);
1319 conn.closeAndCleanup();
1320 }
998a4334
BH
1321 }
1322 }
1323
1324 counter++;
1325
f9f05db4 1326 if(statsWanted) {
c75a6a9e 1327 doStats();
f9f05db4 1328 }
c836dc19 1329
6dcd28c3 1330 gettimeofday(&g_now, 0);
09e6702a 1331 g_fdm->run(&g_now);
0bff046b 1332
6dcd28c3 1333 if(listenOnTCP) {
37d3f960
BH
1334 if(TCPConnection::s_currentConnections > maxTcpClients) { // shutdown
1335 for_each(g_tcpListenSockets.begin(), g_tcpListenSockets.end(),
1336 boost::bind(&FDMultiplexer::removeReadFD, g_fdm, _1));
6dcd28c3
BH
1337 listenOnTCP=false;
1338 }
1339 }
1340 else {
37d3f960
BH
1341 if(TCPConnection::s_currentConnections <= maxTcpClients) { // reenable
1342 for_each(g_tcpListenSockets.begin(), g_tcpListenSockets.end(),
1343 boost::bind(&FDMultiplexer::addReadFD,
1344 g_fdm, _1, handleNewTCPQuestion, boost::any()));
6dcd28c3
BH
1345 listenOnTCP=true;
1346 }
1347 }
288f4aa9
BH
1348 }
1349 }
1350 catch(AhuException &ae) {
c836dc19 1351 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 1352 ret=EXIT_FAILURE;
288f4aa9
BH
1353 }
1354 catch(exception &e) {
c836dc19 1355 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 1356 ret=EXIT_FAILURE;
288f4aa9
BH
1357 }
1358 catch(...) {
c836dc19 1359 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 1360 ret=EXIT_FAILURE;
288f4aa9 1361 }
caa6eefa
BH
1362
1363#ifdef WIN32
1364 WSACleanup();
1365#endif // WIN32
1366
22030c37 1367 return ret;
288f4aa9 1368}