]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
make it work
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
22c012a8 3 Copyright (C) 2003 - 2006 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
f28307ad
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
288f4aa9
BH
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
06bd9ccf 16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
288f4aa9 17*/
caa6eefa
BH
18
19#include "utility.hh"
288f4aa9
BH
20#include <iostream>
21#include <errno.h>
22#include <map>
23#include <set>
caa6eefa 24#ifndef WIN32
288f4aa9 25#include <netdb.h>
caa6eefa 26#endif // WIN32
97bb160b 27#include "recursor_cache.hh"
288f4aa9 28#include <stdio.h>
c75a6a9e 29#include <signal.h>
288f4aa9
BH
30#include <stdlib.h>
31#include <unistd.h>
c8ddb7c2 32#include <netinet/tcp.h>
288f4aa9
BH
33#include "mtasker.hh"
34#include <utility>
288f4aa9
BH
35#include "arguments.hh"
36#include "syncres.hh"
88def049
BH
37#include <fcntl.h>
38#include <fstream>
5c633640
BH
39#include "sstuff.hh"
40#include <boost/tuple/tuple.hpp>
41#include <boost/tuple/tuple_comparison.hpp>
72df400f 42#include <boost/shared_array.hpp>
ea634573
BH
43#include <boost/lexical_cast.hpp>
44#include "dnsparser.hh"
45#include "dnswriter.hh"
46#include "dnsrecords.hh"
f814d7c8 47#include "zoneparser-tng.hh"
1d5b3ce6 48#include "rec_channel.hh"
aaacf7f2 49#include "logger.hh"
c8ddb7c2 50#include "iputils.hh"
1d5b3ce6 51
a2bfc3ff
BH
52#ifndef RECURSOR
53#include "statbag.hh"
54StatBag S;
55#endif
56
57
33988bfb 58using namespace boost;
5c633640 59
27adc173 60#ifdef __FreeBSD__ // see cvstrac ticket #26
7f617eb9
BH
61#include <pthread.h>
62#include <semaphore.h>
63#endif
64
eefd15f9 65MemRecursorCache RC;
1d5b3ce6
BH
66RecursorStats g_stats;
67bool g_quiet;
c8ddb7c2 68NetmaskGroup* g_allowFrom;
88def049 69string s_programname="pdns_recursor";
288f4aa9 70
ea634573 71struct DNSComboWriter {
c9e9e5e0 72 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), d_tcp(false), d_socket(-1)
ea634573
BH
73 {}
74 MOADNSParser d_mdp;
75 void setRemote(struct sockaddr* sa, socklen_t len)
76 {
77 memcpy((void *)d_remote, (void *)sa, len);
78 d_socklen=len;
79 }
80
81 void setSocket(int sock)
82 {
83 d_socket=sock;
84 }
a1754c6a
BH
85
86 string getRemote() const
87 {
88 return sockAddrToString((struct sockaddr_in *)d_remote, d_socklen);
89 }
90
c9e9e5e0 91 struct timeval d_now;
ea634573
BH
92 char d_remote[sizeof(sockaddr_in6)];
93 socklen_t d_socklen;
94 bool d_tcp;
95 int d_socket;
96};
97
98
27adc173
BH
99#ifndef WIN32
100#ifndef __FreeBSD__
288f4aa9
BH
101extern "C" {
102 int sem_init(sem_t*, int, unsigned int){return 0;}
103 int sem_wait(sem_t*){return 0;}
104 int sem_trywait(sem_t*){return 0;}
105 int sem_post(sem_t*){return 0;}
106 int sem_getvalue(sem_t*, int*){return 0;}
dcf9bd8f 107 pthread_t pthread_self(void){return (pthread_t) 0;}
98e05fce 108 int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr){ return 0; }
dcf9bd8f
BH
109 int pthread_mutex_lock(pthread_mutex_t *mutex){ return 0; }
110 int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
df38dbe8 111 int pthread_mutex_destroy(pthread_mutex_t *mutex) { return 0; }
288f4aa9 112}
27adc173 113#endif // __FreeBSD__
caa6eefa 114#endif // WIN32
288f4aa9 115
288f4aa9
BH
116ArgvMap &arg()
117{
118 static ArgvMap theArg;
119 return theArg;
120}
bacc40d9 121static int d_clientsock;
4a75412a 122static int d_prevclientsock;
f28307ad 123static vector<int> d_udpserversocks;
288f4aa9 124
cd50f30d
BH
125typedef vector<int> tcpserversocks_t;
126static tcpserversocks_t s_tcpserversocks;
5c633640 127
5c633640
BH
128static map<int,PacketID> d_tcpclientreadsocks, d_tcpclientwritesocks;
129
35ce8576
BH
130typedef MTasker<PacketID,string> MT_t;
131MT_t* MT;
5c633640
BH
132
133int asendtcp(const string& data, Socket* sock)
134{
135 PacketID pident;
136 pident.sock=sock;
137 pident.outMSG=data;
138 string packet;
139
5c633640
BH
140 d_tcpclientwritesocks[sock->getHandle()]=pident;
141
9170fbaf
BH
142 int ret=MT->waitEvent(pident,&packet,1);
143 if(!ret || ret==-1) { // timeout
5c633640 144 d_tcpclientwritesocks.erase(sock->getHandle());
5c633640 145 }
9170fbaf 146 return ret;
5c633640
BH
147}
148
9170fbaf 149// -1 is error, 0 is timeout, 1 is success
5c633640 150int arecvtcp(string& data, int len, Socket* sock)
288f4aa9 151{
5c633640
BH
152 data="";
153 PacketID pident;
154 pident.sock=sock;
155 pident.inNeeded=len;
156
5c633640
BH
157 d_tcpclientreadsocks[sock->getHandle()]=pident;
158
9170fbaf
BH
159 int ret=MT->waitEvent(pident,&data,1);
160 if(!ret || ret==-1) { // timeout
5c633640 161 d_tcpclientreadsocks.erase(sock->getHandle());
288f4aa9 162 }
9170fbaf 163 return ret;
288f4aa9
BH
164}
165
288f4aa9
BH
166
167/* these two functions are used by LWRes */
9170fbaf 168// -1 is error, > 1 is success
288f4aa9
BH
169int asendto(const char *data, int len, int flags, struct sockaddr *toaddr, int addrlen, int id)
170{
171 return sendto(d_clientsock, data, len, flags, toaddr, addrlen);
172}
173
9170fbaf 174// -1 is error, 0 is timeout, 1 is success
0d5f0a9f 175int arecvfrom(char *data, int len, int flags, struct sockaddr *toaddr, Utility::socklen_t *addrlen, int *d_len, int id, const string& domain)
288f4aa9 176{
0d5f0a9f
BH
177 static optional<unsigned int> nearMissLimit;
178 if(!nearMissLimit)
179 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
180
288f4aa9
BH
181 PacketID pident;
182 pident.id=id;
0d5f0a9f 183 pident.domain=domain;
29a14b24 184 memcpy(&pident.remote, toaddr, sizeof(pident.remote));
b636533b 185
288f4aa9 186 string packet;
29a14b24 187 int ret=MT->waitEvent(pident, &packet, 1);
9170fbaf
BH
188 if(ret > 0) {
189 *d_len=packet.size();
190 memcpy(data,packet.c_str(),min(len,*d_len));
0d5f0a9f
BH
191 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
192 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<sockAddrToString((struct sockaddr_in*)toaddr, sizeof(pident.remote))<<", assuming spoof attempt."<<endl;
193 g_stats.spoofCount++;
35ce8576
BH
194 return -1;
195 }
288f4aa9 196 }
9170fbaf 197 return ret;
288f4aa9
BH
198}
199
aa4e4cbf 200void setBuffer(int fd, int optname, uint32_t size)
ce8deb27 201{
9b356afc 202 uint32_t psize=0;
91e4ecf3 203 socklen_t len=sizeof(psize);
9b356afc 204
aa4e4cbf 205 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
a19fb8e8 206 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
9b356afc
BH
207 return;
208 }
209
aa4e4cbf 210 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
a19fb8e8 211 L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
ce8deb27
BH
212}
213
214
aa4e4cbf
BH
215static void setReceiveBuffer(int fd, uint32_t size)
216{
217 setBuffer(fd, SO_RCVBUF, size);
218}
219
220static void setSendBuffer(int fd, uint32_t size)
221{
222 setBuffer(fd, SO_SNDBUF, size);
223}
224
88def049
BH
225static void writePid(void)
226{
2e3d8a19 227 string fname=::arg()["socket-dir"]+"/"+s_programname+".pid";
88def049
BH
228 ofstream of(fname.c_str());
229 if(of)
369369f6 230 of<< getpid() <<endl;
88def049 231 else
562588a3 232 L<<Logger::Error<<"Requested to write pid for "<<getpid()<<" to "<<fname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
233}
234
bdf40704 235void primeHints(void)
288f4aa9
BH
236{
237 // prime root cache
288f4aa9 238 set<DNSResourceRecord>nsset;
f814d7c8 239
2e3d8a19 240 if(::arg()["hint-file"].empty()) {
f814d7c8
BH
241 static char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", "192.112.36.4", "128.63.2.53",
242 "192.36.148.17","192.58.128.30", "193.0.14.129", "198.32.64.12", "202.12.27.33"};
243 DNSResourceRecord arr, nsrr;
244 arr.qtype=QType::A;
245 arr.ttl=time(0)+3600000;
246 nsrr.qtype=QType::NS;
247 nsrr.ttl=time(0)+3600000;
248
5456e605 249 for(char c='a';c<='m';++c) {
f814d7c8 250 static char templ[40];
7738a23f 251 strncpy(templ,"a.root-servers.net.", sizeof(templ) - 1);
f814d7c8
BH
252 *templ=c;
253 arr.qname=nsrr.content=templ;
5456e605 254 arr.content=ips[c-'a'];
f814d7c8
BH
255 set<DNSResourceRecord> aset;
256 aset.insert(arr);
257 RC.replace(string(templ), QType(QType::A), aset);
258
259 nsset.insert(nsrr);
260 }
261 }
262 else {
2e3d8a19 263 ZoneParserTNG zpt(::arg()["hint-file"]);
f814d7c8 264 DNSResourceRecord rr;
ea634573 265 set<DNSResourceRecord> aset;
288f4aa9 266
f814d7c8 267 while(zpt.get(rr)) {
f814d7c8
BH
268 rr.ttl+=time(0);
269 if(rr.qtype.getCode()==QType::A) {
270 set<DNSResourceRecord> aset;
271 aset.insert(rr);
272 RC.replace(rr.qname, QType(QType::A), aset);
273 }
274 if(rr.qtype.getCode()==QType::NS) {
e2e2c5d8 275 rr.content=toLower(rr.content);
f814d7c8
BH
276 nsset.insert(rr);
277 }
278 }
288f4aa9 279 }
7738a23f 280 RC.replace(".", QType(QType::NS), nsset); // and stuff in the cache
288f4aa9
BH
281}
282
0e9d9ce2
BH
283map<uint32_t, uint32_t> g_tcpClientCounts;
284
285struct TCPConnection
286{
287 int fd;
288 enum {BYTE0, BYTE1, GETQUESTION, DONE} state;
289 int qlen;
290 int bytesread;
291 struct sockaddr_in remote;
292 char data[65535];
293 time_t startTime;
294
295 void closeAndCleanup()
296 {
297 close(fd);
298 if(!g_tcpClientCounts[remote.sin_addr.s_addr]--)
299 g_tcpClientCounts.erase(remote.sin_addr.s_addr);
300 }
301};
302
303vector<TCPConnection> g_tcpconnections; // all *running* TCP/IP questions (from clients)
304
288f4aa9
BH
305void startDoResolve(void *p)
306{
7b1469bb 307 DNSComboWriter* dc=(DNSComboWriter *)p;
288f4aa9 308 try {
10321a98
BH
309 uint16_t maxudpsize=512;
310 MOADNSParser::EDNSOpts edo;
311 if(dc->d_mdp.getEDNSOpts(&edo)) {
312 maxudpsize=edo.d_packetsize;
313 }
314
ea634573 315 vector<DNSResourceRecord> ret;
9170fbaf 316
ea634573
BH
317 vector<uint8_t> packet;
318 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
319
320 pw.getHeader()->aa=0;
321 pw.getHeader()->ra=1;
c154c8a4 322 pw.getHeader()->qr=1;
ea634573 323 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 324 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
ea634573 325
c9e9e5e0 326 SyncRes sr(dc->d_now);
1d5b3ce6 327 if(!g_quiet)
8a63d3ce
BH
328 L<<Logger::Error<<"["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
329 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 330
fededf47 331 sr.setId(MT->getTid());
ea634573 332 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
333 sr.setCacheOnly();
334
ea634573 335 int res=sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret);
1d5b3ce6 336 if(res<0) {
ea634573 337 pw.getHeader()->rcode=RCode::ServFail;
bec87d21 338 // no commit here, because no record
1d5b3ce6
BH
339 g_stats.servFails++;
340 }
288f4aa9 341 else {
ea634573 342 pw.getHeader()->rcode=res;
1d5b3ce6 343 switch(res) {
5e4a2466
BH
344 case RCode::ServFail:
345 g_stats.servFails++;
346 break;
1d5b3ce6
BH
347 case RCode::NXDomain:
348 g_stats.nxDomains++;
349 break;
350 case RCode::NoError:
351 g_stats.noErrors++;
352 break;
353 }
354
c154c8a4 355 if(ret.size()) {
e67e250f 356 shuffle(ret);
c154c8a4 357 for(vector<DNSResourceRecord>::const_iterator i=ret.begin();i!=ret.end();++i) {
10321a98 358 pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, 1, (DNSPacketWriter::Place)i->d_place);
7b1469bb
BH
359
360 shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), 1, i->content));
361
c154c8a4 362 drc->toPacket(pw);
7b1469bb 363
10321a98
BH
364 if(!dc->d_tcp && pw.size() > maxudpsize) {
365 pw.rollback();
1791e3c4
BH
366 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
367 pw.getHeader()->tc=1;
10321a98
BH
368 goto sendit; // need to jump over pw.commit
369 }
c154c8a4
BH
370 }
371 pw.commit();
ea634573 372 }
288f4aa9 373 }
10321a98 374 sendit:;
ea634573 375 if(!dc->d_tcp) {
ea634573 376 sendto(dc->d_socket, &*packet.begin(), packet.size(), 0, (struct sockaddr *)(dc->d_remote), dc->d_socklen);
feccc9fc 377 }
9c495589
BH
378 else {
379 char buf[2];
ea634573
BH
380 buf[0]=packet.size()/256;
381 buf[1]=packet.size()%256;
feccc9fc
BH
382
383 struct iovec iov[2];
384
ea634573
BH
385 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
386 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 387
ea634573 388 int ret=writev(dc->d_socket, iov, 2);
0e9d9ce2 389 bool hadError=true;
feccc9fc 390
0e9d9ce2
BH
391 if(ret == 0)
392 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
393 else if(ret < 0 )
394 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
ea634573 395 else if((unsigned int)ret != 2 + packet.size())
aa4e4cbf 396 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
0e9d9ce2
BH
397 else
398 hadError=false;
399
400 for(vector<TCPConnection>::iterator i=g_tcpconnections.begin();i!=g_tcpconnections.end();++i) {
401 if(i->fd == dc->d_socket) {
402 if(hadError) {
403 i->closeAndCleanup();
404 g_tcpconnections.erase(i);
405 }
406 else {
407 i->state=TCPConnection::BYTE0;
408 i->startTime=time(0); // needs to be current, TCP is slow anyhow
409 }
410 break;
411 }
412 }
9c495589
BH
413 }
414
1d5b3ce6 415 if(!g_quiet) {
8a63d3ce 416 L<<Logger::Error<<"["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 417 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
5c633640 418 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e
BH
419 }
420
eefd15f9 421 sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++;
fe213470
BH
422 float spent=makeFloat(sr.d_now-dc->d_now);
423 if(spent < 0.001)
424 g_stats.answers0_1++;
425 else if(spent < 0.010)
426 g_stats.answers1_10++;
427 else if(spent < 0.1)
428 g_stats.answers10_100++;
429 else if(spent < 1.0)
430 g_stats.answers100_1000++;
431 else
432 g_stats.answersSlow++;
433
574af7ea 434 uint64_t newLat=(uint64_t)(spent*1000000);
87b8e43a
BH
435 if(newLat < 1000000) // outliers of several minutes exist..
436 g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
ea634573 437 delete dc;
288f4aa9
BH
438 }
439 catch(AhuException &ae) {
c836dc19 440 L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
288f4aa9 441 }
7b1469bb
BH
442 catch(MOADNSException& e) {
443 L<<Logger::Error<<"DNS parser error: "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
444 }
c154c8a4
BH
445 catch(exception& e) {
446 L<<Logger::Error<<"STL error: "<<e.what()<<endl;
447 }
288f4aa9 448 catch(...) {
c836dc19 449 L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
288f4aa9
BH
450 }
451}
452
1d5b3ce6
BH
453RecursorControlChannel s_rcc;
454
455void makeControlChannelSocket()
456{
41f7a068
BH
457 string sockname=::arg()["socket-dir"]+"/pdns_recursor.controlsocket";
458 if(::arg().mustDo("fork")) {
459 sockname+="."+lexical_cast<string>(getpid());
460 L<<Logger::Warning<<"Forked control socket name: "<<sockname<<endl;
461 }
462 s_rcc.listen(sockname);
1d5b3ce6
BH
463}
464
4a75412a
BH
465// this stuff is a tad complicated. There are two client sockets, the current one and the previous one (prevclientsocket)
466// if this function is called, and more than 5 seconds have passed since the previous call, the previous client socket is closed,
467// and replaced by the current one, which is then reopened.
468void remakeClientSocket()
288f4aa9 469{
4a75412a
BH
470 static time_t lastChange;
471
472 if(d_clientsock>=0 && !::arg()["query-local-port"].empty()) // already have a port, and we are fixed
473 return;
474
475 if(!lastChange)
476 lastChange=time(0)-10;
477
478 if(lastChange > time(0) - 5)
479 return;
480
481 lastChange=time(0);
482
483 if(d_prevclientsock >= 0) {
484 close(d_prevclientsock);
485 }
486 d_prevclientsock=d_clientsock;
487
288f4aa9
BH
488 d_clientsock=socket(AF_INET, SOCK_DGRAM,0);
489 if(d_clientsock<0)
490 throw AhuException("Making a socket for resolver: "+stringerror());
a19fb8e8 491 setReceiveBuffer(d_clientsock, 200000);
288f4aa9
BH
492 struct sockaddr_in sin;
493 memset((char *)&sin,0, sizeof(sin));
494
495 sin.sin_family = AF_INET;
0d189311 496
2e3d8a19
BH
497 if(!IpToU32(::arg()["query-local-address"], &sin.sin_addr.s_addr))
498 throw AhuException("Unable to resolve local address '"+ ::arg()["query-local-address"] +"'");
0d189311 499
288f4aa9
BH
500 int tries=10;
501 while(--tries) {
4a75412a
BH
502 uint16_t port;
503 if(::arg()["query-local-port"].empty())
504 port=10000+Utility::random()%50000;
505 else {
506 port=::arg().asNum("query-local-port");
507 tries=1;
508 }
288f4aa9
BH
509 sin.sin_port = htons(port);
510
2e3d8a19 511 if (::bind(d_clientsock, (struct sockaddr *)&sin, sizeof(sin)) >= 0)
288f4aa9 512 break;
288f4aa9
BH
513
514 }
515 if(!tries)
4a75412a 516 throw AhuException("Resolver binding to local query client socket: "+stringerror());
976196d2
BH
517
518 Utility::setNonBlocking(d_clientsock);
4a75412a
BH
519
520 // L<<Logger::Error<<"Sending UDP queries from "<<inet_ntoa(sin.sin_addr)<<":"<< ntohs(sin.sin_port) <<endl;
288f4aa9
BH
521}
522
f28307ad 523void makeTCPServerSockets()
9c495589 524{
f28307ad 525 vector<string>locals;
2e3d8a19 526 stringtok(locals,::arg()["local-address"]," ,");
9c495589 527
f28307ad
BH
528 if(locals.empty())
529 throw AhuException("No local address specified");
530
f28307ad
BH
531 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
532 int fd=socket(AF_INET, SOCK_STREAM,0);
533 if(fd<0)
534 throw AhuException("Making a server socket for resolver: "+stringerror());
bacc40d9 535
f28307ad
BH
536 struct sockaddr_in sin;
537 memset((char *)&sin,0, sizeof(sin));
538
539 sin.sin_family = AF_INET;
540 if(!IpToU32(*i, &sin.sin_addr.s_addr))
541 throw AhuException("Unable to resolve local address '"+ *i +"'");
542
543 int tmp=1;
544 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
545 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 546 exit(1);
f28307ad
BH
547 }
548
c8ddb7c2
BH
549#ifdef TCP_DEFER_ACCEPT
550 if(setsockopt(fd,SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
551 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
552 }
553#endif
554
2e3d8a19 555 sin.sin_port = htons(::arg().asNum("local-port"));
f28307ad 556
2e3d8a19 557 if (::bind(fd, (struct sockaddr *)&sin, sizeof(sin))<0)
f28307ad
BH
558 throw AhuException("Binding TCP server socket for "+*i+": "+stringerror());
559
560 Utility::setNonBlocking(fd);
aa4e4cbf 561 setSendBuffer(fd, 65000);
f28307ad 562 listen(fd, 128);
cd50f30d 563 s_tcpserversocks.push_back(fd);
2e3d8a19 564 L<<Logger::Error<<"Listening for TCP queries on "<<inet_ntoa(sin.sin_addr)<<":"<<::arg().asNum("local-port")<<endl;
f28307ad 565 }
9c495589
BH
566}
567
f28307ad 568void makeUDPServerSockets()
288f4aa9 569{
f28307ad 570 vector<string>locals;
2e3d8a19 571 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 572
f28307ad
BH
573 if(locals.empty())
574 throw AhuException("No local address specified");
575
2e3d8a19 576 if(::arg()["local-address"]=="0.0.0.0") {
c836dc19 577 L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
288f4aa9 578 }
525b8a7c 579
f28307ad
BH
580 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
581 int fd=socket(AF_INET, SOCK_DGRAM,0);
582 if(fd<0)
583 throw AhuException("Making a server socket for resolver: "+stringerror());
a19fb8e8 584 setReceiveBuffer(fd, 200000);
f28307ad
BH
585 struct sockaddr_in sin;
586 memset((char *)&sin,0, sizeof(sin));
288f4aa9 587
f28307ad
BH
588 sin.sin_family = AF_INET;
589 if(!IpToU32(*i, &sin.sin_addr.s_addr))
590 throw AhuException("Unable to resolve local address '"+ *i +"'");
591
2e3d8a19 592 sin.sin_port = htons(::arg().asNum("local-port"));
f28307ad 593
2e3d8a19 594 if (::bind(fd, (struct sockaddr *)&sin, sizeof(sin))<0)
f28307ad
BH
595 throw AhuException("Resolver binding to server socket for "+*i+": "+stringerror());
596
597 Utility::setNonBlocking(fd);
598 d_udpserversocks.push_back(fd);
2e3d8a19 599 L<<Logger::Error<<"Listening for UDP queries on "<<inet_ntoa(sin.sin_addr)<<":"<<::arg().asNum("local-port")<<endl;
f28307ad 600 }
c836dc19 601}
caa6eefa 602
9c495589 603
caa6eefa 604#ifndef WIN32
c836dc19
BH
605void daemonize(void)
606{
607 if(fork())
608 exit(0); // bye bye
609
610 setsid();
611
612 // cleanup open fds, but skip sockets
613 close(0);
614 close(1);
615 close(2);
288f4aa9 616}
caa6eefa
BH
617#endif
618
aaacf7f2 619uint64_t counter;
c75a6a9e
BH
620bool statsWanted;
621
1d5b3ce6 622
c75a6a9e
BH
623void usr1Handler(int)
624{
625 statsWanted=true;
626}
ae1b2e98 627
c9e9e5e0
BH
628
629
9170fbaf
BH
630void usr2Handler(int)
631{
632 SyncRes::setLog(true);
1d5b3ce6
BH
633 g_quiet=false;
634 ::arg().set("quiet")="no";
c9e9e5e0 635
9170fbaf
BH
636}
637
c75a6a9e
BH
638void doStats(void)
639{
aaacf7f2
BH
640 if(g_stats.qcounter) {
641 L<<Logger::Error<<"stats: "<<g_stats.qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
8a5602d4 642 <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
2e3d8a19 643 L<<Logger::Error<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
8cd5b55e 644 <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
8a5602d4 645 L<<Logger::Error<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
525b8a7c
BH
646 L<<Logger::Error<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
647 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
5c633640 648 L<<Logger::Error<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
c75a6a9e 649 }
7becf07f
BH
650 else if(statsWanted)
651 L<<Logger::Error<<"stats: no stats yet!"<<endl;
652
c75a6a9e
BH
653 statsWanted=false;
654}
c836dc19 655
29f0b1ce 656static void houseKeeping(void *)
c836dc19 657{
ae1b2e98 658 static time_t last_stat, last_rootupdate, last_prune;
c9e9e5e0
BH
659 struct timeval now;
660 gettimeofday(&now, 0);
661
255e0a07 662 if(now.tv_sec - last_prune > 300) {
5e4a2466
BH
663 DTime dt;
664 dt.setTimeval(now);
eefd15f9 665 RC.doPrune();
33988bfb
BH
666
667 typedef SyncRes::negcache_t::nth_index<1>::type negcache_by_ttd_index_t;
668 negcache_by_ttd_index_t& ttdindex=boost::multi_index::get<1>(SyncRes::s_negcache);
669
670 negcache_by_ttd_index_t::iterator i=ttdindex.lower_bound(now.tv_sec);
671 ttdindex.erase(ttdindex.begin(), i);
2e3d8a19 672
c9e9e5e0 673 time_t limit=now.tv_sec-300;
2e3d8a19
BH
674 for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
675 if(i->second.stale(limit))
676 SyncRes::s_nsSpeeds.erase(i++);
677 else
678 ++i;
679
255e0a07 680 // cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
5e4a2466 681// cout<<"Prune took "<<dt.udiff()<<"usec\n";
ae1b2e98
BH
682 last_prune=time(0);
683 }
c9e9e5e0 684 if(now.tv_sec - last_stat>1800) {
c75a6a9e 685 doStats();
c836dc19
BH
686 last_stat=time(0);
687 }
c9e9e5e0
BH
688 if(now.tv_sec -last_rootupdate>7200) {
689 SyncRes sr(now);
ea634573 690 vector<DNSResourceRecord> ret;
c836dc19
BH
691
692 sr.setNoCache();
7738a23f 693 int res=sr.beginResolve(".", QType(QType::NS), ret);
c836dc19
BH
694 if(!res) {
695 L<<Logger::Error<<"Refreshed . records"<<endl;
c9e9e5e0 696 last_rootupdate=now.tv_sec;
c836dc19
BH
697 }
698 else
699 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
700 }
701}
288f4aa9 702
4e120339 703
9c495589 704
d6d5dea7 705#if 0
d6d5dea7
BH
706#include <execinfo.h>
707
c9e9e5e0
BH
708 multimap<uint32_t,string> rev;
709 for(map<string,uint32_t>::const_iterator i=casesptr->begin(); i!=casesptr->end(); ++i) {
710 rev.insert(make_pair(i->second,i->first));
711 }
712 for(multimap<uint32_t,string>::const_iterator i=rev.begin(); i!= rev.end(); ++i)
713 cout<<i->first<<" times: \n"<<i->second<<"\n";
714
715 cout.flush();
716
717map<string,uint32_t>* casesptr;
718static string maketrace()
d6d5dea7
BH
719{
720 void *array[20]; //only care about last 17 functions (3 taken with tracing support)
721 size_t size;
722 char **strings;
723 size_t i;
724
c9e9e5e0 725 size = backtrace (array, 5);
d6d5dea7
BH
726 strings = backtrace_symbols (array, size); //Need -rdynamic gcc (linker) flag for this to work
727
c9e9e5e0
BH
728 string ret;
729
d6d5dea7 730 for (i = 0; i < size; i++) //skip useless functions
c9e9e5e0
BH
731 ret+=string(strings[i])+"\n";
732 return ret;
d6d5dea7
BH
733}
734
735extern "C" {
c9e9e5e0 736
d6d5dea7
BH
737int gettimeofday (struct timeval *__restrict __tv,
738 __timezone_ptr_t __tz)
739{
c9e9e5e0
BH
740 static map<string, uint32_t> s_cases;
741 casesptr=&s_cases;
742 s_cases[maketrace()]++;
743 __tv->tv_sec=time(0);
d6d5dea7
BH
744 return 0;
745}
746
747}
c9e9e5e0 748#endif
d6d5dea7 749
0d5f0a9f
BH
750string questionExpand(const char* packet, uint16_t len)
751{
752 const char* end=packet+len;
753 const char* pos=packet+12;
754 unsigned char labellen;
755 string ret;
756
757 while((labellen=*pos++)) {
758 if(pos+labellen > end)
759 break;
760 ret.append(pos, labellen);
761 ret.append(1,'.');
762 pos+=labellen;
763 }
764 if(ret.empty())
765 ret=".";
766 return ret;
767}
768
288f4aa9
BH
769int main(int argc, char **argv)
770{
8a63d3ce 771 reportBasicTypes();
ea634573 772
22030c37 773 int ret = EXIT_SUCCESS;
caa6eefa
BH
774#ifdef WIN32
775 WSADATA wsaData;
776 WSAStartup( MAKEWORD( 2, 0 ), &wsaData );
777#endif // WIN32
778
288f4aa9 779 try {
caa6eefa 780 Utility::srandom(time(0));
2e3d8a19
BH
781 ::arg().set("soa-minimum-ttl","Don't change")="0";
782 ::arg().set("soa-serial-offset","Don't change")="0";
783 ::arg().set("no-shuffle","Don't change")="off";
784 ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
785 ::arg().set("local-port","port to listen on")="53";
01ed3112 786 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas")="127.0.0.1";
2e3d8a19
BH
787 ::arg().set("trace","if we should output heaps of logging")="off";
788 ::arg().set("daemon","Operate as a daemon")="yes";
0e9d9ce2 789 ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2e3d8a19
BH
790 ::arg().set("chroot","switch to chroot jail")="";
791 ::arg().set("setgid","If set, change group id to this gid for more security")="";
792 ::arg().set("setuid","If set, change user id to this uid for more security")="";
793 ::arg().set("quiet","Suppress logging of questions and answers")="true";
794 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
795 ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
796 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
797 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4a75412a 798 ::arg().set("query-local-port","Source port address for sending queries, defaults to random")="";
2e3d8a19
BH
799 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
800 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
801 ::arg().set("hint-file", "If set, load root hints from this file")="";
bec87d21 802 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="0";
01ed3112 803 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12";
4e120339 804 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
41f7a068 805 ::arg().set("fork", "If set, fork the daemon for possible double performance")="no";
0d5f0a9f 806 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
2e3d8a19
BH
807
808 ::arg().setCmd("help","Provide a helpful message");
c75a6a9e 809 L.toConsole(Logger::Warning);
2e3d8a19 810 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 811
2e3d8a19 812 string configname=::arg()["config-dir"]+"/recursor.conf";
c75a6a9e
BH
813 cleanSlashes(configname);
814
2e3d8a19 815 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
816 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
817
2e3d8a19 818 ::arg().parse(argc,argv);
c836dc19 819
2e3d8a19 820 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 821
2e3d8a19 822 if(::arg().mustDo("help")) {
b636533b 823 cerr<<"syntax:"<<endl<<endl;
2e3d8a19 824 cerr<<::arg().helpstring(::arg()["help"])<<endl;
b636533b
BH
825 exit(99);
826 }
827
c836dc19 828 L.setName("pdns_recursor");
288f4aa9 829
22c012a8 830 L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2006 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
0d189311
BH
831#ifdef __GNUC__
832 L<<", gcc "__VERSION__;
833#endif // add other compilers here
834 L<<") starting up"<<endl;
835
22c012a8 836 L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
0e9d9ce2
BH
837 L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
838 "This is free software, and you are welcome to redistribute it "
839 "according to the terms of the GPL version 2."<<endl;
840
4a75412a
BH
841
842 if(!::arg()["allow-from"].empty()) {
843 g_allowFrom=new NetmaskGroup;
844 vector<string> ips;
845 stringtok(ips, ::arg()["allow-from"], ", ");
846 L<<Logger::Warning<<"Only allowing queries from: ";
847 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
848 g_allowFrom->addMask(*i);
849 if(i!=ips.begin())
850 L<<Logger::Warning<<", ";
851 L<<Logger::Warning<<*i;
852 }
853 L<<Logger::Warning<<endl;
854 }
855 else if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
856 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
0e9d9ce2
BH
857
858 g_quiet=::arg().mustDo("quiet");
859 if(::arg().mustDo("trace")) {
7b35aa49 860 SyncRes::setLog(true);
2e3d8a19 861 ::arg().set("quiet")="no";
1d5b3ce6 862 g_quiet=false;
0e9d9ce2
BH
863 }
864
865 bool logCommonErrors=::arg().mustDo("log-common-errors");
4d0217fc 866
f28307ad
BH
867 makeUDPServerSockets();
868 makeTCPServerSockets();
41f7a068
BH
869
870 if(::arg().mustDo("fork")) {
871 fork();
872 L<<Logger::Warning<<"This is forked pid "<<getpid()<<endl;
873 }
4a75412a
BH
874 d_clientsock=d_prevclientsock=-1;
875 remakeClientSocket();
41f7a068 876
1d5b3ce6
BH
877 makeControlChannelSocket();
878
fededf47 879 MT=new MTasker<PacketID,string>(100000);
562588a3 880
288f4aa9
BH
881 char data[1500];
882 struct sockaddr_in fromaddr;
883
884 PacketID pident;
bdf40704 885 primeHints();
c836dc19 886 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
caa6eefa 887#ifndef WIN32
2e3d8a19 888 if(::arg().mustDo("daemon")) {
c836dc19
BH
889 L.toConsole(Logger::Critical);
890 daemonize();
891 }
c75a6a9e 892 signal(SIGUSR1,usr1Handler);
9170fbaf 893 signal(SIGUSR2,usr2Handler);
4389619a 894 signal(SIGPIPE,SIG_IGN);
88def049
BH
895
896 writePid();
caa6eefa 897#endif
c75a6a9e 898
08efacea 899 int newgid=0;
2e3d8a19
BH
900 if(!::arg()["setgid"].empty())
901 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
08efacea 902 int newuid=0;
2e3d8a19
BH
903 if(!::arg()["setuid"].empty())
904 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
08efacea
BH
905
906
2e3d8a19
BH
907 if (!::arg()["chroot"].empty()) {
908 if (chroot(::arg()["chroot"].c_str())<0) {
909 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
08efacea
BH
910 exit(1);
911 }
912 }
913
914 Utility::dropPrivs(newuid, newgid);
915
0e9d9ce2 916
49f076e8 917 counter=0;
c9e9e5e0 918 struct timeval now;
2e3d8a19 919 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4e120339
BH
920 int tcpTimeout=::arg().asNum("client-tcp-timeout");
921
922 unsigned int maxTCPPerClient=::arg().asNum("max-tcp-per-client");
923
288f4aa9 924 for(;;) {
fededf47 925 while(MT->schedule()); // housekeeping, let threads do their thing
288f4aa9 926
4a75412a
BH
927 if(!((counter++)%500)) {
928 remakeClientSocket();
d23a4bc7 929 MT->makeThread(houseKeeping,0);
4a75412a 930 }
f9f05db4 931 if(statsWanted) {
c75a6a9e 932 doStats();
f9f05db4 933 }
c836dc19 934
caa6eefa 935 Utility::socklen_t addrlen=sizeof(fromaddr);
288f4aa9 936 int d_len;
288f4aa9
BH
937
938 struct timeval tv;
939 tv.tv_sec=0;
940 tv.tv_usec=500000;
941
5c633640 942 fd_set readfds, writefds;
288f4aa9 943 FD_ZERO( &readfds );
5c633640 944 FD_ZERO( &writefds );
288f4aa9 945 FD_SET( d_clientsock, &readfds );
4a75412a
BH
946 if(d_prevclientsock >= 0)
947 FD_SET( d_prevclientsock, &readfds );
948
1d5b3ce6
BH
949 FD_SET( s_rcc.d_fd, &readfds);
950 int fdmax=max(d_clientsock, s_rcc.d_fd);
f28307ad 951
0e9d9ce2 952 if(!g_tcpconnections.empty())
c9e9e5e0 953 gettimeofday(&now, 0);
cd50f30d
BH
954
955 vector<TCPConnection> sweeped;
2e3d8a19 956
0e9d9ce2
BH
957 for(vector<TCPConnection>::iterator i=g_tcpconnections.begin();i!=g_tcpconnections.end();++i) {
958 if(i->state==TCPConnection::DONE || now.tv_sec < i->startTime + tcpTimeout) { // don't timeout when we are working on the question!
cd50f30d 959 sweeped.push_back(*i);
0e9d9ce2
BH
960 if(i->state!=TCPConnection::DONE) { // we don't listen for data when we are processing the question
961 FD_SET(i->fd, &readfds);
962 fdmax=max(fdmax,i->fd);
963 }
cd50f30d
BH
964 }
965 else {
0e9d9ce2
BH
966 if(logCommonErrors)
967 L<<Logger::Error<<"TCP timeout from client "<<inet_ntoa(i->remote.sin_addr)<<endl;
4e120339 968 i->closeAndCleanup();
cd50f30d 969 }
9c495589 970 }
0e9d9ce2 971 sweeped.swap(g_tcpconnections);
cd50f30d 972
f28307ad
BH
973 for(vector<int>::const_iterator i=d_udpserversocks.begin(); i!=d_udpserversocks.end(); ++i) {
974 FD_SET( *i, &readfds );
975 fdmax=max(fdmax,*i);
976 }
0e9d9ce2 977 if(g_tcpconnections.size() < maxTcpClients)
cd50f30d
BH
978 for(tcpserversocks_t::const_iterator i=s_tcpserversocks.begin(); i!=s_tcpserversocks.end(); ++i) {
979 FD_SET(*i, &readfds );
980 fdmax=max(fdmax,*i);
981 }
982
5c633640
BH
983 for(map<int,PacketID>::const_iterator i=d_tcpclientreadsocks.begin(); i!=d_tcpclientreadsocks.end(); ++i) {
984 // cerr<<"Adding TCP socket "<<i->first<<" to read select set"<<endl;
985 FD_SET( i->first, &readfds );
986 fdmax=max(fdmax,i->first);
987 }
988
989 for(map<int,PacketID>::const_iterator i=d_tcpclientwritesocks.begin(); i!=d_tcpclientwritesocks.end(); ++i) {
990 // cerr<<"Adding TCP socket "<<i->first<<" to write select set"<<endl;
991 FD_SET( i->first, &writefds );
992 fdmax=max(fdmax,i->first);
993 }
8d022964 994
5c633640 995 int selret = select( fdmax + 1, &readfds, &writefds, NULL, &tv );
c9e9e5e0 996 gettimeofday(&now, 0);
c75a6a9e
BH
997 if(selret<=0)
998 if (selret == -1 && errno!=EINTR)
288f4aa9 999 throw AhuException("Select returned: "+stringerror());
c75a6a9e
BH
1000 else
1001 continue;
1002
1d5b3ce6
BH
1003 if(FD_ISSET(s_rcc.d_fd, &readfds)) {
1004 string remote;
1005 string msg=s_rcc.recv(&remote);
1006 RecursorControlParser rcp;
aaacf7f2
BH
1007 RecursorControlParser::func_t* command;
1008 string answer=rcp.getAnswer(msg, &command);
1009 s_rcc.send(answer, &remote);
1010 command();
1d5b3ce6 1011 }
4a75412a
BH
1012
1013 for(int port=0; port < 2; ++port) {
1014 if(port && d_prevclientsock < 0)
1015 break;
1016 int sock = port ? d_prevclientsock : d_clientsock;
1017
1018 if(FD_ISSET(sock,&readfds)) { // do we have a UDP question response from a server ("we are the client", hence d_clientsock)
1019 while((d_len=recvfrom(sock, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
1020 dnsheader dh;
1021 if((size_t) d_len >= sizeof(dnsheader)) {
1022 memcpy(&dh, data, sizeof(dh));
1023
1024 if(dh.qr && dh.qdcount) {
1025 pident.remote=fromaddr;
1026 pident.id=dh.id;
0d5f0a9f 1027 pident.domain=questionExpand(data, d_len);
4a75412a
BH
1028 string packet;
1029 packet.assign(data, d_len);
1030 if(!MT->sendEvent(pident, &packet)) {
1031 if(logCommonErrors)
0d5f0a9f 1032 L<<Logger::Warning<<"Discarding unexpected packet answering '"<<pident.domain<<"' from "<<sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen)<<endl;
4a75412a 1033 g_stats.unexpectedCount++;
35ce8576
BH
1034
1035 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
0d5f0a9f 1036 if(!memcmp(&mthread->key.remote.sin_addr, &pident.remote.sin_addr, sizeof(pident.remote.sin_addr)) && !strcasecmp(pident.domain.c_str(), mthread->key.domain.c_str())) {
35ce8576
BH
1037 mthread->key.nearMisses++;
1038 }
1039 }
4a75412a 1040 }
0e79911b 1041 }
4a75412a
BH
1042 else
1043 L<<Logger::Warning<<"Ignoring question on outgoing socket from "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<endl;
1044 }
1045 else {
1046 g_stats.serverParseError++;
1047 if(logCommonErrors)
1048 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<": packet too small"<<endl;
9b356afc 1049 }
288f4aa9 1050 }
288f4aa9
BH
1051 }
1052 }
1053
f28307ad
BH
1054 for(vector<int>::const_iterator i=d_udpserversocks.begin(); i!=d_udpserversocks.end(); ++i) {
1055 if(FD_ISSET(*i,&readfds)) { // do we have a new question on udp?
9b356afc 1056 while((d_len=recvfrom(*i, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
0e9d9ce2 1057 // g_stats.queryrate.pulse(now); // (broken)
c8ddb7c2
BH
1058 if(g_allowFrom && !g_allowFrom->match(&fromaddr)) {
1059 g_stats.unauthorizedUDP++;
1060 continue;
1061 }
9b356afc
BH
1062
1063 try {
1064 DNSComboWriter* dc = new DNSComboWriter(data, d_len, now);
7b1469bb 1065
9b356afc
BH
1066 dc->setRemote((struct sockaddr *)&fromaddr, addrlen);
1067
0e9d9ce2
BH
1068 if(dc->d_mdp.d_header.qr) {
1069 if(logCommonErrors)
1070 L<<Logger::Error<<"Ignoring answer from "<<dc->getRemote()<<" on server socket!"<<endl;
1071 }
9b356afc
BH
1072 else {
1073 ++g_stats.qcounter;
1074 dc->setSocket(*i);
1075 dc->d_tcp=false;
d23a4bc7 1076 MT->makeThread(startDoResolve, (void*) dc);
9b356afc
BH
1077 }
1078 }
1079 catch(MOADNSException& mde) {
0e9d9ce2 1080 g_stats.clientParseError++;
aa4e4cbf 1081 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<": "<<mde.what()<<endl;
de1890b2 1082 }
9b356afc 1083 }
288f4aa9
BH
1084 }
1085 }
9c495589 1086
cd50f30d 1087 for(tcpserversocks_t::const_iterator i=s_tcpserversocks.begin(); i!=s_tcpserversocks.end(); ++i) {
0e9d9ce2 1088 if(FD_ISSET(*i ,&readfds)) { // do we have a new TCP connection from a client?
f28307ad
BH
1089 struct sockaddr_in addr;
1090 socklen_t addrlen=sizeof(addr);
1091 int newsock=accept(*i, (struct sockaddr*)&addr, &addrlen);
f28307ad 1092 if(newsock>0) {
c8ddb7c2
BH
1093 if(g_allowFrom && !g_allowFrom->match(&addr)) {
1094 g_stats.unauthorizedTCP++;
1095 close(newsock);
1096 continue;
1097 }
1098
4e120339
BH
1099 if(maxTCPPerClient && g_tcpClientCounts.count(addr.sin_addr.s_addr) && g_tcpClientCounts[addr.sin_addr.s_addr] >= maxTCPPerClient) {
1100 g_stats.tcpClientOverflow++;
1101 close(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1102 continue;
1103 }
1104 g_tcpClientCounts[addr.sin_addr.s_addr]++;
f28307ad
BH
1105 Utility::setNonBlocking(newsock);
1106 TCPConnection tc;
1107 tc.fd=newsock;
1108 tc.state=TCPConnection::BYTE0;
1109 tc.remote=addr;
c9e9e5e0 1110 tc.startTime=now.tv_sec;
0e9d9ce2 1111 g_tcpconnections.push_back(tc);
f28307ad 1112 }
9c495589
BH
1113 }
1114 }
1115
369369f6 1116 // have any question answers come in over TCP?
5c633640 1117 for(map<int,PacketID>::iterator i=d_tcpclientreadsocks.begin(); i!=d_tcpclientreadsocks.end();) {
72df400f 1118 bool haveErased=false;
5c633640 1119 if(FD_ISSET(i->first, &readfds)) { // can we receive
72df400f
BH
1120 shared_array<char> buffer(new char[i->second.inNeeded]);
1121
1122 int ret=read(i->first, buffer.get(), min(i->second.inNeeded,200));
5c633640
BH
1123 // cerr<<"Read returned "<<ret<<endl;
1124 if(ret > 0) {
72df400f 1125 i->second.inMSG.append(&buffer[0], &buffer[ret]);
5c633640
BH
1126 i->second.inNeeded-=ret;
1127 if(!i->second.inNeeded) {
1128 // cerr<<"Got entire load of "<<i->second.inMSG.size()<<" bytes"<<endl;
1129 PacketID pid=i->second;
1130 string msg=i->second.inMSG;
1131
1132 d_tcpclientreadsocks.erase((i++));
72df400f 1133 haveErased=true;
5c633640
BH
1134 MT->sendEvent(pid, &msg); // XXX DODGY
1135 }
1136 else {
1137 // cerr<<"Still have "<<i->second.inNeeded<<" left to go"<<endl;
5c633640
BH
1138 }
1139 }
1140 else {
9170fbaf
BH
1141 // cerr<<"when reading ret="<<ret<<endl;
1142 // XXX FIXME I think some stuff needs to happen here - like send an EOF event
5c633640
BH
1143 }
1144 }
72df400f 1145 if(!haveErased)
5c633640 1146 ++i;
5c633640 1147 }
369369f6
BH
1148
1149 // is there data we can send to remote nameservers over TCP?
5c633640 1150 for(map<int,PacketID>::iterator i=d_tcpclientwritesocks.begin(); i!=d_tcpclientwritesocks.end(); ) {
72df400f 1151 bool haveErased=false;
5c633640
BH
1152 if(FD_ISSET(i->first, &writefds)) { // can we send over TCP
1153 // cerr<<"Socket "<<i->first<<" available for writing"<<endl;
1154 int ret=write(i->first, i->second.outMSG.c_str(), i->second.outMSG.size() - i->second.outPos);
1155 if(ret > 0) {
1156 i->second.outPos+=ret;
1157 if(i->second.outPos==i->second.outMSG.size()) {
1158 // cerr<<"Sent out entire load of "<<i->second.outMSG.size()<<" bytes"<<endl;
1159 PacketID pid=i->second;
369369f6 1160 d_tcpclientwritesocks.erase(i++); // erase!
72df400f 1161 haveErased=true;
369369f6 1162 MT->sendEvent(pid, 0);
5c633640 1163 }
72df400f 1164
5c633640
BH
1165 }
1166 else {
9170fbaf
BH
1167 // cerr<<"ret="<<ret<<" when writing"<<endl;
1168 // XXX FIXME I think some stuff needs to happen here - like send an EOF event
5c633640
BH
1169 }
1170 }
72df400f 1171 if(!haveErased)
5c633640
BH
1172 ++i;
1173 }
369369f6
BH
1174
1175 // very braindead TCP incoming question parser
0e9d9ce2 1176 for(vector<TCPConnection>::iterator i=g_tcpconnections.begin();i!=g_tcpconnections.end();++i) {
9c495589
BH
1177 if(FD_ISSET(i->fd, &readfds)) {
1178 if(i->state==TCPConnection::BYTE0) {
1179 int bytes=read(i->fd,i->data,2);
1180 if(bytes==1)
1181 i->state=TCPConnection::BYTE1;
1182 if(bytes==2) {
1183 i->qlen=(i->data[0]<<8)+i->data[1];
1184 i->bytesread=0;
1185 i->state=TCPConnection::GETQUESTION;
1186 }
1187 if(!bytes || bytes < 0) {
4e120339 1188 i->closeAndCleanup();
0e9d9ce2 1189 g_tcpconnections.erase(i);
9c495589
BH
1190 break;
1191 }
1192 }
1193 else if(i->state==TCPConnection::BYTE1) {
1194 int bytes=read(i->fd,i->data+1,1);
1195 if(bytes==1) {
1196 i->state=TCPConnection::GETQUESTION;
1197 i->qlen=(i->data[0]<<8)+i->data[1];
1198 i->bytesread=0;
1199 }
1200 if(!bytes || bytes < 0) {
0e9d9ce2
BH
1201 if(logCommonErrors)
1202 L<<Logger::Error<<"TCP client "<<sockAddrToString(&i->remote,sizeof(i->remote))<<" disconnected after first byte"<<endl;
4e120339 1203 i->closeAndCleanup();
0e9d9ce2 1204 g_tcpconnections.erase(i);
9c495589
BH
1205 break;
1206 }
9c495589
BH
1207 }
1208 else if(i->state==TCPConnection::GETQUESTION) {
1209 int bytes=read(i->fd,i->data + i->bytesread,i->qlen - i->bytesread);
1210 if(!bytes || bytes < 0) {
0e9d9ce2 1211 L<<Logger::Error<<"TCP client "<<sockAddrToString(&i->remote,sizeof(i->remote))<<" disconnected while reading question body"<<endl;
4e120339 1212 i->closeAndCleanup();
0e9d9ce2 1213 g_tcpconnections.erase(i);
9c495589
BH
1214 break;
1215 }
1216 i->bytesread+=bytes;
1217 if(i->bytesread==i->qlen) {
0e9d9ce2 1218 i->state=TCPConnection::DONE; // this makes us immune from timeouts, from now on *we* are responsible
a1754c6a 1219 DNSComboWriter* dc=0;
ea634573 1220 try {
c9e9e5e0 1221 dc=new DNSComboWriter(i->data, i->qlen, now);
ea634573
BH
1222 }
1223 catch(MOADNSException &mde) {
0e9d9ce2
BH
1224 g_stats.clientParseError++;
1225 L<<Logger::Error<<"Unable to parse packet from TCP client "<<sockAddrToString(&i->remote,sizeof(i->remote))<<endl;
4e120339 1226 i->closeAndCleanup();
0e9d9ce2 1227 g_tcpconnections.erase(i);
9c495589
BH
1228 break;
1229 }
ea634573
BH
1230
1231 dc->setSocket(i->fd);
1232 dc->d_tcp=true;
1233 dc->setRemote((struct sockaddr *)&i->remote,sizeof(i->remote));
1234 if(dc->d_mdp.d_header.qr)
1235 L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
1236 else {
aaacf7f2
BH
1237 ++g_stats.qcounter;
1238 ++g_stats.tcpqcounter;
d23a4bc7 1239 MT->makeThread(startDoResolve, dc);
0e9d9ce2 1240 break;
9c495589
BH
1241 }
1242 }
1243 }
1244 }
1245 }
288f4aa9
BH
1246 }
1247 }
1248 catch(AhuException &ae) {
c836dc19 1249 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 1250 ret=EXIT_FAILURE;
288f4aa9
BH
1251 }
1252 catch(exception &e) {
c836dc19 1253 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 1254 ret=EXIT_FAILURE;
288f4aa9
BH
1255 }
1256 catch(...) {
c836dc19 1257 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 1258 ret=EXIT_FAILURE;
288f4aa9 1259 }
caa6eefa
BH
1260
1261#ifdef WIN32
1262 WSACleanup();
1263#endif // WIN32
1264
22030c37 1265 return ret;
288f4aa9 1266}