]> git.ipfire.org Git - thirdparty/pdns.git/blame - pdns/pdns_recursor.cc
implement very simple SMP support
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
CommitLineData
288f4aa9
BH
1/*
2 PowerDNS Versatile Database Driven Nameserver
22c012a8 3 Copyright (C) 2003 - 2006 PowerDNS.COM BV
288f4aa9
BH
4
5 This program is free software; you can redistribute it and/or modify
f28307ad
BH
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
288f4aa9
BH
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
caa6eefa
BH
18
19#include "utility.hh"
288f4aa9
BH
20#include <iostream>
21#include <errno.h>
22#include <map>
23#include <set>
caa6eefa 24#ifndef WIN32
288f4aa9 25#include <netdb.h>
caa6eefa 26#endif // WIN32
97bb160b 27#include "recursor_cache.hh"
288f4aa9 28#include <stdio.h>
c75a6a9e 29#include <signal.h>
288f4aa9
BH
30#include <stdlib.h>
31#include <unistd.h>
c8ddb7c2 32#include <netinet/tcp.h>
288f4aa9
BH
33#include "mtasker.hh"
34#include <utility>
288f4aa9
BH
35#include "arguments.hh"
36#include "syncres.hh"
88def049
BH
37#include <fcntl.h>
38#include <fstream>
5c633640
BH
39#include "sstuff.hh"
40#include <boost/tuple/tuple.hpp>
41#include <boost/tuple/tuple_comparison.hpp>
72df400f 42#include <boost/shared_array.hpp>
ea634573
BH
43#include <boost/lexical_cast.hpp>
44#include "dnsparser.hh"
45#include "dnswriter.hh"
46#include "dnsrecords.hh"
f814d7c8 47#include "zoneparser-tng.hh"
1d5b3ce6 48#include "rec_channel.hh"
aaacf7f2 49#include "logger.hh"
c8ddb7c2 50#include "iputils.hh"
1d5b3ce6 51
a2bfc3ff
BH
52#ifndef RECURSOR
53#include "statbag.hh"
54StatBag S;
55#endif
56
57
33988bfb 58using namespace boost;
5c633640 59
27adc173 60#ifdef __FreeBSD__ // see cvstrac ticket #26
7f617eb9
BH
61#include <pthread.h>
62#include <semaphore.h>
63#endif
64
eefd15f9 65MemRecursorCache RC;
1d5b3ce6
BH
66RecursorStats g_stats;
67bool g_quiet;
c8ddb7c2 68NetmaskGroup* g_allowFrom;
88def049 69string s_programname="pdns_recursor";
288f4aa9 70
ea634573 71struct DNSComboWriter {
c9e9e5e0 72 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), d_tcp(false), d_socket(-1)
ea634573
BH
73 {}
74 MOADNSParser d_mdp;
75 void setRemote(struct sockaddr* sa, socklen_t len)
76 {
77 memcpy((void *)d_remote, (void *)sa, len);
78 d_socklen=len;
79 }
80
81 void setSocket(int sock)
82 {
83 d_socket=sock;
84 }
a1754c6a
BH
85
86 string getRemote() const
87 {
88 return sockAddrToString((struct sockaddr_in *)d_remote, d_socklen);
89 }
90
c9e9e5e0 91 struct timeval d_now;
ea634573
BH
92 char d_remote[sizeof(sockaddr_in6)];
93 socklen_t d_socklen;
94 bool d_tcp;
95 int d_socket;
96};
97
98
27adc173
BH
99#ifndef WIN32
100#ifndef __FreeBSD__
288f4aa9
BH
101extern "C" {
102 int sem_init(sem_t*, int, unsigned int){return 0;}
103 int sem_wait(sem_t*){return 0;}
104 int sem_trywait(sem_t*){return 0;}
105 int sem_post(sem_t*){return 0;}
106 int sem_getvalue(sem_t*, int*){return 0;}
dcf9bd8f 107 pthread_t pthread_self(void){return (pthread_t) 0;}
98e05fce 108 int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr){ return 0; }
dcf9bd8f
BH
109 int pthread_mutex_lock(pthread_mutex_t *mutex){ return 0; }
110 int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
df38dbe8 111 int pthread_mutex_destroy(pthread_mutex_t *mutex) { return 0; }
288f4aa9 112}
27adc173 113#endif // __FreeBSD__
caa6eefa 114#endif // WIN32
288f4aa9 115
288f4aa9
BH
116ArgvMap &arg()
117{
118 static ArgvMap theArg;
119 return theArg;
120}
bacc40d9 121static int d_clientsock;
f28307ad 122static vector<int> d_udpserversocks;
288f4aa9 123
cd50f30d
BH
124typedef vector<int> tcpserversocks_t;
125static tcpserversocks_t s_tcpserversocks;
5c633640 126
5c633640
BH
127static map<int,PacketID> d_tcpclientreadsocks, d_tcpclientwritesocks;
128
129MTasker<PacketID,string>* MT;
130
131int asendtcp(const string& data, Socket* sock)
132{
133 PacketID pident;
134 pident.sock=sock;
135 pident.outMSG=data;
136 string packet;
137
5c633640
BH
138 d_tcpclientwritesocks[sock->getHandle()]=pident;
139
9170fbaf
BH
140 int ret=MT->waitEvent(pident,&packet,1);
141 if(!ret || ret==-1) { // timeout
5c633640 142 d_tcpclientwritesocks.erase(sock->getHandle());
5c633640 143 }
9170fbaf 144 return ret;
5c633640
BH
145}
146
9170fbaf 147// -1 is error, 0 is timeout, 1 is success
5c633640 148int arecvtcp(string& data, int len, Socket* sock)
288f4aa9 149{
5c633640
BH
150 data="";
151 PacketID pident;
152 pident.sock=sock;
153 pident.inNeeded=len;
154
5c633640
BH
155 d_tcpclientreadsocks[sock->getHandle()]=pident;
156
9170fbaf
BH
157 int ret=MT->waitEvent(pident,&data,1);
158 if(!ret || ret==-1) { // timeout
5c633640 159 d_tcpclientreadsocks.erase(sock->getHandle());
288f4aa9 160 }
9170fbaf 161 return ret;
288f4aa9
BH
162}
163
288f4aa9
BH
164
165/* these two functions are used by LWRes */
9170fbaf 166// -1 is error, > 1 is success
288f4aa9
BH
167int asendto(const char *data, int len, int flags, struct sockaddr *toaddr, int addrlen, int id)
168{
169 return sendto(d_clientsock, data, len, flags, toaddr, addrlen);
170}
171
9170fbaf 172// -1 is error, 0 is timeout, 1 is success
caa6eefa 173int arecvfrom(char *data, int len, int flags, struct sockaddr *toaddr, Utility::socklen_t *addrlen, int *d_len, int id)
288f4aa9
BH
174{
175 PacketID pident;
176 pident.id=id;
29a14b24 177 memcpy(&pident.remote, toaddr, sizeof(pident.remote));
b636533b 178
288f4aa9 179 string packet;
29a14b24 180 int ret=MT->waitEvent(pident, &packet, 1);
9170fbaf
BH
181 if(ret > 0) {
182 *d_len=packet.size();
183 memcpy(data,packet.c_str(),min(len,*d_len));
288f4aa9 184 }
9170fbaf 185 return ret;
288f4aa9
BH
186}
187
ce8deb27
BH
188void setReceiveBuffer(int fd, uint32_t size)
189{
9b356afc 190 uint32_t psize=0;
91e4ecf3 191 socklen_t len=sizeof(psize);
9b356afc
BH
192
193 if(!getsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char*)&psize, &len) && psize > size) {
a19fb8e8 194 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
9b356afc
BH
195 return;
196 }
197
ce8deb27 198 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char*)&size, sizeof(size)) < 0 )
a19fb8e8 199 L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
ce8deb27
BH
200}
201
202
88def049
BH
203static void writePid(void)
204{
2e3d8a19 205 string fname=::arg()["socket-dir"]+"/"+s_programname+".pid";
88def049
BH
206 ofstream of(fname.c_str());
207 if(of)
369369f6 208 of<< getpid() <<endl;
88def049 209 else
562588a3 210 L<<Logger::Error<<"Requested to write pid for "<<getpid()<<" to "<<fname<<" failed: "<<strerror(errno)<<endl;
88def049
BH
211}
212
bdf40704 213void primeHints(void)
288f4aa9
BH
214{
215 // prime root cache
288f4aa9 216 set<DNSResourceRecord>nsset;
f814d7c8 217
2e3d8a19 218 if(::arg()["hint-file"].empty()) {
f814d7c8
BH
219 static char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", "192.112.36.4", "128.63.2.53",
220 "192.36.148.17","192.58.128.30", "193.0.14.129", "198.32.64.12", "202.12.27.33"};
221 DNSResourceRecord arr, nsrr;
222 arr.qtype=QType::A;
223 arr.ttl=time(0)+3600000;
224 nsrr.qtype=QType::NS;
225 nsrr.ttl=time(0)+3600000;
226
5456e605 227 for(char c='a';c<='m';++c) {
f814d7c8 228 static char templ[40];
7738a23f 229 strncpy(templ,"a.root-servers.net.", sizeof(templ) - 1);
f814d7c8
BH
230 *templ=c;
231 arr.qname=nsrr.content=templ;
5456e605 232 arr.content=ips[c-'a'];
f814d7c8
BH
233 set<DNSResourceRecord> aset;
234 aset.insert(arr);
235 RC.replace(string(templ), QType(QType::A), aset);
236
237 nsset.insert(nsrr);
238 }
239 }
240 else {
2e3d8a19 241 ZoneParserTNG zpt(::arg()["hint-file"]);
f814d7c8 242 DNSResourceRecord rr;
ea634573 243 set<DNSResourceRecord> aset;
288f4aa9 244
f814d7c8 245 while(zpt.get(rr)) {
f814d7c8
BH
246 rr.ttl+=time(0);
247 if(rr.qtype.getCode()==QType::A) {
248 set<DNSResourceRecord> aset;
249 aset.insert(rr);
250 RC.replace(rr.qname, QType(QType::A), aset);
251 }
252 if(rr.qtype.getCode()==QType::NS) {
e2e2c5d8 253 rr.content=toLower(rr.content);
f814d7c8
BH
254 nsset.insert(rr);
255 }
256 }
288f4aa9 257 }
7738a23f 258 RC.replace(".", QType(QType::NS), nsset); // and stuff in the cache
288f4aa9
BH
259}
260
261void startDoResolve(void *p)
262{
263 try {
ea634573 264 DNSComboWriter* dc=(DNSComboWriter *)p;
b636533b 265
10321a98
BH
266 uint16_t maxudpsize=512;
267 MOADNSParser::EDNSOpts edo;
268 if(dc->d_mdp.getEDNSOpts(&edo)) {
269 maxudpsize=edo.d_packetsize;
270 }
271
ea634573 272 vector<DNSResourceRecord> ret;
9170fbaf 273
ea634573
BH
274 vector<uint8_t> packet;
275 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
276
277 pw.getHeader()->aa=0;
278 pw.getHeader()->ra=1;
c154c8a4 279 pw.getHeader()->qr=1;
ea634573 280 pw.getHeader()->id=dc->d_mdp.d_header.id;
10321a98 281 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
ea634573 282
9170fbaf 283 // MT->setTitle("udp question for "+P.qdomain+"|"+P.qtype.getName());
c9e9e5e0 284 SyncRes sr(dc->d_now);
1d5b3ce6 285 if(!g_quiet)
8a63d3ce
BH
286 L<<Logger::Error<<"["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
287 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
c75a6a9e 288
fededf47 289 sr.setId(MT->getTid());
ea634573 290 if(!dc->d_mdp.d_header.rd)
c836dc19
BH
291 sr.setCacheOnly();
292
ea634573 293 int res=sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret);
1d5b3ce6 294 if(res<0) {
ea634573 295 pw.getHeader()->rcode=RCode::ServFail;
bec87d21 296 // no commit here, because no record
1d5b3ce6
BH
297 g_stats.servFails++;
298 }
288f4aa9 299 else {
ea634573 300 pw.getHeader()->rcode=res;
1d5b3ce6 301 switch(res) {
5e4a2466
BH
302 case RCode::ServFail:
303 g_stats.servFails++;
304 break;
1d5b3ce6
BH
305 case RCode::NXDomain:
306 g_stats.nxDomains++;
307 break;
308 case RCode::NoError:
309 g_stats.noErrors++;
310 break;
311 }
312
c154c8a4 313 if(ret.size()) {
e67e250f 314 shuffle(ret);
c154c8a4 315 for(vector<DNSResourceRecord>::const_iterator i=ret.begin();i!=ret.end();++i) {
10321a98 316 pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, 1, (DNSPacketWriter::Place)i->d_place);
c154c8a4
BH
317 shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), 1, i->content));
318 drc->toPacket(pw);
10321a98
BH
319 if(!dc->d_tcp && pw.size() > maxudpsize) {
320 pw.rollback();
1791e3c4
BH
321 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
322 pw.getHeader()->tc=1;
10321a98
BH
323 goto sendit; // need to jump over pw.commit
324 }
c154c8a4
BH
325 }
326 pw.commit();
ea634573 327 }
288f4aa9 328 }
10321a98 329 sendit:;
ea634573 330 if(!dc->d_tcp) {
ea634573 331 sendto(dc->d_socket, &*packet.begin(), packet.size(), 0, (struct sockaddr *)(dc->d_remote), dc->d_socklen);
feccc9fc 332 }
9c495589
BH
333 else {
334 char buf[2];
ea634573
BH
335 buf[0]=packet.size()/256;
336 buf[1]=packet.size()%256;
feccc9fc
BH
337
338 struct iovec iov[2];
339
ea634573
BH
340 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
341 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
feccc9fc 342
ea634573 343 int ret=writev(dc->d_socket, iov, 2);
feccc9fc
BH
344
345 if(ret <= 0 )
a1754c6a 346 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< (ret ? strerror(errno) : "EOF") <<endl;
ea634573 347 else if((unsigned int)ret != 2 + packet.size())
a1754c6a 348 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" - probably would have trouble receiving our answer anyhow (size="<<packet.size()<<")"<<endl;
9c495589
BH
349 }
350
9170fbaf 351 // MT->setTitle("DONE! udp question for "+P.qdomain+"|"+P.qtype.getName());
1d5b3ce6 352 if(!g_quiet) {
8a63d3ce 353 L<<Logger::Error<<"["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
ea634573 354 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
5c633640 355 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
c75a6a9e
BH
356 }
357
eefd15f9 358 sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++;
fe213470
BH
359 float spent=makeFloat(sr.d_now-dc->d_now);
360 if(spent < 0.001)
361 g_stats.answers0_1++;
362 else if(spent < 0.010)
363 g_stats.answers1_10++;
364 else if(spent < 0.1)
365 g_stats.answers10_100++;
366 else if(spent < 1.0)
367 g_stats.answers100_1000++;
368 else
369 g_stats.answersSlow++;
370
574af7ea 371 uint64_t newLat=(uint64_t)(spent*1000000);
87b8e43a
BH
372 if(newLat < 1000000) // outliers of several minutes exist..
373 g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
ea634573 374 delete dc;
288f4aa9
BH
375 }
376 catch(AhuException &ae) {
c836dc19 377 L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
288f4aa9 378 }
c154c8a4
BH
379 catch(exception& e) {
380 L<<Logger::Error<<"STL error: "<<e.what()<<endl;
381 }
288f4aa9 382 catch(...) {
c836dc19 383 L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
288f4aa9
BH
384 }
385}
386
1d5b3ce6
BH
387RecursorControlChannel s_rcc;
388
389void makeControlChannelSocket()
390{
41f7a068
BH
391 string sockname=::arg()["socket-dir"]+"/pdns_recursor.controlsocket";
392 if(::arg().mustDo("fork")) {
393 sockname+="."+lexical_cast<string>(getpid());
394 L<<Logger::Warning<<"Forked control socket name: "<<sockname<<endl;
395 }
396 s_rcc.listen(sockname);
1d5b3ce6
BH
397}
398
288f4aa9
BH
399void makeClientSocket()
400{
401 d_clientsock=socket(AF_INET, SOCK_DGRAM,0);
402 if(d_clientsock<0)
403 throw AhuException("Making a socket for resolver: "+stringerror());
a19fb8e8 404 setReceiveBuffer(d_clientsock, 200000);
288f4aa9
BH
405 struct sockaddr_in sin;
406 memset((char *)&sin,0, sizeof(sin));
407
408 sin.sin_family = AF_INET;
0d189311 409
2e3d8a19
BH
410 if(!IpToU32(::arg()["query-local-address"], &sin.sin_addr.s_addr))
411 throw AhuException("Unable to resolve local address '"+ ::arg()["query-local-address"] +"'");
0d189311 412
288f4aa9
BH
413 int tries=10;
414 while(--tries) {
092f210a 415 uint16_t port=10000+Utility::random()%10000;
288f4aa9
BH
416 sin.sin_port = htons(port);
417
2e3d8a19 418 if (::bind(d_clientsock, (struct sockaddr *)&sin, sizeof(sin)) >= 0)
288f4aa9 419 break;
288f4aa9
BH
420
421 }
422 if(!tries)
423 throw AhuException("Resolver binding to local socket: "+stringerror());
976196d2
BH
424
425 Utility::setNonBlocking(d_clientsock);
ce8deb27 426
0d189311 427 L<<Logger::Error<<"Sending UDP queries from "<<inet_ntoa(sin.sin_addr)<<":"<< ntohs(sin.sin_port) <<endl;
288f4aa9
BH
428}
429
f28307ad 430void makeTCPServerSockets()
9c495589 431{
f28307ad 432 vector<string>locals;
2e3d8a19 433 stringtok(locals,::arg()["local-address"]," ,");
9c495589 434
f28307ad
BH
435 if(locals.empty())
436 throw AhuException("No local address specified");
437
f28307ad
BH
438 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
439 int fd=socket(AF_INET, SOCK_STREAM,0);
440 if(fd<0)
441 throw AhuException("Making a server socket for resolver: "+stringerror());
bacc40d9 442
f28307ad
BH
443 struct sockaddr_in sin;
444 memset((char *)&sin,0, sizeof(sin));
445
446 sin.sin_family = AF_INET;
447 if(!IpToU32(*i, &sin.sin_addr.s_addr))
448 throw AhuException("Unable to resolve local address '"+ *i +"'");
449
450 int tmp=1;
451 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
452 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
c8ddb7c2 453 exit(1);
f28307ad
BH
454 }
455
c8ddb7c2
BH
456#ifdef TCP_DEFER_ACCEPT
457 if(setsockopt(fd,SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
458 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
459 }
460#endif
461
2e3d8a19 462 sin.sin_port = htons(::arg().asNum("local-port"));
f28307ad 463
2e3d8a19 464 if (::bind(fd, (struct sockaddr *)&sin, sizeof(sin))<0)
f28307ad
BH
465 throw AhuException("Binding TCP server socket for "+*i+": "+stringerror());
466
467 Utility::setNonBlocking(fd);
468 listen(fd, 128);
cd50f30d 469 s_tcpserversocks.push_back(fd);
2e3d8a19 470 L<<Logger::Error<<"Listening for TCP queries on "<<inet_ntoa(sin.sin_addr)<<":"<<::arg().asNum("local-port")<<endl;
f28307ad 471 }
9c495589
BH
472}
473
f28307ad 474void makeUDPServerSockets()
288f4aa9 475{
f28307ad 476 vector<string>locals;
2e3d8a19 477 stringtok(locals,::arg()["local-address"]," ,");
288f4aa9 478
f28307ad
BH
479 if(locals.empty())
480 throw AhuException("No local address specified");
481
2e3d8a19 482 if(::arg()["local-address"]=="0.0.0.0") {
c836dc19 483 L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
288f4aa9 484 }
525b8a7c 485
f28307ad
BH
486 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
487 int fd=socket(AF_INET, SOCK_DGRAM,0);
488 if(fd<0)
489 throw AhuException("Making a server socket for resolver: "+stringerror());
a19fb8e8 490 setReceiveBuffer(fd, 200000);
f28307ad
BH
491 struct sockaddr_in sin;
492 memset((char *)&sin,0, sizeof(sin));
288f4aa9 493
f28307ad
BH
494 sin.sin_family = AF_INET;
495 if(!IpToU32(*i, &sin.sin_addr.s_addr))
496 throw AhuException("Unable to resolve local address '"+ *i +"'");
497
2e3d8a19 498 sin.sin_port = htons(::arg().asNum("local-port"));
f28307ad 499
2e3d8a19 500 if (::bind(fd, (struct sockaddr *)&sin, sizeof(sin))<0)
f28307ad
BH
501 throw AhuException("Resolver binding to server socket for "+*i+": "+stringerror());
502
503 Utility::setNonBlocking(fd);
504 d_udpserversocks.push_back(fd);
2e3d8a19 505 L<<Logger::Error<<"Listening for UDP queries on "<<inet_ntoa(sin.sin_addr)<<":"<<::arg().asNum("local-port")<<endl;
f28307ad 506 }
c836dc19 507}
caa6eefa 508
9c495589 509
caa6eefa 510#ifndef WIN32
c836dc19
BH
511void daemonize(void)
512{
513 if(fork())
514 exit(0); // bye bye
515
516 setsid();
517
518 // cleanup open fds, but skip sockets
519 close(0);
520 close(1);
521 close(2);
288f4aa9 522}
caa6eefa
BH
523#endif
524
aaacf7f2 525uint64_t counter;
c75a6a9e
BH
526bool statsWanted;
527
1d5b3ce6 528
c75a6a9e
BH
529void usr1Handler(int)
530{
531 statsWanted=true;
532}
ae1b2e98 533
c9e9e5e0
BH
534
535
9170fbaf
BH
536void usr2Handler(int)
537{
538 SyncRes::setLog(true);
1d5b3ce6
BH
539 g_quiet=false;
540 ::arg().set("quiet")="no";
c9e9e5e0 541
9170fbaf
BH
542}
543
c75a6a9e
BH
544void doStats(void)
545{
aaacf7f2
BH
546 if(g_stats.qcounter) {
547 L<<Logger::Error<<"stats: "<<g_stats.qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
8a5602d4 548 <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
2e3d8a19 549 L<<Logger::Error<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
8cd5b55e 550 <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
8a5602d4 551 L<<Logger::Error<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
525b8a7c
BH
552 L<<Logger::Error<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
553 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
5c633640 554 L<<Logger::Error<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
c75a6a9e 555 }
7becf07f
BH
556 else if(statsWanted)
557 L<<Logger::Error<<"stats: no stats yet!"<<endl;
558
c75a6a9e
BH
559 statsWanted=false;
560}
c836dc19 561
29f0b1ce 562static void houseKeeping(void *)
c836dc19 563{
ae1b2e98 564 static time_t last_stat, last_rootupdate, last_prune;
c9e9e5e0
BH
565 struct timeval now;
566 gettimeofday(&now, 0);
567
255e0a07 568 if(now.tv_sec - last_prune > 300) {
5e4a2466
BH
569 DTime dt;
570 dt.setTimeval(now);
eefd15f9 571 RC.doPrune();
33988bfb
BH
572
573 typedef SyncRes::negcache_t::nth_index<1>::type negcache_by_ttd_index_t;
574 negcache_by_ttd_index_t& ttdindex=boost::multi_index::get<1>(SyncRes::s_negcache);
575
576 negcache_by_ttd_index_t::iterator i=ttdindex.lower_bound(now.tv_sec);
577 ttdindex.erase(ttdindex.begin(), i);
2e3d8a19 578
c9e9e5e0 579 time_t limit=now.tv_sec-300;
2e3d8a19
BH
580 for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
581 if(i->second.stale(limit))
582 SyncRes::s_nsSpeeds.erase(i++);
583 else
584 ++i;
585
255e0a07 586 // cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
5e4a2466 587// cout<<"Prune took "<<dt.udiff()<<"usec\n";
ae1b2e98
BH
588 last_prune=time(0);
589 }
c9e9e5e0 590 if(now.tv_sec - last_stat>1800) {
c75a6a9e 591 doStats();
c836dc19
BH
592 last_stat=time(0);
593 }
c9e9e5e0
BH
594 if(now.tv_sec -last_rootupdate>7200) {
595 SyncRes sr(now);
ea634573 596 vector<DNSResourceRecord> ret;
c836dc19
BH
597
598 sr.setNoCache();
7738a23f 599 int res=sr.beginResolve(".", QType(QType::NS), ret);
c836dc19
BH
600 if(!res) {
601 L<<Logger::Error<<"Refreshed . records"<<endl;
c9e9e5e0 602 last_rootupdate=now.tv_sec;
c836dc19
BH
603 }
604 else
605 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
606 }
607}
288f4aa9 608
4e120339 609map<uint32_t, uint32_t> g_tcpClientCounts;
9c495589
BH
610struct TCPConnection
611{
612 int fd;
613 enum {BYTE0, BYTE1, GETQUESTION} state;
614 int qlen;
615 int bytesread;
616 struct sockaddr_in remote;
617 char data[65535];
cd50f30d 618 time_t startTime;
4e120339
BH
619
620 void closeAndCleanup()
621 {
622 close(fd);
623 if(!g_tcpClientCounts[remote.sin_addr.s_addr]--)
624 g_tcpClientCounts.erase(remote.sin_addr.s_addr);
625 }
9c495589
BH
626};
627
d6d5dea7 628#if 0
d6d5dea7
BH
629#include <execinfo.h>
630
c9e9e5e0
BH
631 multimap<uint32_t,string> rev;
632 for(map<string,uint32_t>::const_iterator i=casesptr->begin(); i!=casesptr->end(); ++i) {
633 rev.insert(make_pair(i->second,i->first));
634 }
635 for(multimap<uint32_t,string>::const_iterator i=rev.begin(); i!= rev.end(); ++i)
636 cout<<i->first<<" times: \n"<<i->second<<"\n";
637
638 cout.flush();
639
640map<string,uint32_t>* casesptr;
641static string maketrace()
d6d5dea7
BH
642{
643 void *array[20]; //only care about last 17 functions (3 taken with tracing support)
644 size_t size;
645 char **strings;
646 size_t i;
647
c9e9e5e0 648 size = backtrace (array, 5);
d6d5dea7
BH
649 strings = backtrace_symbols (array, size); //Need -rdynamic gcc (linker) flag for this to work
650
c9e9e5e0
BH
651 string ret;
652
d6d5dea7 653 for (i = 0; i < size; i++) //skip useless functions
c9e9e5e0
BH
654 ret+=string(strings[i])+"\n";
655 return ret;
d6d5dea7
BH
656}
657
658extern "C" {
c9e9e5e0 659
d6d5dea7
BH
660int gettimeofday (struct timeval *__restrict __tv,
661 __timezone_ptr_t __tz)
662{
c9e9e5e0
BH
663 static map<string, uint32_t> s_cases;
664 casesptr=&s_cases;
665 s_cases[maketrace()]++;
666 __tv->tv_sec=time(0);
d6d5dea7
BH
667 return 0;
668}
669
670}
c9e9e5e0 671#endif
d6d5dea7 672
288f4aa9
BH
673int main(int argc, char **argv)
674{
8a63d3ce 675 reportBasicTypes();
ea634573 676
22030c37 677 int ret = EXIT_SUCCESS;
caa6eefa
BH
678#ifdef WIN32
679 WSADATA wsaData;
680 WSAStartup( MAKEWORD( 2, 0 ), &wsaData );
681#endif // WIN32
682
288f4aa9 683 try {
caa6eefa 684 Utility::srandom(time(0));
2e3d8a19
BH
685 ::arg().set("soa-minimum-ttl","Don't change")="0";
686 ::arg().set("soa-serial-offset","Don't change")="0";
687 ::arg().set("no-shuffle","Don't change")="off";
688 ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
689 ::arg().set("local-port","port to listen on")="53";
690 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas")="0.0.0.0";
691 ::arg().set("trace","if we should output heaps of logging")="off";
692 ::arg().set("daemon","Operate as a daemon")="yes";
693 ::arg().set("chroot","switch to chroot jail")="";
694 ::arg().set("setgid","If set, change group id to this gid for more security")="";
695 ::arg().set("setuid","If set, change user id to this uid for more security")="";
696 ::arg().set("quiet","Suppress logging of questions and answers")="true";
697 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
698 ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
699 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
700 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
701 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
702 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
703 ::arg().set("hint-file", "If set, load root hints from this file")="";
bec87d21 704 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="0";
c8ddb7c2 705 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")="";
4e120339 706 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
41f7a068 707 ::arg().set("fork", "If set, fork the daemon for possible double performance")="no";
2e3d8a19
BH
708
709 ::arg().setCmd("help","Provide a helpful message");
c75a6a9e 710 L.toConsole(Logger::Warning);
2e3d8a19 711 ::arg().laxParse(argc,argv); // do a lax parse
c75a6a9e 712
2e3d8a19 713 string configname=::arg()["config-dir"]+"/recursor.conf";
c75a6a9e
BH
714 cleanSlashes(configname);
715
2e3d8a19 716 if(!::arg().file(configname.c_str()))
c75a6a9e
BH
717 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
718
2e3d8a19 719 ::arg().parse(argc,argv);
c836dc19 720
2e3d8a19 721 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
562588a3 722
2e3d8a19 723 if(::arg().mustDo("help")) {
b636533b 724 cerr<<"syntax:"<<endl<<endl;
2e3d8a19 725 cerr<<::arg().helpstring(::arg()["help"])<<endl;
b636533b
BH
726 exit(99);
727 }
728
c8ddb7c2
BH
729 if(!::arg()["allow-from"].empty()) {
730 g_allowFrom=new NetmaskGroup;
731 vector<string> ips;
732 stringtok(ips, ::arg()["allow-from"], ", ");
733 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i)
734 g_allowFrom->addMask(*i);
735 }
736
c836dc19 737 L.setName("pdns_recursor");
288f4aa9 738
22c012a8 739 L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2006 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
0d189311
BH
740#ifdef __GNUC__
741 L<<", gcc "__VERSION__;
742#endif // add other compilers here
743 L<<") starting up"<<endl;
744
22c012a8 745 L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
0d189311
BH
746 L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
747 "This is free software, and you are welcome to redistribute it "
748 "according to the terms of the GPL version 2."<<endl;
749
750
4d0217fc 751 g_quiet=::arg().mustDo("quiet");
2e3d8a19 752 if(::arg().mustDo("trace")) {
7b35aa49 753 SyncRes::setLog(true);
2e3d8a19 754 ::arg().set("quiet")="no";
1d5b3ce6 755 g_quiet=false;
878435ce 756 }
4d0217fc 757
f28307ad
BH
758 makeUDPServerSockets();
759 makeTCPServerSockets();
41f7a068
BH
760
761 if(::arg().mustDo("fork")) {
762 fork();
763 L<<Logger::Warning<<"This is forked pid "<<getpid()<<endl;
764 }
765 makeClientSocket();
766
1d5b3ce6
BH
767 makeControlChannelSocket();
768
fededf47 769 MT=new MTasker<PacketID,string>(100000);
562588a3 770
288f4aa9
BH
771 char data[1500];
772 struct sockaddr_in fromaddr;
773
774 PacketID pident;
bdf40704 775 primeHints();
c836dc19 776 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
caa6eefa 777#ifndef WIN32
2e3d8a19 778 if(::arg().mustDo("daemon")) {
c836dc19
BH
779 L.toConsole(Logger::Critical);
780 daemonize();
781 }
c75a6a9e 782 signal(SIGUSR1,usr1Handler);
9170fbaf 783 signal(SIGUSR2,usr2Handler);
4389619a 784 signal(SIGPIPE,SIG_IGN);
88def049
BH
785
786 writePid();
caa6eefa 787#endif
c75a6a9e 788
08efacea 789 int newgid=0;
2e3d8a19
BH
790 if(!::arg()["setgid"].empty())
791 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
08efacea 792 int newuid=0;
2e3d8a19
BH
793 if(!::arg()["setuid"].empty())
794 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
08efacea
BH
795
796
2e3d8a19
BH
797 if (!::arg()["chroot"].empty()) {
798 if (chroot(::arg()["chroot"].c_str())<0) {
799 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
08efacea
BH
800 exit(1);
801 }
802 }
803
804 Utility::dropPrivs(newuid, newgid);
805
9c495589 806 vector<TCPConnection> tcpconnections;
49f076e8 807 counter=0;
c9e9e5e0 808 struct timeval now;
2e3d8a19 809 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
4e120339
BH
810 int tcpTimeout=::arg().asNum("client-tcp-timeout");
811
812 unsigned int maxTCPPerClient=::arg().asNum("max-tcp-per-client");
813
288f4aa9 814 for(;;) {
fededf47 815 while(MT->schedule()); // housekeeping, let threads do their thing
288f4aa9 816
c9e9e5e0 817 if(!((counter++)%500))
d23a4bc7 818 MT->makeThread(houseKeeping,0);
f9f05db4 819 if(statsWanted) {
c75a6a9e 820 doStats();
f9f05db4 821 }
c836dc19 822
caa6eefa 823 Utility::socklen_t addrlen=sizeof(fromaddr);
288f4aa9 824 int d_len;
288f4aa9
BH
825
826 struct timeval tv;
827 tv.tv_sec=0;
828 tv.tv_usec=500000;
829
5c633640 830 fd_set readfds, writefds;
288f4aa9 831 FD_ZERO( &readfds );
5c633640 832 FD_ZERO( &writefds );
288f4aa9 833 FD_SET( d_clientsock, &readfds );
1d5b3ce6
BH
834 FD_SET( s_rcc.d_fd, &readfds);
835 int fdmax=max(d_clientsock, s_rcc.d_fd);
f28307ad 836
cd50f30d 837 if(!tcpconnections.empty())
c9e9e5e0 838 gettimeofday(&now, 0);
cd50f30d
BH
839
840 vector<TCPConnection> sweeped;
2e3d8a19 841
cd50f30d 842 for(vector<TCPConnection>::iterator i=tcpconnections.begin();i!=tcpconnections.end();++i) {
4e120339 843 if(now.tv_sec < i->startTime + tcpTimeout) {
cd50f30d
BH
844 FD_SET(i->fd, &readfds);
845 fdmax=max(fdmax,i->fd);
846 sweeped.push_back(*i);
847 }
848 else {
849 L<<Logger::Error<<"TCP timeout from client "<<inet_ntoa(i->remote.sin_addr)<<endl;
4e120339 850 i->closeAndCleanup();
cd50f30d 851 }
9c495589 852 }
cd50f30d
BH
853 sweeped.swap(tcpconnections);
854
f28307ad
BH
855 for(vector<int>::const_iterator i=d_udpserversocks.begin(); i!=d_udpserversocks.end(); ++i) {
856 FD_SET( *i, &readfds );
857 fdmax=max(fdmax,*i);
858 }
cd50f30d
BH
859 if(tcpconnections.size() < maxTcpClients)
860 for(tcpserversocks_t::const_iterator i=s_tcpserversocks.begin(); i!=s_tcpserversocks.end(); ++i) {
861 FD_SET(*i, &readfds );
862 fdmax=max(fdmax,*i);
863 }
864
5c633640
BH
865 for(map<int,PacketID>::const_iterator i=d_tcpclientreadsocks.begin(); i!=d_tcpclientreadsocks.end(); ++i) {
866 // cerr<<"Adding TCP socket "<<i->first<<" to read select set"<<endl;
867 FD_SET( i->first, &readfds );
868 fdmax=max(fdmax,i->first);
869 }
870
871 for(map<int,PacketID>::const_iterator i=d_tcpclientwritesocks.begin(); i!=d_tcpclientwritesocks.end(); ++i) {
872 // cerr<<"Adding TCP socket "<<i->first<<" to write select set"<<endl;
873 FD_SET( i->first, &writefds );
874 fdmax=max(fdmax,i->first);
875 }
8d022964 876
5c633640 877 int selret = select( fdmax + 1, &readfds, &writefds, NULL, &tv );
c9e9e5e0 878 gettimeofday(&now, 0);
c75a6a9e
BH
879 if(selret<=0)
880 if (selret == -1 && errno!=EINTR)
288f4aa9 881 throw AhuException("Select returned: "+stringerror());
c75a6a9e
BH
882 else
883 continue;
884
1d5b3ce6
BH
885 if(FD_ISSET(s_rcc.d_fd, &readfds)) {
886 string remote;
887 string msg=s_rcc.recv(&remote);
888 RecursorControlParser rcp;
aaacf7f2
BH
889 RecursorControlParser::func_t* command;
890 string answer=rcp.getAnswer(msg, &command);
891 s_rcc.send(answer, &remote);
892 command();
1d5b3ce6
BH
893 }
894
369369f6 895 if(FD_ISSET(d_clientsock,&readfds)) { // do we have a UDP question response?
9b356afc
BH
896 while((d_len=recvfrom(d_clientsock, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
897 try {
898 DNSComboWriter dc(data, d_len, now);
899 dc.setRemote((struct sockaddr *)&fromaddr, addrlen);
900
901 if(dc.d_mdp.d_header.qr) {
902 pident.remote=fromaddr;
903 pident.id=dc.d_mdp.d_header.id;
904 string packet;
905 packet.assign(data, d_len);
906 MT->sendEvent(pident, &packet);
907 }
908 else
909 L<<Logger::Warning<<"Ignoring question on outgoing socket from "<<dc.getRemote()<<endl;
910 }
911 catch(MOADNSException& mde) {
e5986c84 912 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<": "<<mde.what()<<endl;
288f4aa9 913 }
288f4aa9
BH
914 }
915 }
916
f28307ad
BH
917 for(vector<int>::const_iterator i=d_udpserversocks.begin(); i!=d_udpserversocks.end(); ++i) {
918 if(FD_ISSET(*i,&readfds)) { // do we have a new question on udp?
9b356afc
BH
919 while((d_len=recvfrom(*i, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
920 g_stats.queryrate.pulse(now);
c8ddb7c2
BH
921 if(g_allowFrom && !g_allowFrom->match(&fromaddr)) {
922 g_stats.unauthorizedUDP++;
923 continue;
924 }
9b356afc
BH
925
926 try {
927 DNSComboWriter* dc = new DNSComboWriter(data, d_len, now);
928
929 dc->setRemote((struct sockaddr *)&fromaddr, addrlen);
930
931 if(dc->d_mdp.d_header.qr)
932 L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
933 else {
934 ++g_stats.qcounter;
935 dc->setSocket(*i);
936 dc->d_tcp=false;
d23a4bc7 937 MT->makeThread(startDoResolve, (void*) dc);
9b356afc
BH
938 }
939 }
940 catch(MOADNSException& mde) {
e5986c84 941 L<<Logger::Error<<"Unable to parse packet from remote udp client "<< sockAddrToString((struct sockaddr_in*) &fromaddr, addrlen) <<": "<<mde.what()<<endl;
de1890b2 942 }
9b356afc 943 }
288f4aa9
BH
944 }
945 }
9c495589 946
cd50f30d 947 for(tcpserversocks_t::const_iterator i=s_tcpserversocks.begin(); i!=s_tcpserversocks.end(); ++i) {
369369f6 948 if(FD_ISSET(*i ,&readfds)) { // do we have a new TCP connection?
f28307ad
BH
949 struct sockaddr_in addr;
950 socklen_t addrlen=sizeof(addr);
951 int newsock=accept(*i, (struct sockaddr*)&addr, &addrlen);
f28307ad 952 if(newsock>0) {
c8ddb7c2
BH
953 if(g_allowFrom && !g_allowFrom->match(&addr)) {
954 g_stats.unauthorizedTCP++;
955 close(newsock);
956 continue;
957 }
958
4e120339
BH
959 if(maxTCPPerClient && g_tcpClientCounts.count(addr.sin_addr.s_addr) && g_tcpClientCounts[addr.sin_addr.s_addr] >= maxTCPPerClient) {
960 g_stats.tcpClientOverflow++;
961 close(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
962 continue;
963 }
964 g_tcpClientCounts[addr.sin_addr.s_addr]++;
f28307ad
BH
965 Utility::setNonBlocking(newsock);
966 TCPConnection tc;
967 tc.fd=newsock;
968 tc.state=TCPConnection::BYTE0;
969 tc.remote=addr;
c9e9e5e0 970 tc.startTime=now.tv_sec;
f28307ad
BH
971 tcpconnections.push_back(tc);
972 }
9c495589
BH
973 }
974 }
975
369369f6 976 // have any question answers come in over TCP?
5c633640 977 for(map<int,PacketID>::iterator i=d_tcpclientreadsocks.begin(); i!=d_tcpclientreadsocks.end();) {
72df400f 978 bool haveErased=false;
5c633640 979 if(FD_ISSET(i->first, &readfds)) { // can we receive
72df400f
BH
980 shared_array<char> buffer(new char[i->second.inNeeded]);
981
982 int ret=read(i->first, buffer.get(), min(i->second.inNeeded,200));
5c633640
BH
983 // cerr<<"Read returned "<<ret<<endl;
984 if(ret > 0) {
72df400f 985 i->second.inMSG.append(&buffer[0], &buffer[ret]);
5c633640
BH
986 i->second.inNeeded-=ret;
987 if(!i->second.inNeeded) {
988 // cerr<<"Got entire load of "<<i->second.inMSG.size()<<" bytes"<<endl;
989 PacketID pid=i->second;
990 string msg=i->second.inMSG;
991
992 d_tcpclientreadsocks.erase((i++));
72df400f 993 haveErased=true;
5c633640
BH
994 MT->sendEvent(pid, &msg); // XXX DODGY
995 }
996 else {
997 // cerr<<"Still have "<<i->second.inNeeded<<" left to go"<<endl;
5c633640
BH
998 }
999 }
1000 else {
9170fbaf
BH
1001 // cerr<<"when reading ret="<<ret<<endl;
1002 // XXX FIXME I think some stuff needs to happen here - like send an EOF event
5c633640
BH
1003 }
1004 }
72df400f 1005 if(!haveErased)
5c633640 1006 ++i;
5c633640 1007 }
369369f6
BH
1008
1009 // is there data we can send to remote nameservers over TCP?
5c633640 1010 for(map<int,PacketID>::iterator i=d_tcpclientwritesocks.begin(); i!=d_tcpclientwritesocks.end(); ) {
72df400f 1011 bool haveErased=false;
5c633640
BH
1012 if(FD_ISSET(i->first, &writefds)) { // can we send over TCP
1013 // cerr<<"Socket "<<i->first<<" available for writing"<<endl;
1014 int ret=write(i->first, i->second.outMSG.c_str(), i->second.outMSG.size() - i->second.outPos);
1015 if(ret > 0) {
1016 i->second.outPos+=ret;
1017 if(i->second.outPos==i->second.outMSG.size()) {
1018 // cerr<<"Sent out entire load of "<<i->second.outMSG.size()<<" bytes"<<endl;
1019 PacketID pid=i->second;
369369f6 1020 d_tcpclientwritesocks.erase(i++); // erase!
72df400f 1021 haveErased=true;
369369f6 1022 MT->sendEvent(pid, 0);
5c633640 1023 }
72df400f 1024
5c633640
BH
1025 }
1026 else {
9170fbaf
BH
1027 // cerr<<"ret="<<ret<<" when writing"<<endl;
1028 // XXX FIXME I think some stuff needs to happen here - like send an EOF event
5c633640
BH
1029 }
1030 }
72df400f 1031 if(!haveErased)
5c633640
BH
1032 ++i;
1033 }
369369f6
BH
1034
1035 // very braindead TCP incoming question parser
9c495589
BH
1036 for(vector<TCPConnection>::iterator i=tcpconnections.begin();i!=tcpconnections.end();++i) {
1037 if(FD_ISSET(i->fd, &readfds)) {
1038 if(i->state==TCPConnection::BYTE0) {
1039 int bytes=read(i->fd,i->data,2);
1040 if(bytes==1)
1041 i->state=TCPConnection::BYTE1;
1042 if(bytes==2) {
1043 i->qlen=(i->data[0]<<8)+i->data[1];
1044 i->bytesread=0;
1045 i->state=TCPConnection::GETQUESTION;
1046 }
1047 if(!bytes || bytes < 0) {
4e120339 1048 i->closeAndCleanup();
9c495589
BH
1049 tcpconnections.erase(i);
1050 break;
1051 }
1052 }
1053 else if(i->state==TCPConnection::BYTE1) {
1054 int bytes=read(i->fd,i->data+1,1);
1055 if(bytes==1) {
1056 i->state=TCPConnection::GETQUESTION;
1057 i->qlen=(i->data[0]<<8)+i->data[1];
1058 i->bytesread=0;
1059 }
1060 if(!bytes || bytes < 0) {
1061 L<<Logger::Error<<"TCP Remote "<<sockAddrToString(&i->remote,sizeof(i->remote))<<" disconnected after first byte"<<endl;
4e120339 1062 i->closeAndCleanup();
9c495589
BH
1063 tcpconnections.erase(i);
1064 break;
1065 }
1066
1067 }
1068 else if(i->state==TCPConnection::GETQUESTION) {
1069 int bytes=read(i->fd,i->data + i->bytesread,i->qlen - i->bytesread);
1070 if(!bytes || bytes < 0) {
1071 L<<Logger::Error<<"TCP Remote "<<sockAddrToString(&i->remote,sizeof(i->remote))<<" disconnected while reading question body"<<endl;
4e120339 1072 i->closeAndCleanup();
9c495589
BH
1073 tcpconnections.erase(i);
1074 break;
1075 }
1076 i->bytesread+=bytes;
1077 if(i->bytesread==i->qlen) {
1078 i->state=TCPConnection::BYTE0;
a1754c6a 1079 DNSComboWriter* dc=0;
ea634573 1080 try {
c9e9e5e0 1081 dc=new DNSComboWriter(i->data, i->qlen, now);
ea634573
BH
1082 }
1083 catch(MOADNSException &mde) {
e5986c84 1084 L<<Logger::Error<<"Unable to parse packet from remote TCP client "<<sockAddrToString(&i->remote,sizeof(i->remote))<<endl;
4e120339 1085 i->closeAndCleanup();
9c495589
BH
1086 tcpconnections.erase(i);
1087 break;
1088 }
ea634573
BH
1089
1090 dc->setSocket(i->fd);
1091 dc->d_tcp=true;
1092 dc->setRemote((struct sockaddr *)&i->remote,sizeof(i->remote));
1093 if(dc->d_mdp.d_header.qr)
1094 L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
1095 else {
aaacf7f2
BH
1096 ++g_stats.qcounter;
1097 ++g_stats.tcpqcounter;
d23a4bc7 1098 MT->makeThread(startDoResolve, dc);
9c495589
BH
1099 }
1100 }
1101 }
1102 }
1103 }
288f4aa9
BH
1104 }
1105 }
1106 catch(AhuException &ae) {
c836dc19 1107 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
22030c37 1108 ret=EXIT_FAILURE;
288f4aa9
BH
1109 }
1110 catch(exception &e) {
c836dc19 1111 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
22030c37 1112 ret=EXIT_FAILURE;
288f4aa9
BH
1113 }
1114 catch(...) {
c836dc19 1115 L<<Logger::Error<<"any other exception in main: "<<endl;
22030c37 1116 ret=EXIT_FAILURE;
288f4aa9 1117 }
caa6eefa
BH
1118
1119#ifdef WIN32
1120 WSACleanup();
1121#endif // WIN32
1122
22030c37 1123 return ret;
288f4aa9 1124}