]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
Merge pull request #12130 from omoerbeek/auth-axfr-refresh-check
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "rec-main.hh"
24
25 #include "arguments.hh"
26 #include "dns_random.hh"
27 #include "ednsextendederror.hh"
28 #include "ednspadding.hh"
29 #include "query-local-address.hh"
30 #include "rec-taskqueue.hh"
31 #include "responsestats.hh"
32 #include "shuffle.hh"
33 #include "validate-recursor.hh"
34
35 #ifdef HAVE_SYSTEMD
36 #include <systemd/sd-daemon.h>
37 #endif
38
39 #ifdef NOD_ENABLED
40 #include "nod.hh"
41 #include "logging.hh"
42 #endif /* NOD_ENABLED */
43
44 thread_local std::shared_ptr<RecursorLua4> t_pdl;
45 thread_local std::shared_ptr<Regex> t_traceRegex;
46 thread_local ProtobufServersInfo t_protobufServers;
47 thread_local ProtobufServersInfo t_outgoingProtobufServers;
48
49 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
50 std::unique_ptr<MemRecursorCache> g_recCache;
51 std::unique_ptr<NegCache> g_negCache;
52
53 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
54 thread_local std::unique_ptr<FDMultiplexer> t_fdm;
55 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes;
56 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t>>> t_queryring, t_servfailqueryring, t_bogusqueryring;
57 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
58 thread_local std::shared_ptr<NetmaskGroup> t_allowNotifyFrom;
59 thread_local std::shared_ptr<notifyset_t> t_allowNotifyFor;
60 __thread struct timeval g_now; // timestamp, updated (too) frequently
61
62 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
63
64 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
65 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism (without actually using sendfromto())
66 NetmaskGroup g_paddingFrom;
67 size_t g_proxyProtocolMaximumSize;
68 size_t g_maxUDPQueriesPerRound;
69 unsigned int g_maxMThreads;
70 unsigned int g_paddingTag;
71 PaddingMode g_paddingMode;
72 uint16_t g_udpTruncationThreshold;
73 std::atomic<bool> g_quiet;
74 bool g_logCommonErrors;
75 bool g_reusePort{false};
76 bool g_gettagNeedsEDNSOptions{false};
77 bool g_useKernelTimestamp;
78 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
79 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
80 boost::container::flat_set<uint16_t> g_avoidUdpSourcePorts;
81 #else
82 std::set<uint16_t> g_avoidUdpSourcePorts;
83 #endif
84 uint16_t g_minUdpSourcePort;
85 uint16_t g_maxUdpSourcePort;
86 double g_balancingFactor;
87
88 RecursorStats g_stats;
89 bool g_lowercaseOutgoing;
90 unsigned int g_networkTimeoutMsec;
91 uint16_t g_outgoingEDNSBufsize;
92
93 // Used in Syncres to counts DNSSEC stats for names in a different "universe"
94 GlobalStateHolder<SuffixMatchNode> g_xdnssec;
95 // Used in the Syncres to not throttle certain servers
96 GlobalStateHolder<SuffixMatchNode> g_dontThrottleNames;
97 GlobalStateHolder<NetmaskGroup> g_dontThrottleNetmasks;
98 GlobalStateHolder<SuffixMatchNode> g_DoTToAuthNames;
99 uint64_t g_latencyStatSize;
100
101 LWResult::Result UDPClientSocks::getSocket(const ComboAddress& toaddr, int* fd)
102 {
103 *fd = makeClientSocket(toaddr.sin4.sin_family);
104 if (*fd < 0) { // temporary error - receive exception otherwise
105 return LWResult::Result::OSLimitError;
106 }
107
108 if (connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
109 int err = errno;
110 try {
111 closesocket(*fd);
112 }
113 catch (const PDNSException& e) {
114 SLOG(g_log << Logger::Error << "Error closing UDP socket after connect() failed: " << e.reason << endl,
115 g_slogout->error(Logr::Error, e.reason, "Error closing UDP socket after connect() failed", "exception", Logging::Loggable("PDNSException")));
116 }
117
118 if (err == ENETUNREACH) { // Seth "My Interfaces Are Like A Yo Yo" Arnold special
119 return LWResult::Result::OSLimitError;
120 }
121
122 return LWResult::Result::PermanentError;
123 }
124
125 d_numsocks++;
126 return LWResult::Result::Success;
127 }
128
129 // return a socket to the pool, or simply erase it
130 void UDPClientSocks::returnSocket(int fd)
131 {
132 try {
133 t_fdm->removeReadFD(fd);
134 }
135 catch (const FDMultiplexerException& e) {
136 // we sometimes return a socket that has not yet been assigned to t_fdm
137 }
138
139 try {
140 closesocket(fd);
141 }
142 catch (const PDNSException& e) {
143 SLOG(g_log << Logger::Error << "Error closing returned UDP socket: " << e.reason << endl,
144 g_slogout->error(Logr::Error, e.reason, "Error closing returned UDP socket", "exception", Logging::Loggable("PDNSException")));
145 }
146
147 --d_numsocks;
148 }
149
150 // returns -1 for errors which might go away, throws for ones that won't
151 int UDPClientSocks::makeClientSocket(int family)
152 {
153 int ret = socket(family, SOCK_DGRAM, 0); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
154
155 if (ret < 0 && errno == EMFILE) { // this is not a catastrophic error
156 return ret;
157 }
158 if (ret < 0) {
159 throw PDNSException("Making a socket for resolver (family = " + std::to_string(family) + "): " + stringerror());
160 }
161
162 // The loop below runs the body with [tries-1 tries-2 ... 1]. Last iteration with tries == 1 is special: it uses a kernel
163 // allocated UDP port.
164 #if !defined(__OpenBSD__)
165 int tries = 10;
166 #else
167 int tries = 2; // hit the reliable kernel random case for OpenBSD immediately (because it will match tries==1 below), using sysctl net.inet.udp.baddynamic to exclude ports
168 #endif
169 ComboAddress sin;
170 while (--tries) {
171 in_port_t port;
172
173 if (tries == 1) { // last iteration: fall back to kernel 'random'
174 port = 0;
175 }
176 else {
177 do {
178 port = g_minUdpSourcePort + dns_random(g_maxUdpSourcePort - g_minUdpSourcePort + 1);
179 } while (g_avoidUdpSourcePorts.count(port));
180 }
181
182 sin = pdns::getQueryLocalAddress(family, port); // does htons for us
183 if (::bind(ret, reinterpret_cast<struct sockaddr*>(&sin), sin.getSocklen()) >= 0)
184 break;
185 }
186
187 if (!tries) {
188 closesocket(ret);
189 throw PDNSException("Resolver binding to local query client socket on " + sin.toString() + ": " + stringerror());
190 }
191
192 try {
193 setReceiveSocketErrors(ret, family);
194 setNonBlocking(ret);
195 }
196 catch (...) {
197 closesocket(ret);
198 throw;
199 }
200 return ret;
201 }
202
203 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
204 {
205 std::shared_ptr<PacketID> pident = boost::any_cast<std::shared_ptr<PacketID>>(var);
206 PacketBuffer resp;
207 resp.resize(512);
208 ComboAddress fromaddr;
209 socklen_t addrlen = sizeof(fromaddr);
210
211 ssize_t ret = recvfrom(fd, resp.data(), resp.size(), 0, (sockaddr*)&fromaddr, &addrlen);
212 if (fromaddr != pident->remote) {
213 SLOG(g_log << Logger::Notice << "Response received from the wrong remote host (" << fromaddr.toStringWithPort() << " instead of " << pident->remote.toStringWithPort() << "), discarding" << endl,
214 g_slog->withName("lua")->info(Logr::Notice, "Response received from the wrong remote host. discarding", "method", Logging::Loggable("GenUDPQueryResponse"), "fromaddr", Logging::Loggable(fromaddr), "expected", Logging::Loggable(pident->remote)));
215 }
216
217 t_fdm->removeReadFD(fd);
218 if (ret >= 0) {
219 resp.resize(ret);
220 MT->sendEvent(pident, &resp);
221 }
222 else {
223 PacketBuffer empty;
224 MT->sendEvent(pident, &empty);
225 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
226 }
227 }
228
229 PacketBuffer GenUDPQueryResponse(const ComboAddress& dest, const string& query)
230 {
231 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
232 s.setNonBlocking();
233 ComboAddress local = pdns::getQueryLocalAddress(dest.sin4.sin_family, 0);
234
235 s.bind(local);
236 s.connect(dest);
237 s.send(query);
238
239 std::shared_ptr<PacketID> pident = std::make_shared<PacketID>();
240 pident->fd = s.getHandle();
241 pident->remote = dest;
242 pident->type = 0;
243 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
244
245 PacketBuffer data;
246 int ret = MT->waitEvent(pident, &data, g_networkTimeoutMsec);
247
248 if (!ret || ret == -1) { // timeout
249 t_fdm->removeReadFD(s.getHandle());
250 }
251 else if (data.empty()) { // error, EOF or other
252 // we could special case this
253 return data;
254 }
255 return data;
256 }
257
258 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
259
260 thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
261
262 /* these two functions are used by LWRes */
263 LWResult::Result asendto(const char* data, size_t len, int flags,
264 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
265 {
266
267 auto pident = std::make_shared<PacketID>();
268 pident->domain = domain;
269 pident->remote = toaddr;
270 pident->type = qtype;
271
272 // see if there is an existing outstanding request we can chain on to, using partial equivalence function looking for the same
273 // query (qname and qtype) to the same host, but with a different message ID
274 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain = MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
275
276 for (; chain.first != chain.second; chain.first++) {
277 // Line below detected an issue with the two ways of ordering PackeIDs (birtday and non-birthday)
278 assert(chain.first->key->domain == pident->domain);
279 if (chain.first->key->fd > -1 && !chain.first->key->closed) { // don't chain onto existing chained waiter or a chain already processed
280 // cerr << "Insert " << id << ' ' << pident << " into chain for " << chain.first->key << endl;
281 chain.first->key->chain.insert(id); // we can chain
282 *fd = -1; // gets used in waitEvent / sendEvent later on
283 return LWResult::Result::Success;
284 }
285 }
286
287 auto ret = t_udpclientsocks->getSocket(toaddr, fd);
288 if (ret != LWResult::Result::Success) {
289 return ret;
290 }
291
292 pident->fd = *fd;
293 pident->id = id;
294
295 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
296 ssize_t sent = send(*fd, data, len, 0);
297
298 int tmp = errno;
299
300 if (sent < 0) {
301 t_udpclientsocks->returnSocket(*fd);
302 errno = tmp; // this is for logging purposes only
303 return LWResult::Result::PermanentError;
304 }
305
306 return LWResult::Result::Success;
307 }
308
309 LWResult::Result arecvfrom(PacketBuffer& packet, int flags, const ComboAddress& fromaddr, size_t* d_len,
310 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, const struct timeval* now)
311 {
312 static const unsigned int nearMissLimit = ::arg().asNum("spoof-nearmiss-max");
313
314 auto pident = std::make_shared<PacketID>();
315 pident->fd = fd;
316 pident->id = id;
317 pident->domain = domain;
318 pident->type = qtype;
319 pident->remote = fromaddr;
320
321 int ret = MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
322
323 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
324 if (ret > 0) {
325 /* handleUDPServerResponse() will close the socket for us no matter what */
326 if (packet.empty()) { // means "error"
327 return LWResult::Result::PermanentError;
328 }
329
330 *d_len = packet.size();
331
332 if (nearMissLimit > 0 && pident->nearMisses > nearMissLimit) {
333 /* we have received more than nearMissLimit answers on the right IP and port, from the right source (we are using connected sockets),
334 for the correct qname and qtype, but with an unexpected message ID. That looks like a spoofing attempt. */
335 SLOG(g_log << Logger::Error << "Too many (" << pident->nearMisses << " > " << nearMissLimit << ") answers with a wrong message ID for '" << domain << "' from " << fromaddr.toString() << ", assuming spoof attempt." << endl,
336 g_slogudpin->info(Logr::Error, "Too many answers with a wrong message ID, assuming spoofing attempt",
337 "nearmisses", Logging::Loggable(pident->nearMisses),
338 "nearmisslimit", Logging::Loggable(nearMissLimit),
339 "qname", Logging::Loggable(domain),
340 "from", Logging::Loggable(fromaddr)));
341 g_stats.spoofCount++;
342 return LWResult::Result::Spoofed;
343 }
344
345 return LWResult::Result::Success;
346 }
347 else {
348 /* getting there means error or timeout, it's up to us to close the socket */
349 if (fd >= 0) {
350 t_udpclientsocks->returnSocket(fd);
351 }
352 }
353
354 return ret == 0 ? LWResult::Result::Timeout : LWResult::Result::PermanentError;
355 }
356
357 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
358 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
359 {
360 if (packetsize > 1000 && t_largeanswerremotes)
361 t_largeanswerremotes->push_back(remote);
362 switch (res) {
363 case RCode::ServFail:
364 if (t_servfailremotes) {
365 t_servfailremotes->push_back(remote);
366 if (query && t_servfailqueryring) // packet cache
367 t_servfailqueryring->push_back({*query, qtype});
368 }
369 g_stats.servFails++;
370 break;
371 case RCode::NXDomain:
372 g_stats.nxDomains++;
373 break;
374 case RCode::NoError:
375 g_stats.noErrors++;
376 break;
377 }
378 }
379
380 static string makeLoginfo(const std::unique_ptr<DNSComboWriter>& dc)
381 try {
382 return "(" + dc->d_mdp.d_qname.toLogString() + "/" + DNSRecordContent::NumberToType(dc->d_mdp.d_qtype) + " from " + (dc->getRemote()) + ")";
383 }
384 catch (...) {
385 return "Exception making error message for exception";
386 }
387
388 /**
389 * Chases the CNAME provided by the PolicyCustom RPZ policy.
390 *
391 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
392 * @param qtype: The QType of the original query
393 * @param sr: A SyncRes
394 * @param res: An integer that will contain the RCODE of the lookup we do
395 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
396 */
397 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
398 {
399 if (spoofed.d_type == QType::CNAME) {
400 bool oldWantsRPZ = sr.getWantsRPZ();
401 sr.setWantsRPZ(false);
402 vector<DNSRecord> ans;
403 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, QClass::IN, ans);
404 for (const auto& rec : ans) {
405 if (rec.d_place == DNSResourceRecord::ANSWER) {
406 ret.push_back(rec);
407 }
408 }
409 // Reset the RPZ state of the SyncRes
410 sr.setWantsRPZ(oldWantsRPZ);
411 }
412 }
413
414 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
415 {
416 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
417
418 if (rec.d_type != QType::OPT) // their TTL ain't real
419 minTTL = min(minTTL, rec.d_ttl);
420
421 rec.d_content->toPacket(pw);
422 if (pw.size() > static_cast<size_t>(maxAnswerSize)) {
423 pw.rollback();
424 if (rec.d_place != DNSResourceRecord::ADDITIONAL) {
425 pw.getHeader()->tc = 1;
426 pw.truncate();
427 }
428 return false;
429 }
430
431 return true;
432 }
433
434 /**
435 * A helper class that handles the TCP in-flight bookkeeping on
436 * destruct. This class ise used by startDoResolve() to not forget
437 * that. You can also signal that the TCP connection must be closed
438 * once the in-flight connections drop to zero.
439 **/
440 class RunningResolveGuard
441 {
442 public:
443 RunningResolveGuard(std::unique_ptr<DNSComboWriter>& dc) :
444 d_dc(dc)
445 {
446 if (d_dc->d_tcp && !d_dc->d_tcpConnection) {
447 throw std::runtime_error("incoming TCP case without TCP connection");
448 }
449 }
450 ~RunningResolveGuard()
451 {
452 if (!d_handled && d_dc->d_tcp) {
453 try {
454 finishTCPReply(d_dc, false, true);
455 }
456 catch (const FDMultiplexerException&) {
457 }
458 }
459 }
460 void setHandled()
461 {
462 d_handled = true;
463 }
464 void setDropOnIdle()
465 {
466 if (d_dc->d_tcp) {
467 d_dc->d_tcpConnection->setDropOnIdle();
468 }
469 }
470
471 private:
472 std::unique_ptr<DNSComboWriter>& d_dc;
473 bool d_handled{false};
474 };
475
476 enum class PolicyResult : uint8_t
477 {
478 NoAction,
479 HaveAnswer,
480 Drop
481 };
482
483 static PolicyResult handlePolicyHit(const DNSFilterEngine::Policy& appliedPolicy, const std::unique_ptr<DNSComboWriter>& dc, SyncRes& sr, int& res, vector<DNSRecord>& ret, DNSPacketWriter& pw, RunningResolveGuard& tcpGuard)
484 {
485 /* don't account truncate actions for TCP queries, since they are not applied */
486 if (appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::Truncate || !dc->d_tcp) {
487 ++g_stats.policyResults[appliedPolicy.d_kind];
488 ++(g_stats.policyHits.lock()->operator[](appliedPolicy.getName()));
489 }
490
491 if (sr.doLog() && appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
492 SLOG(g_log << Logger::Warning << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << appliedPolicy.getLogString() << endl,
493 appliedPolicy.info(Logr::Warning, sr.d_slog));
494 }
495
496 if (appliedPolicy.d_zoneData && appliedPolicy.d_zoneData->d_extendedErrorCode) {
497 dc->d_extendedErrorCode = *appliedPolicy.d_zoneData->d_extendedErrorCode;
498 dc->d_extendedErrorExtra = appliedPolicy.d_zoneData->d_extendedErrorExtra;
499 }
500
501 switch (appliedPolicy.d_kind) {
502
503 case DNSFilterEngine::PolicyKind::NoAction:
504 return PolicyResult::NoAction;
505
506 case DNSFilterEngine::PolicyKind::Drop:
507 tcpGuard.setDropOnIdle();
508 ++g_stats.policyDrops;
509 return PolicyResult::Drop;
510
511 case DNSFilterEngine::PolicyKind::NXDOMAIN:
512 ret.clear();
513 res = RCode::NXDomain;
514 return PolicyResult::HaveAnswer;
515
516 case DNSFilterEngine::PolicyKind::NODATA:
517 ret.clear();
518 res = RCode::NoError;
519 return PolicyResult::HaveAnswer;
520
521 case DNSFilterEngine::PolicyKind::Truncate:
522 if (!dc->d_tcp) {
523 ret.clear();
524 res = RCode::NoError;
525 pw.getHeader()->tc = 1;
526 return PolicyResult::HaveAnswer;
527 }
528 return PolicyResult::NoAction;
529
530 case DNSFilterEngine::PolicyKind::Custom:
531 res = RCode::NoError;
532 {
533 auto spoofed = appliedPolicy.getCustomRecords(dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
534 for (auto& dr : spoofed) {
535 ret.push_back(dr);
536 try {
537 handleRPZCustom(dr, QType(dc->d_mdp.d_qtype), sr, res, ret);
538 }
539 catch (const ImmediateServFailException& e) {
540 if (g_logCommonErrors) {
541 SLOG(g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of the custom filter policy '" << appliedPolicy.getName() << "' while resolving '" << dc->d_mdp.d_qname << "' because: " << e.reason << endl,
542 sr.d_slog->error(Logr::Notice, e.reason, "Sending SERVFAIL during resolve of the custom filter policy",
543 "policyName", Logging::Loggable(appliedPolicy.getName()), "exception", Logging::Loggable("ImmediateServFailException")));
544 }
545 res = RCode::ServFail;
546 break;
547 }
548 catch (const PolicyHitException& e) {
549 if (g_logCommonErrors) {
550 SLOG(g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of the custom filter policy '" << appliedPolicy.getName() << "' while resolving '" << dc->d_mdp.d_qname << "' because another RPZ policy was hit" << endl,
551 sr.d_slog->info(Logr::Notice, "Sending SERVFAIL during resolve of the custom filter policy because another RPZ policy was hit",
552 "policyName", Logging::Loggable(appliedPolicy.getName()), "exception", Logging::Loggable("PolicyHitException")));
553 }
554 res = RCode::ServFail;
555 break;
556 }
557 }
558
559 return PolicyResult::HaveAnswer;
560 }
561 }
562
563 return PolicyResult::NoAction;
564 }
565
566 #ifdef NOD_ENABLED
567 static bool nodCheckNewDomain(Logr::log_t nodlogger, const DNSName& dname)
568 {
569 bool ret = false;
570 // First check the (sub)domain isn't ignored for NOD purposes
571 if (!g_nodDomainWL.check(dname)) {
572 // Now check the NODDB (note this is probabilistic so can have FNs/FPs)
573 if (t_nodDBp && t_nodDBp->isNewDomain(dname)) {
574 if (g_nodLog) {
575 // This should probably log to a dedicated log file
576 SLOG(g_log << Logger::Notice << "Newly observed domain nod=" << dname << endl,
577 nodlogger->info(Logr::Notice, "New domain observed"));
578 }
579 ret = true;
580 }
581 }
582 return ret;
583 }
584
585 static void sendNODLookup(Logr::log_t nodlogger, const DNSName& dname)
586 {
587 if (!(g_nodLookupDomain.isRoot())) {
588 // Send a DNS A query to <domain>.g_nodLookupDomain
589 DNSName qname;
590 try {
591 qname = dname + g_nodLookupDomain;
592 }
593 catch (const std::range_error& e) {
594 if (g_logCommonErrors) {
595 nodlogger->v(10)->error(Logr::Error, "DNSName too long", "Unable to send NOD lookup");
596 }
597 ++g_stats.nodLookupsDroppedOversize;
598 return;
599 }
600 nodlogger->v(10)->info(Logr::Debug, "Sending NOD lookup", "nodqname", Logging::Loggable(qname));
601 vector<DNSRecord> dummy;
602 directResolve(qname, QType::A, QClass::IN, dummy, nullptr, false, nodlogger);
603 }
604 }
605
606 static bool udrCheckUniqueDNSRecord(Logr::log_t nodlogger, const DNSName& dname, uint16_t qtype, const DNSRecord& record)
607 {
608 bool ret = false;
609 if (record.d_place == DNSResourceRecord::ANSWER || record.d_place == DNSResourceRecord::ADDITIONAL) {
610 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
611 std::stringstream ss;
612 ss << dname.toDNSStringLC() << ":" << qtype << ":" << qtype << ":" << record.d_type << ":" << record.d_name.toDNSStringLC() << ":" << record.d_content->getZoneRepresentation();
613 if (t_udrDBp && t_udrDBp->isUniqueResponse(ss.str())) {
614 if (g_udrLog) {
615 // This should also probably log to a dedicated file.
616 SLOG(g_log << Logger::Notice << "Unique response observed: qname=" << dname << " qtype=" << QType(qtype) << " rrtype=" << QType(record.d_type) << " rrname=" << record.d_name << " rrcontent=" << record.d_content->getZoneRepresentation() << endl,
617 nodlogger->info(Logr::Debug, "New response observed",
618 "qtype", Logging::Loggable(QType(qtype)),
619 "rrtype", Logging::Loggable(QType(record.d_type)),
620 "rrname", Logging::Loggable(record.d_name),
621 "rrcontent", Logging::Loggable(record.d_content->getZoneRepresentation())););
622 }
623 ret = true;
624 }
625 }
626 return ret;
627 }
628 #endif /* NOD_ENABLED */
629
630 static bool dns64Candidate(uint16_t requestedType, int rcode, const std::vector<DNSRecord>& records);
631
632 int followCNAMERecords(vector<DNSRecord>& ret, const QType qtype, int rcode)
633 {
634 vector<DNSRecord> resolved;
635 DNSName target;
636 for (const DNSRecord& rr : ret) {
637 if (rr.d_type == QType::CNAME) {
638 auto rec = getRR<CNAMERecordContent>(rr);
639 if (rec) {
640 target = rec->getTarget();
641 break;
642 }
643 }
644 }
645
646 if (target.empty()) {
647 return rcode;
648 }
649
650 auto log = g_slog->withName("lua")->withValues("method", Logging::Loggable("followCNAMERecords"));
651 rcode = directResolve(target, qtype, QClass::IN, resolved, t_pdl, log);
652
653 if (g_dns64Prefix && qtype == QType::AAAA && dns64Candidate(qtype, rcode, resolved)) {
654 rcode = getFakeAAAARecords(target, *g_dns64Prefix, resolved);
655 }
656
657 for (DNSRecord& rr : resolved) {
658 if (rr.d_place == DNSResourceRecord::ANSWER) {
659 ret.push_back(std::move(rr));
660 }
661 }
662 return rcode;
663 }
664
665 int getFakeAAAARecords(const DNSName& qname, ComboAddress prefix, vector<DNSRecord>& ret)
666 {
667 auto log = g_slog->withName("dns64")->withValues("method", Logging::Loggable("getAAAA"));
668 /* we pass a separate vector of records because we will be resolving the initial qname
669 again, possibly encountering the same CNAME(s), and we don't want to trigger the CNAME
670 loop detection. */
671 vector<DNSRecord> newRecords;
672 int rcode = directResolve(qname, QType::A, QClass::IN, newRecords, t_pdl, log);
673
674 ret.reserve(ret.size() + newRecords.size());
675 for (auto& record : newRecords) {
676 ret.push_back(std::move(record));
677 }
678
679 // Remove double CNAME records
680 std::set<DNSName> seenCNAMEs;
681 ret.erase(std::remove_if(
682 ret.begin(),
683 ret.end(),
684 [&seenCNAMEs](DNSRecord& rr) {
685 if (rr.d_type == QType::CNAME) {
686 auto target = getRR<CNAMERecordContent>(rr);
687 if (target == nullptr) {
688 return false;
689 }
690 if (seenCNAMEs.count(target->getTarget()) > 0) {
691 // We've had this CNAME before, remove it
692 return true;
693 }
694 seenCNAMEs.insert(target->getTarget());
695 }
696 return false;
697 }),
698 ret.end());
699
700 bool seenA = false;
701 for (DNSRecord& rr : ret) {
702 if (rr.d_type == QType::A && rr.d_place == DNSResourceRecord::ANSWER) {
703 if (auto rec = getRR<ARecordContent>(rr)) {
704 ComboAddress ipv4(rec->getCA());
705 memcpy(&prefix.sin6.sin6_addr.s6_addr[12], &ipv4.sin4.sin_addr.s_addr, sizeof(ipv4.sin4.sin_addr.s_addr));
706 rr.d_content = std::make_shared<AAAARecordContent>(prefix);
707 rr.d_type = QType::AAAA;
708 }
709 seenA = true;
710 }
711 }
712
713 if (seenA) {
714 // We've seen an A in the ANSWER section, so there is no need to keep any
715 // SOA in the AUTHORITY section as this is not a NODATA response.
716 ret.erase(std::remove_if(
717 ret.begin(),
718 ret.end(),
719 [](DNSRecord& rr) {
720 return (rr.d_type == QType::SOA && rr.d_place == DNSResourceRecord::AUTHORITY);
721 }),
722 ret.end());
723 }
724 g_stats.dns64prefixanswers++;
725 return rcode;
726 }
727
728 int getFakePTRRecords(const DNSName& qname, vector<DNSRecord>& ret)
729 {
730 /* qname has a reverse ordered IPv6 address, need to extract the underlying IPv4 address from it
731 and turn it into an IPv4 in-addr.arpa query */
732 ret.clear();
733 vector<string> parts = qname.getRawLabels();
734
735 if (parts.size() < 8) {
736 return -1;
737 }
738
739 string newquery;
740 for (int n = 0; n < 4; ++n) {
741 newquery += std::to_string(stoll(parts[n * 2], 0, 16) + 16 * stoll(parts[n * 2 + 1], 0, 16));
742 newquery.append(1, '.');
743 }
744 newquery += "in-addr.arpa.";
745
746 DNSRecord rr;
747 rr.d_name = qname;
748 rr.d_type = QType::CNAME;
749 rr.d_content = std::make_shared<CNAMERecordContent>(newquery);
750 ret.push_back(rr);
751
752 auto log = g_slog->withName("dns64")->withValues("method", Logging::Loggable("getPTR"));
753 int rcode = directResolve(DNSName(newquery), QType::PTR, QClass::IN, ret, t_pdl, log);
754
755 g_stats.dns64prefixanswers++;
756 return rcode;
757 }
758
759 static bool answerIsNOData(uint16_t requestedType, int rcode, const std::vector<DNSRecord>& records)
760 {
761 if (rcode != RCode::NoError) {
762 return false;
763 }
764 for (const auto& rec : records) {
765 if (rec.d_place != DNSResourceRecord::ANSWER) {
766 /* no records in the answer section */
767 return true;
768 }
769 if (rec.d_type == requestedType) {
770 /* we have a record, of the right type, in the right section */
771 return false;
772 }
773 }
774 return true;
775 }
776
777 // RFC 6147 section 5.1 all rcodes except NXDomain should be candidate for dns64
778 // for NoError, check if it is NoData
779 static bool dns64Candidate(uint16_t requestedType, int rcode, const std::vector<DNSRecord>& records)
780 {
781 if (rcode == RCode::NoError) {
782 return answerIsNOData(requestedType, rcode, records);
783 }
784 return rcode != RCode::NXDomain;
785 }
786
787 bool isAllowNotifyForZone(DNSName qname)
788 {
789 if (t_allowNotifyFor->empty()) {
790 return false;
791 }
792
793 notifyset_t::const_iterator ret;
794 do {
795 ret = t_allowNotifyFor->find(qname);
796 if (ret != t_allowNotifyFor->end())
797 return true;
798 } while (qname.chopOff());
799 return false;
800 }
801
802 #if defined(HAVE_FSTRM) && defined(NOD_ENABLED)
803 #include "dnstap.hh"
804 #include "fstrm_logger.hh"
805
806 static bool isEnabledForNODs(const std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>>& fstreamLoggers)
807 {
808 if (fstreamLoggers == nullptr) {
809 return false;
810 }
811 for (auto& logger : *fstreamLoggers) {
812 if (logger->logNODs()) {
813 return true;
814 }
815 }
816 return false;
817 }
818 static bool isEnabledForUDRs(const std::shared_ptr<std::vector<std::unique_ptr<FrameStreamLogger>>>& fstreamLoggers)
819 {
820 if (fstreamLoggers == nullptr) {
821 return false;
822 }
823 for (auto& logger : *fstreamLoggers) {
824 if (logger->logUDRs()) {
825 return true;
826 }
827 }
828 return false;
829 }
830 #endif // HAVE_FSTRM
831
832 void startDoResolve(void* p)
833 {
834 auto dc = std::unique_ptr<DNSComboWriter>(reinterpret_cast<DNSComboWriter*>(p));
835 SyncRes sr(dc->d_now);
836 try {
837 if (t_queryring)
838 t_queryring->push_back({dc->d_mdp.d_qname, dc->d_mdp.d_qtype});
839
840 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
841 EDNSOpts edo;
842 std::vector<pair<uint16_t, string>> ednsOpts;
843 bool variableAnswer = dc->d_variable;
844 bool haveEDNS = false;
845 bool paddingAllowed = false;
846 bool addPaddingToResponse = false;
847 #ifdef NOD_ENABLED
848 bool hasUDR = false;
849 std::shared_ptr<Logr::Logger> nodlogger{nullptr};
850 if (g_udrEnabled || g_nodEnabled) {
851 nodlogger = g_slog->withName("nod")->v(1)->withValues("qname", Logging::Loggable(dc->d_mdp.d_qname));
852 }
853 #endif /* NOD_ENABLED */
854 DNSPacketWriter::optvect_t returnedEdnsOptions; // Here we stuff all the options for the return packet
855 uint8_t ednsExtRCode = 0;
856 if (getEDNSOpts(dc->d_mdp, &edo)) {
857 haveEDNS = true;
858 if (edo.d_version != 0) {
859 ednsExtRCode = ERCode::BADVERS;
860 }
861
862 if (!dc->d_tcp) {
863 /* rfc6891 6.2.3:
864 "Values lower than 512 MUST be treated as equal to 512."
865 */
866 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
867 }
868 ednsOpts = edo.d_options;
869 maxanswersize -= 11; // EDNS header size
870
871 if (!dc->d_responsePaddingDisabled && g_paddingFrom.match(dc->d_remote)) {
872 paddingAllowed = true;
873 if (g_paddingMode == PaddingMode::Always) {
874 addPaddingToResponse = true;
875 }
876 }
877
878 for (const auto& o : edo.d_options) {
879 if (o.first == EDNSOptionCode::ECS && g_useIncomingECS && !dc->d_ecsParsed) {
880 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
881 }
882 else if (o.first == EDNSOptionCode::NSID) {
883 const static string mode_server_id = ::arg()["server-id"];
884 if (mode_server_id != "disabled" && !mode_server_id.empty() && maxanswersize > (EDNSOptionCodeSize + EDNSOptionLengthSize + mode_server_id.size())) {
885 returnedEdnsOptions.emplace_back(EDNSOptionCode::NSID, mode_server_id);
886 variableAnswer = true; // Can't packetcache an answer with NSID
887 maxanswersize -= EDNSOptionCodeSize + EDNSOptionLengthSize + mode_server_id.size();
888 }
889 }
890 else if (paddingAllowed && !addPaddingToResponse && g_paddingMode == PaddingMode::PaddedQueries && o.first == EDNSOptionCode::PADDING) {
891 addPaddingToResponse = true;
892 }
893 }
894 }
895
896 /* the lookup will be done _before_ knowing whether the query actually
897 has a padding option, so we need to use the separate tag even when the
898 query does not have padding, as long as it is from an allowed source */
899 if (paddingAllowed && dc->d_tag == 0) {
900 dc->d_tag = g_paddingTag;
901 }
902
903 /* perhaps there was no EDNS or no ECS but by now we looked */
904 dc->d_ecsParsed = true;
905 vector<DNSRecord> ret;
906 vector<uint8_t> packet;
907
908 auto luaconfsLocal = g_luaconfs.getLocal();
909 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
910 bool wantsRPZ(true);
911 RecursorPacketCache::OptPBData pbDataForCache;
912 pdns::ProtoZero::RecMessage pbMessage;
913 if (checkProtobufExport(luaconfsLocal)) {
914 pbMessage.reserve(128, 128); // It's a bit of a guess...
915 pbMessage.setResponse(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
916 pbMessage.setServerIdentity(SyncRes::s_serverID);
917
918 // RRSets added below
919 }
920 checkOutgoingProtobufExport(luaconfsLocal); // to pick up changed configs
921 #ifdef HAVE_FSTRM
922 checkFrameStreamExport(luaconfsLocal, luaconfsLocal->frameStreamExportConfig, t_frameStreamServersInfo);
923 checkFrameStreamExport(luaconfsLocal, luaconfsLocal->nodFrameStreamExportConfig, t_nodFrameStreamServersInfo);
924 #endif
925
926 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass, dc->d_mdp.d_header.opcode);
927
928 pw.getHeader()->aa = 0;
929 pw.getHeader()->ra = 1;
930 pw.getHeader()->qr = 1;
931 pw.getHeader()->tc = 0;
932 pw.getHeader()->id = dc->d_mdp.d_header.id;
933 pw.getHeader()->rd = dc->d_mdp.d_header.rd;
934 pw.getHeader()->cd = dc->d_mdp.d_header.cd;
935
936 /* This is the lowest TTL seen in the records of the response,
937 so we can't cache it for longer than this value.
938 If we have a TTL cap, this value can't be larger than the
939 cap no matter what. */
940 uint32_t minTTL = dc->d_ttlCap;
941
942 sr.d_eventTrace = std::move(dc->d_eventTrace);
943 sr.setId(MT->getTid());
944
945 bool DNSSECOK = false;
946 if (dc->d_luaContext) {
947 sr.setLuaEngine(dc->d_luaContext);
948 }
949 if (g_dnssecmode != DNSSECMode::Off) {
950 sr.setDoDNSSEC(true);
951
952 // Does the requestor want DNSSEC records?
953 if (edo.d_extFlags & EDNSOpts::DNSSECOK) {
954 DNSSECOK = true;
955 g_stats.dnssecQueries++;
956 }
957 if (dc->d_mdp.d_header.cd) {
958 /* Per rfc6840 section 5.9, "When processing a request with
959 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
960 to return all response data, even data that has failed DNSSEC
961 validation. */
962 ++g_stats.dnssecCheckDisabledQueries;
963 }
964 if (dc->d_mdp.d_header.ad) {
965 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
966 indicating that the requester understands and is interested in the
967 value of the AD bit in the response. This allows a requester to
968 indicate that it understands the AD bit without also requesting
969 DNSSEC data via the DO bit. */
970 ++g_stats.dnssecAuthenticDataQueries;
971 }
972 }
973 else {
974 // Ignore the client-set CD flag
975 pw.getHeader()->cd = 0;
976 }
977 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode == DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode == DNSSECMode::Process));
978
979 sr.setInitialRequestId(dc->d_uuid);
980 sr.setOutgoingProtobufServers(t_outgoingProtobufServers.servers);
981 #ifdef HAVE_FSTRM
982 sr.setFrameStreamServers(t_frameStreamServersInfo.servers);
983 #endif
984
985 bool useMapped = true;
986 // If proxy by table is active and had a match, we only want to use the mapped address if it also has a domain match
987 // (if a domain suffix match table is present in the config)
988 if (t_proxyMapping && dc->d_source != dc->d_mappedSource) {
989 if (auto it = t_proxyMapping->lookup(dc->d_source)) {
990 if (it->second.suffixMatchNode) {
991 if (!it->second.suffixMatchNode->check(dc->d_mdp.d_qname)) {
992 // No match in domains, use original source
993 useMapped = false;
994 }
995 else {
996 ++it->second.stats.suffixMatches;
997 }
998 }
999 // No suffix match node defined, use mapped address
1000 }
1001 // lookup failing cannot happen as dc->d_source != dc->d_mappedSource
1002 }
1003 sr.setQuerySource(useMapped ? dc->d_mappedSource : dc->d_source, g_useIncomingECS && !dc->d_ednssubnet.source.empty() ? boost::optional<const EDNSSubnetOpts&>(dc->d_ednssubnet) : boost::none);
1004
1005 sr.setQueryReceivedOverTCP(dc->d_tcp);
1006
1007 bool tracedQuery = false; // we could consider letting Lua know about this too
1008 bool shouldNotValidate = false;
1009
1010 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1011 int res = RCode::NoError;
1012
1013 DNSFilterEngine::Policy appliedPolicy;
1014 RecursorLua4::DNSQuestion dq(dc->d_source, dc->d_destination, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ, dc->d_logResponse, addPaddingToResponse, (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec != 0) ? dc->d_kernelTimestamp : dc->d_now);
1015 dq.ednsFlags = &edo.d_extFlags;
1016 dq.ednsOptions = &ednsOpts;
1017 dq.tag = dc->d_tag;
1018 dq.discardedPolicies = &sr.d_discardedPolicies;
1019 dq.policyTags = &dc->d_policyTags;
1020 dq.appliedPolicy = &appliedPolicy;
1021 dq.currentRecords = &ret;
1022 dq.dh = &dc->d_mdp.d_header;
1023 dq.data = dc->d_data;
1024 dq.requestorId = dc->d_requestorId;
1025 dq.deviceId = dc->d_deviceId;
1026 dq.deviceName = dc->d_deviceName;
1027 dq.proxyProtocolValues = &dc->d_proxyProtocolValues;
1028 dq.extendedErrorCode = &dc->d_extendedErrorCode;
1029 dq.extendedErrorExtra = &dc->d_extendedErrorExtra;
1030 dq.meta = std::move(dc->d_meta);
1031 dq.fromAuthIP = &sr.d_fromAuthIP;
1032
1033 sr.d_slog = sr.d_slog->withValues("qname", Logging::Loggable(dc->d_mdp.d_qname),
1034 "qtype", Logging::Loggable(QType(dc->d_mdp.d_qtype)),
1035 "remote", Logging::Loggable(dc->getRemote()),
1036 "proto", Logging::Loggable(dc->d_tcp ? "tcp" : "udp"),
1037 "ecs", Logging::Loggable(dc->d_ednssubnet.source.empty() ? "" : dc->d_ednssubnet.source.toString()),
1038 "mtid", Logging::Loggable(MT->getTid()));
1039 RunningResolveGuard tcpGuard(dc);
1040
1041 if (ednsExtRCode != 0 || dc->d_mdp.d_header.opcode == Opcode::Notify) {
1042 goto sendit;
1043 }
1044
1045 if (dc->d_mdp.d_qtype == QType::ANY && !dc->d_tcp && g_anyToTcp) {
1046 pw.getHeader()->tc = 1;
1047 res = 0;
1048 variableAnswer = true;
1049 goto sendit;
1050 }
1051
1052 if (t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
1053 sr.setLogMode(SyncRes::Store);
1054 tracedQuery = true;
1055 }
1056
1057 if (!g_quiet || tracedQuery) {
1058 if (!g_slogStructured) {
1059 g_log << Logger::Warning << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] " << (dc->d_tcp ? "TCP " : "") << "question for '" << dc->d_mdp.d_qname << "|"
1060 << QType(dc->d_mdp.d_qtype) << "' from " << dc->getRemote();
1061 if (!dc->d_ednssubnet.source.empty()) {
1062 g_log << " (ecs " << dc->d_ednssubnet.source.toString() << ")";
1063 }
1064 g_log << endl;
1065 }
1066 else {
1067 sr.d_slog->info(Logr::Info, "Question");
1068 }
1069 }
1070
1071 if (!dc->d_mdp.d_header.rd) {
1072 sr.setCacheOnly();
1073 }
1074
1075 if (dc->d_luaContext) {
1076 dc->d_luaContext->prerpz(dq, res, sr.d_eventTrace);
1077 }
1078
1079 // Check if the client has a policy attached to it
1080 if (wantsRPZ && !appliedPolicy.wasHit()) {
1081
1082 if (luaconfsLocal->dfe.getClientPolicy(dc->d_source, sr.d_discardedPolicies, appliedPolicy)) {
1083 mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
1084 }
1085 }
1086
1087 /* If we already have an answer generated from gettag_ffi, let's see if the filtering policies
1088 should be applied to it */
1089 if (dc->d_rcode != boost::none) {
1090
1091 bool policyOverride = false;
1092 /* Unless we already matched on the client IP, time to check the qname.
1093 We normally check it in beginResolve() but it will be bypassed since we already have an answer */
1094 if (wantsRPZ && appliedPolicy.policyOverridesGettag()) {
1095 if (appliedPolicy.d_type != DNSFilterEngine::PolicyType::None) {
1096 // Client IP already matched
1097 }
1098 else {
1099 // no match on the client IP, check the qname
1100 if (luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, sr.d_discardedPolicies, appliedPolicy)) {
1101 // got a match
1102 mergePolicyTags(dc->d_policyTags, appliedPolicy.getTags());
1103 }
1104 }
1105
1106 if (appliedPolicy.wasHit()) {
1107 policyOverride = true;
1108 }
1109 }
1110
1111 if (!policyOverride) {
1112 /* No RPZ or gettag overrides it anyway */
1113 ret = std::move(dc->d_records);
1114 res = *dc->d_rcode;
1115 if (res == RCode::NoError && dc->d_followCNAMERecords) {
1116 res = followCNAMERecords(ret, QType(dc->d_mdp.d_qtype), res);
1117 }
1118 goto haveAnswer;
1119 }
1120 }
1121
1122 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1123 if (!dc->d_luaContext || !dc->d_luaContext->preresolve(dq, res, sr.d_eventTrace)) {
1124
1125 if (!g_dns64PrefixReverse.empty() && dq.qtype == QType::PTR && dq.qname.isPartOf(g_dns64PrefixReverse)) {
1126 res = getFakePTRRecords(dq.qname, ret);
1127 goto haveAnswer;
1128 }
1129
1130 sr.setWantsRPZ(wantsRPZ);
1131
1132 if (wantsRPZ && appliedPolicy.d_kind != DNSFilterEngine::PolicyKind::NoAction) {
1133
1134 if (dc->d_luaContext && dc->d_luaContext->policyHitEventFilter(dc->d_source, dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_tcp, appliedPolicy, dc->d_policyTags, sr.d_discardedPolicies)) {
1135 /* reset to no match */
1136 appliedPolicy = DNSFilterEngine::Policy();
1137 }
1138 else {
1139 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1140 if (policyResult == PolicyResult::HaveAnswer) {
1141 if (g_dns64Prefix && dq.qtype == QType::AAAA && dns64Candidate(dc->d_mdp.d_qtype, res, ret)) {
1142 res = getFakeAAAARecords(dq.qname, *g_dns64Prefix, ret);
1143 shouldNotValidate = true;
1144 }
1145 goto haveAnswer;
1146 }
1147 else if (policyResult == PolicyResult::Drop) {
1148 return;
1149 }
1150 }
1151 }
1152
1153 // Query did not get handled for Client IP or QNAME Policy reasons, now actually go out to find an answer
1154 try {
1155 sr.d_appliedPolicy = appliedPolicy;
1156 sr.d_policyTags = std::move(dc->d_policyTags);
1157
1158 if (!dc->d_routingTag.empty()) {
1159 sr.d_routingTag = dc->d_routingTag;
1160 }
1161
1162 ret.clear(); // policy might have filled it with custom records but we decided not to use them
1163 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
1164 shouldNotValidate = sr.wasOutOfBand();
1165 }
1166 catch (const ImmediateQueryDropException& e) {
1167 // XXX We need to export a protobuf message (and do a NOD lookup) if requested!
1168 g_stats.policyDrops++;
1169 SLOG(g_log << Logger::Debug << "Dropping query because of a filtering policy " << makeLoginfo(dc) << endl,
1170 sr.d_slog->info(Logr::Debug, "Dropping query because of a filtering policy"));
1171 return;
1172 }
1173 catch (const ImmediateServFailException& e) {
1174 if (g_logCommonErrors) {
1175 SLOG(g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during resolve of '" << dc->d_mdp.d_qname << "' because: " << e.reason << endl,
1176 sr.d_slog->error(Logr::Notice, e.reason, "Sending SERVFAIL during resolve"));
1177 }
1178 res = RCode::ServFail;
1179 }
1180 catch (const SendTruncatedAnswerException& e) {
1181 ret.clear();
1182 res = RCode::NoError;
1183 pw.getHeader()->tc = 1;
1184 }
1185 catch (const PolicyHitException& e) {
1186 res = -2;
1187 }
1188 dq.validationState = sr.getValidationState();
1189 appliedPolicy = sr.d_appliedPolicy;
1190 dc->d_policyTags = std::move(sr.d_policyTags);
1191
1192 if (appliedPolicy.d_type != DNSFilterEngine::PolicyType::None && appliedPolicy.d_zoneData && appliedPolicy.d_zoneData->d_extendedErrorCode) {
1193 dc->d_extendedErrorCode = *appliedPolicy.d_zoneData->d_extendedErrorCode;
1194 dc->d_extendedErrorExtra = appliedPolicy.d_zoneData->d_extendedErrorExtra;
1195 }
1196
1197 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1198 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1199 if (appliedPolicy.d_kind == DNSFilterEngine::PolicyKind::NoAction) {
1200 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1201 }
1202 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1203 if (policyResult == PolicyResult::HaveAnswer) {
1204 goto haveAnswer;
1205 }
1206 else if (policyResult == PolicyResult::Drop) {
1207 return;
1208 }
1209 }
1210
1211 bool luaHookHandled = false;
1212 if (dc->d_luaContext) {
1213 PolicyResult policyResult = PolicyResult::NoAction;
1214 if (answerIsNOData(dc->d_mdp.d_qtype, res, ret)) {
1215 if (dc->d_luaContext->nodata(dq, res, sr.d_eventTrace)) {
1216 luaHookHandled = true;
1217 shouldNotValidate = true;
1218 policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1219 }
1220 }
1221 else if (res == RCode::NXDomain && dc->d_luaContext->nxdomain(dq, res, sr.d_eventTrace)) {
1222 luaHookHandled = true;
1223 shouldNotValidate = true;
1224 policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1225 }
1226 if (policyResult == PolicyResult::HaveAnswer) {
1227 goto haveAnswer;
1228 }
1229 else if (policyResult == PolicyResult::Drop) {
1230 return;
1231 }
1232 } // dc->d_luaContext
1233
1234 if (!luaHookHandled && g_dns64Prefix && dc->d_mdp.d_qtype == QType::AAAA && (shouldNotValidate || !sr.isDNSSECValidationRequested() || !vStateIsBogus(dq.validationState)) && dns64Candidate(dc->d_mdp.d_qtype, res, ret)) {
1235 res = getFakeAAAARecords(dq.qname, *g_dns64Prefix, ret);
1236 shouldNotValidate = true;
1237 }
1238
1239 if (dc->d_luaContext) {
1240 PolicyResult policyResult = PolicyResult::NoAction;
1241 if (dc->d_luaContext->d_postresolve_ffi) {
1242 RecursorLua4::PostResolveFFIHandle handle(dq);
1243 sr.d_eventTrace.add(RecEventTrace::LuaPostResolveFFI);
1244 bool pr = dc->d_luaContext->postresolve_ffi(handle);
1245 sr.d_eventTrace.add(RecEventTrace::LuaPostResolveFFI, pr, false);
1246 if (pr) {
1247 shouldNotValidate = true;
1248 policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1249 }
1250 }
1251 else if (dc->d_luaContext->postresolve(dq, res, sr.d_eventTrace)) {
1252 shouldNotValidate = true;
1253 policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1254 }
1255 if (policyResult == PolicyResult::HaveAnswer) {
1256 goto haveAnswer;
1257 }
1258 else if (policyResult == PolicyResult::Drop) {
1259 return;
1260 }
1261 } // dc->d_luaContext
1262 }
1263 else if (dc->d_luaContext) {
1264 // preresolve returned true
1265 shouldNotValidate = true;
1266 auto policyResult = handlePolicyHit(appliedPolicy, dc, sr, res, ret, pw, tcpGuard);
1267 // haveAnswer case redundant
1268 if (policyResult == PolicyResult::Drop) {
1269 return;
1270 }
1271 }
1272
1273 haveAnswer:;
1274 if (tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail) {
1275 string trace(sr.getTrace());
1276 if (!trace.empty()) {
1277 vector<string> lines;
1278 boost::split(lines, trace, boost::is_any_of("\n"));
1279 for (const string& line : lines) {
1280 if (!line.empty())
1281 g_log << Logger::Warning << line << endl;
1282 }
1283 }
1284 }
1285
1286 if (res == -1) {
1287 pw.getHeader()->rcode = RCode::ServFail;
1288 // no commit here, because no record
1289 g_stats.servFails++;
1290 }
1291 else {
1292 pw.getHeader()->rcode = res;
1293
1294 // Does the validation mode or query demand validation?
1295 if (!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1296 try {
1297 auto state = sr.getValidationState();
1298
1299 string x_marker;
1300 std::shared_ptr<Logr::Logger> log;
1301 if (sr.doLog() || vStateIsBogus(state)) {
1302 // Only create logging object if needed below, beware if you change the logging logic!
1303 log = sr.d_slog->withValues("vstate", Logging::Loggable(state));
1304 auto xdnssec = g_xdnssec.getLocal();
1305 if (xdnssec->check(dc->d_mdp.d_qname)) {
1306 log = log->withValues("in-x-dnssec-names", Logging::Loggable(1));
1307 x_marker = " [in x-dnssec-names]";
1308 }
1309 }
1310 if (state == vState::Secure) {
1311 if (sr.doLog()) {
1312 SLOG(g_log << Logger::Warning << "Answer to " << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << x_marker << " for " << dc->getRemote() << " validates correctly" << endl,
1313 log->info(Logr::Info, "Validates Correctly"));
1314 }
1315
1316 // Is the query source interested in the value of the ad-bit?
1317 if (dc->d_mdp.d_header.ad || DNSSECOK)
1318 pw.getHeader()->ad = 1;
1319 }
1320 else if (state == vState::Insecure) {
1321 if (sr.doLog()) {
1322 SLOG(g_log << Logger::Warning << "Answer to " << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << x_marker << " for " << dc->getRemote() << " validates as Insecure" << endl,
1323 log->info(Logr::Info, "Validates as Insecure"));
1324 }
1325
1326 pw.getHeader()->ad = 0;
1327 }
1328 else if (vStateIsBogus(state)) {
1329 if (t_bogusremotes)
1330 t_bogusremotes->push_back(dc->d_source);
1331 if (t_bogusqueryring)
1332 t_bogusqueryring->push_back({dc->d_mdp.d_qname, dc->d_mdp.d_qtype});
1333 if (g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1334 SLOG(g_log << Logger::Warning << "Answer to " << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << x_marker << " for " << dc->getRemote() << " validates as " << vStateToString(state) << endl,
1335 log->info(Logr::Notice, "Validates as Bogus"));
1336 }
1337
1338 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1339 if (!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1340 if (sr.doLog()) {
1341 SLOG(g_log << Logger::Warning << "Sending out SERVFAIL for " << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << " because recursor or query demands it for Bogus results" << endl,
1342 log->info(Logr::Notice, "Sending out SERVFAIL because recursor or query demands it for Bogus results"));
1343 }
1344
1345 pw.getHeader()->rcode = RCode::ServFail;
1346 goto sendit;
1347 }
1348 else {
1349 if (sr.doLog()) {
1350 SLOG(g_log << Logger::Warning << "Not sending out SERVFAIL for " << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << x_marker << " Bogus validation since neither config nor query demands this" << endl,
1351 log->info(Logr::Notice, "Sending out SERVFAIL because recursor or query demands it for Bogus results"));
1352 }
1353 }
1354 }
1355 }
1356 catch (const ImmediateServFailException& e) {
1357 if (g_logCommonErrors)
1358 SLOG(g_log << Logger::Notice << "Sending SERVFAIL to " << dc->getRemote() << " during validation of '" << dc->d_mdp.d_qname << "|" << QType(dc->d_mdp.d_qtype) << "' because: " << e.reason << endl,
1359 sr.d_slog->error(Logr::Notice, e.reason, "Sending SERVFAIL during validation", "exception", Logging::Loggable("ImmediateServFailException")));
1360 goto sendit;
1361 }
1362 }
1363
1364 if (ret.size()) {
1365 pdns::orderAndShuffle(ret, false);
1366 if (auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_source)) {
1367 stable_sort(ret.begin(), ret.end(), *sl);
1368 variableAnswer = true;
1369 }
1370 }
1371
1372 bool needCommit = false;
1373 for (auto i = ret.cbegin(); i != ret.cend(); ++i) {
1374 if (!DNSSECOK && (i->d_type == QType::NSEC3 || ((i->d_type == QType::RRSIG || i->d_type == QType::NSEC) && ((dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY) || (i->d_place != DNSResourceRecord::ANSWER && i->d_place != DNSResourceRecord::ADDITIONAL))))) {
1375 continue;
1376 }
1377
1378 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1379 needCommit = false;
1380 break;
1381 }
1382 needCommit = true;
1383
1384 bool udr = false;
1385 #ifdef NOD_ENABLED
1386 if (g_udrEnabled) {
1387 udr = udrCheckUniqueDNSRecord(nodlogger, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, *i);
1388 if (!hasUDR && udr) {
1389 hasUDR = true;
1390 }
1391 }
1392 #endif /* NOD ENABLED */
1393
1394 if (t_protobufServers.servers) {
1395 // Max size is 64k, but we're conservative here, as other fields are added after the answers have been added
1396 // If a single answer causes a too big protobuf message, it wil be dropped by queueData()
1397 // But note addRR has code to prevent that
1398 if (pbMessage.size() < std::numeric_limits<uint16_t>::max() / 2) {
1399 pbMessage.addRR(*i, luaconfsLocal->protobufExportConfig.exportTypes, udr);
1400 }
1401 }
1402 }
1403 if (needCommit) {
1404 pw.commit();
1405 }
1406 #ifdef NOD_ENABLED
1407 #ifdef HAVE_FSTRM
1408 if (hasUDR) {
1409 if (isEnabledForUDRs(t_nodFrameStreamServersInfo.servers)) {
1410 struct timespec ts;
1411 std::string str;
1412 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1413 TIMEVAL_TO_TIMESPEC(&(dc->d_kernelTimestamp), &ts);
1414 }
1415 else {
1416 TIMEVAL_TO_TIMESPEC(&(dc->d_now), &ts);
1417 }
1418 DnstapMessage message(str, DnstapMessage::MessageType::resolver_response, SyncRes::s_serverID, &dc->d_source, &dc->d_destination, dc->d_tcp ? DnstapMessage::ProtocolType::DoTCP : DnstapMessage::ProtocolType::DoUDP, reinterpret_cast<const char*>(&*packet.begin()), packet.size(), &ts, nullptr, dc->d_mdp.d_qname);
1419
1420 for (auto& logger : *(t_nodFrameStreamServersInfo.servers)) {
1421 if (logger->logUDRs()) {
1422 remoteLoggerQueueData(*logger, str);
1423 }
1424 }
1425 }
1426 }
1427 #endif // HAVE_FSTRM
1428 #endif // NOD_ENABLED
1429 }
1430 sendit:;
1431
1432 if (g_useIncomingECS && dc->d_ecsFound && !sr.wasVariable() && !variableAnswer) {
1433 EDNSSubnetOpts eo;
1434 eo.source = dc->d_ednssubnet.source;
1435 ComboAddress sa;
1436 sa.reset();
1437 sa.sin4.sin_family = eo.source.getNetwork().sin4.sin_family;
1438 eo.scope = Netmask(sa, 0);
1439 auto ecsPayload = makeEDNSSubnetOptsString(eo);
1440
1441 // if we don't have enough space available let's just not set that scope of zero,
1442 // it will prevent some caching, mostly from dnsdist, but that's fine
1443 if (pw.size() < maxanswersize && (maxanswersize - pw.size()) >= (EDNSOptionCodeSize + EDNSOptionLengthSize + ecsPayload.size())) {
1444
1445 maxanswersize -= EDNSOptionCodeSize + EDNSOptionLengthSize + ecsPayload.size();
1446
1447 returnedEdnsOptions.emplace_back(EDNSOptionCode::ECS, std::move(ecsPayload));
1448 }
1449 }
1450
1451 if (haveEDNS && addPaddingToResponse) {
1452 size_t currentSize = pw.getSizeWithOpts(returnedEdnsOptions);
1453 /* we don't use maxawnswersize because it accounts for some EDNS options, but
1454 not all of them (for example ECS) */
1455 size_t maxSize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
1456
1457 if (currentSize < (maxSize - 4)) {
1458 size_t remaining = maxSize - (currentSize + 4);
1459 /* from rfc8647, "4.1. Recommended Strategy: Block-Length Padding":
1460 If a server receives a query that includes the EDNS(0) "Padding"
1461 option, it MUST pad the corresponding response (see Section 4 of
1462 RFC 7830) and SHOULD pad the corresponding response to a
1463 multiple of 468 octets (see below).
1464 */
1465 const size_t blockSize = 468;
1466 size_t modulo = (currentSize + 4) % blockSize;
1467 size_t padSize = 0;
1468 if (modulo > 0) {
1469 padSize = std::min(blockSize - modulo, remaining);
1470 }
1471 returnedEdnsOptions.emplace_back(EDNSOptionCode::PADDING, makeEDNSPaddingOptString(padSize));
1472 }
1473 }
1474
1475 if (haveEDNS) {
1476 auto state = sr.getValidationState();
1477 if (dc->d_extendedErrorCode || (g_addExtendedResolutionDNSErrors && vStateIsBogus(state))) {
1478 EDNSExtendedError::code code;
1479 std::string extra;
1480
1481 if (dc->d_extendedErrorCode) {
1482 code = static_cast<EDNSExtendedError::code>(*dc->d_extendedErrorCode);
1483 extra = std::move(dc->d_extendedErrorExtra);
1484 }
1485 else {
1486 switch (state) {
1487 case vState::BogusNoValidDNSKEY:
1488 code = EDNSExtendedError::code::DNSKEYMissing;
1489 break;
1490 case vState::BogusInvalidDenial:
1491 code = EDNSExtendedError::code::NSECMissing;
1492 break;
1493 case vState::BogusUnableToGetDSs:
1494 code = EDNSExtendedError::code::DNSSECBogus;
1495 break;
1496 case vState::BogusUnableToGetDNSKEYs:
1497 code = EDNSExtendedError::code::DNSKEYMissing;
1498 break;
1499 case vState::BogusSelfSignedDS:
1500 code = EDNSExtendedError::code::DNSSECBogus;
1501 break;
1502 case vState::BogusNoRRSIG:
1503 code = EDNSExtendedError::code::RRSIGsMissing;
1504 break;
1505 case vState::BogusNoValidRRSIG:
1506 code = EDNSExtendedError::code::DNSSECBogus;
1507 break;
1508 case vState::BogusMissingNegativeIndication:
1509 code = EDNSExtendedError::code::NSECMissing;
1510 break;
1511 case vState::BogusSignatureNotYetValid:
1512 code = EDNSExtendedError::code::SignatureNotYetValid;
1513 break;
1514 case vState::BogusSignatureExpired:
1515 code = EDNSExtendedError::code::SignatureExpired;
1516 break;
1517 case vState::BogusUnsupportedDNSKEYAlgo:
1518 code = EDNSExtendedError::code::UnsupportedDNSKEYAlgorithm;
1519 break;
1520 case vState::BogusUnsupportedDSDigestType:
1521 code = EDNSExtendedError::code::UnsupportedDSDigestType;
1522 break;
1523 case vState::BogusNoZoneKeyBitSet:
1524 code = EDNSExtendedError::code::NoZoneKeyBitSet;
1525 break;
1526 case vState::BogusRevokedDNSKEY:
1527 code = EDNSExtendedError::code::DNSSECBogus;
1528 break;
1529 case vState::BogusInvalidDNSKEYProtocol:
1530 code = EDNSExtendedError::code::DNSSECBogus;
1531 break;
1532 default:
1533 throw std::runtime_error("Bogus validation state not handled: " + vStateToString(state));
1534 }
1535 }
1536
1537 EDNSExtendedError eee;
1538 eee.infoCode = static_cast<uint16_t>(code);
1539 eee.extraText = std::move(extra);
1540
1541 if (pw.size() < maxanswersize && (maxanswersize - pw.size()) >= (EDNSOptionCodeSize + EDNSOptionLengthSize + sizeof(eee.infoCode) + eee.extraText.size())) {
1542 returnedEdnsOptions.emplace_back(EDNSOptionCode::EXTENDEDERROR, makeEDNSExtendedErrorOptString(eee));
1543 }
1544 }
1545
1546 /* we try to add the EDNS OPT RR even for truncated answers,
1547 as rfc6891 states:
1548 "The minimal response MUST be the DNS header, question section, and an
1549 OPT record. This MUST also occur when a truncated response (using
1550 the DNS header's TC bit) is returned."
1551 */
1552 pw.addOpt(512, ednsExtRCode, DNSSECOK ? EDNSOpts::DNSSECOK : 0, returnedEdnsOptions);
1553 pw.commit();
1554 }
1555
1556 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), pw.getHeader()->rcode, !dc->d_tcp);
1557 updateResponseStats(res, dc->d_source, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1558 #ifdef NOD_ENABLED
1559 bool nod = false;
1560 if (g_nodEnabled) {
1561 if (nodCheckNewDomain(nodlogger, dc->d_mdp.d_qname)) {
1562 nod = true;
1563 #ifdef HAVE_FSTRM
1564 if (isEnabledForNODs(t_nodFrameStreamServersInfo.servers)) {
1565 struct timespec ts;
1566 std::string str;
1567 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1568 TIMEVAL_TO_TIMESPEC(&(dc->d_kernelTimestamp), &ts);
1569 }
1570 else {
1571 TIMEVAL_TO_TIMESPEC(&(dc->d_now), &ts);
1572 }
1573 DnstapMessage message(str, DnstapMessage::MessageType::client_query, SyncRes::s_serverID, &dc->d_source, &dc->d_destination, dc->d_tcp ? DnstapMessage::ProtocolType::DoTCP : DnstapMessage::ProtocolType::DoUDP, nullptr, 0, &ts, nullptr, dc->d_mdp.d_qname);
1574
1575 for (auto& logger : *(t_nodFrameStreamServersInfo.servers)) {
1576 if (logger->logNODs()) {
1577 remoteLoggerQueueData(*logger, str);
1578 }
1579 }
1580 }
1581 #endif // HAVE_FSTRM
1582 }
1583 }
1584 #endif /* NOD_ENABLED */
1585
1586 if (variableAnswer || sr.wasVariable()) {
1587 g_stats.variableResponses++;
1588 }
1589
1590 if (t_protobufServers.servers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
1591 // Start constructing embedded DNSResponse object
1592 pbMessage.setResponseCode(pw.getHeader()->rcode);
1593 if (!appliedPolicy.getName().empty()) {
1594 pbMessage.setAppliedPolicy(appliedPolicy.getName());
1595 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1596 pbMessage.setAppliedPolicyTrigger(appliedPolicy.d_trigger);
1597 pbMessage.setAppliedPolicyHit(appliedPolicy.d_hit);
1598 pbMessage.setAppliedPolicyKind(appliedPolicy.d_kind);
1599 }
1600 pbMessage.addPolicyTags(dc->d_policyTags);
1601 pbMessage.setInBytes(packet.size());
1602 pbMessage.setValidationState(sr.getValidationState());
1603
1604 // Take s snap of the current protobuf buffer state to store in the PC
1605 pbDataForCache = boost::make_optional(RecursorPacketCache::PBData{
1606 pbMessage.getMessageBuf(),
1607 pbMessage.getResponseBuf(),
1608 !appliedPolicy.getName().empty() || !dc->d_policyTags.empty()});
1609 #ifdef NOD_ENABLED
1610 // if (g_udrEnabled) ??
1611 pbMessage.clearUDR(pbDataForCache->d_response);
1612 #endif
1613 }
1614
1615 if (t_packetCache && !variableAnswer && !sr.wasVariable()) {
1616 const auto& hdr = pw.getHeader();
1617 if ((hdr->rcode != RCode::NoError && hdr->rcode != RCode::NXDomain) || (hdr->ancount == 0 && hdr->nscount == 0)) {
1618 minTTL = min(minTTL, SyncRes::s_packetcacheservfailttl);
1619 }
1620 minTTL = min(minTTL, SyncRes::s_packetcachettl);
1621 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, std::move(dc->d_query), dc->d_mdp.d_qname,
1622 dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1623 string((const char*)&*packet.begin(), packet.size()),
1624 g_now.tv_sec,
1625 minTTL,
1626 dq.validationState,
1627 std::move(pbDataForCache), dc->d_tcp);
1628 }
1629 if (!dc->d_tcp) {
1630 struct msghdr msgh;
1631 struct iovec iov;
1632 cmsgbuf_aligned cbuf;
1633 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1634 msgh.msg_control = NULL;
1635
1636 if (g_fromtosockets.count(dc->d_socket)) {
1637 addCMsgSrcAddr(&msgh, &cbuf, &dc->d_local, 0);
1638 }
1639 int sendErr = sendOnNBSocket(dc->d_socket, &msgh);
1640 if (sendErr && g_logCommonErrors) {
1641 SLOG(g_log << Logger::Warning << "Sending UDP reply to client " << dc->getRemote() << " failed with: "
1642 << strerror(sendErr) << endl,
1643 g_slogudpin->error(Logr::Warning, sendErr, "Sending UDP reply to client failed"));
1644 }
1645 }
1646 else {
1647 bool hadError = sendResponseOverTCP(dc, packet);
1648 finishTCPReply(dc, hadError, true);
1649 tcpGuard.setHandled();
1650 }
1651
1652 sr.d_eventTrace.add(RecEventTrace::AnswerSent);
1653
1654 // Now do the per query changing part ot the protobuf message
1655 if (t_protobufServers.servers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
1656 // Below are the fields that are not stored in the packet cache and will be appended here and on a cache hit
1657 if (g_useKernelTimestamp && dc->d_kernelTimestamp.tv_sec) {
1658 pbMessage.setQueryTime(dc->d_kernelTimestamp.tv_sec, dc->d_kernelTimestamp.tv_usec);
1659 }
1660 else {
1661 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1662 }
1663 pbMessage.setMessageIdentity(dc->d_uuid);
1664 pbMessage.setSocketProtocol(dc->d_tcp ? pdns::ProtoZero::Message::TransportProtocol::TCP : pdns::ProtoZero::Message::TransportProtocol::UDP);
1665
1666 if (!luaconfsLocal->protobufExportConfig.logMappedFrom) {
1667 pbMessage.setSocketFamily(dc->d_source.sin4.sin_family);
1668 Netmask requestorNM(dc->d_source, dc->d_source.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1669 ComboAddress requestor = requestorNM.getMaskedNetwork();
1670 pbMessage.setFrom(requestor);
1671 pbMessage.setFromPort(dc->d_source.getPort());
1672 }
1673 else {
1674 pbMessage.setSocketFamily(dc->d_mappedSource.sin4.sin_family);
1675 Netmask requestorNM(dc->d_mappedSource, dc->d_mappedSource.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1676 ComboAddress requestor = requestorNM.getMaskedNetwork();
1677 pbMessage.setFrom(requestor);
1678 pbMessage.setFromPort(dc->d_mappedSource.getPort());
1679 }
1680
1681 pbMessage.setTo(dc->d_destination);
1682 pbMessage.setId(dc->d_mdp.d_header.id);
1683
1684 pbMessage.setTime();
1685 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIPv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1686 pbMessage.setRequestorId(dq.requestorId);
1687 pbMessage.setDeviceId(dq.deviceId);
1688 pbMessage.setDeviceName(dq.deviceName);
1689 pbMessage.setToPort(dc->d_destination.getPort());
1690
1691 for (const auto& m : dq.meta) {
1692 pbMessage.setMeta(m.first, m.second.stringVal, m.second.intVal);
1693 }
1694 #ifdef NOD_ENABLED
1695 if (g_nodEnabled) {
1696 if (nod) {
1697 pbMessage.setNewlyObservedDomain(true);
1698 pbMessage.addPolicyTag(g_nod_pbtag);
1699 }
1700 if (hasUDR) {
1701 pbMessage.addPolicyTag(g_udr_pbtag);
1702 }
1703 }
1704 #endif /* NOD_ENABLED */
1705 if (sr.d_eventTrace.enabled() && SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_pb) {
1706 pbMessage.addEvents(sr.d_eventTrace);
1707 }
1708 if (dc->d_logResponse) {
1709 protobufLogResponse(pbMessage);
1710 }
1711 }
1712
1713 if (sr.d_eventTrace.enabled() && SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_log) {
1714 SLOG(g_log << Logger::Info << sr.d_eventTrace.toString() << endl,
1715 sr.d_slog->info(Logr::Info, sr.d_eventTrace.toString())); // Maybe we want it to be more fancy?
1716 }
1717
1718 // Originally this code used a mix of floats, doubles, uint64_t with different units.
1719 // Now it always uses an integral number of microseconds, except for averages, which use doubles
1720 uint64_t spentUsec = uSec(sr.getNow() - dc->d_now);
1721 if (!g_quiet) {
1722 if (!g_slogStructured) {
1723 g_log << Logger::Error << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] answer to " << (dc->d_mdp.d_header.rd ? "" : "non-rd ") << "question '" << dc->d_mdp.d_qname << "|" << DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1724 g_log << "': " << ntohs(pw.getHeader()->ancount) << " answers, " << ntohs(pw.getHeader()->arcount) << " additional, took " << sr.d_outqueries << " packets, " << sr.d_totUsec / 1000.0 << " netw ms, " << spentUsec / 1000.0 << " tot ms, " << sr.d_throttledqueries << " throttled, " << sr.d_timeouts << " timeouts, " << sr.d_tcpoutqueries << "/" << sr.d_dotoutqueries << " tcp/dot connections, rcode=" << res;
1725
1726 if (!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1727 g_log << ", dnssec=" << sr.getValidationState();
1728 }
1729 g_log << endl;
1730 }
1731 else {
1732 sr.d_slog->info(Logr::Info, "Answer", "rd", Logging::Loggable(dc->d_mdp.d_header.rd),
1733 "answers", Logging::Loggable(ntohs(pw.getHeader()->ancount)),
1734 "additional", Logging::Loggable(ntohs(pw.getHeader()->arcount)),
1735 "outqueries", Logging::Loggable(sr.d_outqueries),
1736 "netms", Logging::Loggable(sr.d_totUsec / 1000.0),
1737 "totms", Logging::Loggable(spentUsec / 1000.0),
1738 "throttled", Logging::Loggable(sr.d_throttledqueries),
1739 "timeouts", Logging::Loggable(sr.d_timeouts),
1740 "tcpout", Logging::Loggable(sr.d_tcpoutqueries),
1741 "dotout", Logging::Loggable(sr.d_dotoutqueries),
1742 "rcode", Logging::Loggable(res),
1743 "validationState", Logging::Loggable(sr.getValidationState()));
1744 }
1745 }
1746
1747 if (dc->d_mdp.d_header.opcode == Opcode::Query) {
1748 if (sr.d_outqueries || sr.d_authzonequeries) {
1749 g_recCache->cacheMisses++;
1750 }
1751 else {
1752 g_recCache->cacheHits++;
1753 }
1754 }
1755
1756 g_stats.answers(spentUsec);
1757 g_stats.cumulativeAnswers(spentUsec);
1758
1759 double newLat = spentUsec;
1760 newLat = min(newLat, g_networkTimeoutMsec * 1000.0); // outliers of several minutes exist..
1761 g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + newLat / g_latencyStatSize;
1762 // no worries, we do this for packet cache hits elsewhere
1763
1764 if (spentUsec >= sr.d_totUsec) {
1765 uint64_t ourtime = spentUsec - sr.d_totUsec;
1766 g_stats.ourtime(ourtime);
1767 newLat = ourtime; // usec
1768 g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + newLat / g_latencyStatSize;
1769 }
1770
1771 #ifdef NOD_ENABLED
1772 if (nod) {
1773 sendNODLookup(nodlogger, dc->d_mdp.d_qname);
1774 }
1775 #endif /* NOD_ENABLED */
1776
1777 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1778 }
1779 catch (const PDNSException& ae) {
1780 SLOG(g_log << Logger::Error << "startDoResolve problem " << makeLoginfo(dc) << ": " << ae.reason << endl,
1781 sr.d_slog->error(Logr::Error, ae.reason, "startDoResolve problem", "exception", Logging::Loggable("PDNSException")));
1782 }
1783 catch (const MOADNSException& mde) {
1784 SLOG(g_log << Logger::Error << "DNS parser error " << makeLoginfo(dc) << ": " << dc->d_mdp.d_qname << ", " << mde.what() << endl,
1785 sr.d_slog->error(Logr::Error, mde.what(), "DNS parser error"));
1786 }
1787 catch (const std::exception& e) {
1788 SLOG(g_log << Logger::Error << "STL error " << makeLoginfo(dc) << ": " << e.what(),
1789 sr.d_slog->error(Logr::Error, e.what(), "Exception in resolver context ", "exception", Logging::Loggable("std::exception")));
1790
1791 // Luawrapper nests the exception from Lua, so we unnest it here
1792 try {
1793 std::rethrow_if_nested(e);
1794 }
1795 catch (const std::exception& ne) {
1796 SLOG(g_log << ". Extra info: " << ne.what(),
1797 sr.d_slog->error(Logr::Error, ne.what(), "Nested exception in resolver context", Logging::Loggable("std::exception")));
1798 }
1799 catch (...) {
1800 }
1801 if (!g_slogStructured) {
1802 g_log << endl;
1803 }
1804 }
1805 catch (...) {
1806 SLOG(g_log << Logger::Error << "Any other exception in a resolver context " << makeLoginfo(dc) << endl,
1807 sr.d_slog->info(Logr::Error, "Any other exception in a resolver context"));
1808 }
1809
1810 runTaskOnce(g_logCommonErrors);
1811
1812 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage.load());
1813 }
1814
1815 void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
1816 bool& foundECS, EDNSSubnetOpts* ednssubnet, EDNSOptionViewMap* options)
1817 {
1818 const bool lookForECS = ednssubnet != nullptr;
1819 const dnsheader_aligned dnshead(question.data());
1820 const dnsheader* dh = dnshead.get();
1821 size_t questionLen = question.length();
1822 unsigned int consumed = 0;
1823 *dnsname = DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1824
1825 size_t pos = sizeof(dnsheader) + consumed + 4;
1826 const size_t headerSize = /* root */ 1 + sizeof(dnsrecordheader);
1827 const uint16_t arcount = ntohs(dh->arcount);
1828
1829 for (uint16_t arpos = 0; arpos < arcount && questionLen > (pos + headerSize) && (lookForECS && !foundECS); arpos++) {
1830 if (question.at(pos) != 0) {
1831 /* not an OPT, bye. */
1832 return;
1833 }
1834
1835 pos += 1;
1836 const dnsrecordheader* drh = reinterpret_cast<const dnsrecordheader*>(&question.at(pos));
1837 pos += sizeof(dnsrecordheader);
1838
1839 if (pos >= questionLen) {
1840 return;
1841 }
1842
1843 /* OPT root label (1) followed by type (2) */
1844 if (lookForECS && ntohs(drh->d_type) == QType::OPT) {
1845 if (!options) {
1846 size_t ecsStartPosition = 0;
1847 size_t ecsLen = 0;
1848 /* we need to pass the record len */
1849 int res = getEDNSOption(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen))), questionLen - pos + sizeof(drh->d_clen), EDNSOptionCode::ECS, &ecsStartPosition, &ecsLen);
1850 if (res == 0 && ecsLen > 4) {
1851 EDNSSubnetOpts eso;
1852 if (getEDNSSubnetOptsFromString(&question.at(pos - sizeof(drh->d_clen) + ecsStartPosition + 4), ecsLen - 4, &eso)) {
1853 *ednssubnet = eso;
1854 foundECS = true;
1855 }
1856 }
1857 }
1858 else {
1859 /* we need to pass the record len */
1860 int res = getEDNSOptions(reinterpret_cast<const char*>(&question.at(pos - sizeof(drh->d_clen))), questionLen - pos + (sizeof(drh->d_clen)), *options);
1861 if (res == 0) {
1862 const auto& it = options->find(EDNSOptionCode::ECS);
1863 if (it != options->end() && !it->second.values.empty() && it->second.values.at(0).content != nullptr && it->second.values.at(0).size > 0) {
1864 EDNSSubnetOpts eso;
1865 if (getEDNSSubnetOptsFromString(it->second.values.at(0).content, it->second.values.at(0).size, &eso)) {
1866 *ednssubnet = eso;
1867 foundECS = true;
1868 }
1869 }
1870 }
1871 }
1872 }
1873
1874 pos += ntohs(drh->d_clen);
1875 }
1876 }
1877
1878 bool checkForCacheHit(bool qnameParsed, unsigned int tag, const string& data,
1879 DNSName& qname, uint16_t& qtype, uint16_t& qclass,
1880 const struct timeval& now,
1881 string& response, uint32_t& qhash,
1882 RecursorPacketCache::OptPBData& pbData, bool tcp, const ComboAddress& source, const ComboAddress& mappedSource)
1883 {
1884 if (!t_packetCache) {
1885 return false;
1886 }
1887 bool cacheHit = false;
1888 uint32_t age;
1889 vState valState;
1890
1891 if (qnameParsed) {
1892 cacheHit = t_packetCache->getResponsePacket(tag, data, qname, qtype, qclass, now.tv_sec, &response, &age, &valState, &qhash, &pbData, tcp);
1893 }
1894 else {
1895 cacheHit = t_packetCache->getResponsePacket(tag, data, qname, &qtype, &qclass, now.tv_sec, &response, &age, &valState, &qhash, &pbData, tcp);
1896 }
1897
1898 if (cacheHit) {
1899 if (vStateIsBogus(valState)) {
1900 if (t_bogusremotes) {
1901 t_bogusremotes->push_back(source);
1902 }
1903 if (t_bogusqueryring) {
1904 t_bogusqueryring->push_back({qname, qtype});
1905 }
1906 }
1907
1908 // This is only to get the proxyMapping suffixMatch stats right i the case of a PC hit
1909 if (t_proxyMapping && source != mappedSource) {
1910 if (const auto* found = t_proxyMapping->lookup(source)) {
1911 if (found->second.suffixMatchNode) {
1912 if (found->second.suffixMatchNode->check(qname)) {
1913 ++found->second.stats.suffixMatches;
1914 }
1915 }
1916 }
1917 }
1918
1919 g_stats.packetCacheHits++;
1920 SyncRes::s_queries++;
1921 ageDNSPacket(response, age);
1922 if (response.length() >= sizeof(struct dnsheader)) {
1923 const struct dnsheader* dh = reinterpret_cast<const dnsheader*>(response.data());
1924 updateResponseStats(dh->rcode, source, response.length(), 0, 0);
1925 }
1926 g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1927 g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1928 #if 0
1929 // XXX changes behaviour compared to old code!
1930 g_stats.answers(0);
1931 g_stats.ourtime(0);
1932 #endif
1933 }
1934
1935 return cacheHit;
1936 }
1937
1938 static void* pleaseWipeCaches(const DNSName& canon, bool subtree, uint16_t qtype)
1939 {
1940 auto res = wipeCaches(canon, subtree, qtype);
1941 SLOG(g_log << Logger::Info << "Wiped caches for " << canon << ": " << res.record_count << " records; " << res.negative_record_count << " negative records; " << res.packet_count << " packets" << endl,
1942 g_slog->withName("runtime")->info(Logr::Info, "Wiped cache", "qname", Logging::Loggable(canon), "records", Logging::Loggable(res.record_count), "negrecords", Logging::Loggable(res.negative_record_count), "packets", Logging::Loggable(res.packet_count)));
1943 return nullptr;
1944 }
1945
1946 void requestWipeCaches(const DNSName& canon)
1947 {
1948 // send a message to the handler thread asking it
1949 // to wipe all of the caches
1950 ThreadMSG* tmsg = new ThreadMSG();
1951 tmsg->func = [=] { return pleaseWipeCaches(canon, true, 0xffff); };
1952 tmsg->wantAnswer = false;
1953 if (write(RecThreadInfo::info(0).pipes.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
1954 delete tmsg;
1955
1956 unixDie("write to thread pipe returned wrong size or error");
1957 }
1958 // coverity[leaked_storage]
1959 }
1960
1961 bool expectProxyProtocol(const ComboAddress& from)
1962 {
1963 return g_proxyProtocolACL.match(from);
1964 }
1965
1966 // fromaddr: the address the query is coming from
1967 // destaddr: the address the query was received on
1968 // source: the address we assume the query is coming from, might be set by proxy protocol
1969 // destination: the address we assume the query was sent to, might be set by proxy protocol
1970 // mappedSource: the address we assume the query is coming from. Differs from source if table based mapping has been applied
1971 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, ComboAddress source, ComboAddress destination, const ComboAddress& mappedSource, struct timeval tv, int fd, std::vector<ProxyProtocolValue>& proxyProtocolValues, RecEventTrace& eventTrace)
1972 {
1973 ++(RecThreadInfo::self().numberOfDistributedQueries);
1974 gettimeofday(&g_now, nullptr);
1975 if (tv.tv_sec) {
1976 struct timeval diff = g_now - tv;
1977 double delta = (diff.tv_sec * 1000 + diff.tv_usec / 1000.0);
1978
1979 if (delta > 1000.0) {
1980 g_stats.tooOldDrops++;
1981 return nullptr;
1982 }
1983 }
1984
1985 ++g_stats.qcounter;
1986 if (fromaddr.sin4.sin_family == AF_INET6)
1987 g_stats.ipv6qcounter++;
1988
1989 string response;
1990 const dnsheader_aligned headerdata(question.data());
1991 const dnsheader* dh = headerdata.get();
1992 unsigned int ctag = 0;
1993 uint32_t qhash = 0;
1994 bool needECS = false;
1995 std::unordered_set<std::string> policyTags;
1996 std::map<std::string, RecursorLua4::MetaValue> meta;
1997 LuaContext::LuaObject data;
1998 string requestorId;
1999 string deviceId;
2000 string deviceName;
2001 string routingTag;
2002 bool logQuery = false;
2003 bool logResponse = false;
2004 boost::uuids::uuid uniqueId;
2005 auto luaconfsLocal = g_luaconfs.getLocal();
2006 const auto pbExport = checkProtobufExport(luaconfsLocal);
2007 const auto outgoingbExport = checkOutgoingProtobufExport(luaconfsLocal);
2008 if (pbExport || outgoingbExport) {
2009 if (pbExport) {
2010 needECS = true;
2011 }
2012 uniqueId = getUniqueID();
2013 }
2014 logQuery = t_protobufServers.servers && luaconfsLocal->protobufExportConfig.logQueries;
2015 logResponse = t_protobufServers.servers && luaconfsLocal->protobufExportConfig.logResponses;
2016 #ifdef HAVE_FSTRM
2017 checkFrameStreamExport(luaconfsLocal, luaconfsLocal->frameStreamExportConfig, t_frameStreamServersInfo);
2018 #endif
2019 EDNSSubnetOpts ednssubnet;
2020 bool ecsFound = false;
2021 bool ecsParsed = false;
2022 std::vector<DNSRecord> records;
2023 std::string extendedErrorExtra;
2024 boost::optional<int> rcode = boost::none;
2025 boost::optional<uint16_t> extendedErrorCode{boost::none};
2026 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
2027 bool variable = false;
2028 bool followCNAMEs = false;
2029 bool responsePaddingDisabled = false;
2030 DNSName qname;
2031 try {
2032 uint16_t qtype = 0;
2033 uint16_t qclass = 0;
2034 bool qnameParsed = false;
2035 #ifdef MALLOC_TRACE
2036 /*
2037 static uint64_t last=0;
2038 if(!last)
2039 g_mtracer->clearAllocators();
2040 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2041 last=g_mtracer->getAllocs();
2042 cout<<g_mtracer->topAllocatorsString()<<endl;
2043 g_mtracer->clearAllocators();
2044 */
2045 #endif
2046
2047 // We do not have a SyncRes specific Lua context at this point yet, so ok to use t_pdl
2048 if (needECS || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi)) || dh->opcode == Opcode::Notify) {
2049 try {
2050 EDNSOptionViewMap ednsOptions;
2051
2052 ecsFound = false;
2053
2054 getQNameAndSubnet(question, &qname, &qtype, &qclass,
2055 ecsFound, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
2056
2057 qnameParsed = true;
2058 ecsParsed = true;
2059
2060 if (t_pdl) {
2061 try {
2062 if (t_pdl->d_gettag_ffi) {
2063 RecursorLua4::FFIParams params(qname, qtype, destination, source, ednssubnet.source, data, policyTags, records, ednsOptions, proxyProtocolValues, requestorId, deviceId, deviceName, routingTag, rcode, ttlCap, variable, false, logQuery, logResponse, followCNAMEs, extendedErrorCode, extendedErrorExtra, responsePaddingDisabled, meta);
2064
2065 eventTrace.add(RecEventTrace::LuaGetTagFFI);
2066 ctag = t_pdl->gettag_ffi(params);
2067 eventTrace.add(RecEventTrace::LuaGetTagFFI, ctag, false);
2068 }
2069 else if (t_pdl->d_gettag) {
2070 eventTrace.add(RecEventTrace::LuaGetTag);
2071 ctag = t_pdl->gettag(source, ednssubnet.source, destination, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, deviceName, routingTag, proxyProtocolValues);
2072 eventTrace.add(RecEventTrace::LuaGetTag, ctag, false);
2073 }
2074 }
2075 catch (const std::exception& e) {
2076 if (g_logCommonErrors) {
2077 SLOG(g_log << Logger::Warning << "Error parsing a query packet qname='" << qname << "' for tag determination, setting tag=0: " << e.what() << endl,
2078 g_slogudpin->error(Logr::Warning, e.what(), "Error parsing a query packet for tag determination, setting tag=0", "qname", Logging::Loggable(qname), "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("std;:exception")));
2079 }
2080 }
2081 }
2082 }
2083 catch (const std::exception& e) {
2084 if (g_logCommonErrors) {
2085 SLOG(g_log << Logger::Warning << "Error parsing a query packet for tag determination, setting tag=0: " << e.what() << endl,
2086 g_slogudpin->error(Logr::Warning, e.what(), "Error parsing a query packet for tag determination, setting tag=0", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("std;:exception")));
2087 }
2088 }
2089 }
2090
2091 RecursorPacketCache::OptPBData pbData{boost::none};
2092 if (t_protobufServers.servers) {
2093 if (logQuery && !(luaconfsLocal->protobufExportConfig.taggedOnly && policyTags.empty())) {
2094 protobufLogQuery(luaconfsLocal, uniqueId, source, destination, mappedSource, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId, deviceName, meta);
2095 }
2096 }
2097
2098 if (ctag == 0 && !responsePaddingDisabled && g_paddingFrom.match(fromaddr)) {
2099 ctag = g_paddingTag;
2100 }
2101
2102 if (dh->opcode == Opcode::Query) {
2103 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2104 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2105 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2106 eventTrace.add(RecEventTrace::PCacheCheck);
2107 bool cacheHit = checkForCacheHit(qnameParsed, ctag, question, qname, qtype, qclass, g_now, response, qhash, pbData, false, source, mappedSource);
2108 eventTrace.add(RecEventTrace::PCacheCheck, cacheHit, false);
2109 if (cacheHit) {
2110 if (!g_quiet) {
2111 SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " question answered from packet cache tag=" << ctag << " from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << endl,
2112 g_slogudpin->info(Logr::Notice, "Question answered from packet cache", "tag", Logging::Loggable(ctag),
2113 "qname", Logging::Loggable(qname), "qtype", Logging::Loggable(QType(qtype)),
2114 "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
2115 }
2116 struct msghdr msgh;
2117 struct iovec iov;
2118 cmsgbuf_aligned cbuf;
2119 fillMSGHdr(&msgh, &iov, &cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
2120 msgh.msg_control = NULL;
2121
2122 if (g_fromtosockets.count(fd)) {
2123 addCMsgSrcAddr(&msgh, &cbuf, &destaddr, 0);
2124 }
2125 int sendErr = sendOnNBSocket(fd, &msgh);
2126 eventTrace.add(RecEventTrace::AnswerSent);
2127
2128 if (t_protobufServers.servers && logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbData && !pbData->d_tagged)) {
2129 protobufLogResponse(dh, luaconfsLocal, pbData, tv, false, source, destination, mappedSource, ednssubnet, uniqueId, requestorId, deviceId, deviceName, meta, eventTrace);
2130 }
2131
2132 if (eventTrace.enabled() && SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_log) {
2133 SLOG(g_log << Logger::Info << eventTrace.toString() << endl,
2134 g_slogudpin->info(Logr::Info, eventTrace.toString())); // Do we want more fancy logging here?
2135 }
2136 if (sendErr && g_logCommonErrors) {
2137 SLOG(g_log << Logger::Warning << "Sending UDP reply to client " << source.toStringWithPort()
2138 << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " failed with: "
2139 << strerror(sendErr) << endl,
2140 g_slogudpin->error(Logr::Error, sendErr, "Sending UDP reply to client failed", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
2141 }
2142 struct timeval now;
2143 Utility::gettimeofday(&now, nullptr);
2144 uint64_t spentUsec = uSec(now - tv);
2145 g_stats.cumulativeAnswers(spentUsec);
2146 return 0;
2147 }
2148 }
2149 }
2150 catch (const std::exception& e) {
2151 if (g_logCommonErrors) {
2152 SLOG(g_log << Logger::Error << "Error processing or aging answer packet: " << e.what() << endl,
2153 g_slogudpin->error(Logr::Error, e.what(), "Error processing or aging answer packet", "exception", Logging::Loggable("std::exception")));
2154 }
2155 return 0;
2156 }
2157
2158 if (t_pdl) {
2159 bool ipf = t_pdl->ipfilter(source, destination, *dh, eventTrace);
2160 if (ipf) {
2161 if (!g_quiet) {
2162 SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " based on policy" << endl,
2163 g_slogudpin->info(Logr::Notice, "Dropped question based on policy", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
2164 }
2165 g_stats.policyDrops++;
2166 return 0;
2167 }
2168 }
2169
2170 if (dh->opcode == Opcode::Notify) {
2171 if (!isAllowNotifyForZone(qname)) {
2172 if (!g_quiet) {
2173 SLOG(g_log << Logger::Error << "[" << MT->getTid() << "] dropping UDP NOTIFY from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << ", for " << qname.toLogString() << ", zone not matched by allow-notify-for" << endl,
2174 g_slogudpin->info(Logr::Notice, "Dropping UDP NOTIFY, zone not matched by allow-notify-for", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
2175 }
2176
2177 g_stats.zoneDisallowedNotify++;
2178 return 0;
2179 }
2180
2181 if (!g_quiet) {
2182 SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " got NOTIFY for " << qname.toLogString() << " from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << endl,
2183 g_slogudpin->info(Logr::Notice, "Got NOTIFY", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr), "qname", Logging::Loggable(qname)));
2184 }
2185
2186 requestWipeCaches(qname);
2187
2188 // the operation will now be treated as a Query, generating
2189 // a normal response, as the rest of the code does not
2190 // check dh->opcode, but we need to ensure that the response
2191 // to this request does not get put into the packet cache
2192 variable = true;
2193 }
2194
2195 if (MT->numProcesses() > g_maxMThreads) {
2196 if (!g_quiet)
2197 SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << ", over capacity" << endl,
2198 g_slogudpin->info(Logr::Notice, "Dropped question, over capacity", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
2199
2200 g_stats.overCapacityDrops++;
2201 return 0;
2202 }
2203
2204 auto dc = std::make_unique<DNSComboWriter>(question, g_now, std::move(policyTags), t_pdl, std::move(data), std::move(records));
2205
2206 if (SyncRes::isUnsupported(dc->d_mdp.d_qtype)) {
2207 g_stats.ignoredCount++;
2208 if (!g_quiet) {
2209 SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " Unsupported qtype " << dc->d_mdp.d_qtype << " from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << endl,
2210 g_slogudpin->info(Logr::Notice, "Unsupported qtype", "qtype", Logging::Loggable(QType(dc->d_mdp.d_qtype)), "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
2211 }
2212
2213 return 0;
2214 }
2215
2216 dc->setSocket(fd);
2217 dc->d_tag = ctag;
2218 dc->d_qhash = qhash;
2219 dc->setRemote(fromaddr); // the address the query is coming from
2220 dc->setSource(source); // the address we assume the query is coming from, might be set by proxy protocol
2221 dc->setLocal(destaddr); // the address the query was received on
2222 dc->setDestination(destination); // the address we assume the query is sent to, might be set by proxy protocol
2223 dc->setMappedSource(mappedSource); // the address we assume the query is coming from. Differs from source if table-based mapping has been applied
2224 dc->d_tcp = false;
2225 dc->d_ecsFound = ecsFound;
2226 dc->d_ecsParsed = ecsParsed;
2227 dc->d_ednssubnet = ednssubnet;
2228 dc->d_ttlCap = ttlCap;
2229 dc->d_variable = variable;
2230 dc->d_followCNAMERecords = followCNAMEs;
2231 dc->d_rcode = rcode;
2232 dc->d_logResponse = logResponse;
2233 if (t_protobufServers.servers || t_outgoingProtobufServers.servers) {
2234 dc->d_uuid = std::move(uniqueId);
2235 }
2236 dc->d_requestorId = requestorId;
2237 dc->d_deviceId = deviceId;
2238 dc->d_deviceName = deviceName;
2239 dc->d_kernelTimestamp = tv;
2240 dc->d_proxyProtocolValues = std::move(proxyProtocolValues);
2241 dc->d_routingTag = std::move(routingTag);
2242 dc->d_extendedErrorCode = extendedErrorCode;
2243 dc->d_extendedErrorExtra = std::move(extendedErrorExtra);
2244 dc->d_responsePaddingDisabled = responsePaddingDisabled;
2245 dc->d_meta = std::move(meta);
2246
2247 dc->d_eventTrace = std::move(eventTrace);
2248 MT->makeThread(startDoResolve, (void*)dc.release()); // deletes dc
2249
2250 return 0;
2251 }
2252
2253 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
2254 {
2255 ssize_t len;
2256 static const size_t maxIncomingQuerySize = g_proxyProtocolACL.empty() ? 512 : (512 + g_proxyProtocolMaximumSize);
2257 static thread_local std::string data;
2258 ComboAddress fromaddr; // the address the query is coming from
2259 ComboAddress source; // the address we assume the query is coming from, might be set by proxy protocol
2260 ComboAddress destination; // the address we assume the query was sent to, might be set by proxy protocol
2261 struct msghdr msgh;
2262 struct iovec iov;
2263 cmsgbuf_aligned cbuf;
2264 bool firstQuery = true;
2265 std::vector<ProxyProtocolValue> proxyProtocolValues;
2266 RecEventTrace eventTrace;
2267
2268 for (size_t queriesCounter = 0; queriesCounter < g_maxUDPQueriesPerRound; queriesCounter++) {
2269 bool proxyProto = false;
2270 proxyProtocolValues.clear();
2271 data.resize(maxIncomingQuerySize);
2272 fromaddr.sin6.sin6_family = AF_INET6; // this makes sure fromaddr is big enough
2273 fillMSGHdr(&msgh, &iov, &cbuf, sizeof(cbuf), &data[0], data.size(), &fromaddr);
2274
2275 if ((len = recvmsg(fd, &msgh, 0)) >= 0) {
2276 eventTrace.clear();
2277 eventTrace.setEnabled(SyncRes::s_event_trace_enabled);
2278 eventTrace.add(RecEventTrace::ReqRecv);
2279
2280 firstQuery = false;
2281
2282 if (msgh.msg_flags & MSG_TRUNC) {
2283 g_stats.truncatedDrops++;
2284 if (!g_quiet) {
2285 SLOG(g_log << Logger::Error << "Ignoring truncated query from " << fromaddr.toString() << endl,
2286 g_slogudpin->info(Logr::Error, "Ignoring truncated query", "remote", Logging::Loggable(fromaddr)));
2287 }
2288 return;
2289 }
2290
2291 data.resize(static_cast<size_t>(len));
2292
2293 if (expectProxyProtocol(fromaddr)) {
2294 bool tcp;
2295 ssize_t used = parseProxyHeader(data, proxyProto, source, destination, tcp, proxyProtocolValues);
2296 if (used <= 0) {
2297 ++g_stats.proxyProtocolInvalidCount;
2298 if (!g_quiet) {
2299 SLOG(g_log << Logger::Error << "Ignoring invalid proxy protocol (" << std::to_string(len) << ", " << std::to_string(used) << ") query from " << fromaddr.toStringWithPort() << endl,
2300 g_slogudpin->info(Logr::Error, "Ignoring invalid proxy protocol query", "length", Logging::Loggable(len),
2301 "used", Logging::Loggable(used), "remote", Logging::Loggable(fromaddr)));
2302 }
2303 return;
2304 }
2305 else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
2306 if (g_quiet) {
2307 SLOG(g_log << Logger::Error << "Proxy protocol header in UDP packet from " << fromaddr.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping" << endl,
2308 g_slogudpin->info(Logr::Error, "Proxy protocol header in UDP packet is larger than proxy-protocol-maximum-size",
2309 "used", Logging::Loggable(used), "remote", Logging::Loggable(fromaddr)));
2310 }
2311 ++g_stats.proxyProtocolInvalidCount;
2312 return;
2313 }
2314
2315 data.erase(0, used);
2316 }
2317 else if (len > 512) {
2318 /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
2319 g_stats.truncatedDrops++;
2320 if (!g_quiet) {
2321 SLOG(g_log << Logger::Error << "Ignoring truncated query from " << fromaddr.toStringWithPort() << endl,
2322 g_slogudpin->info(Logr::Error, "Ignoring truncated query", "remote", Logging::Loggable(fromaddr)));
2323 }
2324 return;
2325 }
2326
2327 if (data.size() < sizeof(dnsheader)) {
2328 g_stats.ignoredCount++;
2329 if (!g_quiet) {
2330 SLOG(g_log << Logger::Error << "Ignoring too-short (" << std::to_string(data.size()) << ") query from " << fromaddr.toString() << endl,
2331 g_slogudpin->info(Logr::Error, "Ignoring too-short query", "length", Logging::Loggable(data.size()),
2332 "remote", Logging::Loggable(fromaddr)));
2333 }
2334 return;
2335 }
2336
2337 if (!proxyProto) {
2338 source = fromaddr;
2339 }
2340 ComboAddress mappedSource = source;
2341 if (t_proxyMapping) {
2342 if (auto it = t_proxyMapping->lookup(source)) {
2343 mappedSource = it->second.address;
2344 ++it->second.stats.netmaskMatches;
2345 }
2346 }
2347 if (t_remotes) {
2348 t_remotes->push_back(fromaddr);
2349 }
2350
2351 if (t_allowFrom && !t_allowFrom->match(&mappedSource)) {
2352 if (!g_quiet) {
2353 SLOG(g_log << Logger::Error << "[" << MT->getTid() << "] dropping UDP query from " << mappedSource.toString() << ", address not matched by allow-from" << endl,
2354 g_slogudpin->info(Logr::Error, "Dropping UDP query, address not matched by allow-from", "source", Logging::Loggable(mappedSource)));
2355 }
2356
2357 g_stats.unauthorizedUDP++;
2358 return;
2359 }
2360
2361 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
2362 if (!fromaddr.sin4.sin_port) { // also works for IPv6
2363 if (!g_quiet) {
2364 SLOG(g_log << Logger::Error << "[" << MT->getTid() << "] dropping UDP query from " << fromaddr.toStringWithPort() << ", can't deal with port 0" << endl,
2365 g_slogudpin->info(Logr::Error, "Dropping UDP query can't deal with port 0", "remote", Logging::Loggable(fromaddr)));
2366 }
2367
2368 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
2369 return;
2370 }
2371
2372 try {
2373 const dnsheader_aligned headerdata(data.data());
2374 const dnsheader* dh = headerdata.get();
2375
2376 if (dh->qr) {
2377 g_stats.ignoredCount++;
2378 if (g_logCommonErrors) {
2379 SLOG(g_log << Logger::Error << "Ignoring answer from " << fromaddr.toString() << " on server socket!" << endl,
2380 g_slogudpin->info(Logr::Error, "Ignoring answer on server socket", "remote", Logging::Loggable(fromaddr)));
2381 }
2382 }
2383 else if (dh->opcode != Opcode::Query && dh->opcode != Opcode::Notify) {
2384 g_stats.ignoredCount++;
2385 if (g_logCommonErrors) {
2386 SLOG(g_log << Logger::Error << "Ignoring unsupported opcode " << Opcode::to_s(dh->opcode) << " from " << fromaddr.toString() << " on server socket!" << endl,
2387 g_slogudpin->info(Logr::Error, "Ignoring unsupported opcode server socket", "remote", Logging::Loggable(fromaddr), "opcode", Logging::Loggable(Opcode::to_s(dh->opcode))));
2388 }
2389 }
2390 else if (dh->qdcount == 0) {
2391 g_stats.emptyQueriesCount++;
2392 if (g_logCommonErrors) {
2393 SLOG(g_log << Logger::Error << "Ignoring empty (qdcount == 0) query from " << fromaddr.toString() << " on server socket!" << endl,
2394 g_slogudpin->info(Logr::Error, "Ignoring empty (qdcount == 0) query on server socket!", "remote", Logging::Loggable(fromaddr)));
2395 }
2396 }
2397 else {
2398 if (dh->opcode == Opcode::Notify) {
2399 if (!t_allowNotifyFrom || !t_allowNotifyFrom->match(&mappedSource)) {
2400 if (!g_quiet) {
2401 SLOG(g_log << Logger::Error << "[" << MT->getTid() << "] dropping UDP NOTIFY from " << mappedSource.toString() << ", address not matched by allow-notify-from" << endl,
2402 g_slogudpin->info(Logr::Error, "Dropping UDP NOTIFY from address not matched by allow-notify-from",
2403 "source", Logging::Loggable(mappedSource)));
2404 }
2405
2406 g_stats.sourceDisallowedNotify++;
2407 return;
2408 }
2409 }
2410
2411 struct timeval tv = {0, 0};
2412 HarvestTimestamp(&msgh, &tv);
2413 ComboAddress dest; // the address the query was sent to to
2414 dest.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2415 auto loc = rplookup(g_listenSocketsAddresses, fd);
2416 if (HarvestDestinationAddress(&msgh, &dest)) {
2417 // but.. need to get port too
2418 if (loc) {
2419 dest.sin4.sin_port = loc->sin4.sin_port;
2420 }
2421 }
2422 else {
2423 if (loc) {
2424 dest = *loc;
2425 }
2426 else {
2427 dest.sin4.sin_family = fromaddr.sin4.sin_family;
2428 socklen_t slen = dest.getSocklen();
2429 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
2430 }
2431 }
2432 if (!proxyProto) {
2433 destination = dest;
2434 }
2435
2436 if (RecThreadInfo::weDistributeQueries()) {
2437 std::string localdata = data;
2438 distributeAsyncFunction(data, [localdata, fromaddr, dest, source, destination, mappedSource, tv, fd, proxyProtocolValues, eventTrace]() mutable {
2439 return doProcessUDPQuestion(localdata, fromaddr, dest, source, destination, mappedSource, tv, fd, proxyProtocolValues, eventTrace);
2440 });
2441 }
2442 else {
2443 doProcessUDPQuestion(data, fromaddr, dest, source, destination, mappedSource, tv, fd, proxyProtocolValues, eventTrace);
2444 }
2445 }
2446 }
2447 catch (const MOADNSException& mde) {
2448 g_stats.clientParseError++;
2449 if (g_logCommonErrors) {
2450 SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP client " << fromaddr.toString() << ": " << mde.what() << endl,
2451 g_slogudpin->error(Logr::Error, mde.what(), "Unable to parse packet from remote UDP client", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("MOADNSException")));
2452 }
2453 }
2454 catch (const std::runtime_error& e) {
2455 g_stats.clientParseError++;
2456 if (g_logCommonErrors) {
2457 SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP client " << fromaddr.toString() << ": " << e.what() << endl,
2458 g_slogudpin->error(Logr::Error, e.what(), "Unable to parse packet from remote UDP client", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("std::runtime_error")));
2459 }
2460 }
2461 }
2462 else {
2463 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2464 if (firstQuery && errno == EAGAIN) {
2465 g_stats.noPacketError++;
2466 }
2467
2468 break;
2469 }
2470 }
2471 }
2472
2473 void makeUDPServerSockets(deferredAdd_t& deferredAdds, Logr::log_t log)
2474 {
2475 int one = 1;
2476 vector<string> localAddresses;
2477 stringtok(localAddresses, ::arg()["local-address"], " ,");
2478
2479 if (localAddresses.empty()) {
2480 throw PDNSException("No local address specified");
2481 }
2482
2483 const uint16_t defaultLocalPort = ::arg().asNum("local-port");
2484 for (const auto& localAddress : localAddresses) {
2485 ComboAddress address{localAddress, defaultLocalPort};
2486 const int socketFd = socket(address.sin4.sin_family, SOCK_DGRAM, 0);
2487 if (socketFd < 0) {
2488 throw PDNSException("Making a UDP server socket for resolver: " + stringerror());
2489 }
2490 if (!setSocketTimestamps(socketFd)) {
2491 SLOG(g_log << Logger::Warning << "Unable to enable timestamp reporting for socket" << endl,
2492 log->info(Logr::Warning, "Unable to enable timestamp reporting for socket"));
2493 }
2494 if (IsAnyAddress(address)) {
2495 if (address.sin4.sin_family == AF_INET) {
2496 if (!setsockopt(socketFd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) { // linux supports this, so why not - might fail on other systems
2497 g_fromtosockets.insert(socketFd);
2498 }
2499 }
2500 #ifdef IPV6_RECVPKTINFO
2501 if (address.sin4.sin_family == AF_INET6) {
2502 if (!setsockopt(socketFd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one))) {
2503 g_fromtosockets.insert(socketFd);
2504 }
2505 }
2506 #endif
2507 if (address.sin6.sin6_family == AF_INET6 && setsockopt(socketFd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2508 int err = errno;
2509 SLOG(g_log << Logger::Error << "Failed to set IPv6 socket to IPv6 only, continuing anyhow: " << strerror(err) << endl,
2510 log->error(Logr::Error, "Failed to set IPv6 socket to IPv6 only, continuing anyhow"));
2511 }
2512 }
2513 if (::arg().mustDo("non-local-bind")) {
2514 Utility::setBindAny(AF_INET6, socketFd);
2515 }
2516
2517 setCloseOnExec(socketFd);
2518
2519 try {
2520 setSocketReceiveBuffer(socketFd, 250000);
2521 }
2522 catch (const std::exception& e) {
2523 SLOG(g_log << Logger::Error << e.what() << endl,
2524 log->error(Logr::Error, e.what(), "Exception while setting socket buffer size"));
2525 }
2526
2527 if (g_reusePort) {
2528 #if defined(SO_REUSEPORT_LB)
2529 try {
2530 SSetsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, 1);
2531 }
2532 catch (const std::exception& e) {
2533 throw PDNSException(std::string("SO_REUSEPORT_LB: ") + e.what());
2534 }
2535 #elif defined(SO_REUSEPORT)
2536 try {
2537 SSetsockopt(socketFd, SOL_SOCKET, SO_REUSEPORT, 1);
2538 }
2539 catch (const std::exception& e) {
2540 throw PDNSException(std::string("SO_REUSEPORT: ") + e.what());
2541 }
2542 #endif
2543 }
2544
2545 try {
2546 setSocketIgnorePMTU(socketFd, address.sin4.sin_family);
2547 }
2548 catch (const std::exception& e) {
2549 SLOG(g_log << Logger::Warning << "Failed to set IP_MTU_DISCOVER on UDP server socket: " << e.what() << endl,
2550 log->error(Logr::Warning, e.what(), "Failed to set IP_MTU_DISCOVER on UDP server socket"));
2551 }
2552
2553 socklen_t socklen = address.getSocklen();
2554 if (::bind(socketFd, (struct sockaddr*)&address, socklen) < 0) {
2555 throw PDNSException("Resolver binding to server socket on " + address.toStringWithPort() + ": " + stringerror());
2556 }
2557
2558 setNonBlocking(socketFd);
2559
2560 deferredAdds.emplace_back(socketFd, handleNewUDPQuestion);
2561 g_listenSocketsAddresses[socketFd] = address; // this is written to only from the startup thread, not from the workers
2562 SLOG(g_log << Logger::Info << "Listening for UDP queries on " << address.toStringWithPort() << endl,
2563 log->info(Logr::Info, "Listening for queries", "proto", Logging::Loggable("UDP"), "address", Logging::Loggable(address)));
2564 }
2565 }
2566
2567 static bool trySendingQueryToWorker(unsigned int target, ThreadMSG* tmsg)
2568 {
2569 auto& targetInfo = RecThreadInfo::info(target);
2570 if (!targetInfo.isWorker()) {
2571 SLOG(g_log << Logger::Error << "distributeAsyncFunction() tried to assign a query to a non-worker thread" << endl,
2572 g_slog->withName("runtime")->info(Logr::Error, "distributeAsyncFunction() tried to assign a query to a non-worker thread"));
2573 _exit(1);
2574 }
2575
2576 const auto& tps = targetInfo.pipes;
2577
2578 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2579 if (written > 0) {
2580 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2581 delete tmsg;
2582 unixDie("write to thread pipe returned wrong size or error");
2583 }
2584 }
2585 else {
2586 int error = errno;
2587 if (error == EAGAIN || error == EWOULDBLOCK) {
2588 return false;
2589 }
2590 else {
2591 delete tmsg;
2592 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2593 }
2594 }
2595
2596 return true;
2597 }
2598
2599 static unsigned int getWorkerLoad(size_t workerIdx)
2600 {
2601 const auto mt = RecThreadInfo::info(RecThreadInfo::numHandlers() + RecThreadInfo::numDistributors() + workerIdx).mt;
2602 if (mt != nullptr) {
2603 return mt->numProcesses();
2604 }
2605 return 0;
2606 }
2607
2608 static unsigned int selectWorker(unsigned int hash)
2609 {
2610 assert(RecThreadInfo::numWorkers() != 0);
2611 if (g_balancingFactor == 0) {
2612 return RecThreadInfo::numHandlers() + RecThreadInfo::numDistributors() + (hash % RecThreadInfo::numWorkers());
2613 }
2614
2615 /* we start with one, representing the query we are currently handling */
2616 double currentLoad = 1;
2617 std::vector<unsigned int> load(RecThreadInfo::numWorkers());
2618 for (size_t idx = 0; idx < RecThreadInfo::numWorkers(); idx++) {
2619 load[idx] = getWorkerLoad(idx);
2620 currentLoad += load[idx];
2621 }
2622
2623 double targetLoad = (currentLoad / RecThreadInfo::numWorkers()) * g_balancingFactor;
2624
2625 unsigned int worker = hash % RecThreadInfo::numWorkers();
2626 /* at least one server has to be at or below the average load */
2627 if (load[worker] > targetLoad) {
2628 ++g_stats.rebalancedQueries;
2629 do {
2630 worker = (worker + 1) % RecThreadInfo::numWorkers();
2631 } while (load[worker] > targetLoad);
2632 }
2633
2634 return RecThreadInfo::numHandlers() + RecThreadInfo::numDistributors() + worker;
2635 }
2636
2637 // This function is only called by the distributor threads, when pdns-distributes-queries is set
2638 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2639 {
2640 if (!RecThreadInfo::self().isDistributor()) {
2641 SLOG(g_log << Logger::Error << "distributeAsyncFunction() has been called by a worker (" << RecThreadInfo::id() << ")" << endl,
2642 g_slog->info(Logr::Error, "distributeAsyncFunction() has been called by a worker")); // tid will be added
2643 _exit(1);
2644 }
2645
2646 bool ok;
2647 unsigned int hash = hashQuestion(reinterpret_cast<const uint8_t*>(packet.data()), packet.length(), g_disthashseed, ok);
2648 if (!ok) {
2649 // hashQuestion does detect invalid names, so we might as well punt here instead of in the worker thread
2650 g_stats.ignoredCount++;
2651 throw MOADNSException("too-short (" + std::to_string(packet.length()) + ") or invalid name");
2652 }
2653 unsigned int target = selectWorker(hash);
2654
2655 ThreadMSG* tmsg = new ThreadMSG();
2656 tmsg->func = func;
2657 tmsg->wantAnswer = false;
2658
2659 if (!trySendingQueryToWorker(target, tmsg)) {
2660 /* if this function failed but did not raise an exception, it means that the pipe
2661 was full, let's try another one */
2662 unsigned int newTarget = 0;
2663 do {
2664 newTarget = RecThreadInfo::numHandlers() + RecThreadInfo::numDistributors() + dns_random(RecThreadInfo::numWorkers());
2665 } while (newTarget == target);
2666
2667 if (!trySendingQueryToWorker(newTarget, tmsg)) {
2668 g_stats.queryPipeFullDrops++;
2669 delete tmsg;
2670 }
2671 }
2672 // coverity[leaked_storage]
2673 }
2674
2675 // resend event to everybody chained onto it
2676 static void doResends(MT_t::waiters_t::iterator& iter, const std::shared_ptr<PacketID>& resend, const PacketBuffer& content)
2677 {
2678 // We close the chain for new entries, since they won't be processed anyway
2679 iter->key->closed = true;
2680
2681 if (iter->key->chain.empty())
2682 return;
2683 for (PacketID::chain_t::iterator i = iter->key->chain.begin(); i != iter->key->chain.end(); ++i) {
2684 auto r = std::make_shared<PacketID>(*resend);
2685 r->fd = -1;
2686 r->id = *i;
2687 MT->sendEvent(r, &content);
2688 g_stats.chainResends++;
2689 }
2690 }
2691
2692 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2693 {
2694 std::shared_ptr<PacketID> pid = boost::any_cast<std::shared_ptr<PacketID>>(var);
2695 ssize_t len;
2696 PacketBuffer packet;
2697 packet.resize(g_outgoingEDNSBufsize);
2698 ComboAddress fromaddr;
2699 socklen_t addrlen = sizeof(fromaddr);
2700
2701 len = recvfrom(fd, &packet.at(0), packet.size(), 0, (sockaddr*)&fromaddr, &addrlen);
2702
2703 if (len < (ssize_t)sizeof(dnsheader)) {
2704 if (len < 0)
2705 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2706 else {
2707 g_stats.serverParseError++;
2708 if (g_logCommonErrors)
2709 SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP server " << fromaddr.toString() << ": packet smaller than DNS header" << endl,
2710 g_slogout->info(Logr::Error, "Unable to parse packet from remote UDP server", "from", Logging::Loggable(fromaddr)));
2711 }
2712
2713 t_udpclientsocks->returnSocket(fd);
2714 PacketBuffer empty;
2715
2716 MT_t::waiters_t::iterator iter = MT->d_waiters.find(pid);
2717 if (iter != MT->d_waiters.end())
2718 doResends(iter, pid, empty);
2719
2720 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2721 return;
2722 }
2723
2724 packet.resize(len);
2725 dnsheader dh;
2726 memcpy(&dh, &packet.at(0), sizeof(dh));
2727
2728 auto pident = std::make_shared<PacketID>();
2729 pident->remote = fromaddr;
2730 pident->id = dh.id;
2731 pident->fd = fd;
2732
2733 if (!dh.qr && g_logCommonErrors) {
2734 SLOG(g_log << Logger::Notice << "Not taking data from question on outgoing socket from " << fromaddr.toStringWithPort() << endl,
2735 g_slogout->info(Logr::Error, "Not taking data from question on outgoing socket", "from", Logging::Loggable(fromaddr)));
2736 }
2737
2738 if (!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2739 !dh.qr) { // one weird server
2740 pident->domain.clear();
2741 pident->type = 0;
2742 }
2743 else {
2744 try {
2745 if (len > 12)
2746 pident->domain = DNSName(reinterpret_cast<const char*>(packet.data()), len, 12, false, &pident->type); // don't copy this from above - we need to do the actual read
2747 }
2748 catch (std::exception& e) {
2749 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2750 SLOG(g_log << Logger::Warning << "Error in packet from remote nameserver " << fromaddr.toStringWithPort() << ": " << e.what() << endl,
2751 g_slogudpin->error(Logr::Warning, "Error in packet from remote nameserver", "from", Logging::Loggable(fromaddr)));
2752 return;
2753 }
2754 }
2755
2756 MT_t::waiters_t::iterator iter = MT->d_waiters.find(pident);
2757 if (iter != MT->d_waiters.end()) {
2758 doResends(iter, pident, packet);
2759 }
2760
2761 retryWithName:
2762
2763 if (!MT->sendEvent(pident, &packet)) {
2764 /* we did not find a match for this response, something is wrong */
2765
2766 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2767 for (MT_t::waiters_t::iterator mthread = MT->d_waiters.begin(); mthread != MT->d_waiters.end(); ++mthread) {
2768 if (pident->fd == mthread->key->fd && mthread->key->remote == pident->remote && mthread->key->type == pident->type && pident->domain == mthread->key->domain) {
2769 /* we are expecting an answer from that exact source, on that exact port (since we are using connected sockets), for that qname/qtype,
2770 but with a different message ID. That smells like a spoofing attempt. For now we will just increase the counter and will deal with
2771 that later. */
2772 mthread->key->nearMisses++;
2773 }
2774
2775 // be a bit paranoid here since we're weakening our matching
2776 if (pident->domain.empty() && !mthread->key->domain.empty() && !pident->type && mthread->key->type && pident->id == mthread->key->id && mthread->key->remote == pident->remote) {
2777 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2778 pident->domain = mthread->key->domain;
2779 pident->type = mthread->key->type;
2780 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2781 }
2782 }
2783 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2784 if (g_logCommonErrors) {
2785 SLOG(g_log << Logger::Warning << "Discarding unexpected packet from " << fromaddr.toStringWithPort() << ": " << (pident->domain.empty() ? "<empty>" : pident->domain.toString()) << ", " << pident->type << ", " << MT->d_waiters.size() << " waiters" << endl,
2786 g_slogudpin->info(Logr::Warning, "Discarding unexpected packet", "from", Logging::Loggable(fromaddr),
2787 "qname", Logging::Loggable(pident->domain),
2788 "qtype", Logging::Loggable(QType(pident->type)),
2789 "waiters", Logging::Loggable(MT->d_waiters.size())));
2790 }
2791 }
2792 else if (fd >= 0) {
2793 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
2794 t_udpclientsocks->returnSocket(fd);
2795 }
2796 }