From: Neil Cook Date: Tue, 29 May 2018 17:51:35 +0000 (+0100) Subject: Newly observed domain support using a stable bloom filter to record previously seen... X-Git-Tag: dnsdist-1.3.3~185^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=af1377b7f7339bf95fd1c0ae389c763064a7f73f;p=thirdparty%2Fpdns.git Newly observed domain support using a stable bloom filter to record previously seen domains. - Not enabled by default, must be enabled via configure - Enables new domains to be logged or sent via DNS lookup - New settings documented --- diff --git a/build-scripts/travis.sh b/build-scripts/travis.sh index ef4a0a6ce9..17bd22c69c 100755 --- a/build-scripts/travis.sh +++ b/build-scripts/travis.sh @@ -416,6 +416,7 @@ build_recursor() { --prefix=$PDNS_RECURSOR_DIR \ --enable-libsodium \ --enable-unit-tests \ + --enable-nod \ --disable-silent-rules" run "make -k -j3" run "make install" diff --git a/ext/probds/.gitignore b/ext/probds/.gitignore new file mode 100644 index 0000000000..282522db03 --- /dev/null +++ b/ext/probds/.gitignore @@ -0,0 +1,2 @@ +Makefile +Makefile.in diff --git a/ext/probds/Makefile.am b/ext/probds/Makefile.am new file mode 100644 index 0000000000..e7af3d0a27 --- /dev/null +++ b/ext/probds/Makefile.am @@ -0,0 +1,2 @@ +noinst_LTLIBRARIES = libprobds.la +libprobds_la_SOURCES = murmur3.cc murmur3.h diff --git a/ext/probds/murmur3.cc b/ext/probds/murmur3.cc new file mode 100644 index 0000000000..1e58c97fb4 --- /dev/null +++ b/ext/probds/murmur3.cc @@ -0,0 +1,84 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "murmur3.h" +#include +#include + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +uint32_t fmix32( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//----------------------------------------------------------------------------- + + +void MurmurHash3_x86_32( const void * key, int len, uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + int i; + + uint32_t h1 = seed; + + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(i = -nblocks; i; i++) + { + uint32_t k1 = getblock(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + { + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + } + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} diff --git a/ext/probds/murmur3.h b/ext/probds/murmur3.h new file mode 100755 index 0000000000..5ed886bb59 --- /dev/null +++ b/ext/probds/murmur3.h @@ -0,0 +1,89 @@ +#pragma once +#ifndef MURMUR3_H +#define MURMUR3_H + +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +#define FORCE_INLINE __attribute__((always_inline)) + +inline uint32_t rotl32 ( uint32_t x, uint8_t r ) +{ + return (x << r) | (x >> (32 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +/* NO-OP for little-endian platforms */ +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define BYTESWAP(x) (x) +# endif +/* if __BYTE_ORDER__ is not predefined (like FreeBSD), use arch */ +#elif defined(__i386) || defined(__x86_64) \ + || defined(__alpha) || defined(__vax) + +# define BYTESWAP(x) (x) +/* use __builtin_bswap32 if available */ +#elif defined(__GNUC__) || defined(__clang__) +#ifdef __has_builtin +#if __has_builtin(__builtin_bswap32) +#define BYTESWAP(x) __builtin_bswap32(x) +#endif // __has_builtin(__builtin_bswap32) +#endif // __has_builtin +#endif // defined(__GNUC__) || defined(__clang__) +/* last resort (big-endian w/o __builtin_bswap) */ +#ifndef BYTESWAP +# define BYTESWAP(x) ((((x)&0xFF)<<24) \ + |(((x)>>24)&0xFF) \ + |(((x)&0x0000FF00)<<8) \ + |(((x)&0x00FF0000)>>8) ) +#endif + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +#define getblock(p, i) BYTESWAP(p[i]) + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +uint32_t fmix32( uint32_t h ); + +//----------------------------------------------------------------------------- + +#ifdef __cplusplus +extern "C" +#else +extern +#endif +void MurmurHash3_x86_32( const void * key, int len, uint32_t seed, void * out ); +#endif diff --git a/pdns/nod.cc b/pdns/nod.cc new file mode 100644 index 0000000000..287bd45f57 --- /dev/null +++ b/pdns/nod.cc @@ -0,0 +1,212 @@ +/* + * This file is part of PowerDNS or dnsdist. + * Copyright -- PowerDNS.COM B.V. and its contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In addition, for the avoidance of any doubt, permission is granted to + * link this program with OpenSSL and to (re)distribute the binaries + * produced as the result of such linking. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "nod.hh" +#include +#include "pdnsexception.hh" +#include +#include +#include +#include +#include +#include +#include "logger.hh" +#include "misc.hh" + +using namespace nod; +using namespace boost::filesystem; + +std::mutex NODDB::d_cachedir_mutex; + +// This looks for an old (per-thread) snapshot. The first one it finds, +// it restores from that. Then immediately snapshots with the current thread id,// before removing the old snapshot +// In this way, we can have per-thread SBFs, but still snapshot and restore. +// The mutex has to be static because we can't have multiple (i.e. per-thread) +// instances iterating and writing to the cache dir at the same time +bool NODDB::init(bool ignore_pid) { + if (d_init) + return false; + + std::lock_guard lock(NODDB::d_cachedir_mutex); + if (d_cachedir.length()) { + path p(d_cachedir); + try { + if (exists(p) && is_directory(p)) { + path newest_file; + std::time_t newest_time=time(nullptr); + Regex file_regex(".*\\." + bf_suffix + "$"); + for (directory_iterator i(p); i!=directory_iterator(); ++i) { + if (is_regular_file(i->path()) && + file_regex.match(i->path().filename().string())) { + if (ignore_pid || + (i->path().filename().string().find(std::to_string(getpid())) == std::string::npos)) { + // look for the newest file matching the regex + if ((last_write_time(i->path()) < newest_time) || + newest_file.empty()) { + newest_time = last_write_time(i->path()); + newest_file = i->path(); + } + } + } + } + if (exists(newest_file)) { + std::string filename = newest_file.string(); + std::ifstream infile; + try { + infile.open(filename, std::ios::in | std::ios::binary); + g_log << Logger::Warning << "Found SBF file " << filename << endl; + // read the file into the sbf + d_sbf.restore(infile); + infile.close(); + // now dump it out again with new thread id & process id + snapshotCurrent(); + // Remove the old file we just read to stop proliferation + remove(newest_file); + } + catch (const std::runtime_error& e) { + g_log< lock(NODDB::d_sbf_mutex); + // the result is always the inverse of what is returned by the SBF + return !d_sbf.testAndAdd(dname_lc); +} + +bool NODDB::isNewDomainWithParent(const std::string& domain, std::string& observed) +{ + DNSName dname(domain); + return isNewDomainWithParent(dname, observed); +} + +bool NODDB::isNewDomainWithParent(const DNSName& dname, std::string& observed) +{ + bool ret = isNewDomain(dname); + if (ret == true) { + DNSName mdname = dname; + while (mdname.chopOff()) { + if (!isNewDomain(mdname)) { + observed = mdname.toString(); + break; + } + } + } + return ret; +} + +void NODDB::addDomain(const DNSName& dname) +{ + // The only time this should block is when snapshotting from the + // housekeeping thread + std::string native_domain = dname.toDNSStringLC(); + std::lock_guard lock(NODDB::d_sbf_mutex); + d_sbf.add(native_domain); +} + +void NODDB::addDomain(const std::string& domain) +{ + DNSName dname(domain); + addDomain(dname); +} + +// Dump the SBF to a file +// To spend the least amount of time inside the mutex, we dump to an +// intermediate stringstream, otherwise the lock would be waiting for +// file IO to complete +bool NODDB::snapshotCurrent() +{ + if (d_cachedir.length()) { + path p(d_cachedir); + path f(d_cachedir); + std::stringstream ss; + ss << std::this_thread::get_id(); + f /= ss.str() + "_" + std::to_string(getpid()) + "." + bf_suffix; + if (exists(p) && is_directory(p)) { + try { + std::ofstream ofile; + std::stringstream ss; + ofile.open(f.string(), std::ios::out | std::ios::binary); + { + // only lock while dumping to a stringstream + std::lock_guard lock(NODDB::d_sbf_mutex); + d_sbf.dump(ss); + } + // Now write it out to the file + ofile << ss.str(); + + if (ofile.fail()) + throw std::runtime_error("Failed to write to file:" + f.string()); + return true; + } + catch (const std::runtime_error& e) { + g_log< +#include +#include "dnsname.hh" +#include "stable-bloom.hh" + +namespace nod { + const float fp_rate = 0.01; + const size_t num_cells = 67108864; + const uint8_t num_dec = 10; + const unsigned int snapshot_interval_default = 600; + const std::string bf_suffix = "bf"; + + // This class is not designed to be shared between threads + // Use a new instance per-thread, e.g. using thread local storage + // Synchronization (at the class level) is still needed for reading from + // and writing to the cache dir + // Synchronization (at the instance level) is needed when snapshotting + class NODDB { + public: + NODDB() {} + // Set ignore_pid to true if you don't mind loading files + // created by the current process + bool init(bool ignore_pid=false); // Initialize the NODDB + bool isNewDomain(const std::string& domain); // Returns true if newly observed domain + bool isNewDomain(const DNSName& dname); // As above + bool isNewDomainWithParent(const std::string& domain, std::string& observed); // Returns true if newly observed domain, in which case "observed" contains the parent domain which *was* observed (or "" if domain is . or no parent domains observed) + bool isNewDomainWithParent(const DNSName& dname, std::string& observed); // As above + void addDomain(const DNSName& dname); // You need to add this to refresh frequently used domains + void addDomain(const std::string& domain); // As above + void setSnapshotInterval(unsigned int secs) { d_snapshot_interval = secs; } + void setCacheDir(const std::string& cachedir); + bool snapshotCurrent(); // Write the current file out to disk + bool pruneCacheFiles(); // Remove oldest cache files + static void startHousekeepingThread(std::shared_ptr noddbp) { + noddbp->housekeepingThread(); + } + private: + void housekeepingThread(); + bool d_init{false}; + bf::stableBF d_sbf{fp_rate, num_cells, num_dec}; // Stable Bloom Filter + unsigned int d_snapshot_interval{snapshot_interval_default}; // Number seconds between snapshots + std::string d_cachedir; + std::mutex d_sbf_mutex; // Per-instance mutex for snapshots + static std::mutex d_cachedir_mutex; // One mutex for all instances of this class + }; + +} diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc index cefafcf408..bf9cfcddf6 100644 --- a/pdns/pdns_recursor.cc +++ b/pdns/pdns_recursor.cc @@ -86,6 +86,9 @@ #include "rec-lua-conf.hh" #include "ednsoptions.hh" #include "gettime.hh" +#ifdef NOD_ENABLED +#include "nod.hh" +#endif /* NOD_ENABLED */ #include "rec-protobuf.hh" #include "rec-snmp.hh" @@ -119,6 +122,9 @@ thread_local std::shared_ptr t_allowFrom; #ifdef HAVE_PROTOBUF thread_local std::unique_ptr t_uuidGenerator; #endif +#ifdef NOD_ENABLED +thread_local std::shared_ptr t_nodDBp; +#endif /* NOD_ENABLED */ __thread struct timeval g_now; // timestamp, updated (too) frequently // for communicating with our threads @@ -175,6 +181,12 @@ static bool g_gettagNeedsEDNSOptions{false}; static time_t g_statisticsInterval; static bool g_useIncomingECS; std::atomic g_maxCacheEntries, g_maxPacketCacheEntries; +#ifdef NOD_ENABLED +static bool g_nodEnabled; +static DNSName g_nodLookupDomain; +static bool g_nodLog; +static SuffixMatchNode g_nodDomainWL; +#endif /* NOD_ENABLED */ #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP static boost::container::flat_set s_avoidUdpSourcePorts; #else @@ -861,6 +873,42 @@ static bool checkOutgoingProtobufExport(LocalStateHolder& luacon } #endif /* HAVE_PROTOBUF */ +#ifdef NOD_ENABLED +static void nodCheckNewDomain(const DNSName& dname) +{ + static const QType qt(QType::A); + static const uint16_t qc(QClass::IN); + // First check the (sub)domain isn't whitelisted for NOD purposes + if (!g_nodDomainWL.check(dname)) { + // Now check the NODDB (note this is probablistic so can have FNs/FPs) + if (t_nodDBp && t_nodDBp->isNewDomain(dname)) { + if (g_nodLog) { + // This should probably log to a dedicated log file + g_log<init()) { + g_log< parts; + stringtok(parts, wlist, ",; "); + for(const auto& a : parts) { + g_nodDomainWL.add(DNSName(a)); + } +} + +static void setupNODGlobal() +{ + // Setup NOD subsystem + g_nodEnabled = ::arg().mustDo("new-domain-tracking"); + g_nodLookupDomain = DNSName(::arg()["new-domain-lookup"]); + g_nodLog = ::arg().mustDo("new-domain-log"); + parseNODWhitelist(::arg()["new-domain-whitelist"]); +} +#endif /* NOD_ENABLED */ + static int serviceMain(int argc, char*argv[]) { g_log.setName(s_programname); @@ -3344,6 +3442,11 @@ static int serviceMain(int argc, char*argv[]) makeTCPServerSockets(0); } +#ifdef NOD_ENABLED + // Setup newly observed domain globals + setupNODGlobal(); +#endif /* NOD_ENABLED */ + int forks; for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) { if(!fork()) // we are child @@ -3496,6 +3599,10 @@ try #endif g_log<(); @@ -3805,7 +3912,13 @@ int main(int argc, char **argv) ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535"; ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211"; ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto"; - +#ifdef NOD_ENABLED + ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no"; + ::arg().set("new-domain-log", "Log newly observed domains.")="yes"; + ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")=""; + ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR)+"/nod"; + ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")=""; +#endif /* NOD_ENABLED */ ::arg().setCmd("help","Provide a helpful message"); ::arg().setCmd("version","Print version string"); ::arg().setCmd("config","Output blank configuration"); diff --git a/pdns/recursordist/Makefile.am b/pdns/recursordist/Makefile.am index f5e6afdd18..ed7366294d 100644 --- a/pdns/recursordist/Makefile.am +++ b/pdns/recursordist/Makefile.am @@ -1,4 +1,5 @@ JSON11_LIBS = $(top_srcdir)/ext/json11/libjson11.la +PROBDS_LIBS = $(top_srcdir)/ext/probds/libprobds.la AM_CPPFLAGS = $(LUA_CFLAGS) $(YAHTTP_CFLAGS) $(BOOST_CPPFLAGS) $(BOTAN_CFLAGS) $(LIBSODIUM_CFLAGS) $(NET_SNMP_CFLAGS) $(SANITIZER_FLAGS) -O3 -Wall -pthread -DSYSCONFDIR=\"${sysconfdir}\" $(SYSTEMD_CFLAGS) @@ -10,7 +11,8 @@ AM_CPPFLAGS += \ AM_CXXFLAGS = \ -DSYSCONFDIR=\"$(sysconfdir)\" \ -DPKGLIBDIR=\"$(pkglibdir)\" \ - -DLOCALSTATEDIR=\"$(socketdir)\" + -DLOCALSTATEDIR=\"$(socketdir)\" \ + -DNODCACHEDIR=\"$(nodcachedir)\" AM_LDFLAGS = \ $(PROGRAM_LDFLAGS) \ @@ -121,6 +123,7 @@ pdns_recursor_SOURCES = \ mtasker_context.cc mtasker_context.hh \ namespaces.hh \ negcache.hh negcache.cc \ + nod.hh nod.cc \ nsecrecords.cc \ opensslsigners.cc opensslsigners.hh \ packetcache.hh \ @@ -154,6 +157,7 @@ pdns_recursor_SOURCES = \ snmp-agent.hh snmp-agent.cc \ sortlist.cc sortlist.hh \ sstuff.hh \ + stable-bloom.hh \ syncres.cc syncres.hh \ tsigverifier.cc tsigverifier.hh \ ueberbackend.hh \ @@ -180,10 +184,14 @@ pdns_recursor_LDADD = \ $(BOOST_CONTEXT_LIBS) \ $(NET_SNMP_LIBS) \ $(SYSTEMD_LIBS) \ - $(RT_LIBS) + $(RT_LIBS) \ + $(BOOST_FILESYSTEM_LIBS) \ + $(BOOST_SYSTEM_LIBS) \ + $(PROBDS_LIBS) pdns_recursor_LDFLAGS = $(AM_LDFLAGS) \ - $(LIBCRYPTO_LDFLAGS) $(BOOST_CONTEXT_LDFLAGS) + $(LIBCRYPTO_LDFLAGS) $(BOOST_CONTEXT_LDFLAGS) \ + $(BOOST_FILESYSTEM_LDFLAGS) $(BOOST_SYSTEM_LDFLAGS) testrunner_SOURCES = \ arguments.cc \ @@ -211,6 +219,7 @@ testrunner_SOURCES = \ mtasker_context.cc \ negcache.hh negcache.cc \ namespaces.hh \ + nod.hh nod.cc \ nsecrecords.cc \ pdnsexception.hh \ opensslsigners.cc opensslsigners.hh \ @@ -225,6 +234,7 @@ testrunner_SOURCES = \ sillyrecords.cc \ sholder.hh \ sstuff.hh \ + stable-bloom.hh \ syncres.cc syncres.hh \ test-arguments_cc.cc \ test-base32_cc.cc \ @@ -242,6 +252,7 @@ testrunner_SOURCES = \ test-mtasker.cc \ test-nmtree.cc \ test-negcache_cc.cc \ + test-nod_cc.cc \ test-rcpgenerator_cc.cc \ test-recpacketcache_cc.cc \ test-recursorcache_cc.cc \ @@ -261,13 +272,18 @@ testrunner_LDFLAGS = \ $(AM_LDFLAGS) \ $(BOOST_CONTEXT_LDFLAGS) \ $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) \ - $(LIBCRYPTO_LDFLAGS) + $(LIBCRYPTO_LDFLAGS) \ + $(BOOST_FILESYSTEM_LDFLAGS) \ + $(BOOST_SYSTEM_LDFLAGS) testrunner_LDADD = \ $(BOOST_CONTEXT_LIBS) \ $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) \ $(LIBCRYPTO_LIBS) \ - $(RT_LIBS) + $(RT_LIBS) \ + $(BOOST_FILESYSTEM_LIBS) \ + $(BOOST_SYSTEM_LIBS) \ + $(PROBDS_LIBS) if BOTAN pdns_recursor_SOURCES += \ diff --git a/pdns/recursordist/configure.ac b/pdns/recursordist/configure.ac index 7312cb3610..1c54df3ca1 100644 --- a/pdns/recursordist/configure.ac +++ b/pdns/recursordist/configure.ac @@ -78,6 +78,9 @@ AC_DEFUN([PDNS_SELECT_CONTEXT_IMPL], [ fi ]) +BOOST_FILESYSTEM +AS_IF([test -z "$BOOST_FILESYSTEM_LIBS"], [ AC_MSG_ERROR([Boost filesystem library is not installed])]) + PDNS_CHECK_CLOCK_GETTIME boost_required_version=1.35 @@ -138,6 +141,15 @@ AC_ARG_WITH([socketdir], [socketdir="$withval"] ) +PDNS_ENABLE_NOD + +AC_SUBST([nodcachedir]) +nodcachedir='${prefix}/var/lib/pdns-recursor' +AC_ARG_WITH([nod-cache-dir], + [AS_HELP_STRING([--with-nodcachedir], [where newly observed domain cache files live @<:@default=PREFIX/var/lib/pdns-recursor@:>@])], + [nodcachedir="$withval"] +) + AC_MSG_CHECKING([whether we will enable compiler security checks]) AC_ARG_ENABLE([hardening], [AS_HELP_STRING([--disable-hardening], [disable compiler security checks @<:@default=no@:>@])], @@ -179,6 +191,7 @@ AC_SUBST([PROGRAM_LDFLAGS]) AC_CONFIG_FILES([Makefile ext/Makefile ext/json11/Makefile + ext/probds/Makefile ext/yahttp/Makefile ext/yahttp/yahttp/Makefile]) @@ -232,5 +245,9 @@ AS_IF([test "x$systemd" != "xn"], [AC_MSG_NOTICE([systemd: yes])], [AC_MSG_NOTICE([systemd: no])] ) +AS_IF([test "x$NOD_ENABLED" != "xn"], + [AC_MSG_NOTICE([nod: yes])], + [AC_MSG_NOTICE([nod: no])] +) AC_MSG_NOTICE([Context library: $pdns_context_library]) AC_MSG_NOTICE([]) diff --git a/pdns/recursordist/docs/settings.rst b/pdns/recursordist/docs/settings.rst index edf2a0855c..a92db4f1d8 100644 --- a/pdns/recursordist/docs/settings.rst +++ b/pdns/recursordist/docs/settings.rst @@ -807,6 +807,79 @@ This setting artificially raises all TTLs to be at least this long. While this is a gross hack, and violates RFCs, under conditions of DoS, it may enable you to continue serving your customers. Can be set at runtime using ``rec_control set-minimum-ttl 3600``. +.. _setting-new-domain-tracking: + +``new-domain-tracking`` +----------------------- +- Boolean +- Default: no (disabled) + +Whether to track newly observed domains, i.e. never seen before. This +is a probablistic algorithm, using a stable bloom filter to store +records of previously seen domains. When enabled for the first time, +all domains will appear to be newly observed, so the feature is best +left enabled for e.g. a week or longer before using the results. Note +that this feature is optional and must be enabled at compile-time, +thus it may not be available in all pre-built packages. + +.. _setting-new-domain-log: + +``new-domain-log`` +------------------ +- Boolean +- Default: yes (enabled) + +If a newly observed domain is detected, log that domain in the +recursor log file. The log line looks something like: + +Jul 18 11:31:25 Newly observed domain nod=sdfoijdfio.com + +.. _setting-new-domain-lookup: + +``new-domain-lookup`` +--------------------- +- Domain Name +- Example: nod.powerdns.com + +If a domain is specified, then each time a newly observed domain is +detected, the recursor will perform an A record lookup of ".". For example if 'new-domain-lookup' +is configured as 'nod.powerdns.com', and a new domain 'xyz123.tv' is +detected, then an A record lookup will be made for +'xyz123.tv.nod.powerdns.com'. This feature gives a way to share the +newly observed domain with partners, vendors or security teams. The +result of the DNS lookup will be ignored by the recursor. + +.. _setting-new-domain-history-dir: + +``new-domain-history-dir`` +-------------------------- +- Path +- Default: /var/lib/pdns-recursor/nod + +This setting controls which directory is used to store the on-disk +cache of previously observed domains. + +The newly observed domain feature uses a stable bloom filter to store +a history of previously observed domains. The data structure is +synchronized to disk every 5 minutes, and is also initialized from +disk on startup. This ensures that previously observed domains are +preserved across recursor restarts. + +.. _setting-new-domain-whitelist: + +``new-domain-whitelist`` +------------------------ +- List of Domain Names, comma separated +- Example: xyz.com, abc.com + +This setting is a list of all domains (and implicitly all subdomains) +that will never be considered a new domain. For example, if the domain +'xyz123.tv' is in the list, then 'foo.bar.xyz123.tv' will never be +considered a new domain. One use-case for the whitelist is to never +reveal details of internal subdomains via the new-domain-lookup +feature. + .. _setting-network-timeout: ``network-timeout`` diff --git a/pdns/recursordist/ext/Makefile.am b/pdns/recursordist/ext/Makefile.am index ab63843f7d..b4b000d059 100644 --- a/pdns/recursordist/ext/Makefile.am +++ b/pdns/recursordist/ext/Makefile.am @@ -1,12 +1,14 @@ SUBDIRS = \ yahttp \ - json11 + json11 \ + probds DIST_SUBDIRS = \ yahttp \ - json11 + json11 \ + probds EXTRA_DIST = \ luawrapper/include/LuaContext.hpp \ yahttp/LICENSE \ - json11/LICENSE.txt incbin/incbin.h + json11/LICENSE.txt incbin/incbin.h diff --git a/pdns/recursordist/ext/probds/.gitignore b/pdns/recursordist/ext/probds/.gitignore new file mode 120000 index 0000000000..1daf4da78d --- /dev/null +++ b/pdns/recursordist/ext/probds/.gitignore @@ -0,0 +1 @@ +../../../../ext/probds/.gitignore \ No newline at end of file diff --git a/pdns/recursordist/ext/probds/Makefile.am b/pdns/recursordist/ext/probds/Makefile.am new file mode 120000 index 0000000000..1a662e28e5 --- /dev/null +++ b/pdns/recursordist/ext/probds/Makefile.am @@ -0,0 +1 @@ +../../../../ext/probds/Makefile.am \ No newline at end of file diff --git a/pdns/recursordist/ext/probds/murmur3.cc b/pdns/recursordist/ext/probds/murmur3.cc new file mode 120000 index 0000000000..736b5472b3 --- /dev/null +++ b/pdns/recursordist/ext/probds/murmur3.cc @@ -0,0 +1 @@ +../../../../ext/probds/murmur3.cc \ No newline at end of file diff --git a/pdns/recursordist/ext/probds/murmur3.h b/pdns/recursordist/ext/probds/murmur3.h new file mode 120000 index 0000000000..8a18737d51 --- /dev/null +++ b/pdns/recursordist/ext/probds/murmur3.h @@ -0,0 +1 @@ +../../../../ext/probds/murmur3.h \ No newline at end of file diff --git a/pdns/recursordist/m4/pdns_enable_nod.m4 b/pdns/recursordist/m4/pdns_enable_nod.m4 new file mode 100644 index 0000000000..8584e3cf64 --- /dev/null +++ b/pdns/recursordist/m4/pdns_enable_nod.m4 @@ -0,0 +1,17 @@ +AC_DEFUN([PDNS_ENABLE_NOD],[ + AC_MSG_CHECKING([whether to enable newly observed domain checking]) + + AC_ARG_ENABLE([nod], + AS_HELP_STRING([--enable-nod], + [enable newly observed domains @<:@default=no@:>@] + ), + [enable_nod=$enableval], + [enable_nod=no] + ) + + AS_IF([test "x$enable_nod" != "xno"], + [AC_DEFINE([NOD_ENABLED], [1], [Define to 1 if nod is enabled])] + ) + + AC_MSG_RESULT([$enable_nod]) +]) diff --git a/pdns/recursordist/make-ext-symlinks.py b/pdns/recursordist/make-ext-symlinks.py index 403ae425d0..f0ee8a02b7 100755 --- a/pdns/recursordist/make-ext-symlinks.py +++ b/pdns/recursordist/make-ext-symlinks.py @@ -4,7 +4,7 @@ import os import shutil import os.path -for extdir in ['yahttp', 'json11']: +for extdir in ['yahttp', 'json11', 'probds']: try: shutil.rmtree(os.path.join('ext', extdir)) except OSError: diff --git a/pdns/recursordist/nod.cc b/pdns/recursordist/nod.cc new file mode 120000 index 0000000000..fde5c091f0 --- /dev/null +++ b/pdns/recursordist/nod.cc @@ -0,0 +1 @@ +../nod.cc \ No newline at end of file diff --git a/pdns/recursordist/nod.hh b/pdns/recursordist/nod.hh new file mode 120000 index 0000000000..a56167aa87 --- /dev/null +++ b/pdns/recursordist/nod.hh @@ -0,0 +1 @@ +../nod.hh \ No newline at end of file diff --git a/pdns/recursordist/stable-bloom.hh b/pdns/recursordist/stable-bloom.hh new file mode 100644 index 0000000000..835f689795 --- /dev/null +++ b/pdns/recursordist/stable-bloom.hh @@ -0,0 +1,142 @@ +/* + * This file is part of PowerDNS or dnsdist. + * Copyright -- PowerDNS.COM B.V. and its contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In addition, for the avoidance of any doubt, permission is granted to + * link this program with OpenSSL and to (re)distribute the binaries + * produced as the result of such linking. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include "misc.hh" +#include "ext/probds/murmur3.h" + +namespace bf +{ + // Based on http://webdocs.cs.ualberta.ca/~drafiei/papers/DupDetExt.pdf + // Max is always 1 in this implementation, which is best for streaming data + // This also means we can use a bitset for storing values which is very + // efficient + class stableBF { + public: + stableBF(float fp_rate, uint32_t num_cells, uint8_t p): d_k(optimalK(fp_rate)), d_num_cells(num_cells), d_p(p), d_cells(num_cells), d_gen(std::random_device()()), d_dis(0, num_cells) {} + stableBF(uint8_t k, uint32_t num_cells, uint8_t p, const std::string& bitstr): d_k(k), d_num_cells(num_cells), d_p(p), d_cells(bitstr), d_gen(std::random_device()()), d_dis(0, num_cells) {} + void add(const std::string& data) { + decrement(); + auto hashes = hash(data); + for (auto& i : hashes) { + d_cells.set(i % d_num_cells); + } + } + bool test(const std::string& data) { + auto hashes = hash(data); + for (auto& i : hashes) { + if (d_cells.test(i % d_num_cells) == false) + return false; + } + return true; + } + bool testAndAdd(const std::string& data) { + auto hashes = hash(data); + bool retval = true; + for (auto& i : hashes) { + if (d_cells.test(i % d_num_cells) == false) { + retval = false; + break; + } + } + decrement(); + for (auto& i : hashes) { + d_cells.set(i % d_num_cells); + } + return retval; + } + void dump(std::ostream& os) { + os.write((char*)&d_k, sizeof(d_k)); + uint32_t nint = htonl(d_num_cells); + os.write((char*)&nint, sizeof(nint)); + os.write((char*)&d_p, sizeof(d_p)); + std::string temp_str; + boost::to_string(d_cells, temp_str); + uint32_t bitstr_length = htonl((uint32_t)temp_str.length()); + os.write((char*)&bitstr_length, sizeof(bitstr_length)); + os.write((char*)temp_str.c_str(), temp_str.length()); + if (os.fail()) { + throw std::runtime_error("SBF: Failed to dump"); + } + } + void restore(std::istream& is) { + uint8_t k, p; + uint32_t num_cells, bitstr_len; + is.read((char*)&k, sizeof(k)); + is.read((char*)&num_cells, sizeof(num_cells)); + num_cells = ntohl(num_cells); + is.read((char*)&p, sizeof(p)); + is.read((char*)&bitstr_len, sizeof(bitstr_len)); + bitstr_len = ntohl(bitstr_len); + char* bitcstr = new char[bitstr_len]; + is.read((char*)bitcstr, bitstr_len); + std::string bitstr(bitcstr, bitstr_len); + delete[] bitcstr; + stableBF tempbf(k, num_cells, p, bitstr); + swap(tempbf); + } + private: + unsigned int optimalK(float fp_rate) { + return std::ceil(std::log2(1/fp_rate)); + } + void decrement() { + // Choose a random cell then decrement the next p-1 + // The stable bloom algorithm described in the paper says + // to choose p independent positions, but that is much slower + // and this shouldn't change the properties of the SBF + size_t r = d_dis(d_gen); + for (uint64_t i=0; i hash(const std::string& data) { + uint32_t h1, h2; + MurmurHash3_x86_32(data.c_str(), data.length(), 1, (void*)&h1); + MurmurHash3_x86_32(data.c_str(), data.length(), 2, (void*)&h2); + std::vector ret_hashes(d_k); + for (size_t i=0; i < d_k; ++i) { + ret_hashes[i] = h1 + i * h2; + } + return ret_hashes; + } + uint8_t d_k; + uint32_t d_num_cells; + uint8_t d_p; + boost::dynamic_bitset<> d_cells; + std::mt19937 d_gen; + std::uniform_int_distribution<> d_dis; + }; +} diff --git a/pdns/recursordist/test-nod_cc.cc b/pdns/recursordist/test-nod_cc.cc new file mode 100644 index 0000000000..6a07a46d76 --- /dev/null +++ b/pdns/recursordist/test-nod_cc.cc @@ -0,0 +1,62 @@ +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_NO_MAIN +#include +#include "nod.hh" +#include "pdnsexception.hh" +using namespace boost; +using std::string; +using namespace nod; + +BOOST_AUTO_TEST_SUITE(nod_cc) + +bool pdns_exception( PDNSException const& ex ) { return true; } + +BOOST_AUTO_TEST_CASE(test_basic) { + DNSName new_domain1("abc.com."), new_domain2("xyz.com."); + + { + NODDB noddb; + + BOOST_CHECK_EXCEPTION( noddb.setCacheDir("/xyz/abc"), PDNSException, pdns_exception); + + noddb.setCacheDir("/tmp"); + + BOOST_CHECK_EQUAL(noddb.init(), true); + + BOOST_CHECK_EQUAL(noddb.isNewDomain(new_domain1), true); + BOOST_CHECK_EQUAL(noddb.isNewDomain(new_domain1), false); + BOOST_CHECK_EQUAL(noddb.isNewDomain(new_domain2), true); + BOOST_CHECK_EQUAL(noddb.isNewDomain(new_domain1), false); + + for (int i=0; i<1000000; ++i) { + noddb.isNewDomain("foo.com."); + } + + noddb.addDomain("abc.com."); + DNSName new_subdomain("foo.abc.com."); + std::string parent; + bool res = noddb.isNewDomainWithParent(new_subdomain, parent); + + BOOST_CHECK_EQUAL(res, true); + BOOST_CHECK_EQUAL(parent, string("abc.com.")); + } + { + NODDB newnod; + newnod.setCacheDir("."); + BOOST_CHECK_EQUAL(newnod.init(), true); + BOOST_CHECK_EQUAL(newnod.isNewDomain(new_domain1), true); + BOOST_CHECK_EQUAL(newnod.isNewDomain(new_domain2), true); + BOOST_CHECK_EQUAL(newnod.isNewDomain(new_domain1), false); + BOOST_CHECK_EQUAL(newnod.isNewDomain(new_domain2), false); + BOOST_CHECK_EQUAL(newnod.snapshotCurrent(), true); + } + { + NODDB newnod; + newnod.setCacheDir("."); + BOOST_CHECK_EQUAL(newnod.init(true), true); + BOOST_CHECK_EQUAL(newnod.isNewDomain(new_domain2), false); + BOOST_CHECK_EQUAL(newnod.isNewDomain(new_domain1), false); + } +} + +BOOST_AUTO_TEST_SUITE_END()