From 6a85d81ed3c66b4fbe1757ae70a638e0ba72f471 Mon Sep 17 00:00:00 2001 From: Mark Zealey Date: Fri, 14 Feb 2014 17:06:16 +0200 Subject: [PATCH] LMDB-based very high performance backend --- .travis.yml | 5 +- configure.ac | 4 + m4/pdns_with_lmdb.m4 | 8 + modules/Makefile.am | 2 +- modules/lmdbbackend/Makefile.am | 8 + modules/lmdbbackend/OBJECTFILES | 1 + modules/lmdbbackend/OBJECTLIBS | 1 + modules/lmdbbackend/dumpdb.pl | 36 +++ modules/lmdbbackend/lmdb-example.pl | 63 +++++ modules/lmdbbackend/lmdbbackend.cc | 370 ++++++++++++++++++++++++++++ modules/lmdbbackend/lmdbbackend.hh | 50 ++++ pdns/docs/pdns.xml | 174 +++++++++++++ 12 files changed, 720 insertions(+), 2 deletions(-) create mode 100644 m4/pdns_with_lmdb.m4 create mode 100644 modules/lmdbbackend/Makefile.am create mode 100644 modules/lmdbbackend/OBJECTFILES create mode 100644 modules/lmdbbackend/OBJECTLIBS create mode 100644 modules/lmdbbackend/dumpdb.pl create mode 100644 modules/lmdbbackend/lmdb-example.pl create mode 100644 modules/lmdbbackend/lmdbbackend.cc create mode 100644 modules/lmdbbackend/lmdbbackend.hh diff --git a/.travis.yml b/.travis.yml index 02a3f46581..cb541b611f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ before_script: - sudo rm /etc/apt/sources.list.d/travis_ci_zeromq3-source.list - sudo apt-get update - sudo apt-get install libboost-all-dev libtolua-dev bc libcdb-dev libnet-dns-perl unbound-host ldnsutils dnsutils bind9utils libtool libcdb-dev xmlto dblatex links asciidoc ruby-json ruby-sqlite3 rubygems libcurl4-openssl-dev ruby1.9.1 socat time libzmq1 libzmq-dev pkg-config daemontools authbind liblua5.1-posix1 libopendbx1-dev libopendbx1-sqlite3 python-virtualenv libldap2-dev + - sudo sh -c 'sed s/precise/trusty/g /etc/apt/sources.list > /etc/apt/sources.list.d/trusty.list' + - sudo apt-get update + - sudo apt-get install liblmdb0 liblmdb-dev lmdb-utils - sudo update-alternatives --set ruby /usr/bin/ruby1.9.1 - sudo touch /etc/authbind/byport/53 - sudo chmod 755 /etc/authbind/byport/53 @@ -20,7 +23,7 @@ before_script: - cd ../.. script: - ./bootstrap - - ./configure --with-modules='bind gmysql gpgsql gsqlite3 mydns tinydns remote random opendbx ldap' --enable-unit-tests --enable-remotebackend-http --enable-tools --enable-remotebackend-zeromq + - ./configure --with-modules='bind gmysql gpgsql gsqlite3 mydns tinydns remote random opendbx ldap lmdb' --enable-unit-tests --enable-remotebackend-http --enable-tools --enable-remotebackend-zeromq - make dist - make -j 4 - make -j 4 check diff --git a/configure.ac b/configure.ac index ac0640959e..3433fd9c13 100644 --- a/configure.ac +++ b/configure.ac @@ -240,6 +240,9 @@ for a in $modules $dynmodules; do PDNS_WITH_ORACLE needoracle=yes ;; + lmdb) + PDNS_WITH_LMDB + ;; mydns|gmysql|pdns) PDNS_WITH_MYSQL ;; @@ -328,6 +331,7 @@ AC_CONFIG_FILES([ modules/randombackend/Makefile modules/remotebackend/Makefile modules/tinydnsbackend/Makefile + modules/lmdbbackend/Makefile ]) AC_OUTPUT diff --git a/m4/pdns_with_lmdb.m4 b/m4/pdns_with_lmdb.m4 new file mode 100644 index 0000000000..00156fdac0 --- /dev/null +++ b/m4/pdns_with_lmdb.m4 @@ -0,0 +1,8 @@ +AC_DEFUN([PDNS_WITH_LMDB],[ + AC_CHECK_HEADERS([lmdb.h], , [AC_MSG_ERROR([lmdb header (lmdb.h) not found])]) + AC_SUBST([LIBLMDB]) + AC_CHECK_LIB( + [lmdb], [mdb_env_create], + [AC_DEFINE([HAVE_LIBLMDB], 1, [Have -llmdb]) LIBLMDB="lmdb"] + ) +]) diff --git a/modules/Makefile.am b/modules/Makefile.am index 089b4c3b3c..1a544a9fbd 100644 --- a/modules/Makefile.am +++ b/modules/Makefile.am @@ -1,2 +1,2 @@ SUBDIRS=@moduledirs@ -DIST_SUBDIRS=bindbackend db2backend geobackend gmysqlbackend goraclebackend gpgsqlbackend gsqlite3backend ldapbackend luabackend mydnsbackend opendbxbackend oraclebackend pipebackend tinydnsbackend remotebackend randombackend +DIST_SUBDIRS=bindbackend db2backend geobackend gmysqlbackend goraclebackend gpgsqlbackend gsqlite3backend ldapbackend luabackend mydnsbackend opendbxbackend oraclebackend pipebackend tinydnsbackend remotebackend randombackend lmdbbackend diff --git a/modules/lmdbbackend/Makefile.am b/modules/lmdbbackend/Makefile.am new file mode 100644 index 0000000000..2aac8f8d66 --- /dev/null +++ b/modules/lmdbbackend/Makefile.am @@ -0,0 +1,8 @@ +AM_CPPFLAGS=@THREADFLAGS@ $(BOOST_CPPFLAGS) +lib_LTLIBRARIES = liblmdbbackend.la + +EXTRA_DIST=OBJECTFILES OBJECTLIBS + +liblmdbbackend_la_SOURCES=lmdbbackend.cc lmdbbackend.hh +liblmdbbackend_la_LDFLAGS=-module -avoid-version +liblmdbbackend_la_LIBADD=-l@LIBLMDB@ diff --git a/modules/lmdbbackend/OBJECTFILES b/modules/lmdbbackend/OBJECTFILES new file mode 100644 index 0000000000..10d9763cc2 --- /dev/null +++ b/modules/lmdbbackend/OBJECTFILES @@ -0,0 +1 @@ +lmdbbackend.o diff --git a/modules/lmdbbackend/OBJECTLIBS b/modules/lmdbbackend/OBJECTLIBS new file mode 100644 index 0000000000..0c0c430855 --- /dev/null +++ b/modules/lmdbbackend/OBJECTLIBS @@ -0,0 +1 @@ +-l$(LIBLMDB) diff --git a/modules/lmdbbackend/dumpdb.pl b/modules/lmdbbackend/dumpdb.pl new file mode 100644 index 0000000000..a682b6c032 --- /dev/null +++ b/modules/lmdbbackend/dumpdb.pl @@ -0,0 +1,36 @@ +#!/usr/bin/perl +use strict; +use warnings; + +use LMDB_File qw( :dbflags :envflags :cursor_op :writeflags ); + +my ($path, $dbname, $searchkey) = @ARGV; +die unless -d $path; + +my $env = LMDB::Env->new( $path, { + mapsize => 1024*1024*1024, + maxdbs => 3, + flags => MDB_RDONLY, +}); +my $txn = LMDB::Txn->new( $env, MDB_RDONLY ); +my $db = $txn->OpenDB( $dbname, MDB_DUPSORT ); +my $c = $db->Cursor; +my ($k, $v); +if( $searchkey ) { + $c->get( $k = $searchkey, $v, MDB_SET_RANGE ); +} else { + $c->get( $k, $v, MDB_FIRST ); +} + +print "key: $k; value: $v\n"; + +while(1) { + eval { + $c->get( $k, $v, MDB_NEXT ); + }; + if( $@ =~ /MDB_NOTFOUND/ ) { + exit; + } + die $@ if $@; + print "key: $k; value: $v\n"; +} diff --git a/modules/lmdbbackend/lmdb-example.pl b/modules/lmdbbackend/lmdb-example.pl new file mode 100644 index 0000000000..15563bce47 --- /dev/null +++ b/modules/lmdbbackend/lmdb-example.pl @@ -0,0 +1,63 @@ +#!/usr/bin/perl +# An example script to generate files for the PowerDNS LMDB high performance +# backend + +use LMDB_File 0.04 qw( :dbflags :envflags :cursor_op :writeflags ); + +use strict; +use warnings; + +my $HOME = "/var/tmp/lmdb"; + +mkdir $HOME unless -d $HOME; +my $env = LMDB::Env->new( $HOME, { + mapsize => 100*1024*1024*1024, + maxdbs => 3, +}); +my $txn = LMDB::Txn->new( $env, 0 ); +my $dns_zone = $txn->OpenDB( 'zone', MDB_CREATE ); + +my $zone = 'example.com'; +my $zone_id = 1; +my $zone_ttl = 300; +my $soa_entry = "ns.$zone. hostmaster.$zone. 2012021101 86400 7200 604800 86400"; +# XXX $zone length MUST be less than 500 bytes +$dns_zone->put( scalar reverse(lc $zone), join("\t", $zone_id, $zone_ttl, $soa_entry) ); + +my $dns_data = $txn->OpenDB( 'data', MDB_CREATE | MDB_DUPSORT ); +my $dns_extended_data = $txn->OpenDB( 'extended_data', MDB_CREATE ); +my @entries = ( + # host type data + [ $zone, 'NS', "ns.$zone" ], + # MX/SRV put priority data + [ $zone, 'MX', "10 mail.hotmail.com" ], + # No SOA records + [ "test.$zone", 'A', '1.2.3.4' ], + [ "text.$zone", 'TXT', "test\n123" ], + [ "longtext.$zone", 'TXT', "A" x 550 ], + +); + +my $extended_ref = 0; +for my $row (@entries) { + my ($host, $type, $data) = @$row; + + # Don't ever allow these characters as they break powerdns + $data =~ tr/"\\//d; + + if( $type eq 'TXT' ) { + $data =~ s/([^ -~])/sprintf '\\%03d', ord $1/eg; + } + + my $key = join( "\t", scalar reverse(lc $host), $type ); # XXX must be less than 500 bytes + my $val = join( "\t", $zone_id, $zone_ttl, $data); + if( length $val > 500 ) { + $dns_data->put( $key, "REF\t" . ++$extended_ref ); + $dns_extended_data->put( $extended_ref, $val ); + # Extended data record storage as DUPSORT can only store up to 500 bytes of data unfortunately + } else { + $dns_data->put( $key, $val ); + } +} + +$txn->commit; diff --git a/modules/lmdbbackend/lmdbbackend.cc b/modules/lmdbbackend/lmdbbackend.cc new file mode 100644 index 0000000000..2e7a32261d --- /dev/null +++ b/modules/lmdbbackend/lmdbbackend.cc @@ -0,0 +1,370 @@ +/* + * LMDBBackend - a high performance LMDB based backend for PowerDNS written by + * Mark Zealey, 2013 + * + * This was originally going to be a backend using BerkeleyDB 5 for high + * performance DNS over massive (millions of zones) databases. However, + * BerkeleyDB had a number of issues to do with locking, contention and + * corruption which made it unsuitable for use. Instead, we use LMDB to perform + * very fast lookups. + * + * See the documentation for more details, and lmdb-example.pl for an example + * script which generates a simple zone. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "lmdbbackend.hh" +#include + +#if 0 +#define DEBUGLOG(msg) L<parts; + stringtok(parts,data,"\t"); + + if(parts.size() != 3 ) + throw PDNSException("Invalid record in zone table: " + data ); + + fillSOAData( parts[2], soa ); + + soa.domain_id = atoi( parts[0].c_str() ); + soa.ttl = atoi( parts[1].c_str() ); + + soa.scopeMask = 0; + soa.db = this; + + return true; +} + +// Called to start an AXFR then ->get() is called. Return true if the domain exists +bool LMDBBackend::list(const string &target, int zoneId, bool include_disabled) { + DEBUGLOG("list() requested for " < keyparts, valparts; + + stringtok(keyparts,cur_key,"\t"); + stringtok(valparts,cur_value,"\t"); + + if( valparts.size() == 2 && valparts[0] == "REF" ) { + MDB_val extended_key, extended_val; + + // XXX parse into an int and have extended table as MDB_INTEGER to have + // a bit better performance/smaller space? + extended_key.mv_data = (char *)valparts[1].c_str(); + extended_key.mv_size = valparts[1].length(); + + if( int rc = mdb_cursor_get( data_extended_cursor, &extended_key, &extended_val, MDB_SET_KEY ) ) + throw PDNSException("Record " + cur_key + " references extended record " + cur_value + " but this doesn't exist: " + mdb_strerror( rc )); + + cur_value.assign((const char *)extended_val.mv_data, extended_val.mv_size); + valparts.clear(); + stringtok(valparts, cur_value, "\t"); + } + + if( keyparts.size() != 2 || valparts.size() != 3 ) + throw PDNSException("Invalid record in record table: key: '" + cur_key + "'; value: "+ cur_value); + + string compare_string = cur_key.substr(0, d_searchkey.length()); + DEBUGLOG( "searchkey: " << d_searchkey << "; compare: " << compare_string << ";" << endl); + + // If we're onto records not beginning with this search prefix, then we + // must be past the end + if( compare_string.compare( d_searchkey ) ) + return get_finished(); + + int domain_id = atoi( valparts[0].c_str() ); + + // If we are doing an AXFR and the record fetched has been outside of our domain then end the transfer + if( is_axfr ) { + // Check it's not a subdomain ie belongs to this record + if( domain_id != d_domain_id ) + goto next_record; + + // If it's under the main domain then append the . to the comparison to + // ensure items outside our zone don't enter + if( keyparts[0].length() > d_querykey.length() ) { + string test = d_querykey; + test.append("."); + + compare_string = cur_key.substr(0, d_querykey.length() + 1); + + DEBUGLOG("test: " << test << "; compare: " << compare_string << ";" << endl); + + if( test.compare( compare_string ) ) + goto next_record; + } + + // We need to maintain query casing so strip off domain (less dot) and append originial query + string sub = keyparts[0].substr( d_origdomain.length(), string::npos ); + rr.qname = string( sub.rbegin(), sub.rend() ) + d_origdomain; + } else + rr.qname = d_origdomain; // use cached and original casing + + DEBUGLOG("Found record: " < +#include + +class LMDBBackend : public DNSReversedBackend +{ +private: + + MDB_env *env; + MDB_dbi data_db, zone_db, data_extended_db; + MDB_txn *txn; + MDB_cursor *data_cursor, *zone_cursor, *data_extended_cursor; + + // Domain that we are querying for in list()/lookup()/get(). In original case and direction. + string d_origdomain; + + // Current QType being queried for + QType d_curqtype; + + // Is this the first call to ::get() ? + bool d_first; + + // Current domain ID being queried for + int d_domain_id; + + // The reversed and lowercase key that we are querying in the database. Set after the first ::get() call. + string d_querykey; + + // d_querykey with some additional bits potentially tacked on to make searching faster + string d_searchkey; + + void open_db(); + void close_db(); + inline bool get_finished(); + +public: + LMDBBackend(const string &suffix=""); + ~LMDBBackend(); + bool list(const string &target, int id, bool include_disabled=false); + void lookup(const QType &type, const string &qdomain, DNSPacket *p, int zoneId); + void reload(); + bool get(DNSResourceRecord &rr); + + bool getAuthZone( string &rev_zone ); + bool getAuthData( SOAData &, DNSPacket *); +}; diff --git a/pdns/docs/pdns.xml b/pdns/docs/pdns.xml index bf62b4485e..0d3b030bb4 100644 --- a/pdns/docs/pdns.xml +++ b/pdns/docs/pdns.xml @@ -19579,6 +19579,180 @@ VALUES (:zoneid, :ip) + LMDB (high performance) backend + + + LMDB backend capabilities + + + NativeYes + MasterNo + SlaveNo + SuperslaveNo + AutoserialNo + DNSSECNo + Module namelmdb + Launchlmdb + + +
+
+ + Based on the LMDB key-value + database, the LMDB backend turns powerdns into a very high + performance and DDOS-resilient authoritative DNS server. Testing on a + 32-core server shows the ability to answer up to 400,000 queries per second + with instant startup and real-time updates independent of database size. + + + + + lmdb-datapath= + + + Location of the database to load + + + + + + + Operation + + Unlike other backends LMDB does not require any special configuration. + New or updated zones are available the next query after the update + transaction is committed. If the underlying database is removed or + recreated then the reload command should be sent through to powerdns to + get it to close and reopen the database. + + + Database Format + + A full example script for generating a database can be found in + pdns/modules/lmdbbackend/lmdb-example.pl. Basically the database + environment is comprised of three databases to store the data: + + zone database + + Each key in the zone database is the reversed lower-cased name of + the zone without + leading or trailing dots (ie for example.com the key would be moc.elpmaxe). + + + Each value in the database must contain the following data (tab-separated): + + + Zone ID + + The Zone's unique integer ID in ASCII (32-bit) + + + + TTL + + The TTL for the zone's SOA record + + + + SOA data + + space-separated SOA data eg + + ns.foo.com. hostmaster.foo.com. <serial> <refresh> <retry> <expire> <minimum> + + If refresh, retry, expire or minimum are not specified then the powerdns defaults will be used + + + + + + + data database + + This database is required to have been created with the MDB_DUPSORT flag enabled. It stores the records for each domain. + Each key must contain the following data (tab-separated): + + + Record name + + The reversed lower-cased name of the record and zone without leading or trailing dots + + + + Record type + + The type of record A, NS, PTR etc. SOA is not allowed as it is automatically created from the zone database records. + + + + + + The value for each entry must contain the following data + (tab-separated). If the length of this record is greater than the + LMDB limit of 510 bytes (for DUPSORT databases) an entry of "REF" + followed by the tab character and a unique 32-bit ASCII integer + which contains a reference into . + + + Zone ID + + The Zone's unique integer ID in ASCII (32-bit) + + + + TTL + + The TTL for the SOA record + + + + Record data + + + The record's data entry. For MX/SRV records the + priority is the first field and space-separated from the rest + of the data. Care must be taken to escape the data + appropriately for PowerDNS. As in the Pipe backend " and \ + characters are not allowed and any it is advised that any + characters outside of ASCII 32-126 are escaped using the \ + character. + + + + + + + extended_data database + + If the length of the value that you wish to insert into is longer than 510 bytes you need to create the + REF entry as described above linked in to this table. The value is a + unique 32-bit integer value formatted in ASCII and the value is the + exact same format as it would have been in + but can be however long you require. + + + + Example database structure + + (as output by the pdns/modules/lmdbbackend/lmdb-example.pl example script and shown by pdns/modules/lmdbbackend/dumpdb.pl) + + # perl dumpdb.pl /var/tmp/lmdb zone + key: moc.elpmaxe; value: 1 300 ns.example.com. hostmaster.example.com. 2012021101 86400 7200 604800 86400 + # perl dumpdb.pl /var/tmp/lmdb data + key: moc.elpmaxe MX; value: 1 300 10 mail.hotmail.com + key: moc.elpmaxe NS; value: 1 300 ns.example.com + key: moc.elpmaxe.tset A; value: 1 300 1.2.3.4 + key: moc.elpmaxe.txet TXT; value: 1 300 test\010123 + key: moc.elpmaxe.txetgnol TXT; value: REF 1 + # perl dumpdb.pl /var/tmp/lmdb extended_data + key: 1; value: 1 300 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + + + + +
+ ODBC backend -- 2.47.2