]> git.ipfire.org Git - thirdparty/pdns.git/blob - modules/lmdbbackend/lmdbbackend.cc
Merge pull request #1914 from rubenk/make-mysql-flags-consistent
[thirdparty/pdns.git] / modules / lmdbbackend / lmdbbackend.cc
1 /*
2 * LMDBBackend - a high performance LMDB based backend for PowerDNS written by
3 * Mark Zealey, 2013
4 *
5 * This was originally going to be a backend using BerkeleyDB 5 for high
6 * performance DNS over massive (millions of zones) databases. However,
7 * BerkeleyDB had a number of issues to do with locking, contention and
8 * corruption which made it unsuitable for use. Instead, we use LMDB to perform
9 * very fast lookups.
10 *
11 * See the documentation for more details, and lmdb-example.pl for an example
12 * script which generates a simple zone.
13 */
14
15 #include "pdns/utility.hh"
16 #include "pdns/dnsbackend.hh"
17 #include "pdns/dns.hh"
18 #include "pdns/dnspacket.hh"
19 #include "pdns/pdnsexception.hh"
20 #include "pdns/logger.hh"
21 #include <signal.h>
22 #include "lmdbbackend.hh"
23 #include "pdns/arguments.hh"
24 #include "pdns/base32.hh"
25 #include "pdns/lock.hh"
26
27 #if 0
28 #define DEBUGLOG(msg) L<<Logger::Error<<msg
29 #else
30 #define DEBUGLOG(msg) do {} while(0)
31 #endif
32
33 int LMDBBackend::s_reloadcount=0;
34 pthread_mutex_t LMDBBackend::s_initlock = PTHREAD_MUTEX_INITIALIZER;
35
36 LMDBBackend::LMDBBackend(const string &suffix)
37 {
38 setArgPrefix("lmdb"+suffix);
39 try {
40 d_doDnssec = mustDo("experimental-dnssec");
41 }
42 catch (ArgException e) {
43 d_doDnssec = false;
44 }
45 d_lastreload = s_reloadcount;
46 open_db();
47 }
48
49 void LMDBBackend::open_db() {
50 L<<Logger::Error<<"Loading LMDB database " << getArg("datapath") << endl;
51
52 string path = getArg("datapath");
53 int rc;
54 int major, minor, patch;
55
56 string verstring( mdb_version( &major, &minor, &patch ) );
57 if( MDB_VERINT( major, minor, patch ) < MDB_VERINT( 0, 9, 8 ) )
58 throw PDNSException( "LMDB Library version too old (" + verstring + "). Needs to be 0.9.8 or greater" );
59
60 Lock l(&s_initlock);
61
62 if( (rc = mdb_env_create(&env)) )
63 throw PDNSException("Couldn't open LMDB database " + path + ": mdb_env_create() returned " + mdb_strerror(rc));
64
65 if( (rc = mdb_env_set_maxdbs( env, d_doDnssec ? 5 : 3)) )
66 throw PDNSException("Couldn't open LMDB database " + path + ": mdb_env_set_maxdbs() returned " + mdb_strerror(rc));
67
68 if( (rc = mdb_env_open(env, path.c_str(), MDB_RDONLY, 0)) )
69 throw PDNSException("Couldn't open LMDB database " + path + ": mdb_env_open() returned " + mdb_strerror(rc));
70
71 if( (rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn) ))
72 throw PDNSException("Couldn't start LMDB txn " + path + ": mdb_txn_begin() returned " + mdb_strerror(rc));
73
74 if( (rc = mdb_dbi_open(txn, "zone", 0, &zone_db) ) )
75 throw PDNSException("Couldn't open LMDB zone database " + path + ": mdb_dbi_open() returned " + mdb_strerror(rc));
76 if( (rc = mdb_cursor_open(txn, zone_db, &zone_cursor) ))
77 throw PDNSException("Couldn't open cursor on LMDB zone database " + path + ": mdb_cursor_open() returned " + mdb_strerror(rc));
78
79 if( (rc = mdb_dbi_open(txn, "data", MDB_DUPSORT, &data_db) ))
80 throw PDNSException("Couldn't open LMDB data database " + path + ": mdb_dbi_open() returned " + mdb_strerror(rc));
81 if( (rc = mdb_cursor_open(txn, data_db, &data_cursor) ))
82 throw PDNSException("Couldn't open cursor on LMDB data database " + path + ": mdb_cursor_open() returned " + mdb_strerror(rc));
83
84 if( (rc = mdb_dbi_open(txn, "extended_data", 0, &data_extended_db) ))
85 throw PDNSException("Couldn't open LMDB extended_data database " + path + ": mdb_dbi_open() returned " + mdb_strerror(rc));
86 if( ( rc = mdb_cursor_open(txn, data_extended_db, &data_extended_cursor)) )
87 throw PDNSException("Couldn't open cursor on LMDB data_extended database " + path + ": mdb_cursor_open() returned " + mdb_strerror(rc));
88
89 if(d_doDnssec) {
90 DEBUGLOG("Experimental dnssec support enabled"<<endl);
91 if( (rc = mdb_dbi_open(txn, "rrsig", MDB_DUPSORT, &rrsig_db) ))
92 throw PDNSException("Couldn't open LMDB rrsig database " + path + ": mdb_dbi_open() returned " + mdb_strerror(rc));
93 if( ( rc = mdb_cursor_open(txn, rrsig_db, &rrsig_cursor)) )
94 throw PDNSException("Couldn't open cursor on LMDB rrsig database " + path + ": mdb_cursor_open() returned " + mdb_strerror(rc));
95
96 if( (rc = mdb_dbi_open(txn, "nsecx", 0, &nsecx_db) ))
97 throw PDNSException("Couldn't open LMDB nsecx database " + path + ": mdb_dbi_open() returned " + mdb_strerror(rc));
98 if( ( rc = mdb_cursor_open(txn, nsecx_db, &nsecx_cursor)) )
99 throw PDNSException("Couldn't open cursor on LMDB nsecx database " + path + ": mdb_cursor_open() returned " + mdb_strerror(rc));
100 }
101 }
102
103 void LMDBBackend::close_db() {
104 L<<Logger::Error<<"Closing LMDB database"<< endl;
105
106 mdb_cursor_close(data_cursor);
107 mdb_cursor_close(zone_cursor);
108 mdb_cursor_close(data_extended_cursor);
109 mdb_dbi_close(env, data_db);
110 mdb_dbi_close(env, zone_db);
111 mdb_dbi_close(env, data_extended_db);
112 if (d_doDnssec) {
113 mdb_cursor_close(rrsig_cursor);
114 mdb_cursor_close(nsecx_cursor);
115 mdb_dbi_close(env, rrsig_db);
116 mdb_dbi_close(env, nsecx_db);
117 }
118 mdb_txn_abort(txn);
119 mdb_env_close(env);
120 }
121
122 LMDBBackend::~LMDBBackend()
123 {
124 close_db();
125 }
126
127 void LMDBBackend::reload() {
128 ++s_reloadcount;
129 }
130
131 void LMDBBackend::needReload() {
132 if (s_reloadcount > d_lastreload) {
133 d_lastreload = s_reloadcount;
134 close_db();
135 open_db();
136 }
137 }
138
139 bool LMDBBackend::getDomainMetadata(const string& name, const std::string& kind, std::vector<std::string>& meta)
140 {
141 if (!d_doDnssec)
142 return false;
143
144 needReload();
145
146 if (kind == "PRESIGNED" || kind == "NSEC3PARAM") {
147 int rc;
148 MDB_val key, data;
149 string key_str, cur_value;
150 vector<string> valparts;
151
152 key_str=bitFlip(labelReverse(toLower(name)))+"\xff";
153 key.mv_data = (char *)key_str.c_str();
154 key.mv_size = key_str.length();
155
156 if ((rc = mdb_cursor_get(zone_cursor, &key, &data, MDB_SET_KEY)) == 0) {
157 cur_value.assign((const char *)data.mv_data, data.mv_size);
158 stringtok(valparts,cur_value,"\t");
159
160 if (valparts.size() == 4) {
161 if (kind == "PRESIGNED")
162 meta.push_back("1");
163 else if (valparts[3] != "1")
164 meta.push_back(valparts[3]);
165 }
166 }
167
168 if (rc == MDB_NOTFOUND)
169 DEBUGLOG("Metadata records for zone: '"<<name<<"'' not found. This is impossible !!!"<<endl);
170 }
171
172 return true;
173 }
174
175 bool LMDBBackend::getDirectNSECx(uint32_t id, const string &hashed, const QType &qtype, string &before, DNSResourceRecord &rr)
176 {
177 if (!d_doDnssec)
178 return false;
179
180 needReload();
181
182 MDB_val key, data;
183 string key_str, cur_key, cur_value;
184 vector<string> keyparts, valparts;
185
186 if (qtype == QType::NSEC)
187 key_str=itoa(id)+"\t"+bitFlip(hashed)+"\xff";
188 else
189 key_str=itoa(id)+"\t"+toBase32Hex(bitFlip(hashed));
190 key.mv_data = (char *)key_str.c_str();
191 key.mv_size = key_str.length();
192
193 before.clear();
194 if(!mdb_cursor_get(nsecx_cursor, &key, &data, MDB_SET_RANGE)) {
195 cur_key.assign((const char *)key.mv_data, key.mv_size);
196 cur_value.assign((const char *)data.mv_data, data.mv_size);
197 stringtok(keyparts,cur_key,"\t");
198 stringtok(valparts,cur_value,"\t");
199
200 if( keyparts.size() != 2 || valparts.size() != 4 ) {
201 throw PDNSException("Invalid record in nsecx table: key: '" + cur_key + "'; value: "+ cur_value);
202 }
203
204 // is the key a full match or does the id part match our zone?
205 // if it does we have a valid answer.
206 if (!key_str.compare(cur_key) || atoi(keyparts[0].c_str()) == (int) id) // FIXME we need atoui
207 goto hasnsecx;
208 }
209 // no match, now we look for the last record in the NSECx chain.
210 key_str=itoa(id)+"\t";
211 key.mv_data = (char *)key_str.c_str();
212 key.mv_size = key_str.length();
213
214 if(!mdb_cursor_get(nsecx_cursor, &key, &data, MDB_NEXT_NODUP )) {
215 cur_key.assign((const char *)key.mv_data, key.mv_size);
216 cur_value.assign((const char *)data.mv_data, data.mv_size);
217 stringtok(keyparts,cur_key,"\t");
218 stringtok(valparts,cur_value,"\t");
219
220 if( keyparts.size() != 2 || valparts.size() != 4 ) {
221 throw PDNSException("Invalid record in nsecx table: key: '" + cur_key + "'; value: "+ cur_value);
222 }
223
224 if (!key_str.compare(cur_key) || atoi(keyparts[0].c_str()) == (int) id) // FIXME we need atoui
225 goto hasnsecx;
226 }
227
228 DEBUGLOG("NSECx record for '"<<toBase32Hex(bitFlip(hashed))<<"'' in zone '"<<id<<"' not found"<<endl);
229 return true;
230
231 hasnsecx:
232 if (qtype == QType::NSEC)
233 before=bitFlip(keyparts[1]).c_str();
234 else
235 before=bitFlip(fromBase32Hex(keyparts[1]));
236 rr.qname=valparts[0];
237 rr.ttl=atoi(valparts[1].c_str());
238 rr.qtype=DNSRecordContent::TypeToNumber(valparts[2]);
239 rr.content=valparts[3];
240 rr.d_place=DNSResourceRecord::AUTHORITY;
241 rr.domain_id=id;
242 rr.auth=true;
243
244 return true;
245 }
246
247 bool LMDBBackend::getDirectRRSIGs(const string &signer, const string &qname, const QType &qtype, vector<DNSResourceRecord> &rrsigs)
248 {
249 if (!d_doDnssec)
250 return false;
251
252 needReload();
253
254 int rc;
255 MDB_val key, data;
256 string key_str, cur_value;
257 vector<string> valparts;
258
259 key_str=signer+"\t"+makeRelative(qname, signer)+"\t"+qtype.getName();
260 key.mv_data = (char *)key_str.c_str();
261 key.mv_size = key_str.length();
262
263 if ((rc = mdb_cursor_get(rrsig_cursor, &key, &data, MDB_SET_KEY)) == 0) {
264 DNSResourceRecord rr;
265 rr.qname=qname;
266 rr.qtype=QType::RRSIG;
267 //rr.d_place = (DNSResourceRecord::Place) signPlace;
268 rr.auth=false;
269
270 do {
271 cur_value.assign((const char *)data.mv_data, data.mv_size);
272 stringtok(valparts,cur_value,"\t");
273
274 if( valparts.size() != 2 ) {
275 throw PDNSException("Invalid record in rrsig table: qname: '" + qname + "'; value: "+ cur_value);
276 }
277
278 rr.ttl=atoi(valparts[0].c_str());
279 rr.content = valparts[1];
280 rrsigs.push_back(rr);
281
282 } while (mdb_cursor_get(rrsig_cursor, &key, &data, MDB_NEXT_DUP) == 0);
283 }
284
285 if (rc == MDB_NOTFOUND)
286 DEBUGLOG("RRSIG records for qname: '"<<qname<<"'' with type: '"<<qtype.getName()<<"' not found"<<endl);
287
288 return true;
289 }
290
291 // Get the zone name of the requested zone (labelReversed) OR the name of the closest parrent zone
292 bool LMDBBackend::getAuthZone( string &rev_zone )
293 {
294 needReload();
295
296 MDB_val key, data;
297 // XXX can do this just using char *
298
299 string key_str=bitFlip(rev_zone+" ");
300 key.mv_data = (char *)key_str.c_str();
301 key.mv_size = key_str.length();
302
303 // Release our transaction and cursors in order to get latest data
304 mdb_txn_reset( txn );
305 mdb_txn_renew( txn );
306 mdb_cursor_renew( txn, zone_cursor );
307 mdb_cursor_renew( txn, data_cursor );
308 mdb_cursor_renew( txn, data_extended_cursor );
309 if (d_doDnssec) {
310 mdb_cursor_renew( txn, rrsig_cursor );
311 mdb_cursor_renew( txn, nsecx_cursor );
312 }
313
314 // Find the best record
315 if( mdb_cursor_get( zone_cursor, &key, &data, MDB_SET_RANGE ) == 0 && key.mv_size <= key_str.length() ) {
316 // Found a shorter match. Now look if the zones are equal up to key-length-1. If they are check
317 // if position key-length in key_str is a label separator. If all this is true we have a match.
318 if( key_str.compare( 0, key.mv_size-1, (const char *) key.mv_data, key.mv_size-1 ) == 0 && key.mv_size && key_str[key.mv_size-1] == ~' ') {
319 rev_zone.resize( key.mv_size-1 );
320
321 DEBUGLOG("Auth key: " << rev_zone <<endl);
322
323 return true;
324 }
325 }
326
327 //reset the cursor the data in it is invallid
328 mdb_cursor_renew( txn, zone_cursor );
329
330 return false;
331 }
332
333 bool LMDBBackend::getAuthData( SOAData &soa, DNSPacket *p )
334 {
335 needReload();
336
337 MDB_val key, value;
338 if( mdb_cursor_get(zone_cursor, &key, &value, MDB_GET_CURRENT) )
339 return false;
340
341 string data( (const char *)value.mv_data, value.mv_size );
342 DEBUGLOG("Auth record data " << data<<endl);
343
344 // XXX do this in C too
345 vector<string>parts;
346 stringtok(parts,data,"\t");
347
348 if(parts.size() < 3)
349 throw PDNSException("Invalid record in zone table: " + data );
350
351 fillSOAData( parts[2], soa );
352
353 soa.domain_id = atoi( parts[0].c_str() );
354 soa.ttl = atoi( parts[1].c_str() );
355
356 soa.scopeMask = 0;
357 soa.db = this;
358
359 return true;
360 }
361
362 // Called to start an AXFR then ->get() is called. Return true if the domain exists
363 bool LMDBBackend::list(const string &target, int zoneId, bool include_disabled) {
364 DEBUGLOG("list() requested for " <<target << endl);
365 d_first = true;
366 d_origdomain = target;
367 d_domain_id = zoneId;
368 d_curqtype = QType::AXFR;
369
370 // getSOA will have been called first to ensure the domain exists so if
371 // that's the case then there's no reason we can't AXFR it.
372
373 return true;
374 }
375
376 void LMDBBackend::lookup(const QType &type, const string &inQdomain, DNSPacket *p, int zoneId)
377 {
378 DEBUGLOG("lookup: " <<inQdomain << " " << type.getName() << endl);
379
380 needReload();
381
382 d_first = true;
383 d_origdomain = inQdomain;
384 d_curqtype = type;
385 }
386
387 inline bool LMDBBackend::get_finished()
388 {
389 d_origdomain = "";
390
391 return false;
392 }
393
394 bool LMDBBackend::get(DNSResourceRecord &rr)
395 {
396 MDB_val key, value;
397 bool is_axfr = (d_curqtype == QType::AXFR);
398 bool is_full_key = ( ! is_axfr && d_curqtype != QType::ANY );
399
400 DEBUGLOG("get : " <<d_origdomain<< endl);
401 if( !d_origdomain.length() )
402 return false;
403
404 DEBUGLOG("Starting Q " << d_first<< endl);
405
406 if( d_first ) {
407 d_first = false;
408
409 // Reverse the query string
410 string lowerq = toLower( d_origdomain );
411 d_querykey = string( lowerq.rbegin(), lowerq.rend() );
412 d_searchkey = d_querykey;
413
414 // For normal queries ensure that we are only trying to get the exact
415 // record and also try to specify the type too to make negatives a lot
416 // quicker
417 if( ! is_axfr ) {
418 d_searchkey += "\t";
419
420 // Search by query type too to easily exclude anything that doesn't
421 // belong to us
422 if( is_full_key )
423 d_searchkey += d_curqtype.getName();
424 }
425
426 key.mv_size = d_searchkey.length();
427 key.mv_data = (char *)d_searchkey.c_str();
428 if( mdb_cursor_get(data_cursor, &key, &value, is_full_key ? MDB_SET_KEY : MDB_SET_RANGE ) )
429 return get_finished();
430 } else {
431 next_record:
432 key.mv_size = 0;
433 if( mdb_cursor_get(data_cursor, &key, &value, is_full_key ? MDB_NEXT_DUP : MDB_NEXT ) )
434 return get_finished();
435 }
436
437 // Some buggy versions of lmdb will do this. Should be caught in opendb above though.
438 if( key.mv_size == 0 ) {
439 DEBUGLOG("No key returned. Error" << endl);
440 return get_finished();
441 }
442
443 string cur_value((const char *)value.mv_data, value.mv_size);
444 string cur_key((const char *)key.mv_data, key.mv_size);
445
446 DEBUGLOG("querykey: " << d_querykey << "; cur_key: " <<cur_key<< "; cur_value: '" << cur_value << "'" << endl);
447
448 vector<string> keyparts, valparts;
449
450 stringtok(keyparts,cur_key,"\t");
451 stringtok(valparts,cur_value,"\t");
452
453 if( valparts.size() == 2 && valparts[0] == "REF" ) {
454 MDB_val extended_key, extended_val;
455
456 // XXX parse into an int and have extended table as MDB_INTEGER to have
457 // a bit better performance/smaller space?
458 extended_key.mv_data = (char *)valparts[1].c_str();
459 extended_key.mv_size = valparts[1].length();
460
461 if( int rc = mdb_cursor_get( data_extended_cursor, &extended_key, &extended_val, MDB_SET_KEY ) )
462 throw PDNSException("Record " + cur_key + " references extended record " + cur_value + " but this doesn't exist: " + mdb_strerror( rc ));
463
464 cur_value.assign((const char *)extended_val.mv_data, extended_val.mv_size);
465 valparts.clear();
466 stringtok(valparts, cur_value, "\t");
467 }
468
469 if (valparts.size() != 3) // FIXME
470 valparts.push_back(".");
471
472 if( keyparts.size() != 2 || valparts.size() != 3 )
473 throw PDNSException("Invalid record in record table: key: '" + cur_key + "'; value: '"+ cur_value+"'");
474
475 string compare_string = cur_key.substr(0, d_searchkey.length());
476 DEBUGLOG( "searchkey: " << d_searchkey << "; compare: " << compare_string << ";" << endl);
477
478 // If we're onto records not beginning with this search prefix, then we
479 // must be past the end
480 if( compare_string.compare( d_searchkey ) )
481 return get_finished();
482
483 int domain_id = atoi( valparts[0].c_str() );
484
485 // If we are doing an AXFR and the record fetched has been outside of our domain then end the transfer
486 if( is_axfr ) {
487 // Check it's not a subdomain ie belongs to this record
488 if( domain_id != d_domain_id )
489 goto next_record;
490
491 // If it's under the main domain then append the . to the comparison to
492 // ensure items outside our zone don't enter
493 if( keyparts[0].length() > d_querykey.length() ) {
494 string test = d_querykey;
495 test.append(".");
496
497 compare_string = cur_key.substr(0, d_querykey.length() + 1);
498
499 DEBUGLOG("test: " << test << "; compare: " << compare_string << ";" << endl);
500
501 if( test.compare( compare_string ) )
502 goto next_record;
503 }
504
505 // We need to maintain query casing so strip off domain (less dot) and append originial query
506 string sub = keyparts[0].substr( d_origdomain.length(), string::npos );
507 rr.qname = string( sub.rbegin(), sub.rend() ) + d_origdomain;
508 } else
509 rr.qname = d_origdomain; // use cached and original casing
510
511 DEBUGLOG("Found record: " <<cur_key << ": "<<valparts.size() << endl);
512
513 DEBUGLOG("pass! " << rr.qname << ";" << endl);
514 rr.qtype = keyparts[1];
515
516 /* Filter records to only match query type */
517 if( d_curqtype != QType::ANY && !is_axfr && rr.qtype != d_curqtype )
518 goto next_record;
519
520 DEBUGLOG("Correct record type" << endl);
521 rr.auth = 1;
522
523 rr.domain_id = domain_id;
524 rr.ttl = atoi( valparts[1].c_str() );
525 rr.content = valparts[2];
526
527 return true;
528 }
529
530 class LMDBFactory : public BackendFactory
531 {
532 public:
533 LMDBFactory() : BackendFactory("lmdb") {}
534 void declareArguments(const string &suffix="")
535 {
536 declare(suffix,"datapath","Path to the directory containing the lmdb files","/etc/pdns/data");
537 declare(suffix,"experimental-dnssec","Enable experimental DNSSEC processing","no");
538 }
539 DNSBackend *make(const string &suffix="")
540 {
541 return new LMDBBackend(suffix);
542 }
543 };
544
545 /* THIRD PART */
546
547 class LMDBLoader
548 {
549 public:
550 LMDBLoader()
551 {
552 BackendMakers().report(new LMDBFactory);
553 L << Logger::Info << "[lmdbbackend] This is the lmdb backend version " VERSION " (" __DATE__ ", " __TIME__ ") reporting" << endl;
554 }
555 };
556
557 static LMDBLoader lmdbLoader;
558