Add database enumerator interface to perform iterative searches
[people/ms/libloc.git] / src / database.c
CommitLineData
2601e83e
MT
1/*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15*/
16
2a30e4de 17#include <arpa/inet.h>
0676cd80 18#include <endian.h>
2601e83e 19#include <errno.h>
10778041 20#include <netinet/in.h>
2601e83e
MT
21#include <stddef.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
c182393f 26#include <sys/mman.h>
2601e83e 27#include <sys/types.h>
96ea74a5 28#include <time.h>
3f35869a 29#include <unistd.h>
2601e83e
MT
30
31#include <loc/libloc.h>
9fc7f001
MT
32#include <loc/as.h>
33#include <loc/database.h>
a5db3e49 34#include <loc/format.h>
10778041 35#include <loc/network.h>
9fc7f001
MT
36#include <loc/private.h>
37#include <loc/stringpool.h>
2601e83e
MT
38
39struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
96ea74a5 44 time_t created_at;
2601e83e
MT
45 off_t vendor;
46 off_t description;
4bf49d00 47 off_t license;
2601e83e 48
a5db3e49 49 // ASes in the database
c182393f 50 struct loc_database_as_v0* as_v0;
a5db3e49
MT
51 size_t as_count;
52
f66b7b09
MT
53 // Network tree
54 struct loc_database_network_node_v0* network_nodes_v0;
55 size_t network_nodes_count;
56
a735a563
MT
57 // Networks
58 struct loc_database_network_v0* networks_v0;
59 size_t networks_count;
60
2601e83e
MT
61 struct loc_stringpool* pool;
62};
63
7e13db74
MT
64struct loc_database_enumerator {
65 struct loc_ctx* ctx;
66 struct loc_database* db;
67 int refcount;
68};
69
a7431f1a 70static int loc_database_read_magic(struct loc_database* db, FILE* f) {
2601e83e
MT
71 struct loc_database_magic magic;
72
73 // Read from file
a7431f1a 74 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
2601e83e
MT
75
76 // Check if we have been able to read enough data
77 if (bytes_read < sizeof(magic)) {
78 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
79 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
80 return -ENOMSG;
81 }
82
83 // Compare magic bytes
84 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
85 DEBUG(db->ctx, "Magic value matches\n");
86
87 // Parse version
0676cd80 88 db->version = be16toh(magic.version);
2601e83e
MT
89 DEBUG(db->ctx, "Database version is %u\n", db->version);
90
91 return 0;
92 }
93
94 ERROR(db->ctx, "Database format is not compatible\n");
95
96 // Return an error
97 return 1;
98}
99
a5db3e49 100static int loc_database_read_as_section_v0(struct loc_database* db,
edb4ba7c
MT
101 FILE* f, const struct loc_database_header_v0* header) {
102 off_t as_offset = be32toh(header->as_offset);
103 size_t as_length = be32toh(header->as_length);
104
c182393f 105 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
a5db3e49 106
c182393f
MT
107 if (as_length > 0) {
108 db->as_v0 = mmap(NULL, as_length, PROT_READ,
a7431f1a 109 MAP_SHARED, fileno(f), as_offset);
a5db3e49 110
c182393f
MT
111 if (db->as_v0 == MAP_FAILED)
112 return -errno;
a5db3e49
MT
113 }
114
c182393f
MT
115 db->as_count = as_length / sizeof(*db->as_v0);
116
a5db3e49
MT
117 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
118
119 return 0;
120}
121
f66b7b09 122static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
edb4ba7c
MT
123 FILE* f, const struct loc_database_header_v0* header) {
124 off_t network_nodes_offset = be32toh(header->network_tree_offset);
125 size_t network_nodes_length = be32toh(header->network_tree_length);
126
f66b7b09
MT
127 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
128 network_nodes_offset, network_nodes_length);
129
130 if (network_nodes_length > 0) {
131 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
132 MAP_SHARED, fileno(f), network_nodes_offset);
133
134 if (db->network_nodes_v0 == MAP_FAILED)
135 return -errno;
136 }
137
138 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
139
140 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
141
142 return 0;
143}
144
a735a563
MT
145static int loc_database_read_networks_section_v0(struct loc_database* db,
146 FILE* f, const struct loc_database_header_v0* header) {
147 off_t networks_offset = be32toh(header->network_data_offset);
148 size_t networks_length = be32toh(header->network_data_length);
149
150 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
151 networks_offset, networks_length);
152
153 if (networks_length > 0) {
154 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
155 MAP_SHARED, fileno(f), networks_offset);
156
157 if (db->networks_v0 == MAP_FAILED)
158 return -errno;
159 }
160
161 db->networks_count = networks_length / sizeof(*db->networks_v0);
162
163 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
164
165 return 0;
166}
167
a7431f1a 168static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
2601e83e
MT
169 struct loc_database_header_v0 header;
170
171 // Read from file
a7431f1a 172 size_t size = fread(&header, 1, sizeof(header), f);
2601e83e
MT
173
174 if (size < sizeof(header)) {
175 ERROR(db->ctx, "Could not read enough data for header\n");
176 return -ENOMSG;
177 }
178
179 // Copy over data
96ea74a5 180 db->created_at = be64toh(header.created_at);
0676cd80
MT
181 db->vendor = be32toh(header.vendor);
182 db->description = be32toh(header.description);
4bf49d00 183 db->license = be32toh(header.license);
2601e83e
MT
184
185 // Open pool
0676cd80
MT
186 off_t pool_offset = be32toh(header.pool_offset);
187 size_t pool_length = be32toh(header.pool_length);
2601e83e 188
0e974d4b 189 int r = loc_stringpool_open(db->ctx, &db->pool,
a7431f1a 190 f, pool_length, pool_offset);
2601e83e
MT
191 if (r)
192 return r;
193
a5db3e49 194 // AS section
edb4ba7c 195 r = loc_database_read_as_section_v0(db, f, &header);
a5db3e49
MT
196 if (r)
197 return r;
198
f66b7b09 199 // Network Nodes
edb4ba7c 200 r = loc_database_read_network_nodes_section_v0(db, f, &header);
f66b7b09
MT
201 if (r)
202 return r;
203
a735a563
MT
204 // Networks
205 r = loc_database_read_networks_section_v0(db, f, &header);
206 if (r)
207 return r;
208
2601e83e
MT
209 return 0;
210}
211
a7431f1a 212static int loc_database_read_header(struct loc_database* db, FILE* f) {
2601e83e
MT
213 switch (db->version) {
214 case 0:
a7431f1a 215 return loc_database_read_header_v0(db, f);
2601e83e
MT
216
217 default:
218 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
219 return 1;
220 }
221}
222
a7431f1a 223static int loc_database_read(struct loc_database* db, FILE* f) {
02879100
MT
224 clock_t start = clock();
225
226 // Read magic bytes
a7431f1a 227 int r = loc_database_read_magic(db, f);
02879100
MT
228 if (r)
229 return r;
230
231 // Read the header
a7431f1a 232 r = loc_database_read_header(db, f);
02879100
MT
233 if (r)
234 return r;
235
236 clock_t end = clock();
237
238 INFO(db->ctx, "Opened database in %.8fs\n",
239 (double)(end - start) / CLOCKS_PER_SEC);
240
241 return 0;
242}
243
c182393f 244LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
a7431f1a
MT
245 // Fail on invalid file handle
246 if (!f)
247 return -EINVAL;
248
c182393f
MT
249 struct loc_database* db = calloc(1, sizeof(*db));
250 if (!db)
251 return -ENOMEM;
252
253 // Reference context
254 db->ctx = loc_ref(ctx);
255 db->refcount = 1;
256
257 DEBUG(db->ctx, "Database object allocated at %p\n", db);
258
a7431f1a 259 int r = loc_database_read(db, f);
02879100
MT
260 if (r) {
261 loc_database_unref(db);
2601e83e 262 return r;
02879100 263 }
2601e83e 264
c182393f
MT
265 *database = db;
266
2601e83e 267 return 0;
2601e83e
MT
268}
269
c182393f
MT
270LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
271 db->refcount++;
272
273 return db;
8f5b676a
MT
274}
275
c182393f 276static void loc_database_free(struct loc_database* db) {
f10ebc2d
MT
277 int r;
278
c182393f 279 DEBUG(db->ctx, "Releasing database %p\n", db);
c34e76f1 280
c182393f
MT
281 // Removing all ASes
282 if (db->as_v0) {
f10ebc2d 283 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
c182393f
MT
284 if (r)
285 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
286 }
c34e76f1 287
f10ebc2d
MT
288 // Remove mapped network sections
289 if (db->networks_v0) {
290 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
291 if (r)
292 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
293 }
294
295 // Remove mapped network nodes section
296 if (db->network_nodes_v0) {
297 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
298 if (r)
299 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
300 }
301
c182393f 302 loc_stringpool_unref(db->pool);
c34e76f1 303
c182393f
MT
304 loc_unref(db->ctx);
305 free(db);
c34e76f1
MT
306}
307
c182393f
MT
308LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
309 if (--db->refcount > 0)
310 return NULL;
78ace4ed 311
c182393f
MT
312 loc_database_free(db);
313 return NULL;
314}
78ace4ed 315
c182393f
MT
316LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
317 return db->created_at;
318}
78ace4ed 319
c182393f
MT
320LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
321 return loc_stringpool_get(db->pool, db->vendor);
322}
78ace4ed 323
c182393f
MT
324LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
325 return loc_stringpool_get(db->pool, db->description);
326}
78ace4ed 327
4bf49d00
MT
328LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
329 return loc_stringpool_get(db->pool, db->license);
330}
331
c182393f
MT
332LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
333 return db->as_count;
78ace4ed
MT
334}
335
c182393f
MT
336// Returns the AS at position pos
337static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
338 if ((size_t)pos >= db->as_count)
339 return -EINVAL;
2601e83e 340
c182393f 341 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
2601e83e
MT
342
343 int r;
c182393f
MT
344 switch (db->version) {
345 case 0:
346 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
347 break;
2601e83e 348
c182393f
MT
349 default:
350 return -1;
351 }
2601e83e 352
c182393f
MT
353 if (r == 0) {
354 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
2601e83e 355 }
2601e83e 356
c182393f
MT
357 return r;
358}
c34e76f1 359
c182393f
MT
360// Performs a binary search to find the AS in the list
361LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
362 off_t lo = 0;
363 off_t hi = db->as_count - 1;
c34e76f1 364
8f3e2a06
MT
365 // Save start time
366 clock_t start = clock();
367
c182393f
MT
368 while (lo <= hi) {
369 off_t i = (lo + hi) / 2;
8f5b676a 370
c182393f
MT
371 // Fetch AS in the middle between lo and hi
372 int r = loc_database_fetch_as(db, as, i);
373 if (r)
374 return r;
a5db3e49 375
c182393f
MT
376 // Check if this is a match
377 uint32_t as_number = loc_as_get_number(*as);
8f3e2a06
MT
378 if (as_number == number) {
379 clock_t end = clock();
380
381 // Log how fast this has been
382 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
383 (double)(end - start) / CLOCKS_PER_SEC);
384
c182393f 385 return 0;
8f3e2a06 386 }
c182393f
MT
387
388 // If it wasn't, we release the AS and
389 // adjust our search pointers
390 loc_as_unref(*as);
391
392 if (as_number < number) {
393 lo = i + 1;
394 } else
395 hi = i - 1;
396 }
2601e83e 397
c182393f
MT
398 // Nothing found
399 *as = NULL;
2601e83e 400
8f3e2a06 401 return 1;
2601e83e 402}
10778041
MT
403
404// Returns the network at position pos
405static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
406 if ((size_t)pos >= db->networks_count)
407 return -EINVAL;
408
409 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
410
411 int r;
412 switch (db->version) {
413 case 0:
414 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
415 break;
416
417 default:
418 return -1;
419 }
420
421 if (r == 0) {
422 char* string = loc_network_str(*network);
423 DEBUG(db->ctx, "Got network %s\n", string);
424 free(string);
425 }
426
427 return r;
428}
2a30e4de 429
025ef489
MT
430static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
431 return (node->zero == htobe32(0xffffffff));
432}
433
434static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
2a30e4de
MT
435 struct loc_network** network, struct in6_addr* network_address,
436 const struct loc_database_network_node_v0* node) {
025ef489 437 DEBUG(db->ctx, "Handling leaf node at %jd\n", node - db->network_nodes_v0);
2a30e4de
MT
438
439 // Fetch the network
440 int r = loc_database_fetch_network(db, network,
441 network_address, be32toh(node->one));
442 if (r)
443 return r;
444
445 // Check if the given IP address is inside the network
446 r = loc_network_match_address(*network, address);
447 if (r) {
448 DEBUG(db->ctx, "Searched address is not part of the network\n");
449
450 loc_network_unref(*network);
451 *network = NULL;
452 return 1;
453 }
454
455 // A network was found and the IP address matches
456 return 0;
457}
458
459// Returns the highest result available
460static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
461 struct loc_network** network, struct in6_addr* network_address,
f66f15e1 462 const struct loc_database_network_node_v0* node, unsigned int level) {
2a30e4de 463 // If the node is a leaf node, we end here
025ef489
MT
464 if (__loc_database_node_is_leaf(node))
465 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
2a30e4de 466
025ef489 467 int r;
2a30e4de
MT
468 off_t node_index;
469
470 // Try to go down the ones path first
471 if (node->one) {
472 node_index = be32toh(node->one);
473 in6_addr_set_bit(network_address, level, 1);
474
475 // Check boundaries
476 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
477 r = __loc_database_lookup_max(db, address, network, network_address,
478 db->network_nodes_v0 + node_index, level + 1);
479
480 // Abort when match was found or error
481 if (r <= 0)
482 return r;
483 }
484 }
485
486 // ... and if that fails, we try to go down one step on a zero
487 // branch and then try the ones again...
488 if (node->zero) {
489 node_index = be32toh(node->zero);
490 in6_addr_set_bit(network_address, level, 0);
491
492 // Check boundaries
493 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
494 r = __loc_database_lookup_max(db, address, network, network_address,
495 db->network_nodes_v0 + node_index, level + 1);
496
497 // Abort when match was found or error
498 if (r <= 0)
499 return r;
500 }
501 }
502
503 // End of path
504 return 1;
505}
506
507// Searches for an exact match along the path
508static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
509 struct loc_network** network, struct in6_addr* network_address,
f66f15e1 510 const struct loc_database_network_node_v0* node, unsigned int level) {
2a30e4de 511 // If the node is a leaf node, we end here
025ef489
MT
512 if (__loc_database_node_is_leaf(node))
513 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
2a30e4de 514
025ef489 515 int r;
2a30e4de
MT
516 off_t node_index;
517
518 // Follow the path
519 int bit = in6_addr_get_bit(address, level);
520 in6_addr_set_bit(network_address, level, bit);
521
522 if (bit == 0)
523 node_index = be32toh(node->zero);
524 else
525 node_index = be32toh(node->one);
526
527 // If we point back to root, the path ends here
528 if (node_index == 0) {
529 DEBUG(db->ctx, "Tree ends here\n");
530 return 1;
531 }
532
533 // Check boundaries
534 if ((size_t)node_index >= db->network_nodes_count)
535 return -EINVAL;
536
537 // Move on to the next node
538 r = __loc_database_lookup(db, address, network, network_address,
539 db->network_nodes_v0 + node_index, level + 1);
540
541 // End here if a result was found
542 if (r == 0)
543 return r;
544
545 // Raise any errors
546 else if (r < 0)
547 return r;
548
549 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
550
551 // If nothing was found, we have to search for an inexact match
552 return __loc_database_lookup_max(db, address, network, network_address, node, level);
553}
554
555LOC_EXPORT int loc_database_lookup(struct loc_database* db,
556 struct in6_addr* address, struct loc_network** network) {
557 struct in6_addr network_address;
558 memset(&network_address, 0, sizeof(network_address));
559
560 *network = NULL;
561
562 // Save start time
563 clock_t start = clock();
564
565 int r = __loc_database_lookup(db, address, network, &network_address,
566 db->network_nodes_v0, 0);
567
568 clock_t end = clock();
569
570 // Log how fast this has been
571 DEBUG(db->ctx, "Executed network search in %.8fs\n",
572 (double)(end - start) / CLOCKS_PER_SEC);
573
574 return r;
575}
576
577LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
578 const char* string, struct loc_network** network) {
579 struct in6_addr address;
580
581 int r = loc_parse_address(db->ctx, string, &address);
582 if (r)
583 return r;
584
585 return loc_database_lookup(db, &address, network);
586}
7e13db74
MT
587
588// Enumerator
589
590LOC_EXPORT int loc_database_enumerator_new(struct loc_database_enumerator** enumerator, struct loc_database* db) {
591 struct loc_database_enumerator* e = calloc(1, sizeof(*e));
592 if (!e)
593 return -ENOMEM;
594
595 // Reference context
596 e->ctx = loc_ref(db->ctx);
597 e->db = loc_database_ref(db);
598 e->refcount = 1;
599
600 DEBUG(e->ctx, "Database enumerator object allocated at %p\n", e);
601
602 *enumerator = e;
603 return 0;
604}
605
606LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_ref(struct loc_database_enumerator* enumerator) {
607 enumerator->refcount++;
608
609 return enumerator;
610}
611
612static void loc_database_enumerator_free(struct loc_database_enumerator* enumerator) {
613 DEBUG(enumerator->ctx, "Releasing database enumerator %p\n", enumerator);
614
615 // Release all references
616 loc_database_unref(enumerator->db);
617 loc_unref(enumerator->ctx);
618
619 free(enumerator);
620}
621
622LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_unref(struct loc_database_enumerator* enumerator) {
623 if (!enumerator)
624 return NULL;
625
626 if (--enumerator->refcount > 0)
627 return enumerator;
628
629 loc_database_enumerator_free(enumerator);
630 return NULL;
631}