]> git.ipfire.org Git - people/ms/libloc.git/blob - src/database.c
database: Encode prefix length into tree
[people/ms/libloc.git] / src / database.c
1 /*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15 */
16
17 #include <arpa/inet.h>
18 #include <ctype.h>
19 #include <endian.h>
20 #include <errno.h>
21 #include <netinet/in.h>
22 #include <stddef.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/mman.h>
28 #include <sys/types.h>
29 #include <time.h>
30 #include <unistd.h>
31
32 #include <loc/libloc.h>
33 #include <loc/as.h>
34 #include <loc/database.h>
35 #include <loc/format.h>
36 #include <loc/network.h>
37 #include <loc/private.h>
38 #include <loc/stringpool.h>
39
40 struct loc_database {
41 struct loc_ctx* ctx;
42 int refcount;
43
44 unsigned int version;
45 time_t created_at;
46 off_t vendor;
47 off_t description;
48 off_t license;
49
50 // ASes in the database
51 struct loc_database_as_v0* as_v0;
52 size_t as_count;
53
54 // Network tree
55 struct loc_database_network_node_v0* network_nodes_v0;
56 size_t network_nodes_count;
57
58 // Networks
59 struct loc_database_network_v0* networks_v0;
60 size_t networks_count;
61
62 struct loc_stringpool* pool;
63 };
64
65 struct loc_database_enumerator {
66 struct loc_ctx* ctx;
67 struct loc_database* db;
68 int refcount;
69
70 // Search string
71 char* string;
72
73 // Index of the AS we are looking at
74 unsigned int as_index;
75 };
76
77 static int loc_database_read_magic(struct loc_database* db, FILE* f) {
78 struct loc_database_magic magic;
79
80 // Read from file
81 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
82
83 // Check if we have been able to read enough data
84 if (bytes_read < sizeof(magic)) {
85 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
86 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
87 return -ENOMSG;
88 }
89
90 // Compare magic bytes
91 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
92 DEBUG(db->ctx, "Magic value matches\n");
93
94 // Parse version
95 db->version = be16toh(magic.version);
96 DEBUG(db->ctx, "Database version is %u\n", db->version);
97
98 return 0;
99 }
100
101 ERROR(db->ctx, "Database format is not compatible\n");
102
103 // Return an error
104 return 1;
105 }
106
107 static int loc_database_read_as_section_v0(struct loc_database* db,
108 FILE* f, const struct loc_database_header_v0* header) {
109 off_t as_offset = be32toh(header->as_offset);
110 size_t as_length = be32toh(header->as_length);
111
112 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
113
114 if (as_length > 0) {
115 db->as_v0 = mmap(NULL, as_length, PROT_READ,
116 MAP_SHARED, fileno(f), as_offset);
117
118 if (db->as_v0 == MAP_FAILED)
119 return -errno;
120 }
121
122 db->as_count = as_length / sizeof(*db->as_v0);
123
124 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
125
126 return 0;
127 }
128
129 static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
130 FILE* f, const struct loc_database_header_v0* header) {
131 off_t network_nodes_offset = be32toh(header->network_tree_offset);
132 size_t network_nodes_length = be32toh(header->network_tree_length);
133
134 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
135 network_nodes_offset, network_nodes_length);
136
137 if (network_nodes_length > 0) {
138 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
139 MAP_SHARED, fileno(f), network_nodes_offset);
140
141 if (db->network_nodes_v0 == MAP_FAILED)
142 return -errno;
143 }
144
145 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
146
147 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
148
149 return 0;
150 }
151
152 static int loc_database_read_networks_section_v0(struct loc_database* db,
153 FILE* f, const struct loc_database_header_v0* header) {
154 off_t networks_offset = be32toh(header->network_data_offset);
155 size_t networks_length = be32toh(header->network_data_length);
156
157 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
158 networks_offset, networks_length);
159
160 if (networks_length > 0) {
161 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
162 MAP_SHARED, fileno(f), networks_offset);
163
164 if (db->networks_v0 == MAP_FAILED)
165 return -errno;
166 }
167
168 db->networks_count = networks_length / sizeof(*db->networks_v0);
169
170 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
171
172 return 0;
173 }
174
175 static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
176 struct loc_database_header_v0 header;
177
178 // Read from file
179 size_t size = fread(&header, 1, sizeof(header), f);
180
181 if (size < sizeof(header)) {
182 ERROR(db->ctx, "Could not read enough data for header\n");
183 return -ENOMSG;
184 }
185
186 // Copy over data
187 db->created_at = be64toh(header.created_at);
188 db->vendor = be32toh(header.vendor);
189 db->description = be32toh(header.description);
190 db->license = be32toh(header.license);
191
192 // Open pool
193 off_t pool_offset = be32toh(header.pool_offset);
194 size_t pool_length = be32toh(header.pool_length);
195
196 int r = loc_stringpool_open(db->ctx, &db->pool,
197 f, pool_length, pool_offset);
198 if (r)
199 return r;
200
201 // AS section
202 r = loc_database_read_as_section_v0(db, f, &header);
203 if (r)
204 return r;
205
206 // Network Nodes
207 r = loc_database_read_network_nodes_section_v0(db, f, &header);
208 if (r)
209 return r;
210
211 // Networks
212 r = loc_database_read_networks_section_v0(db, f, &header);
213 if (r)
214 return r;
215
216 return 0;
217 }
218
219 static int loc_database_read_header(struct loc_database* db, FILE* f) {
220 switch (db->version) {
221 case 0:
222 return loc_database_read_header_v0(db, f);
223
224 default:
225 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
226 return 1;
227 }
228 }
229
230 static int loc_database_read(struct loc_database* db, FILE* f) {
231 clock_t start = clock();
232
233 // Read magic bytes
234 int r = loc_database_read_magic(db, f);
235 if (r)
236 return r;
237
238 // Read the header
239 r = loc_database_read_header(db, f);
240 if (r)
241 return r;
242
243 clock_t end = clock();
244
245 INFO(db->ctx, "Opened database in %.8fs\n",
246 (double)(end - start) / CLOCKS_PER_SEC);
247
248 return 0;
249 }
250
251 LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
252 // Fail on invalid file handle
253 if (!f)
254 return -EINVAL;
255
256 struct loc_database* db = calloc(1, sizeof(*db));
257 if (!db)
258 return -ENOMEM;
259
260 // Reference context
261 db->ctx = loc_ref(ctx);
262 db->refcount = 1;
263
264 DEBUG(db->ctx, "Database object allocated at %p\n", db);
265
266 int r = loc_database_read(db, f);
267 if (r) {
268 loc_database_unref(db);
269 return r;
270 }
271
272 *database = db;
273
274 return 0;
275 }
276
277 LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
278 db->refcount++;
279
280 return db;
281 }
282
283 static void loc_database_free(struct loc_database* db) {
284 int r;
285
286 DEBUG(db->ctx, "Releasing database %p\n", db);
287
288 // Removing all ASes
289 if (db->as_v0) {
290 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
291 if (r)
292 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
293 }
294
295 // Remove mapped network sections
296 if (db->networks_v0) {
297 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
298 if (r)
299 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
300 }
301
302 // Remove mapped network nodes section
303 if (db->network_nodes_v0) {
304 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
305 if (r)
306 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
307 }
308
309 loc_stringpool_unref(db->pool);
310
311 loc_unref(db->ctx);
312 free(db);
313 }
314
315 LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
316 if (--db->refcount > 0)
317 return NULL;
318
319 loc_database_free(db);
320 return NULL;
321 }
322
323 LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
324 return db->created_at;
325 }
326
327 LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
328 return loc_stringpool_get(db->pool, db->vendor);
329 }
330
331 LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
332 return loc_stringpool_get(db->pool, db->description);
333 }
334
335 LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
336 return loc_stringpool_get(db->pool, db->license);
337 }
338
339 LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
340 return db->as_count;
341 }
342
343 // Returns the AS at position pos
344 static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
345 if ((size_t)pos >= db->as_count)
346 return -EINVAL;
347
348 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
349
350 int r;
351 switch (db->version) {
352 case 0:
353 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
354 break;
355
356 default:
357 return -1;
358 }
359
360 if (r == 0) {
361 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
362 }
363
364 return r;
365 }
366
367 // Performs a binary search to find the AS in the list
368 LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
369 off_t lo = 0;
370 off_t hi = db->as_count - 1;
371
372 // Save start time
373 clock_t start = clock();
374
375 while (lo <= hi) {
376 off_t i = (lo + hi) / 2;
377
378 // Fetch AS in the middle between lo and hi
379 int r = loc_database_fetch_as(db, as, i);
380 if (r)
381 return r;
382
383 // Check if this is a match
384 uint32_t as_number = loc_as_get_number(*as);
385 if (as_number == number) {
386 clock_t end = clock();
387
388 // Log how fast this has been
389 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
390 (double)(end - start) / CLOCKS_PER_SEC);
391
392 return 0;
393 }
394
395 // If it wasn't, we release the AS and
396 // adjust our search pointers
397 loc_as_unref(*as);
398
399 if (as_number < number) {
400 lo = i + 1;
401 } else
402 hi = i - 1;
403 }
404
405 // Nothing found
406 *as = NULL;
407
408 return 1;
409 }
410
411 // Returns the network at position pos
412 static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network,
413 struct in6_addr* address, unsigned int prefix, off_t pos) {
414 if ((size_t)pos >= db->networks_count)
415 return -EINVAL;
416
417 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
418
419 int r;
420 switch (db->version) {
421 case 0:
422 r = loc_network_new_from_database_v0(db->ctx, network,
423 address, prefix, db->networks_v0 + pos);
424 break;
425
426 default:
427 return -1;
428 }
429
430 if (r == 0) {
431 char* string = loc_network_str(*network);
432 DEBUG(db->ctx, "Got network %s\n", string);
433 free(string);
434 }
435
436 return r;
437 }
438
439 static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
440 return (node->network != htobe32(0xffffffff));
441 }
442
443 static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
444 struct loc_network** network, struct in6_addr* network_address, unsigned int prefix,
445 const struct loc_database_network_node_v0* node) {
446 off_t network_index = be32toh(node->network);
447
448 DEBUG(db->ctx, "Handling leaf node at %jd (%jd)\n", node - db->network_nodes_v0, network_index);
449
450 // Fetch the network
451 int r = loc_database_fetch_network(db, network,
452 network_address, prefix, network_index);
453 if (r)
454 return r;
455
456 char* s = loc_network_str(*network);
457 DEBUG(db->ctx, "Got network %s\n", s);
458 free(s);
459
460 // Check if the given IP address is inside the network
461 r = loc_network_match_address(*network, address);
462 if (r) {
463 DEBUG(db->ctx, "Searched address is not part of the network\n");
464
465 loc_network_unref(*network);
466 *network = NULL;
467 return 1;
468 }
469
470 // A network was found and the IP address matches
471 return 0;
472 }
473
474 // Returns the highest result available
475 static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
476 struct loc_network** network, struct in6_addr* network_address,
477 const struct loc_database_network_node_v0* node, unsigned int level) {
478 int r;
479 off_t node_index;
480
481 // If the node is a leaf node, we end here
482 if (__loc_database_node_is_leaf(node)) {
483 r = __loc_database_lookup_handle_leaf(db, address, network, network_address, level, node);
484 if (r <= 0)
485 return r;
486 }
487
488 // Try to go down the ones path first
489 if (node->one) {
490 node_index = be32toh(node->one);
491 in6_addr_set_bit(network_address, level, 1);
492
493 // Check boundaries
494 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
495 r = __loc_database_lookup_max(db, address, network, network_address,
496 db->network_nodes_v0 + node_index, level + 1);
497
498 // Abort when match was found or error
499 if (r <= 0)
500 return r;
501 }
502 }
503
504 // ... and if that fails, we try to go down one step on a zero
505 // branch and then try the ones again...
506 if (node->zero) {
507 node_index = be32toh(node->zero);
508 in6_addr_set_bit(network_address, level, 0);
509
510 // Check boundaries
511 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
512 r = __loc_database_lookup_max(db, address, network, network_address,
513 db->network_nodes_v0 + node_index, level + 1);
514
515 // Abort when match was found or error
516 if (r <= 0)
517 return r;
518 }
519 }
520
521 // End of path
522 return 1;
523 }
524
525 // Searches for an exact match along the path
526 static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
527 struct loc_network** network, struct in6_addr* network_address,
528 const struct loc_database_network_node_v0* node, unsigned int level) {
529 int r;
530 off_t node_index;
531
532 // If the node is a leaf node, we end here
533 if (__loc_database_node_is_leaf(node)) {
534 r = __loc_database_lookup_handle_leaf(db, address, network, network_address, level, node);
535 if (r <= 0)
536 return r;
537 }
538
539 // Follow the path
540 int bit = in6_addr_get_bit(address, level);
541 in6_addr_set_bit(network_address, level, bit);
542
543 if (bit == 0)
544 node_index = be32toh(node->zero);
545 else
546 node_index = be32toh(node->one);
547
548 // If we point back to root, the path ends here
549 if (node_index == 0) {
550 DEBUG(db->ctx, "Tree ends here\n");
551 return 1;
552 }
553
554 // Check boundaries
555 if ((size_t)node_index >= db->network_nodes_count)
556 return -EINVAL;
557
558 // Move on to the next node
559 r = __loc_database_lookup(db, address, network, network_address,
560 db->network_nodes_v0 + node_index, level + 1);
561
562 // End here if a result was found
563 if (r == 0)
564 return r;
565
566 // Raise any errors
567 else if (r < 0)
568 return r;
569
570 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
571
572 // If nothing was found, we have to search for an inexact match
573 return __loc_database_lookup_max(db, address, network, network_address, node, level);
574 }
575
576 LOC_EXPORT int loc_database_lookup(struct loc_database* db,
577 struct in6_addr* address, struct loc_network** network) {
578 struct in6_addr network_address;
579 memset(&network_address, 0, sizeof(network_address));
580
581 *network = NULL;
582
583 // Save start time
584 clock_t start = clock();
585
586 int r = __loc_database_lookup(db, address, network, &network_address,
587 db->network_nodes_v0, 0);
588
589 clock_t end = clock();
590
591 // Log how fast this has been
592 DEBUG(db->ctx, "Executed network search in %.8fs\n",
593 (double)(end - start) / CLOCKS_PER_SEC);
594
595 return r;
596 }
597
598 LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
599 const char* string, struct loc_network** network) {
600 struct in6_addr address;
601
602 int r = loc_parse_address(db->ctx, string, &address);
603 if (r)
604 return r;
605
606 return loc_database_lookup(db, &address, network);
607 }
608
609 // Enumerator
610
611 LOC_EXPORT int loc_database_enumerator_new(struct loc_database_enumerator** enumerator, struct loc_database* db) {
612 struct loc_database_enumerator* e = calloc(1, sizeof(*e));
613 if (!e)
614 return -ENOMEM;
615
616 // Reference context
617 e->ctx = loc_ref(db->ctx);
618 e->db = loc_database_ref(db);
619 e->refcount = 1;
620
621 DEBUG(e->ctx, "Database enumerator object allocated at %p\n", e);
622
623 *enumerator = e;
624 return 0;
625 }
626
627 LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_ref(struct loc_database_enumerator* enumerator) {
628 enumerator->refcount++;
629
630 return enumerator;
631 }
632
633 static void loc_database_enumerator_free(struct loc_database_enumerator* enumerator) {
634 DEBUG(enumerator->ctx, "Releasing database enumerator %p\n", enumerator);
635
636 // Release all references
637 loc_database_unref(enumerator->db);
638 loc_unref(enumerator->ctx);
639
640 if (enumerator->string)
641 free(enumerator->string);
642
643 free(enumerator);
644 }
645
646 LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_unref(struct loc_database_enumerator* enumerator) {
647 if (!enumerator)
648 return NULL;
649
650 if (--enumerator->refcount > 0)
651 return enumerator;
652
653 loc_database_enumerator_free(enumerator);
654 return NULL;
655 }
656
657 LOC_EXPORT int loc_database_enumerator_set_string(struct loc_database_enumerator* enumerator, const char* string) {
658 enumerator->string = strdup(string);
659
660 // Make the string lowercase
661 for (char *p = enumerator->string; *p; p++)
662 *p = tolower(*p);
663
664 return 0;
665 }
666
667 LOC_EXPORT struct loc_as* loc_database_enumerator_next_as(struct loc_database_enumerator* enumerator) {
668 struct loc_database* db = enumerator->db;
669 struct loc_as* as;
670
671 while (enumerator->as_index < db->as_count) {
672 // Fetch the next AS
673 int r = loc_database_fetch_as(db, &as, enumerator->as_index++);
674 if (r)
675 return NULL;
676
677 r = loc_as_match_string(as, enumerator->string);
678 if (r == 0) {
679 DEBUG(enumerator->ctx, "AS%d (%s) matches %s\n",
680 loc_as_get_number(as), loc_as_get_name(as), enumerator->string);
681
682 return as;
683 }
684
685 // No match
686 loc_as_unref(as);
687 }
688
689 // Reset the index
690 enumerator->as_index = 0;
691
692 // We have searched through all of them
693 return NULL;
694 }