]> git.ipfire.org Git - people/ms/libloc.git/blame_incremental - src/database.c
Implement filtering networks for the ASN they belong to
[people/ms/libloc.git] / src / database.c
... / ...
CommitLineData
1/*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15*/
16
17#include <arpa/inet.h>
18#include <ctype.h>
19#include <endian.h>
20#include <errno.h>
21#include <netinet/in.h>
22#include <stddef.h>
23#include <stdint.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <sys/mman.h>
28#include <sys/types.h>
29#include <time.h>
30#include <unistd.h>
31
32#include <loc/libloc.h>
33#include <loc/as.h>
34#include <loc/database.h>
35#include <loc/format.h>
36#include <loc/network.h>
37#include <loc/private.h>
38#include <loc/stringpool.h>
39
40struct loc_database {
41 struct loc_ctx* ctx;
42 int refcount;
43
44 unsigned int version;
45 time_t created_at;
46 off_t vendor;
47 off_t description;
48 off_t license;
49
50 // ASes in the database
51 struct loc_database_as_v0* as_v0;
52 size_t as_count;
53
54 // Network tree
55 struct loc_database_network_node_v0* network_nodes_v0;
56 size_t network_nodes_count;
57
58 // Networks
59 struct loc_database_network_v0* networks_v0;
60 size_t networks_count;
61
62 struct loc_stringpool* pool;
63};
64
65#define MAX_STACK_DEPTH 256
66
67struct loc_node_stack {
68 off_t offset;
69 int i; // Is this node 0 or 1?
70 int depth;
71};
72
73struct loc_database_enumerator {
74 struct loc_ctx* ctx;
75 struct loc_database* db;
76 int refcount;
77
78 // Search string
79 char* string;
80 char country_code[3];
81 uint32_t asn;
82
83 // Index of the AS we are looking at
84 unsigned int as_index;
85
86 // Network state
87 struct in6_addr network_address;
88 struct loc_node_stack network_stack[MAX_STACK_DEPTH];
89 int network_stack_depth;
90 unsigned int* networks_visited;
91};
92
93static int loc_database_read_magic(struct loc_database* db, FILE* f) {
94 struct loc_database_magic magic;
95
96 // Read from file
97 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
98
99 // Check if we have been able to read enough data
100 if (bytes_read < sizeof(magic)) {
101 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
102 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
103 return -ENOMSG;
104 }
105
106 // Compare magic bytes
107 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
108 DEBUG(db->ctx, "Magic value matches\n");
109
110 // Parse version
111 db->version = be16toh(magic.version);
112 DEBUG(db->ctx, "Database version is %u\n", db->version);
113
114 return 0;
115 }
116
117 ERROR(db->ctx, "Database format is not compatible\n");
118
119 // Return an error
120 return 1;
121}
122
123static int loc_database_read_as_section_v0(struct loc_database* db,
124 FILE* f, const struct loc_database_header_v0* header) {
125 off_t as_offset = be32toh(header->as_offset);
126 size_t as_length = be32toh(header->as_length);
127
128 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
129
130 if (as_length > 0) {
131 db->as_v0 = mmap(NULL, as_length, PROT_READ,
132 MAP_SHARED, fileno(f), as_offset);
133
134 if (db->as_v0 == MAP_FAILED)
135 return -errno;
136 }
137
138 db->as_count = as_length / sizeof(*db->as_v0);
139
140 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
141
142 return 0;
143}
144
145static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
146 FILE* f, const struct loc_database_header_v0* header) {
147 off_t network_nodes_offset = be32toh(header->network_tree_offset);
148 size_t network_nodes_length = be32toh(header->network_tree_length);
149
150 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
151 network_nodes_offset, network_nodes_length);
152
153 if (network_nodes_length > 0) {
154 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
155 MAP_SHARED, fileno(f), network_nodes_offset);
156
157 if (db->network_nodes_v0 == MAP_FAILED)
158 return -errno;
159 }
160
161 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
162
163 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
164
165 return 0;
166}
167
168static int loc_database_read_networks_section_v0(struct loc_database* db,
169 FILE* f, const struct loc_database_header_v0* header) {
170 off_t networks_offset = be32toh(header->network_data_offset);
171 size_t networks_length = be32toh(header->network_data_length);
172
173 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
174 networks_offset, networks_length);
175
176 if (networks_length > 0) {
177 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
178 MAP_SHARED, fileno(f), networks_offset);
179
180 if (db->networks_v0 == MAP_FAILED)
181 return -errno;
182 }
183
184 db->networks_count = networks_length / sizeof(*db->networks_v0);
185
186 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
187
188 return 0;
189}
190
191static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
192 struct loc_database_header_v0 header;
193
194 // Read from file
195 size_t size = fread(&header, 1, sizeof(header), f);
196
197 if (size < sizeof(header)) {
198 ERROR(db->ctx, "Could not read enough data for header\n");
199 return -ENOMSG;
200 }
201
202 // Copy over data
203 db->created_at = be64toh(header.created_at);
204 db->vendor = be32toh(header.vendor);
205 db->description = be32toh(header.description);
206 db->license = be32toh(header.license);
207
208 // Open pool
209 off_t pool_offset = be32toh(header.pool_offset);
210 size_t pool_length = be32toh(header.pool_length);
211
212 int r = loc_stringpool_open(db->ctx, &db->pool,
213 f, pool_length, pool_offset);
214 if (r)
215 return r;
216
217 // AS section
218 r = loc_database_read_as_section_v0(db, f, &header);
219 if (r)
220 return r;
221
222 // Network Nodes
223 r = loc_database_read_network_nodes_section_v0(db, f, &header);
224 if (r)
225 return r;
226
227 // Networks
228 r = loc_database_read_networks_section_v0(db, f, &header);
229 if (r)
230 return r;
231
232 return 0;
233}
234
235static int loc_database_read_header(struct loc_database* db, FILE* f) {
236 switch (db->version) {
237 case 0:
238 return loc_database_read_header_v0(db, f);
239
240 default:
241 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
242 return 1;
243 }
244}
245
246static int loc_database_read(struct loc_database* db, FILE* f) {
247 clock_t start = clock();
248
249 // Read magic bytes
250 int r = loc_database_read_magic(db, f);
251 if (r)
252 return r;
253
254 // Read the header
255 r = loc_database_read_header(db, f);
256 if (r)
257 return r;
258
259 clock_t end = clock();
260
261 INFO(db->ctx, "Opened database in %.8fs\n",
262 (double)(end - start) / CLOCKS_PER_SEC);
263
264 return 0;
265}
266
267LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
268 // Fail on invalid file handle
269 if (!f)
270 return -EINVAL;
271
272 struct loc_database* db = calloc(1, sizeof(*db));
273 if (!db)
274 return -ENOMEM;
275
276 // Reference context
277 db->ctx = loc_ref(ctx);
278 db->refcount = 1;
279
280 DEBUG(db->ctx, "Database object allocated at %p\n", db);
281
282 int r = loc_database_read(db, f);
283 if (r) {
284 loc_database_unref(db);
285 return r;
286 }
287
288 *database = db;
289
290 return 0;
291}
292
293LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
294 db->refcount++;
295
296 return db;
297}
298
299static void loc_database_free(struct loc_database* db) {
300 int r;
301
302 DEBUG(db->ctx, "Releasing database %p\n", db);
303
304 // Removing all ASes
305 if (db->as_v0) {
306 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
307 if (r)
308 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
309 }
310
311 // Remove mapped network sections
312 if (db->networks_v0) {
313 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
314 if (r)
315 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
316 }
317
318 // Remove mapped network nodes section
319 if (db->network_nodes_v0) {
320 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
321 if (r)
322 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
323 }
324
325 loc_stringpool_unref(db->pool);
326
327 loc_unref(db->ctx);
328 free(db);
329}
330
331LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
332 if (--db->refcount > 0)
333 return NULL;
334
335 loc_database_free(db);
336 return NULL;
337}
338
339LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
340 return db->created_at;
341}
342
343LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
344 return loc_stringpool_get(db->pool, db->vendor);
345}
346
347LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
348 return loc_stringpool_get(db->pool, db->description);
349}
350
351LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
352 return loc_stringpool_get(db->pool, db->license);
353}
354
355LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
356 return db->as_count;
357}
358
359// Returns the AS at position pos
360static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
361 if ((size_t)pos >= db->as_count)
362 return -EINVAL;
363
364 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
365
366 int r;
367 switch (db->version) {
368 case 0:
369 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
370 break;
371
372 default:
373 return -1;
374 }
375
376 if (r == 0) {
377 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
378 }
379
380 return r;
381}
382
383// Performs a binary search to find the AS in the list
384LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
385 off_t lo = 0;
386 off_t hi = db->as_count - 1;
387
388 // Save start time
389 clock_t start = clock();
390
391 while (lo <= hi) {
392 off_t i = (lo + hi) / 2;
393
394 // Fetch AS in the middle between lo and hi
395 int r = loc_database_fetch_as(db, as, i);
396 if (r)
397 return r;
398
399 // Check if this is a match
400 uint32_t as_number = loc_as_get_number(*as);
401 if (as_number == number) {
402 clock_t end = clock();
403
404 // Log how fast this has been
405 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
406 (double)(end - start) / CLOCKS_PER_SEC);
407
408 return 0;
409 }
410
411 // If it wasn't, we release the AS and
412 // adjust our search pointers
413 loc_as_unref(*as);
414
415 if (as_number < number) {
416 lo = i + 1;
417 } else
418 hi = i - 1;
419 }
420
421 // Nothing found
422 *as = NULL;
423
424 return 1;
425}
426
427// Returns the network at position pos
428static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network,
429 struct in6_addr* address, unsigned int prefix, off_t pos) {
430 if ((size_t)pos >= db->networks_count)
431 return -EINVAL;
432
433 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
434
435 int r;
436 switch (db->version) {
437 case 0:
438 r = loc_network_new_from_database_v0(db->ctx, network,
439 address, prefix, db->networks_v0 + pos);
440 break;
441
442 default:
443 return -1;
444 }
445
446 if (r == 0) {
447 char* string = loc_network_str(*network);
448 DEBUG(db->ctx, "Got network %s\n", string);
449 free(string);
450 }
451
452 return r;
453}
454
455static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
456 return (node->network != htobe32(0xffffffff));
457}
458
459static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
460 struct loc_network** network, struct in6_addr* network_address, unsigned int prefix,
461 const struct loc_database_network_node_v0* node) {
462 off_t network_index = be32toh(node->network);
463
464 DEBUG(db->ctx, "Handling leaf node at %jd (%jd)\n", node - db->network_nodes_v0, network_index);
465
466 // Fetch the network
467 int r = loc_database_fetch_network(db, network,
468 network_address, prefix, network_index);
469 if (r) {
470 ERROR(db->ctx, "Could not fetch network %jd from database\n", network_index);
471 return r;
472 }
473
474 // Check if the given IP address is inside the network
475 r = loc_network_match_address(*network, address);
476 if (r) {
477 DEBUG(db->ctx, "Searched address is not part of the network\n");
478
479 loc_network_unref(*network);
480 *network = NULL;
481 return 1;
482 }
483
484 // A network was found and the IP address matches
485 return 0;
486}
487
488// Searches for an exact match along the path
489static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
490 struct loc_network** network, struct in6_addr* network_address,
491 const struct loc_database_network_node_v0* node, unsigned int level) {
492 int r;
493 off_t node_index;
494
495 // Follow the path
496 int bit = in6_addr_get_bit(address, level);
497 in6_addr_set_bit(network_address, level, bit);
498
499 if (bit == 0)
500 node_index = be32toh(node->zero);
501 else
502 node_index = be32toh(node->one);
503
504 // If the node index is zero, the tree ends here
505 // and we cannot descend any further
506 if (node_index > 0) {
507 // Check boundaries
508 if ((size_t)node_index >= db->network_nodes_count)
509 return -EINVAL;
510
511 // Move on to the next node
512 r = __loc_database_lookup(db, address, network, network_address,
513 db->network_nodes_v0 + node_index, level + 1);
514
515 // End here if a result was found
516 if (r == 0)
517 return r;
518
519 // Raise any errors
520 else if (r < 0)
521 return r;
522
523 DEBUG(db->ctx, "No match found below level %u\n", level);
524 } else {
525 DEBUG(db->ctx, "Tree ended at level %u\n", level);
526 }
527
528 // If this node has a leaf, we will check if it matches
529 if (__loc_database_node_is_leaf(node)) {
530 r = __loc_database_lookup_handle_leaf(db, address, network, network_address, level, node);
531 if (r <= 0)
532 return r;
533 }
534
535 return 1;
536}
537
538LOC_EXPORT int loc_database_lookup(struct loc_database* db,
539 struct in6_addr* address, struct loc_network** network) {
540 struct in6_addr network_address;
541 memset(&network_address, 0, sizeof(network_address));
542
543 *network = NULL;
544
545 // Save start time
546 clock_t start = clock();
547
548 int r = __loc_database_lookup(db, address, network, &network_address,
549 db->network_nodes_v0, 0);
550
551 clock_t end = clock();
552
553 // Log how fast this has been
554 DEBUG(db->ctx, "Executed network search in %.8fs\n",
555 (double)(end - start) / CLOCKS_PER_SEC);
556
557 return r;
558}
559
560LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
561 const char* string, struct loc_network** network) {
562 struct in6_addr address;
563
564 int r = loc_parse_address(db->ctx, string, &address);
565 if (r)
566 return r;
567
568 return loc_database_lookup(db, &address, network);
569}
570
571// Enumerator
572
573LOC_EXPORT int loc_database_enumerator_new(struct loc_database_enumerator** enumerator, struct loc_database* db) {
574 struct loc_database_enumerator* e = calloc(1, sizeof(*e));
575 if (!e)
576 return -ENOMEM;
577
578 // Reference context
579 e->ctx = loc_ref(db->ctx);
580 e->db = loc_database_ref(db);
581 e->refcount = 1;
582
583 // Initialise graph search
584 //e->network_stack[++e->network_stack_depth] = 0;
585 e->network_stack_depth = 1;
586 e->networks_visited = calloc(db->network_nodes_count, sizeof(*e->networks_visited));
587
588 DEBUG(e->ctx, "Database enumerator object allocated at %p\n", e);
589
590 *enumerator = e;
591 return 0;
592}
593
594LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_ref(struct loc_database_enumerator* enumerator) {
595 enumerator->refcount++;
596
597 return enumerator;
598}
599
600static void loc_database_enumerator_free(struct loc_database_enumerator* enumerator) {
601 DEBUG(enumerator->ctx, "Releasing database enumerator %p\n", enumerator);
602
603 // Release all references
604 loc_database_unref(enumerator->db);
605 loc_unref(enumerator->ctx);
606
607 if (enumerator->string)
608 free(enumerator->string);
609
610 // Free network search
611 free(enumerator->networks_visited);
612
613 free(enumerator);
614}
615
616LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_unref(struct loc_database_enumerator* enumerator) {
617 if (!enumerator)
618 return NULL;
619
620 if (--enumerator->refcount > 0)
621 return enumerator;
622
623 loc_database_enumerator_free(enumerator);
624 return NULL;
625}
626
627LOC_EXPORT int loc_database_enumerator_set_string(struct loc_database_enumerator* enumerator, const char* string) {
628 enumerator->string = strdup(string);
629
630 // Make the string lowercase
631 for (char *p = enumerator->string; *p; p++)
632 *p = tolower(*p);
633
634 return 0;
635}
636
637LOC_EXPORT int loc_database_enumerator_set_country_code(struct loc_database_enumerator* enumerator, const char* country_code) {
638 // Set empty country code
639 if (!country_code || !*country_code) {
640 *enumerator->country_code = '\0';
641 return 0;
642 }
643
644 // Country codes must be two characters
645 if (strlen(country_code) != 2)
646 return -EINVAL;
647
648 for (unsigned int i = 0; i < 3; i++) {
649 enumerator->country_code[i] = country_code[i];
650 }
651
652 return 0;
653}
654
655LOC_EXPORT int loc_database_enumerator_set_asn(
656 struct loc_database_enumerator* enumerator, unsigned int asn) {
657 enumerator->asn = asn;
658
659 return 0;
660}
661
662LOC_EXPORT struct loc_as* loc_database_enumerator_next_as(struct loc_database_enumerator* enumerator) {
663 struct loc_database* db = enumerator->db;
664 struct loc_as* as;
665
666 while (enumerator->as_index < db->as_count) {
667 // Fetch the next AS
668 int r = loc_database_fetch_as(db, &as, enumerator->as_index++);
669 if (r)
670 return NULL;
671
672 r = loc_as_match_string(as, enumerator->string);
673 if (r == 1) {
674 DEBUG(enumerator->ctx, "AS%d (%s) matches %s\n",
675 loc_as_get_number(as), loc_as_get_name(as), enumerator->string);
676
677 return as;
678 }
679
680 // No match
681 loc_as_unref(as);
682 }
683
684 // Reset the index
685 enumerator->as_index = 0;
686
687 // We have searched through all of them
688 return NULL;
689}
690
691static int loc_database_enumerator_stack_push_node(
692 struct loc_database_enumerator* e, off_t offset, int i, int depth) {
693 // Do not add empty nodes
694 if (!offset)
695 return 0;
696
697 // Check if there is any space left on the stack
698 if (e->network_stack_depth >= MAX_STACK_DEPTH) {
699 ERROR(e->ctx, "Maximum stack size reached: %d\n", e->network_stack_depth);
700 return -1;
701 }
702
703 // Increase stack size
704 int s = ++e->network_stack_depth;
705
706 DEBUG(e->ctx, "Added node %jd to stack (%d)\n", offset, depth);
707
708 e->network_stack[s].offset = offset;
709 e->network_stack[s].i = i;
710 e->network_stack[s].depth = depth;
711
712 return 0;
713}
714
715static int loc_database_enumerator_network_depth_first_search(
716 struct loc_database_enumerator* e, struct loc_network** network) {
717 // Reset network
718 *network = NULL;
719 int r;
720
721 DEBUG(e->ctx, "Called with a stack of %u nodes\n", e->network_stack_depth);
722
723 // Perform DFS
724 while (e->network_stack_depth > 0) {
725 DEBUG(e->ctx, "Stack depth: %u\n", e->network_stack_depth);
726
727 // Get object from top of the stack
728 struct loc_node_stack* node = &e->network_stack[e->network_stack_depth];
729
730 // Remove the node from the stack if we have already visited it
731 if (e->networks_visited[node->offset]) {
732 e->network_stack_depth--;
733 continue;
734 }
735
736 // Mark the bits on the path correctly
737 in6_addr_set_bit(&e->network_address,
738 (node->depth > 0) ? node->depth - 1 : 0, node->i);
739
740 DEBUG(e->ctx, "Looking at node %jd\n", node->offset);
741 e->networks_visited[node->offset]++;
742
743 // Pop node from top of the stack
744 struct loc_database_network_node_v0* n =
745 e->db->network_nodes_v0 + node->offset;
746
747 // Add edges to stack
748 r = loc_database_enumerator_stack_push_node(e,
749 be32toh(n->one), 1, node->depth + 1);
750
751 if (r)
752 return r;
753
754 r = loc_database_enumerator_stack_push_node(e,
755 be32toh(n->zero), 0, node->depth + 1);
756
757 if (r)
758 return r;
759
760 // Check if this node is a leaf and has a network object
761 if (__loc_database_node_is_leaf(n)) {
762 off_t network_index = be32toh(n->network);
763
764 DEBUG(e->ctx, "Node has a network at %jd\n", network_index);
765
766 // Fetch the network object
767 r = loc_database_fetch_network(e->db, network,
768 &e->network_address, node->depth, network_index);
769
770 // Break on any errors
771 if (r)
772 return r;
773
774 // Check if we are interested in this network
775
776 // Skip if the country code does not match
777 if (e->country_code && !loc_network_match_country_code(*network, e->country_code)) {
778 loc_network_unref(*network);
779 continue;
780 }
781
782 // Skip if the ASN does not match
783 if (e->asn && !loc_network_match_asn(*network, e->asn)) {
784 loc_network_unref(*network);
785 continue;
786 }
787
788 return 0;
789 }
790 }
791
792 // Reached the end of the search
793
794 // Mark all nodes as non-visited
795 for (unsigned int i = 0; i < e->db->network_nodes_count; i++)
796 e->networks_visited[i] = 0;
797
798 return 0;
799}
800
801LOC_EXPORT struct loc_network* loc_database_enumerator_next_network(
802 struct loc_database_enumerator* enumerator) {
803 struct loc_network* network = NULL;
804
805 int r = loc_database_enumerator_network_depth_first_search(enumerator, &network);
806 if (r) {
807 return NULL;
808 }
809
810 return network;
811}