]> git.ipfire.org Git - people/ms/libloc.git/blob - src/database.c
37a44e5e37aa21a14db8e75ed1374c203078a951
[people/ms/libloc.git] / src / database.c
1 /*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15 */
16
17 #include <arpa/inet.h>
18 #include <ctype.h>
19 #include <endian.h>
20 #include <errno.h>
21 #include <netinet/in.h>
22 #include <stddef.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/mman.h>
28 #include <sys/types.h>
29 #include <time.h>
30 #include <unistd.h>
31
32 #include <loc/libloc.h>
33 #include <loc/as.h>
34 #include <loc/database.h>
35 #include <loc/format.h>
36 #include <loc/network.h>
37 #include <loc/private.h>
38 #include <loc/stringpool.h>
39
40 struct loc_database {
41 struct loc_ctx* ctx;
42 int refcount;
43
44 unsigned int version;
45 time_t created_at;
46 off_t vendor;
47 off_t description;
48 off_t license;
49
50 // ASes in the database
51 struct loc_database_as_v0* as_v0;
52 size_t as_count;
53
54 // Network tree
55 struct loc_database_network_node_v0* network_nodes_v0;
56 size_t network_nodes_count;
57
58 // Networks
59 struct loc_database_network_v0* networks_v0;
60 size_t networks_count;
61
62 struct loc_stringpool* pool;
63 };
64
65 #define MAX_STACK_DEPTH 256
66
67 struct loc_node_stack {
68 off_t offset;
69 int i; // Is this node 0 or 1?
70 int depth;
71 };
72
73 struct loc_database_enumerator {
74 struct loc_ctx* ctx;
75 struct loc_database* db;
76 int refcount;
77
78 // Search string
79 char* string;
80 char country_code[3];
81
82 // Index of the AS we are looking at
83 unsigned int as_index;
84
85 // Network state
86 struct in6_addr network_address;
87 struct loc_node_stack network_stack[MAX_STACK_DEPTH];
88 int network_stack_depth;
89 unsigned int* networks_visited;
90 };
91
92 static int loc_database_read_magic(struct loc_database* db, FILE* f) {
93 struct loc_database_magic magic;
94
95 // Read from file
96 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
97
98 // Check if we have been able to read enough data
99 if (bytes_read < sizeof(magic)) {
100 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
101 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
102 return -ENOMSG;
103 }
104
105 // Compare magic bytes
106 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
107 DEBUG(db->ctx, "Magic value matches\n");
108
109 // Parse version
110 db->version = be16toh(magic.version);
111 DEBUG(db->ctx, "Database version is %u\n", db->version);
112
113 return 0;
114 }
115
116 ERROR(db->ctx, "Database format is not compatible\n");
117
118 // Return an error
119 return 1;
120 }
121
122 static int loc_database_read_as_section_v0(struct loc_database* db,
123 FILE* f, const struct loc_database_header_v0* header) {
124 off_t as_offset = be32toh(header->as_offset);
125 size_t as_length = be32toh(header->as_length);
126
127 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
128
129 if (as_length > 0) {
130 db->as_v0 = mmap(NULL, as_length, PROT_READ,
131 MAP_SHARED, fileno(f), as_offset);
132
133 if (db->as_v0 == MAP_FAILED)
134 return -errno;
135 }
136
137 db->as_count = as_length / sizeof(*db->as_v0);
138
139 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
140
141 return 0;
142 }
143
144 static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
145 FILE* f, const struct loc_database_header_v0* header) {
146 off_t network_nodes_offset = be32toh(header->network_tree_offset);
147 size_t network_nodes_length = be32toh(header->network_tree_length);
148
149 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
150 network_nodes_offset, network_nodes_length);
151
152 if (network_nodes_length > 0) {
153 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
154 MAP_SHARED, fileno(f), network_nodes_offset);
155
156 if (db->network_nodes_v0 == MAP_FAILED)
157 return -errno;
158 }
159
160 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
161
162 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
163
164 return 0;
165 }
166
167 static int loc_database_read_networks_section_v0(struct loc_database* db,
168 FILE* f, const struct loc_database_header_v0* header) {
169 off_t networks_offset = be32toh(header->network_data_offset);
170 size_t networks_length = be32toh(header->network_data_length);
171
172 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
173 networks_offset, networks_length);
174
175 if (networks_length > 0) {
176 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
177 MAP_SHARED, fileno(f), networks_offset);
178
179 if (db->networks_v0 == MAP_FAILED)
180 return -errno;
181 }
182
183 db->networks_count = networks_length / sizeof(*db->networks_v0);
184
185 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
186
187 return 0;
188 }
189
190 static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
191 struct loc_database_header_v0 header;
192
193 // Read from file
194 size_t size = fread(&header, 1, sizeof(header), f);
195
196 if (size < sizeof(header)) {
197 ERROR(db->ctx, "Could not read enough data for header\n");
198 return -ENOMSG;
199 }
200
201 // Copy over data
202 db->created_at = be64toh(header.created_at);
203 db->vendor = be32toh(header.vendor);
204 db->description = be32toh(header.description);
205 db->license = be32toh(header.license);
206
207 // Open pool
208 off_t pool_offset = be32toh(header.pool_offset);
209 size_t pool_length = be32toh(header.pool_length);
210
211 int r = loc_stringpool_open(db->ctx, &db->pool,
212 f, pool_length, pool_offset);
213 if (r)
214 return r;
215
216 // AS section
217 r = loc_database_read_as_section_v0(db, f, &header);
218 if (r)
219 return r;
220
221 // Network Nodes
222 r = loc_database_read_network_nodes_section_v0(db, f, &header);
223 if (r)
224 return r;
225
226 // Networks
227 r = loc_database_read_networks_section_v0(db, f, &header);
228 if (r)
229 return r;
230
231 return 0;
232 }
233
234 static int loc_database_read_header(struct loc_database* db, FILE* f) {
235 switch (db->version) {
236 case 0:
237 return loc_database_read_header_v0(db, f);
238
239 default:
240 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
241 return 1;
242 }
243 }
244
245 static int loc_database_read(struct loc_database* db, FILE* f) {
246 clock_t start = clock();
247
248 // Read magic bytes
249 int r = loc_database_read_magic(db, f);
250 if (r)
251 return r;
252
253 // Read the header
254 r = loc_database_read_header(db, f);
255 if (r)
256 return r;
257
258 clock_t end = clock();
259
260 INFO(db->ctx, "Opened database in %.8fs\n",
261 (double)(end - start) / CLOCKS_PER_SEC);
262
263 return 0;
264 }
265
266 LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
267 // Fail on invalid file handle
268 if (!f)
269 return -EINVAL;
270
271 struct loc_database* db = calloc(1, sizeof(*db));
272 if (!db)
273 return -ENOMEM;
274
275 // Reference context
276 db->ctx = loc_ref(ctx);
277 db->refcount = 1;
278
279 DEBUG(db->ctx, "Database object allocated at %p\n", db);
280
281 int r = loc_database_read(db, f);
282 if (r) {
283 loc_database_unref(db);
284 return r;
285 }
286
287 *database = db;
288
289 return 0;
290 }
291
292 LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
293 db->refcount++;
294
295 return db;
296 }
297
298 static void loc_database_free(struct loc_database* db) {
299 int r;
300
301 DEBUG(db->ctx, "Releasing database %p\n", db);
302
303 // Removing all ASes
304 if (db->as_v0) {
305 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
306 if (r)
307 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
308 }
309
310 // Remove mapped network sections
311 if (db->networks_v0) {
312 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
313 if (r)
314 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
315 }
316
317 // Remove mapped network nodes section
318 if (db->network_nodes_v0) {
319 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
320 if (r)
321 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
322 }
323
324 loc_stringpool_unref(db->pool);
325
326 loc_unref(db->ctx);
327 free(db);
328 }
329
330 LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
331 if (--db->refcount > 0)
332 return NULL;
333
334 loc_database_free(db);
335 return NULL;
336 }
337
338 LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
339 return db->created_at;
340 }
341
342 LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
343 return loc_stringpool_get(db->pool, db->vendor);
344 }
345
346 LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
347 return loc_stringpool_get(db->pool, db->description);
348 }
349
350 LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
351 return loc_stringpool_get(db->pool, db->license);
352 }
353
354 LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
355 return db->as_count;
356 }
357
358 // Returns the AS at position pos
359 static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
360 if ((size_t)pos >= db->as_count)
361 return -EINVAL;
362
363 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
364
365 int r;
366 switch (db->version) {
367 case 0:
368 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
369 break;
370
371 default:
372 return -1;
373 }
374
375 if (r == 0) {
376 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
377 }
378
379 return r;
380 }
381
382 // Performs a binary search to find the AS in the list
383 LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
384 off_t lo = 0;
385 off_t hi = db->as_count - 1;
386
387 // Save start time
388 clock_t start = clock();
389
390 while (lo <= hi) {
391 off_t i = (lo + hi) / 2;
392
393 // Fetch AS in the middle between lo and hi
394 int r = loc_database_fetch_as(db, as, i);
395 if (r)
396 return r;
397
398 // Check if this is a match
399 uint32_t as_number = loc_as_get_number(*as);
400 if (as_number == number) {
401 clock_t end = clock();
402
403 // Log how fast this has been
404 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
405 (double)(end - start) / CLOCKS_PER_SEC);
406
407 return 0;
408 }
409
410 // If it wasn't, we release the AS and
411 // adjust our search pointers
412 loc_as_unref(*as);
413
414 if (as_number < number) {
415 lo = i + 1;
416 } else
417 hi = i - 1;
418 }
419
420 // Nothing found
421 *as = NULL;
422
423 return 1;
424 }
425
426 // Returns the network at position pos
427 static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network,
428 struct in6_addr* address, unsigned int prefix, off_t pos) {
429 if ((size_t)pos >= db->networks_count)
430 return -EINVAL;
431
432 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
433
434 int r;
435 switch (db->version) {
436 case 0:
437 r = loc_network_new_from_database_v0(db->ctx, network,
438 address, prefix, db->networks_v0 + pos);
439 break;
440
441 default:
442 return -1;
443 }
444
445 if (r == 0) {
446 char* string = loc_network_str(*network);
447 DEBUG(db->ctx, "Got network %s\n", string);
448 free(string);
449 }
450
451 return r;
452 }
453
454 static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
455 return (node->network != htobe32(0xffffffff));
456 }
457
458 static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
459 struct loc_network** network, struct in6_addr* network_address, unsigned int prefix,
460 const struct loc_database_network_node_v0* node) {
461 off_t network_index = be32toh(node->network);
462
463 DEBUG(db->ctx, "Handling leaf node at %jd (%jd)\n", node - db->network_nodes_v0, network_index);
464
465 // Fetch the network
466 int r = loc_database_fetch_network(db, network,
467 network_address, prefix, network_index);
468 if (r) {
469 ERROR(db->ctx, "Could not fetch network %jd from database\n", network_index);
470 return r;
471 }
472
473 // Check if the given IP address is inside the network
474 r = loc_network_match_address(*network, address);
475 if (r) {
476 DEBUG(db->ctx, "Searched address is not part of the network\n");
477
478 loc_network_unref(*network);
479 *network = NULL;
480 return 1;
481 }
482
483 // A network was found and the IP address matches
484 return 0;
485 }
486
487 // Searches for an exact match along the path
488 static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
489 struct loc_network** network, struct in6_addr* network_address,
490 const struct loc_database_network_node_v0* node, unsigned int level) {
491 int r;
492 off_t node_index;
493
494 // Follow the path
495 int bit = in6_addr_get_bit(address, level);
496 in6_addr_set_bit(network_address, level, bit);
497
498 if (bit == 0)
499 node_index = be32toh(node->zero);
500 else
501 node_index = be32toh(node->one);
502
503 // If the node index is zero, the tree ends here
504 // and we cannot descend any further
505 if (node_index > 0) {
506 // Check boundaries
507 if ((size_t)node_index >= db->network_nodes_count)
508 return -EINVAL;
509
510 // Move on to the next node
511 r = __loc_database_lookup(db, address, network, network_address,
512 db->network_nodes_v0 + node_index, level + 1);
513
514 // End here if a result was found
515 if (r == 0)
516 return r;
517
518 // Raise any errors
519 else if (r < 0)
520 return r;
521
522 DEBUG(db->ctx, "No match found below level %u\n", level);
523 } else {
524 DEBUG(db->ctx, "Tree ended at level %u\n", level);
525 }
526
527 // If this node has a leaf, we will check if it matches
528 if (__loc_database_node_is_leaf(node)) {
529 r = __loc_database_lookup_handle_leaf(db, address, network, network_address, level, node);
530 if (r <= 0)
531 return r;
532 }
533
534 return 1;
535 }
536
537 LOC_EXPORT int loc_database_lookup(struct loc_database* db,
538 struct in6_addr* address, struct loc_network** network) {
539 struct in6_addr network_address;
540 memset(&network_address, 0, sizeof(network_address));
541
542 *network = NULL;
543
544 // Save start time
545 clock_t start = clock();
546
547 int r = __loc_database_lookup(db, address, network, &network_address,
548 db->network_nodes_v0, 0);
549
550 clock_t end = clock();
551
552 // Log how fast this has been
553 DEBUG(db->ctx, "Executed network search in %.8fs\n",
554 (double)(end - start) / CLOCKS_PER_SEC);
555
556 return r;
557 }
558
559 LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
560 const char* string, struct loc_network** network) {
561 struct in6_addr address;
562
563 int r = loc_parse_address(db->ctx, string, &address);
564 if (r)
565 return r;
566
567 return loc_database_lookup(db, &address, network);
568 }
569
570 // Enumerator
571
572 LOC_EXPORT int loc_database_enumerator_new(struct loc_database_enumerator** enumerator, struct loc_database* db) {
573 struct loc_database_enumerator* e = calloc(1, sizeof(*e));
574 if (!e)
575 return -ENOMEM;
576
577 // Reference context
578 e->ctx = loc_ref(db->ctx);
579 e->db = loc_database_ref(db);
580 e->refcount = 1;
581
582 // Initialise graph search
583 //e->network_stack[++e->network_stack_depth] = 0;
584 e->network_stack_depth = 1;
585 e->networks_visited = calloc(db->network_nodes_count, sizeof(*e->networks_visited));
586
587 DEBUG(e->ctx, "Database enumerator object allocated at %p\n", e);
588
589 *enumerator = e;
590 return 0;
591 }
592
593 LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_ref(struct loc_database_enumerator* enumerator) {
594 enumerator->refcount++;
595
596 return enumerator;
597 }
598
599 static void loc_database_enumerator_free(struct loc_database_enumerator* enumerator) {
600 DEBUG(enumerator->ctx, "Releasing database enumerator %p\n", enumerator);
601
602 // Release all references
603 loc_database_unref(enumerator->db);
604 loc_unref(enumerator->ctx);
605
606 if (enumerator->string)
607 free(enumerator->string);
608
609 // Free network search
610 free(enumerator->networks_visited);
611
612 free(enumerator);
613 }
614
615 LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_unref(struct loc_database_enumerator* enumerator) {
616 if (!enumerator)
617 return NULL;
618
619 if (--enumerator->refcount > 0)
620 return enumerator;
621
622 loc_database_enumerator_free(enumerator);
623 return NULL;
624 }
625
626 LOC_EXPORT int loc_database_enumerator_set_string(struct loc_database_enumerator* enumerator, const char* string) {
627 enumerator->string = strdup(string);
628
629 // Make the string lowercase
630 for (char *p = enumerator->string; *p; p++)
631 *p = tolower(*p);
632
633 return 0;
634 }
635
636 LOC_EXPORT int loc_database_enumerator_set_country_code(struct loc_database_enumerator* enumerator, const char* country_code) {
637 // Set empty country code
638 if (!country_code || !*country_code) {
639 *enumerator->country_code = '\0';
640 return 0;
641 }
642
643 // Country codes must be two characters
644 if (strlen(country_code) != 2)
645 return -EINVAL;
646
647 for (unsigned int i = 0; i < 3; i++) {
648 enumerator->country_code[i] = country_code[i];
649 }
650
651 return 0;
652 }
653
654 LOC_EXPORT struct loc_as* loc_database_enumerator_next_as(struct loc_database_enumerator* enumerator) {
655 struct loc_database* db = enumerator->db;
656 struct loc_as* as;
657
658 while (enumerator->as_index < db->as_count) {
659 // Fetch the next AS
660 int r = loc_database_fetch_as(db, &as, enumerator->as_index++);
661 if (r)
662 return NULL;
663
664 r = loc_as_match_string(as, enumerator->string);
665 if (r == 1) {
666 DEBUG(enumerator->ctx, "AS%d (%s) matches %s\n",
667 loc_as_get_number(as), loc_as_get_name(as), enumerator->string);
668
669 return as;
670 }
671
672 // No match
673 loc_as_unref(as);
674 }
675
676 // Reset the index
677 enumerator->as_index = 0;
678
679 // We have searched through all of them
680 return NULL;
681 }
682
683 static int loc_database_enumerator_stack_push_node(
684 struct loc_database_enumerator* e, off_t offset, int i, int depth) {
685 // Do not add empty nodes
686 if (!offset)
687 return 0;
688
689 // Check if there is any space left on the stack
690 if (e->network_stack_depth >= MAX_STACK_DEPTH) {
691 ERROR(e->ctx, "Maximum stack size reached: %d\n", e->network_stack_depth);
692 return -1;
693 }
694
695 // Increase stack size
696 int s = ++e->network_stack_depth;
697
698 DEBUG(e->ctx, "Added node %jd to stack (%d)\n", offset, depth);
699
700 e->network_stack[s].offset = offset;
701 e->network_stack[s].i = i;
702 e->network_stack[s].depth = depth;
703
704 return 0;
705 }
706
707 static int loc_database_enumerator_network_depth_first_search(
708 struct loc_database_enumerator* e, struct loc_network** network) {
709 // Reset network
710 *network = NULL;
711 int r;
712
713 DEBUG(e->ctx, "Called with a stack of %u nodes\n", e->network_stack_depth);
714
715 // Perform DFS
716 while (e->network_stack_depth > 0) {
717 DEBUG(e->ctx, "Stack depth: %u\n", e->network_stack_depth);
718
719 // Get object from top of the stack
720 struct loc_node_stack* node = &e->network_stack[e->network_stack_depth];
721
722 // Remove the node from the stack if we have already visited it
723 if (e->networks_visited[node->offset]) {
724 e->network_stack_depth--;
725 continue;
726 }
727
728 in6_addr_set_bit(&e->network_address,
729 (node->depth > 0) ? node->depth - 1 : 0, node->i);
730
731 //for (unsigned int i = stack->depth + 1; i < 128; i++)
732 // in6_addr_set_bit(&e->network_address, i, 0);
733
734 DEBUG(e->ctx, "Looking at node %jd\n", node->offset);
735 e->networks_visited[node->offset]++;
736
737 // Pop node from top of the stack
738 struct loc_database_network_node_v0* n =
739 e->db->network_nodes_v0 + node->offset;
740
741 // Add edges to stack
742 r = loc_database_enumerator_stack_push_node(e,
743 be32toh(n->one), 1, node->depth + 1);
744
745 if (r)
746 return r;
747
748 r = loc_database_enumerator_stack_push_node(e,
749 be32toh(n->zero), 0, node->depth + 1);
750
751 if (r)
752 return r;
753
754 // Check if this node is a leaf and has a network object
755 if (__loc_database_node_is_leaf(n)) {
756 off_t network_index = be32toh(n->network);
757
758 DEBUG(e->ctx, "Node has a network at %jd\n", network_index);
759
760 // Fetch the network object
761 r = loc_database_fetch_network(e->db, network,
762 &e->network_address, node->depth, network_index);
763
764 // Break on any errors
765 if (r)
766 return r;
767
768 // Check if we are interested in this network
769
770 // Skip if the country code does not match
771 if (e->country_code && !loc_network_match_country_code(*network, e->country_code)) {
772 loc_network_unref(*network);
773 continue;
774 }
775
776 return 0;
777 }
778 }
779
780 // Reached the end of the search
781 // TODO cleanup
782
783 return 0;
784 }
785
786 LOC_EXPORT struct loc_network* loc_database_enumerator_next_network(
787 struct loc_database_enumerator* enumerator) {
788 struct loc_network* network = NULL;
789
790 int r = loc_database_enumerator_network_depth_first_search(enumerator, &network);
791 if (r) {
792 return NULL;
793 }
794
795 return network;
796 }