]> git.ipfire.org Git - people/ms/libloc.git/blob - src/database.c
Add database enumerator interface to perform iterative searches
[people/ms/libloc.git] / src / database.c
1 /*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15 */
16
17 #include <arpa/inet.h>
18 #include <endian.h>
19 #include <errno.h>
20 #include <netinet/in.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/types.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include <loc/libloc.h>
32 #include <loc/as.h>
33 #include <loc/database.h>
34 #include <loc/format.h>
35 #include <loc/network.h>
36 #include <loc/private.h>
37 #include <loc/stringpool.h>
38
39 struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
44 time_t created_at;
45 off_t vendor;
46 off_t description;
47 off_t license;
48
49 // ASes in the database
50 struct loc_database_as_v0* as_v0;
51 size_t as_count;
52
53 // Network tree
54 struct loc_database_network_node_v0* network_nodes_v0;
55 size_t network_nodes_count;
56
57 // Networks
58 struct loc_database_network_v0* networks_v0;
59 size_t networks_count;
60
61 struct loc_stringpool* pool;
62 };
63
64 struct loc_database_enumerator {
65 struct loc_ctx* ctx;
66 struct loc_database* db;
67 int refcount;
68 };
69
70 static int loc_database_read_magic(struct loc_database* db, FILE* f) {
71 struct loc_database_magic magic;
72
73 // Read from file
74 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
75
76 // Check if we have been able to read enough data
77 if (bytes_read < sizeof(magic)) {
78 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
79 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
80 return -ENOMSG;
81 }
82
83 // Compare magic bytes
84 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
85 DEBUG(db->ctx, "Magic value matches\n");
86
87 // Parse version
88 db->version = be16toh(magic.version);
89 DEBUG(db->ctx, "Database version is %u\n", db->version);
90
91 return 0;
92 }
93
94 ERROR(db->ctx, "Database format is not compatible\n");
95
96 // Return an error
97 return 1;
98 }
99
100 static int loc_database_read_as_section_v0(struct loc_database* db,
101 FILE* f, const struct loc_database_header_v0* header) {
102 off_t as_offset = be32toh(header->as_offset);
103 size_t as_length = be32toh(header->as_length);
104
105 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
106
107 if (as_length > 0) {
108 db->as_v0 = mmap(NULL, as_length, PROT_READ,
109 MAP_SHARED, fileno(f), as_offset);
110
111 if (db->as_v0 == MAP_FAILED)
112 return -errno;
113 }
114
115 db->as_count = as_length / sizeof(*db->as_v0);
116
117 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
118
119 return 0;
120 }
121
122 static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
123 FILE* f, const struct loc_database_header_v0* header) {
124 off_t network_nodes_offset = be32toh(header->network_tree_offset);
125 size_t network_nodes_length = be32toh(header->network_tree_length);
126
127 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
128 network_nodes_offset, network_nodes_length);
129
130 if (network_nodes_length > 0) {
131 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
132 MAP_SHARED, fileno(f), network_nodes_offset);
133
134 if (db->network_nodes_v0 == MAP_FAILED)
135 return -errno;
136 }
137
138 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
139
140 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
141
142 return 0;
143 }
144
145 static int loc_database_read_networks_section_v0(struct loc_database* db,
146 FILE* f, const struct loc_database_header_v0* header) {
147 off_t networks_offset = be32toh(header->network_data_offset);
148 size_t networks_length = be32toh(header->network_data_length);
149
150 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
151 networks_offset, networks_length);
152
153 if (networks_length > 0) {
154 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
155 MAP_SHARED, fileno(f), networks_offset);
156
157 if (db->networks_v0 == MAP_FAILED)
158 return -errno;
159 }
160
161 db->networks_count = networks_length / sizeof(*db->networks_v0);
162
163 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
164
165 return 0;
166 }
167
168 static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
169 struct loc_database_header_v0 header;
170
171 // Read from file
172 size_t size = fread(&header, 1, sizeof(header), f);
173
174 if (size < sizeof(header)) {
175 ERROR(db->ctx, "Could not read enough data for header\n");
176 return -ENOMSG;
177 }
178
179 // Copy over data
180 db->created_at = be64toh(header.created_at);
181 db->vendor = be32toh(header.vendor);
182 db->description = be32toh(header.description);
183 db->license = be32toh(header.license);
184
185 // Open pool
186 off_t pool_offset = be32toh(header.pool_offset);
187 size_t pool_length = be32toh(header.pool_length);
188
189 int r = loc_stringpool_open(db->ctx, &db->pool,
190 f, pool_length, pool_offset);
191 if (r)
192 return r;
193
194 // AS section
195 r = loc_database_read_as_section_v0(db, f, &header);
196 if (r)
197 return r;
198
199 // Network Nodes
200 r = loc_database_read_network_nodes_section_v0(db, f, &header);
201 if (r)
202 return r;
203
204 // Networks
205 r = loc_database_read_networks_section_v0(db, f, &header);
206 if (r)
207 return r;
208
209 return 0;
210 }
211
212 static int loc_database_read_header(struct loc_database* db, FILE* f) {
213 switch (db->version) {
214 case 0:
215 return loc_database_read_header_v0(db, f);
216
217 default:
218 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
219 return 1;
220 }
221 }
222
223 static int loc_database_read(struct loc_database* db, FILE* f) {
224 clock_t start = clock();
225
226 // Read magic bytes
227 int r = loc_database_read_magic(db, f);
228 if (r)
229 return r;
230
231 // Read the header
232 r = loc_database_read_header(db, f);
233 if (r)
234 return r;
235
236 clock_t end = clock();
237
238 INFO(db->ctx, "Opened database in %.8fs\n",
239 (double)(end - start) / CLOCKS_PER_SEC);
240
241 return 0;
242 }
243
244 LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
245 // Fail on invalid file handle
246 if (!f)
247 return -EINVAL;
248
249 struct loc_database* db = calloc(1, sizeof(*db));
250 if (!db)
251 return -ENOMEM;
252
253 // Reference context
254 db->ctx = loc_ref(ctx);
255 db->refcount = 1;
256
257 DEBUG(db->ctx, "Database object allocated at %p\n", db);
258
259 int r = loc_database_read(db, f);
260 if (r) {
261 loc_database_unref(db);
262 return r;
263 }
264
265 *database = db;
266
267 return 0;
268 }
269
270 LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
271 db->refcount++;
272
273 return db;
274 }
275
276 static void loc_database_free(struct loc_database* db) {
277 int r;
278
279 DEBUG(db->ctx, "Releasing database %p\n", db);
280
281 // Removing all ASes
282 if (db->as_v0) {
283 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
284 if (r)
285 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
286 }
287
288 // Remove mapped network sections
289 if (db->networks_v0) {
290 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
291 if (r)
292 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
293 }
294
295 // Remove mapped network nodes section
296 if (db->network_nodes_v0) {
297 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
298 if (r)
299 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
300 }
301
302 loc_stringpool_unref(db->pool);
303
304 loc_unref(db->ctx);
305 free(db);
306 }
307
308 LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
309 if (--db->refcount > 0)
310 return NULL;
311
312 loc_database_free(db);
313 return NULL;
314 }
315
316 LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
317 return db->created_at;
318 }
319
320 LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
321 return loc_stringpool_get(db->pool, db->vendor);
322 }
323
324 LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
325 return loc_stringpool_get(db->pool, db->description);
326 }
327
328 LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
329 return loc_stringpool_get(db->pool, db->license);
330 }
331
332 LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
333 return db->as_count;
334 }
335
336 // Returns the AS at position pos
337 static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
338 if ((size_t)pos >= db->as_count)
339 return -EINVAL;
340
341 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
342
343 int r;
344 switch (db->version) {
345 case 0:
346 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
347 break;
348
349 default:
350 return -1;
351 }
352
353 if (r == 0) {
354 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
355 }
356
357 return r;
358 }
359
360 // Performs a binary search to find the AS in the list
361 LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
362 off_t lo = 0;
363 off_t hi = db->as_count - 1;
364
365 // Save start time
366 clock_t start = clock();
367
368 while (lo <= hi) {
369 off_t i = (lo + hi) / 2;
370
371 // Fetch AS in the middle between lo and hi
372 int r = loc_database_fetch_as(db, as, i);
373 if (r)
374 return r;
375
376 // Check if this is a match
377 uint32_t as_number = loc_as_get_number(*as);
378 if (as_number == number) {
379 clock_t end = clock();
380
381 // Log how fast this has been
382 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
383 (double)(end - start) / CLOCKS_PER_SEC);
384
385 return 0;
386 }
387
388 // If it wasn't, we release the AS and
389 // adjust our search pointers
390 loc_as_unref(*as);
391
392 if (as_number < number) {
393 lo = i + 1;
394 } else
395 hi = i - 1;
396 }
397
398 // Nothing found
399 *as = NULL;
400
401 return 1;
402 }
403
404 // Returns the network at position pos
405 static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
406 if ((size_t)pos >= db->networks_count)
407 return -EINVAL;
408
409 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
410
411 int r;
412 switch (db->version) {
413 case 0:
414 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
415 break;
416
417 default:
418 return -1;
419 }
420
421 if (r == 0) {
422 char* string = loc_network_str(*network);
423 DEBUG(db->ctx, "Got network %s\n", string);
424 free(string);
425 }
426
427 return r;
428 }
429
430 static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
431 return (node->zero == htobe32(0xffffffff));
432 }
433
434 static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
435 struct loc_network** network, struct in6_addr* network_address,
436 const struct loc_database_network_node_v0* node) {
437 DEBUG(db->ctx, "Handling leaf node at %jd\n", node - db->network_nodes_v0);
438
439 // Fetch the network
440 int r = loc_database_fetch_network(db, network,
441 network_address, be32toh(node->one));
442 if (r)
443 return r;
444
445 // Check if the given IP address is inside the network
446 r = loc_network_match_address(*network, address);
447 if (r) {
448 DEBUG(db->ctx, "Searched address is not part of the network\n");
449
450 loc_network_unref(*network);
451 *network = NULL;
452 return 1;
453 }
454
455 // A network was found and the IP address matches
456 return 0;
457 }
458
459 // Returns the highest result available
460 static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
461 struct loc_network** network, struct in6_addr* network_address,
462 const struct loc_database_network_node_v0* node, unsigned int level) {
463 // If the node is a leaf node, we end here
464 if (__loc_database_node_is_leaf(node))
465 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
466
467 int r;
468 off_t node_index;
469
470 // Try to go down the ones path first
471 if (node->one) {
472 node_index = be32toh(node->one);
473 in6_addr_set_bit(network_address, level, 1);
474
475 // Check boundaries
476 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
477 r = __loc_database_lookup_max(db, address, network, network_address,
478 db->network_nodes_v0 + node_index, level + 1);
479
480 // Abort when match was found or error
481 if (r <= 0)
482 return r;
483 }
484 }
485
486 // ... and if that fails, we try to go down one step on a zero
487 // branch and then try the ones again...
488 if (node->zero) {
489 node_index = be32toh(node->zero);
490 in6_addr_set_bit(network_address, level, 0);
491
492 // Check boundaries
493 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
494 r = __loc_database_lookup_max(db, address, network, network_address,
495 db->network_nodes_v0 + node_index, level + 1);
496
497 // Abort when match was found or error
498 if (r <= 0)
499 return r;
500 }
501 }
502
503 // End of path
504 return 1;
505 }
506
507 // Searches for an exact match along the path
508 static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
509 struct loc_network** network, struct in6_addr* network_address,
510 const struct loc_database_network_node_v0* node, unsigned int level) {
511 // If the node is a leaf node, we end here
512 if (__loc_database_node_is_leaf(node))
513 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
514
515 int r;
516 off_t node_index;
517
518 // Follow the path
519 int bit = in6_addr_get_bit(address, level);
520 in6_addr_set_bit(network_address, level, bit);
521
522 if (bit == 0)
523 node_index = be32toh(node->zero);
524 else
525 node_index = be32toh(node->one);
526
527 // If we point back to root, the path ends here
528 if (node_index == 0) {
529 DEBUG(db->ctx, "Tree ends here\n");
530 return 1;
531 }
532
533 // Check boundaries
534 if ((size_t)node_index >= db->network_nodes_count)
535 return -EINVAL;
536
537 // Move on to the next node
538 r = __loc_database_lookup(db, address, network, network_address,
539 db->network_nodes_v0 + node_index, level + 1);
540
541 // End here if a result was found
542 if (r == 0)
543 return r;
544
545 // Raise any errors
546 else if (r < 0)
547 return r;
548
549 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
550
551 // If nothing was found, we have to search for an inexact match
552 return __loc_database_lookup_max(db, address, network, network_address, node, level);
553 }
554
555 LOC_EXPORT int loc_database_lookup(struct loc_database* db,
556 struct in6_addr* address, struct loc_network** network) {
557 struct in6_addr network_address;
558 memset(&network_address, 0, sizeof(network_address));
559
560 *network = NULL;
561
562 // Save start time
563 clock_t start = clock();
564
565 int r = __loc_database_lookup(db, address, network, &network_address,
566 db->network_nodes_v0, 0);
567
568 clock_t end = clock();
569
570 // Log how fast this has been
571 DEBUG(db->ctx, "Executed network search in %.8fs\n",
572 (double)(end - start) / CLOCKS_PER_SEC);
573
574 return r;
575 }
576
577 LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
578 const char* string, struct loc_network** network) {
579 struct in6_addr address;
580
581 int r = loc_parse_address(db->ctx, string, &address);
582 if (r)
583 return r;
584
585 return loc_database_lookup(db, &address, network);
586 }
587
588 // Enumerator
589
590 LOC_EXPORT int loc_database_enumerator_new(struct loc_database_enumerator** enumerator, struct loc_database* db) {
591 struct loc_database_enumerator* e = calloc(1, sizeof(*e));
592 if (!e)
593 return -ENOMEM;
594
595 // Reference context
596 e->ctx = loc_ref(db->ctx);
597 e->db = loc_database_ref(db);
598 e->refcount = 1;
599
600 DEBUG(e->ctx, "Database enumerator object allocated at %p\n", e);
601
602 *enumerator = e;
603 return 0;
604 }
605
606 LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_ref(struct loc_database_enumerator* enumerator) {
607 enumerator->refcount++;
608
609 return enumerator;
610 }
611
612 static void loc_database_enumerator_free(struct loc_database_enumerator* enumerator) {
613 DEBUG(enumerator->ctx, "Releasing database enumerator %p\n", enumerator);
614
615 // Release all references
616 loc_database_unref(enumerator->db);
617 loc_unref(enumerator->ctx);
618
619 free(enumerator);
620 }
621
622 LOC_EXPORT struct loc_database_enumerator* loc_database_enumerator_unref(struct loc_database_enumerator* enumerator) {
623 if (!enumerator)
624 return NULL;
625
626 if (--enumerator->refcount > 0)
627 return enumerator;
628
629 loc_database_enumerator_free(enumerator);
630 return NULL;
631 }