]> git.ipfire.org Git - people/ms/libloc.git/blame - src/database.c
python: Save path when opening the database
[people/ms/libloc.git] / src / database.c
CommitLineData
2601e83e
MT
1/*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15*/
16
2a30e4de 17#include <arpa/inet.h>
0676cd80 18#include <endian.h>
2601e83e 19#include <errno.h>
10778041 20#include <netinet/in.h>
2601e83e
MT
21#include <stddef.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
c182393f 26#include <sys/mman.h>
2601e83e 27#include <sys/types.h>
96ea74a5 28#include <time.h>
3f35869a 29#include <unistd.h>
2601e83e
MT
30
31#include <loc/libloc.h>
9fc7f001
MT
32#include <loc/as.h>
33#include <loc/database.h>
a5db3e49 34#include <loc/format.h>
10778041 35#include <loc/network.h>
9fc7f001
MT
36#include <loc/private.h>
37#include <loc/stringpool.h>
2601e83e
MT
38
39struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
96ea74a5 44 time_t created_at;
2601e83e
MT
45 off_t vendor;
46 off_t description;
47
a5db3e49 48 // ASes in the database
c182393f 49 struct loc_database_as_v0* as_v0;
a5db3e49
MT
50 size_t as_count;
51
f66b7b09
MT
52 // Network tree
53 struct loc_database_network_node_v0* network_nodes_v0;
54 size_t network_nodes_count;
55
a735a563
MT
56 // Networks
57 struct loc_database_network_v0* networks_v0;
58 size_t networks_count;
59
2601e83e
MT
60 struct loc_stringpool* pool;
61};
62
a7431f1a 63static int loc_database_read_magic(struct loc_database* db, FILE* f) {
2601e83e
MT
64 struct loc_database_magic magic;
65
66 // Read from file
a7431f1a 67 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
2601e83e
MT
68
69 // Check if we have been able to read enough data
70 if (bytes_read < sizeof(magic)) {
71 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
72 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
73 return -ENOMSG;
74 }
75
76 // Compare magic bytes
77 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
78 DEBUG(db->ctx, "Magic value matches\n");
79
80 // Parse version
0676cd80 81 db->version = be16toh(magic.version);
2601e83e
MT
82 DEBUG(db->ctx, "Database version is %u\n", db->version);
83
84 return 0;
85 }
86
87 ERROR(db->ctx, "Database format is not compatible\n");
88
89 // Return an error
90 return 1;
91}
92
a5db3e49 93static int loc_database_read_as_section_v0(struct loc_database* db,
edb4ba7c
MT
94 FILE* f, const struct loc_database_header_v0* header) {
95 off_t as_offset = be32toh(header->as_offset);
96 size_t as_length = be32toh(header->as_length);
97
c182393f 98 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
a5db3e49 99
c182393f
MT
100 if (as_length > 0) {
101 db->as_v0 = mmap(NULL, as_length, PROT_READ,
a7431f1a 102 MAP_SHARED, fileno(f), as_offset);
a5db3e49 103
c182393f
MT
104 if (db->as_v0 == MAP_FAILED)
105 return -errno;
a5db3e49
MT
106 }
107
c182393f
MT
108 db->as_count = as_length / sizeof(*db->as_v0);
109
a5db3e49
MT
110 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
111
112 return 0;
113}
114
f66b7b09 115static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
edb4ba7c
MT
116 FILE* f, const struct loc_database_header_v0* header) {
117 off_t network_nodes_offset = be32toh(header->network_tree_offset);
118 size_t network_nodes_length = be32toh(header->network_tree_length);
119
f66b7b09
MT
120 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
121 network_nodes_offset, network_nodes_length);
122
123 if (network_nodes_length > 0) {
124 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
125 MAP_SHARED, fileno(f), network_nodes_offset);
126
127 if (db->network_nodes_v0 == MAP_FAILED)
128 return -errno;
129 }
130
131 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
132
133 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
134
135 return 0;
136}
137
a735a563
MT
138static int loc_database_read_networks_section_v0(struct loc_database* db,
139 FILE* f, const struct loc_database_header_v0* header) {
140 off_t networks_offset = be32toh(header->network_data_offset);
141 size_t networks_length = be32toh(header->network_data_length);
142
143 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
144 networks_offset, networks_length);
145
146 if (networks_length > 0) {
147 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
148 MAP_SHARED, fileno(f), networks_offset);
149
150 if (db->networks_v0 == MAP_FAILED)
151 return -errno;
152 }
153
154 db->networks_count = networks_length / sizeof(*db->networks_v0);
155
156 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
157
158 return 0;
159}
160
a7431f1a 161static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
2601e83e
MT
162 struct loc_database_header_v0 header;
163
164 // Read from file
a7431f1a 165 size_t size = fread(&header, 1, sizeof(header), f);
2601e83e
MT
166
167 if (size < sizeof(header)) {
168 ERROR(db->ctx, "Could not read enough data for header\n");
169 return -ENOMSG;
170 }
171
172 // Copy over data
96ea74a5 173 db->created_at = be64toh(header.created_at);
0676cd80
MT
174 db->vendor = be32toh(header.vendor);
175 db->description = be32toh(header.description);
2601e83e
MT
176
177 // Open pool
0676cd80
MT
178 off_t pool_offset = be32toh(header.pool_offset);
179 size_t pool_length = be32toh(header.pool_length);
2601e83e 180
0e974d4b 181 int r = loc_stringpool_open(db->ctx, &db->pool,
a7431f1a 182 f, pool_length, pool_offset);
2601e83e
MT
183 if (r)
184 return r;
185
a5db3e49 186 // AS section
edb4ba7c 187 r = loc_database_read_as_section_v0(db, f, &header);
a5db3e49
MT
188 if (r)
189 return r;
190
f66b7b09 191 // Network Nodes
edb4ba7c 192 r = loc_database_read_network_nodes_section_v0(db, f, &header);
f66b7b09
MT
193 if (r)
194 return r;
195
a735a563
MT
196 // Networks
197 r = loc_database_read_networks_section_v0(db, f, &header);
198 if (r)
199 return r;
200
2601e83e
MT
201 return 0;
202}
203
a7431f1a 204static int loc_database_read_header(struct loc_database* db, FILE* f) {
2601e83e
MT
205 switch (db->version) {
206 case 0:
a7431f1a 207 return loc_database_read_header_v0(db, f);
2601e83e
MT
208
209 default:
210 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
211 return 1;
212 }
213}
214
a7431f1a 215static int loc_database_read(struct loc_database* db, FILE* f) {
02879100
MT
216 clock_t start = clock();
217
218 // Read magic bytes
a7431f1a 219 int r = loc_database_read_magic(db, f);
02879100
MT
220 if (r)
221 return r;
222
223 // Read the header
a7431f1a 224 r = loc_database_read_header(db, f);
02879100
MT
225 if (r)
226 return r;
227
228 clock_t end = clock();
229
230 INFO(db->ctx, "Opened database in %.8fs\n",
231 (double)(end - start) / CLOCKS_PER_SEC);
232
233 return 0;
234}
235
c182393f 236LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
a7431f1a
MT
237 // Fail on invalid file handle
238 if (!f)
239 return -EINVAL;
240
c182393f
MT
241 struct loc_database* db = calloc(1, sizeof(*db));
242 if (!db)
243 return -ENOMEM;
244
245 // Reference context
246 db->ctx = loc_ref(ctx);
247 db->refcount = 1;
248
249 DEBUG(db->ctx, "Database object allocated at %p\n", db);
250
a7431f1a 251 int r = loc_database_read(db, f);
02879100
MT
252 if (r) {
253 loc_database_unref(db);
2601e83e 254 return r;
02879100 255 }
2601e83e 256
c182393f
MT
257 *database = db;
258
2601e83e 259 return 0;
2601e83e
MT
260}
261
c182393f
MT
262LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
263 db->refcount++;
264
265 return db;
8f5b676a
MT
266}
267
c182393f 268static void loc_database_free(struct loc_database* db) {
f10ebc2d
MT
269 int r;
270
c182393f 271 DEBUG(db->ctx, "Releasing database %p\n", db);
c34e76f1 272
c182393f
MT
273 // Removing all ASes
274 if (db->as_v0) {
f10ebc2d 275 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
c182393f
MT
276 if (r)
277 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
278 }
c34e76f1 279
f10ebc2d
MT
280 // Remove mapped network sections
281 if (db->networks_v0) {
282 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
283 if (r)
284 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
285 }
286
287 // Remove mapped network nodes section
288 if (db->network_nodes_v0) {
289 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
290 if (r)
291 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
292 }
293
c182393f 294 loc_stringpool_unref(db->pool);
c34e76f1 295
c182393f
MT
296 loc_unref(db->ctx);
297 free(db);
c34e76f1
MT
298}
299
c182393f
MT
300LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
301 if (--db->refcount > 0)
302 return NULL;
78ace4ed 303
c182393f
MT
304 loc_database_free(db);
305 return NULL;
306}
78ace4ed 307
c182393f
MT
308LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
309 return db->created_at;
310}
78ace4ed 311
c182393f
MT
312LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
313 return loc_stringpool_get(db->pool, db->vendor);
314}
78ace4ed 315
c182393f
MT
316LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
317 return loc_stringpool_get(db->pool, db->description);
318}
78ace4ed 319
c182393f
MT
320LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
321 return db->as_count;
78ace4ed
MT
322}
323
c182393f
MT
324// Returns the AS at position pos
325static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
326 if ((size_t)pos >= db->as_count)
327 return -EINVAL;
2601e83e 328
c182393f 329 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
2601e83e
MT
330
331 int r;
c182393f
MT
332 switch (db->version) {
333 case 0:
334 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
335 break;
2601e83e 336
c182393f
MT
337 default:
338 return -1;
339 }
2601e83e 340
c182393f
MT
341 if (r == 0) {
342 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
2601e83e 343 }
2601e83e 344
c182393f
MT
345 return r;
346}
c34e76f1 347
c182393f
MT
348// Performs a binary search to find the AS in the list
349LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
350 off_t lo = 0;
351 off_t hi = db->as_count - 1;
c34e76f1 352
8f3e2a06
MT
353 // Save start time
354 clock_t start = clock();
355
c182393f
MT
356 while (lo <= hi) {
357 off_t i = (lo + hi) / 2;
8f5b676a 358
c182393f
MT
359 // Fetch AS in the middle between lo and hi
360 int r = loc_database_fetch_as(db, as, i);
361 if (r)
362 return r;
a5db3e49 363
c182393f
MT
364 // Check if this is a match
365 uint32_t as_number = loc_as_get_number(*as);
8f3e2a06
MT
366 if (as_number == number) {
367 clock_t end = clock();
368
369 // Log how fast this has been
370 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
371 (double)(end - start) / CLOCKS_PER_SEC);
372
c182393f 373 return 0;
8f3e2a06 374 }
c182393f
MT
375
376 // If it wasn't, we release the AS and
377 // adjust our search pointers
378 loc_as_unref(*as);
379
380 if (as_number < number) {
381 lo = i + 1;
382 } else
383 hi = i - 1;
384 }
2601e83e 385
c182393f
MT
386 // Nothing found
387 *as = NULL;
2601e83e 388
8f3e2a06 389 return 1;
2601e83e 390}
10778041
MT
391
392// Returns the network at position pos
393static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
394 if ((size_t)pos >= db->networks_count)
395 return -EINVAL;
396
397 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
398
399 int r;
400 switch (db->version) {
401 case 0:
402 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
403 break;
404
405 default:
406 return -1;
407 }
408
409 if (r == 0) {
410 char* string = loc_network_str(*network);
411 DEBUG(db->ctx, "Got network %s\n", string);
412 free(string);
413 }
414
415 return r;
416}
2a30e4de
MT
417
418static int __loc_database_lookup_leaf_node(struct loc_database* db, const struct in6_addr* address,
419 struct loc_network** network, struct in6_addr* network_address,
420 const struct loc_database_network_node_v0* node) {
421 // Check if this node is a leaf node
422 if (node->zero != htobe32(0xffffffff))
423 return 1;
424
425 DEBUG(db->ctx, "Node is a leaf: %jd\n", node - db->network_nodes_v0);
426
427 // Fetch the network
428 int r = loc_database_fetch_network(db, network,
429 network_address, be32toh(node->one));
430 if (r)
431 return r;
432
433 // Check if the given IP address is inside the network
434 r = loc_network_match_address(*network, address);
435 if (r) {
436 DEBUG(db->ctx, "Searched address is not part of the network\n");
437
438 loc_network_unref(*network);
439 *network = NULL;
440 return 1;
441 }
442
443 // A network was found and the IP address matches
444 return 0;
445}
446
447// Returns the highest result available
448static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
449 struct loc_network** network, struct in6_addr* network_address,
450 const struct loc_database_network_node_v0* node, int level) {
451
452 // If the node is a leaf node, we end here
453 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
454 if (r <= 0)
455 return r;
456
457 off_t node_index;
458
459 // Try to go down the ones path first
460 if (node->one) {
461 node_index = be32toh(node->one);
462 in6_addr_set_bit(network_address, level, 1);
463
464 // Check boundaries
465 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
466 r = __loc_database_lookup_max(db, address, network, network_address,
467 db->network_nodes_v0 + node_index, level + 1);
468
469 // Abort when match was found or error
470 if (r <= 0)
471 return r;
472 }
473 }
474
475 // ... and if that fails, we try to go down one step on a zero
476 // branch and then try the ones again...
477 if (node->zero) {
478 node_index = be32toh(node->zero);
479 in6_addr_set_bit(network_address, level, 0);
480
481 // Check boundaries
482 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
483 r = __loc_database_lookup_max(db, address, network, network_address,
484 db->network_nodes_v0 + node_index, level + 1);
485
486 // Abort when match was found or error
487 if (r <= 0)
488 return r;
489 }
490 }
491
492 // End of path
493 return 1;
494}
495
496// Searches for an exact match along the path
497static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
498 struct loc_network** network, struct in6_addr* network_address,
499 const struct loc_database_network_node_v0* node, int level) {
500 // If the node is a leaf node, we end here
501 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
502 if (r <= 0)
503 return r;
504
505 off_t node_index;
506
507 // Follow the path
508 int bit = in6_addr_get_bit(address, level);
509 in6_addr_set_bit(network_address, level, bit);
510
511 if (bit == 0)
512 node_index = be32toh(node->zero);
513 else
514 node_index = be32toh(node->one);
515
516 // If we point back to root, the path ends here
517 if (node_index == 0) {
518 DEBUG(db->ctx, "Tree ends here\n");
519 return 1;
520 }
521
522 // Check boundaries
523 if ((size_t)node_index >= db->network_nodes_count)
524 return -EINVAL;
525
526 // Move on to the next node
527 r = __loc_database_lookup(db, address, network, network_address,
528 db->network_nodes_v0 + node_index, level + 1);
529
530 // End here if a result was found
531 if (r == 0)
532 return r;
533
534 // Raise any errors
535 else if (r < 0)
536 return r;
537
538 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
539
540 // If nothing was found, we have to search for an inexact match
541 return __loc_database_lookup_max(db, address, network, network_address, node, level);
542}
543
544LOC_EXPORT int loc_database_lookup(struct loc_database* db,
545 struct in6_addr* address, struct loc_network** network) {
546 struct in6_addr network_address;
547 memset(&network_address, 0, sizeof(network_address));
548
549 *network = NULL;
550
551 // Save start time
552 clock_t start = clock();
553
554 int r = __loc_database_lookup(db, address, network, &network_address,
555 db->network_nodes_v0, 0);
556
557 clock_t end = clock();
558
559 // Log how fast this has been
560 DEBUG(db->ctx, "Executed network search in %.8fs\n",
561 (double)(end - start) / CLOCKS_PER_SEC);
562
563 return r;
564}
565
566LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
567 const char* string, struct loc_network** network) {
568 struct in6_addr address;
569
570 int r = loc_parse_address(db->ctx, string, &address);
571 if (r)
572 return r;
573
574 return loc_database_lookup(db, &address, network);
575}