python: Make lookup function available
[people/ms/libloc.git] / src / database.c
CommitLineData
2601e83e
MT
1/*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15*/
16
2a30e4de 17#include <arpa/inet.h>
0676cd80 18#include <endian.h>
2601e83e 19#include <errno.h>
10778041 20#include <netinet/in.h>
2601e83e
MT
21#include <stddef.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
c182393f 26#include <sys/mman.h>
2601e83e 27#include <sys/types.h>
96ea74a5 28#include <time.h>
3f35869a 29#include <unistd.h>
2601e83e
MT
30
31#include <loc/libloc.h>
9fc7f001
MT
32#include <loc/as.h>
33#include <loc/database.h>
a5db3e49 34#include <loc/format.h>
10778041 35#include <loc/network.h>
9fc7f001
MT
36#include <loc/private.h>
37#include <loc/stringpool.h>
2601e83e
MT
38
39struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
96ea74a5 44 time_t created_at;
2601e83e
MT
45 off_t vendor;
46 off_t description;
47
a5db3e49 48 // ASes in the database
c182393f 49 struct loc_database_as_v0* as_v0;
a5db3e49
MT
50 size_t as_count;
51
f66b7b09
MT
52 // Network tree
53 struct loc_database_network_node_v0* network_nodes_v0;
54 size_t network_nodes_count;
55
a735a563
MT
56 // Networks
57 struct loc_database_network_v0* networks_v0;
58 size_t networks_count;
59
2601e83e
MT
60 struct loc_stringpool* pool;
61};
62
a7431f1a 63static int loc_database_read_magic(struct loc_database* db, FILE* f) {
2601e83e
MT
64 struct loc_database_magic magic;
65
66 // Read from file
a7431f1a 67 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
2601e83e
MT
68
69 // Check if we have been able to read enough data
70 if (bytes_read < sizeof(magic)) {
71 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
72 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
73 return -ENOMSG;
74 }
75
76 // Compare magic bytes
77 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
78 DEBUG(db->ctx, "Magic value matches\n");
79
80 // Parse version
0676cd80 81 db->version = be16toh(magic.version);
2601e83e
MT
82 DEBUG(db->ctx, "Database version is %u\n", db->version);
83
84 return 0;
85 }
86
87 ERROR(db->ctx, "Database format is not compatible\n");
88
89 // Return an error
90 return 1;
91}
92
a5db3e49 93static int loc_database_read_as_section_v0(struct loc_database* db,
edb4ba7c
MT
94 FILE* f, const struct loc_database_header_v0* header) {
95 off_t as_offset = be32toh(header->as_offset);
96 size_t as_length = be32toh(header->as_length);
97
c182393f 98 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
a5db3e49 99
c182393f
MT
100 if (as_length > 0) {
101 db->as_v0 = mmap(NULL, as_length, PROT_READ,
a7431f1a 102 MAP_SHARED, fileno(f), as_offset);
a5db3e49 103
c182393f
MT
104 if (db->as_v0 == MAP_FAILED)
105 return -errno;
a5db3e49
MT
106 }
107
c182393f
MT
108 db->as_count = as_length / sizeof(*db->as_v0);
109
a5db3e49
MT
110 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
111
112 return 0;
113}
114
f66b7b09 115static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
edb4ba7c
MT
116 FILE* f, const struct loc_database_header_v0* header) {
117 off_t network_nodes_offset = be32toh(header->network_tree_offset);
118 size_t network_nodes_length = be32toh(header->network_tree_length);
119
f66b7b09
MT
120 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
121 network_nodes_offset, network_nodes_length);
122
123 if (network_nodes_length > 0) {
124 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
125 MAP_SHARED, fileno(f), network_nodes_offset);
126
127 if (db->network_nodes_v0 == MAP_FAILED)
128 return -errno;
129 }
130
131 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
132
133 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
134
135 return 0;
136}
137
a735a563
MT
138static int loc_database_read_networks_section_v0(struct loc_database* db,
139 FILE* f, const struct loc_database_header_v0* header) {
140 off_t networks_offset = be32toh(header->network_data_offset);
141 size_t networks_length = be32toh(header->network_data_length);
142
143 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
144 networks_offset, networks_length);
145
146 if (networks_length > 0) {
147 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
148 MAP_SHARED, fileno(f), networks_offset);
149
150 if (db->networks_v0 == MAP_FAILED)
151 return -errno;
152 }
153
154 db->networks_count = networks_length / sizeof(*db->networks_v0);
155
156 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
157
158 return 0;
159}
160
a7431f1a 161static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
2601e83e
MT
162 struct loc_database_header_v0 header;
163
164 // Read from file
a7431f1a 165 size_t size = fread(&header, 1, sizeof(header), f);
2601e83e
MT
166
167 if (size < sizeof(header)) {
168 ERROR(db->ctx, "Could not read enough data for header\n");
169 return -ENOMSG;
170 }
171
172 // Copy over data
96ea74a5 173 db->created_at = be64toh(header.created_at);
0676cd80
MT
174 db->vendor = be32toh(header.vendor);
175 db->description = be32toh(header.description);
2601e83e
MT
176
177 // Open pool
0676cd80
MT
178 off_t pool_offset = be32toh(header.pool_offset);
179 size_t pool_length = be32toh(header.pool_length);
2601e83e 180
0e974d4b 181 int r = loc_stringpool_open(db->ctx, &db->pool,
a7431f1a 182 f, pool_length, pool_offset);
2601e83e
MT
183 if (r)
184 return r;
185
a5db3e49 186 // AS section
edb4ba7c 187 r = loc_database_read_as_section_v0(db, f, &header);
a5db3e49
MT
188 if (r)
189 return r;
190
f66b7b09 191 // Network Nodes
edb4ba7c 192 r = loc_database_read_network_nodes_section_v0(db, f, &header);
f66b7b09
MT
193 if (r)
194 return r;
195
a735a563
MT
196 // Networks
197 r = loc_database_read_networks_section_v0(db, f, &header);
198 if (r)
199 return r;
200
2601e83e
MT
201 return 0;
202}
203
a7431f1a 204static int loc_database_read_header(struct loc_database* db, FILE* f) {
2601e83e
MT
205 switch (db->version) {
206 case 0:
a7431f1a 207 return loc_database_read_header_v0(db, f);
2601e83e
MT
208
209 default:
210 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
211 return 1;
212 }
213}
214
a7431f1a 215static int loc_database_read(struct loc_database* db, FILE* f) {
02879100
MT
216 clock_t start = clock();
217
218 // Read magic bytes
a7431f1a 219 int r = loc_database_read_magic(db, f);
02879100
MT
220 if (r)
221 return r;
222
223 // Read the header
a7431f1a 224 r = loc_database_read_header(db, f);
02879100
MT
225 if (r)
226 return r;
227
228 clock_t end = clock();
229
230 INFO(db->ctx, "Opened database in %.8fs\n",
231 (double)(end - start) / CLOCKS_PER_SEC);
232
233 return 0;
234}
235
c182393f 236LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
a7431f1a
MT
237 // Fail on invalid file handle
238 if (!f)
239 return -EINVAL;
240
c182393f
MT
241 struct loc_database* db = calloc(1, sizeof(*db));
242 if (!db)
243 return -ENOMEM;
244
245 // Reference context
246 db->ctx = loc_ref(ctx);
247 db->refcount = 1;
248
249 DEBUG(db->ctx, "Database object allocated at %p\n", db);
250
a7431f1a 251 int r = loc_database_read(db, f);
02879100
MT
252 if (r) {
253 loc_database_unref(db);
2601e83e 254 return r;
02879100 255 }
2601e83e 256
c182393f
MT
257 *database = db;
258
2601e83e 259 return 0;
2601e83e
MT
260}
261
c182393f
MT
262LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
263 db->refcount++;
264
265 return db;
8f5b676a
MT
266}
267
c182393f
MT
268static void loc_database_free(struct loc_database* db) {
269 DEBUG(db->ctx, "Releasing database %p\n", db);
c34e76f1 270
c182393f
MT
271 // Removing all ASes
272 if (db->as_v0) {
273 int r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
274 if (r)
275 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
276 }
c34e76f1 277
c182393f 278 loc_stringpool_unref(db->pool);
c34e76f1 279
c182393f
MT
280 loc_unref(db->ctx);
281 free(db);
c34e76f1
MT
282}
283
c182393f
MT
284LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
285 if (--db->refcount > 0)
286 return NULL;
78ace4ed 287
c182393f
MT
288 loc_database_free(db);
289 return NULL;
290}
78ace4ed 291
c182393f
MT
292LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
293 return db->created_at;
294}
78ace4ed 295
c182393f
MT
296LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
297 return loc_stringpool_get(db->pool, db->vendor);
298}
78ace4ed 299
c182393f
MT
300LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
301 return loc_stringpool_get(db->pool, db->description);
302}
78ace4ed 303
c182393f
MT
304LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
305 return db->as_count;
78ace4ed
MT
306}
307
c182393f
MT
308// Returns the AS at position pos
309static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
310 if ((size_t)pos >= db->as_count)
311 return -EINVAL;
2601e83e 312
c182393f 313 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
2601e83e
MT
314
315 int r;
c182393f
MT
316 switch (db->version) {
317 case 0:
318 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
319 break;
2601e83e 320
c182393f
MT
321 default:
322 return -1;
323 }
2601e83e 324
c182393f
MT
325 if (r == 0) {
326 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
2601e83e 327 }
2601e83e 328
c182393f
MT
329 return r;
330}
c34e76f1 331
c182393f
MT
332// Performs a binary search to find the AS in the list
333LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
334 off_t lo = 0;
335 off_t hi = db->as_count - 1;
c34e76f1 336
8f3e2a06
MT
337 // Save start time
338 clock_t start = clock();
339
c182393f
MT
340 while (lo <= hi) {
341 off_t i = (lo + hi) / 2;
8f5b676a 342
c182393f
MT
343 // Fetch AS in the middle between lo and hi
344 int r = loc_database_fetch_as(db, as, i);
345 if (r)
346 return r;
a5db3e49 347
c182393f
MT
348 // Check if this is a match
349 uint32_t as_number = loc_as_get_number(*as);
8f3e2a06
MT
350 if (as_number == number) {
351 clock_t end = clock();
352
353 // Log how fast this has been
354 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
355 (double)(end - start) / CLOCKS_PER_SEC);
356
c182393f 357 return 0;
8f3e2a06 358 }
c182393f
MT
359
360 // If it wasn't, we release the AS and
361 // adjust our search pointers
362 loc_as_unref(*as);
363
364 if (as_number < number) {
365 lo = i + 1;
366 } else
367 hi = i - 1;
368 }
2601e83e 369
c182393f
MT
370 // Nothing found
371 *as = NULL;
2601e83e 372
8f3e2a06 373 return 1;
2601e83e 374}
10778041
MT
375
376// Returns the network at position pos
377static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
378 if ((size_t)pos >= db->networks_count)
379 return -EINVAL;
380
381 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
382
383 int r;
384 switch (db->version) {
385 case 0:
386 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
387 break;
388
389 default:
390 return -1;
391 }
392
393 if (r == 0) {
394 char* string = loc_network_str(*network);
395 DEBUG(db->ctx, "Got network %s\n", string);
396 free(string);
397 }
398
399 return r;
400}
2a30e4de
MT
401
402static int __loc_database_lookup_leaf_node(struct loc_database* db, const struct in6_addr* address,
403 struct loc_network** network, struct in6_addr* network_address,
404 const struct loc_database_network_node_v0* node) {
405 // Check if this node is a leaf node
406 if (node->zero != htobe32(0xffffffff))
407 return 1;
408
409 DEBUG(db->ctx, "Node is a leaf: %jd\n", node - db->network_nodes_v0);
410
411 // Fetch the network
412 int r = loc_database_fetch_network(db, network,
413 network_address, be32toh(node->one));
414 if (r)
415 return r;
416
417 // Check if the given IP address is inside the network
418 r = loc_network_match_address(*network, address);
419 if (r) {
420 DEBUG(db->ctx, "Searched address is not part of the network\n");
421
422 loc_network_unref(*network);
423 *network = NULL;
424 return 1;
425 }
426
427 // A network was found and the IP address matches
428 return 0;
429}
430
431// Returns the highest result available
432static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
433 struct loc_network** network, struct in6_addr* network_address,
434 const struct loc_database_network_node_v0* node, int level) {
435
436 // If the node is a leaf node, we end here
437 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
438 if (r <= 0)
439 return r;
440
441 off_t node_index;
442
443 // Try to go down the ones path first
444 if (node->one) {
445 node_index = be32toh(node->one);
446 in6_addr_set_bit(network_address, level, 1);
447
448 // Check boundaries
449 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
450 r = __loc_database_lookup_max(db, address, network, network_address,
451 db->network_nodes_v0 + node_index, level + 1);
452
453 // Abort when match was found or error
454 if (r <= 0)
455 return r;
456 }
457 }
458
459 // ... and if that fails, we try to go down one step on a zero
460 // branch and then try the ones again...
461 if (node->zero) {
462 node_index = be32toh(node->zero);
463 in6_addr_set_bit(network_address, level, 0);
464
465 // Check boundaries
466 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
467 r = __loc_database_lookup_max(db, address, network, network_address,
468 db->network_nodes_v0 + node_index, level + 1);
469
470 // Abort when match was found or error
471 if (r <= 0)
472 return r;
473 }
474 }
475
476 // End of path
477 return 1;
478}
479
480// Searches for an exact match along the path
481static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
482 struct loc_network** network, struct in6_addr* network_address,
483 const struct loc_database_network_node_v0* node, int level) {
484 // If the node is a leaf node, we end here
485 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
486 if (r <= 0)
487 return r;
488
489 off_t node_index;
490
491 // Follow the path
492 int bit = in6_addr_get_bit(address, level);
493 in6_addr_set_bit(network_address, level, bit);
494
495 if (bit == 0)
496 node_index = be32toh(node->zero);
497 else
498 node_index = be32toh(node->one);
499
500 // If we point back to root, the path ends here
501 if (node_index == 0) {
502 DEBUG(db->ctx, "Tree ends here\n");
503 return 1;
504 }
505
506 // Check boundaries
507 if ((size_t)node_index >= db->network_nodes_count)
508 return -EINVAL;
509
510 // Move on to the next node
511 r = __loc_database_lookup(db, address, network, network_address,
512 db->network_nodes_v0 + node_index, level + 1);
513
514 // End here if a result was found
515 if (r == 0)
516 return r;
517
518 // Raise any errors
519 else if (r < 0)
520 return r;
521
522 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
523
524 // If nothing was found, we have to search for an inexact match
525 return __loc_database_lookup_max(db, address, network, network_address, node, level);
526}
527
528LOC_EXPORT int loc_database_lookup(struct loc_database* db,
529 struct in6_addr* address, struct loc_network** network) {
530 struct in6_addr network_address;
531 memset(&network_address, 0, sizeof(network_address));
532
533 *network = NULL;
534
535 // Save start time
536 clock_t start = clock();
537
538 int r = __loc_database_lookup(db, address, network, &network_address,
539 db->network_nodes_v0, 0);
540
541 clock_t end = clock();
542
543 // Log how fast this has been
544 DEBUG(db->ctx, "Executed network search in %.8fs\n",
545 (double)(end - start) / CLOCKS_PER_SEC);
546
547 return r;
548}
549
550LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
551 const char* string, struct loc_network** network) {
552 struct in6_addr address;
553
554 int r = loc_parse_address(db->ctx, string, &address);
555 if (r)
556 return r;
557
558 return loc_database_lookup(db, &address, network);
559}