When formatting an IP address only pass the actual IP address
[people/ms/libloc.git] / src / database.c
CommitLineData
2601e83e
MT
1/*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15*/
16
2a30e4de 17#include <arpa/inet.h>
0676cd80 18#include <endian.h>
2601e83e 19#include <errno.h>
10778041 20#include <netinet/in.h>
2601e83e
MT
21#include <stddef.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
c182393f 26#include <sys/mman.h>
2601e83e 27#include <sys/types.h>
96ea74a5 28#include <time.h>
3f35869a 29#include <unistd.h>
2601e83e
MT
30
31#include <loc/libloc.h>
9fc7f001
MT
32#include <loc/as.h>
33#include <loc/database.h>
a5db3e49 34#include <loc/format.h>
10778041 35#include <loc/network.h>
9fc7f001
MT
36#include <loc/private.h>
37#include <loc/stringpool.h>
2601e83e
MT
38
39struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
96ea74a5 44 time_t created_at;
2601e83e
MT
45 off_t vendor;
46 off_t description;
4bf49d00 47 off_t license;
2601e83e 48
a5db3e49 49 // ASes in the database
c182393f 50 struct loc_database_as_v0* as_v0;
a5db3e49
MT
51 size_t as_count;
52
f66b7b09
MT
53 // Network tree
54 struct loc_database_network_node_v0* network_nodes_v0;
55 size_t network_nodes_count;
56
a735a563
MT
57 // Networks
58 struct loc_database_network_v0* networks_v0;
59 size_t networks_count;
60
2601e83e
MT
61 struct loc_stringpool* pool;
62};
63
a7431f1a 64static int loc_database_read_magic(struct loc_database* db, FILE* f) {
2601e83e
MT
65 struct loc_database_magic magic;
66
67 // Read from file
a7431f1a 68 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
2601e83e
MT
69
70 // Check if we have been able to read enough data
71 if (bytes_read < sizeof(magic)) {
72 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
73 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
74 return -ENOMSG;
75 }
76
77 // Compare magic bytes
78 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
79 DEBUG(db->ctx, "Magic value matches\n");
80
81 // Parse version
0676cd80 82 db->version = be16toh(magic.version);
2601e83e
MT
83 DEBUG(db->ctx, "Database version is %u\n", db->version);
84
85 return 0;
86 }
87
88 ERROR(db->ctx, "Database format is not compatible\n");
89
90 // Return an error
91 return 1;
92}
93
a5db3e49 94static int loc_database_read_as_section_v0(struct loc_database* db,
edb4ba7c
MT
95 FILE* f, const struct loc_database_header_v0* header) {
96 off_t as_offset = be32toh(header->as_offset);
97 size_t as_length = be32toh(header->as_length);
98
c182393f 99 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
a5db3e49 100
c182393f
MT
101 if (as_length > 0) {
102 db->as_v0 = mmap(NULL, as_length, PROT_READ,
a7431f1a 103 MAP_SHARED, fileno(f), as_offset);
a5db3e49 104
c182393f
MT
105 if (db->as_v0 == MAP_FAILED)
106 return -errno;
a5db3e49
MT
107 }
108
c182393f
MT
109 db->as_count = as_length / sizeof(*db->as_v0);
110
a5db3e49
MT
111 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
112
113 return 0;
114}
115
f66b7b09 116static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
edb4ba7c
MT
117 FILE* f, const struct loc_database_header_v0* header) {
118 off_t network_nodes_offset = be32toh(header->network_tree_offset);
119 size_t network_nodes_length = be32toh(header->network_tree_length);
120
f66b7b09
MT
121 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
122 network_nodes_offset, network_nodes_length);
123
124 if (network_nodes_length > 0) {
125 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
126 MAP_SHARED, fileno(f), network_nodes_offset);
127
128 if (db->network_nodes_v0 == MAP_FAILED)
129 return -errno;
130 }
131
132 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
133
134 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
135
136 return 0;
137}
138
a735a563
MT
139static int loc_database_read_networks_section_v0(struct loc_database* db,
140 FILE* f, const struct loc_database_header_v0* header) {
141 off_t networks_offset = be32toh(header->network_data_offset);
142 size_t networks_length = be32toh(header->network_data_length);
143
144 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
145 networks_offset, networks_length);
146
147 if (networks_length > 0) {
148 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
149 MAP_SHARED, fileno(f), networks_offset);
150
151 if (db->networks_v0 == MAP_FAILED)
152 return -errno;
153 }
154
155 db->networks_count = networks_length / sizeof(*db->networks_v0);
156
157 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
158
159 return 0;
160}
161
a7431f1a 162static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
2601e83e
MT
163 struct loc_database_header_v0 header;
164
165 // Read from file
a7431f1a 166 size_t size = fread(&header, 1, sizeof(header), f);
2601e83e
MT
167
168 if (size < sizeof(header)) {
169 ERROR(db->ctx, "Could not read enough data for header\n");
170 return -ENOMSG;
171 }
172
173 // Copy over data
96ea74a5 174 db->created_at = be64toh(header.created_at);
0676cd80
MT
175 db->vendor = be32toh(header.vendor);
176 db->description = be32toh(header.description);
4bf49d00 177 db->license = be32toh(header.license);
2601e83e
MT
178
179 // Open pool
0676cd80
MT
180 off_t pool_offset = be32toh(header.pool_offset);
181 size_t pool_length = be32toh(header.pool_length);
2601e83e 182
0e974d4b 183 int r = loc_stringpool_open(db->ctx, &db->pool,
a7431f1a 184 f, pool_length, pool_offset);
2601e83e
MT
185 if (r)
186 return r;
187
a5db3e49 188 // AS section
edb4ba7c 189 r = loc_database_read_as_section_v0(db, f, &header);
a5db3e49
MT
190 if (r)
191 return r;
192
f66b7b09 193 // Network Nodes
edb4ba7c 194 r = loc_database_read_network_nodes_section_v0(db, f, &header);
f66b7b09
MT
195 if (r)
196 return r;
197
a735a563
MT
198 // Networks
199 r = loc_database_read_networks_section_v0(db, f, &header);
200 if (r)
201 return r;
202
2601e83e
MT
203 return 0;
204}
205
a7431f1a 206static int loc_database_read_header(struct loc_database* db, FILE* f) {
2601e83e
MT
207 switch (db->version) {
208 case 0:
a7431f1a 209 return loc_database_read_header_v0(db, f);
2601e83e
MT
210
211 default:
212 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
213 return 1;
214 }
215}
216
a7431f1a 217static int loc_database_read(struct loc_database* db, FILE* f) {
02879100
MT
218 clock_t start = clock();
219
220 // Read magic bytes
a7431f1a 221 int r = loc_database_read_magic(db, f);
02879100
MT
222 if (r)
223 return r;
224
225 // Read the header
a7431f1a 226 r = loc_database_read_header(db, f);
02879100
MT
227 if (r)
228 return r;
229
230 clock_t end = clock();
231
232 INFO(db->ctx, "Opened database in %.8fs\n",
233 (double)(end - start) / CLOCKS_PER_SEC);
234
235 return 0;
236}
237
c182393f 238LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
a7431f1a
MT
239 // Fail on invalid file handle
240 if (!f)
241 return -EINVAL;
242
c182393f
MT
243 struct loc_database* db = calloc(1, sizeof(*db));
244 if (!db)
245 return -ENOMEM;
246
247 // Reference context
248 db->ctx = loc_ref(ctx);
249 db->refcount = 1;
250
251 DEBUG(db->ctx, "Database object allocated at %p\n", db);
252
a7431f1a 253 int r = loc_database_read(db, f);
02879100
MT
254 if (r) {
255 loc_database_unref(db);
2601e83e 256 return r;
02879100 257 }
2601e83e 258
c182393f
MT
259 *database = db;
260
2601e83e 261 return 0;
2601e83e
MT
262}
263
c182393f
MT
264LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
265 db->refcount++;
266
267 return db;
8f5b676a
MT
268}
269
c182393f 270static void loc_database_free(struct loc_database* db) {
f10ebc2d
MT
271 int r;
272
c182393f 273 DEBUG(db->ctx, "Releasing database %p\n", db);
c34e76f1 274
c182393f
MT
275 // Removing all ASes
276 if (db->as_v0) {
f10ebc2d 277 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
c182393f
MT
278 if (r)
279 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
280 }
c34e76f1 281
f10ebc2d
MT
282 // Remove mapped network sections
283 if (db->networks_v0) {
284 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
285 if (r)
286 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
287 }
288
289 // Remove mapped network nodes section
290 if (db->network_nodes_v0) {
291 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
292 if (r)
293 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
294 }
295
c182393f 296 loc_stringpool_unref(db->pool);
c34e76f1 297
c182393f
MT
298 loc_unref(db->ctx);
299 free(db);
c34e76f1
MT
300}
301
c182393f
MT
302LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
303 if (--db->refcount > 0)
304 return NULL;
78ace4ed 305
c182393f
MT
306 loc_database_free(db);
307 return NULL;
308}
78ace4ed 309
c182393f
MT
310LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
311 return db->created_at;
312}
78ace4ed 313
c182393f
MT
314LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
315 return loc_stringpool_get(db->pool, db->vendor);
316}
78ace4ed 317
c182393f
MT
318LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
319 return loc_stringpool_get(db->pool, db->description);
320}
78ace4ed 321
4bf49d00
MT
322LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
323 return loc_stringpool_get(db->pool, db->license);
324}
325
c182393f
MT
326LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
327 return db->as_count;
78ace4ed
MT
328}
329
c182393f
MT
330// Returns the AS at position pos
331static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
332 if ((size_t)pos >= db->as_count)
333 return -EINVAL;
2601e83e 334
c182393f 335 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
2601e83e
MT
336
337 int r;
c182393f
MT
338 switch (db->version) {
339 case 0:
340 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
341 break;
2601e83e 342
c182393f
MT
343 default:
344 return -1;
345 }
2601e83e 346
c182393f
MT
347 if (r == 0) {
348 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
2601e83e 349 }
2601e83e 350
c182393f
MT
351 return r;
352}
c34e76f1 353
c182393f
MT
354// Performs a binary search to find the AS in the list
355LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
356 off_t lo = 0;
357 off_t hi = db->as_count - 1;
c34e76f1 358
8f3e2a06
MT
359 // Save start time
360 clock_t start = clock();
361
c182393f
MT
362 while (lo <= hi) {
363 off_t i = (lo + hi) / 2;
8f5b676a 364
c182393f
MT
365 // Fetch AS in the middle between lo and hi
366 int r = loc_database_fetch_as(db, as, i);
367 if (r)
368 return r;
a5db3e49 369
c182393f
MT
370 // Check if this is a match
371 uint32_t as_number = loc_as_get_number(*as);
8f3e2a06
MT
372 if (as_number == number) {
373 clock_t end = clock();
374
375 // Log how fast this has been
376 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
377 (double)(end - start) / CLOCKS_PER_SEC);
378
c182393f 379 return 0;
8f3e2a06 380 }
c182393f
MT
381
382 // If it wasn't, we release the AS and
383 // adjust our search pointers
384 loc_as_unref(*as);
385
386 if (as_number < number) {
387 lo = i + 1;
388 } else
389 hi = i - 1;
390 }
2601e83e 391
c182393f
MT
392 // Nothing found
393 *as = NULL;
2601e83e 394
8f3e2a06 395 return 1;
2601e83e 396}
10778041
MT
397
398// Returns the network at position pos
399static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
400 if ((size_t)pos >= db->networks_count)
401 return -EINVAL;
402
403 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
404
405 int r;
406 switch (db->version) {
407 case 0:
408 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
409 break;
410
411 default:
412 return -1;
413 }
414
415 if (r == 0) {
416 char* string = loc_network_str(*network);
417 DEBUG(db->ctx, "Got network %s\n", string);
418 free(string);
419 }
420
421 return r;
422}
2a30e4de 423
025ef489
MT
424static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
425 return (node->zero == htobe32(0xffffffff));
426}
427
428static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
2a30e4de
MT
429 struct loc_network** network, struct in6_addr* network_address,
430 const struct loc_database_network_node_v0* node) {
025ef489 431 DEBUG(db->ctx, "Handling leaf node at %jd\n", node - db->network_nodes_v0);
2a30e4de
MT
432
433 // Fetch the network
434 int r = loc_database_fetch_network(db, network,
435 network_address, be32toh(node->one));
436 if (r)
437 return r;
438
439 // Check if the given IP address is inside the network
440 r = loc_network_match_address(*network, address);
441 if (r) {
442 DEBUG(db->ctx, "Searched address is not part of the network\n");
443
444 loc_network_unref(*network);
445 *network = NULL;
446 return 1;
447 }
448
449 // A network was found and the IP address matches
450 return 0;
451}
452
453// Returns the highest result available
454static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
455 struct loc_network** network, struct in6_addr* network_address,
f66f15e1 456 const struct loc_database_network_node_v0* node, unsigned int level) {
2a30e4de 457 // If the node is a leaf node, we end here
025ef489
MT
458 if (__loc_database_node_is_leaf(node))
459 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
2a30e4de 460
025ef489 461 int r;
2a30e4de
MT
462 off_t node_index;
463
464 // Try to go down the ones path first
465 if (node->one) {
466 node_index = be32toh(node->one);
467 in6_addr_set_bit(network_address, level, 1);
468
469 // Check boundaries
470 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
471 r = __loc_database_lookup_max(db, address, network, network_address,
472 db->network_nodes_v0 + node_index, level + 1);
473
474 // Abort when match was found or error
475 if (r <= 0)
476 return r;
477 }
478 }
479
480 // ... and if that fails, we try to go down one step on a zero
481 // branch and then try the ones again...
482 if (node->zero) {
483 node_index = be32toh(node->zero);
484 in6_addr_set_bit(network_address, level, 0);
485
486 // Check boundaries
487 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
488 r = __loc_database_lookup_max(db, address, network, network_address,
489 db->network_nodes_v0 + node_index, level + 1);
490
491 // Abort when match was found or error
492 if (r <= 0)
493 return r;
494 }
495 }
496
497 // End of path
498 return 1;
499}
500
501// Searches for an exact match along the path
502static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
503 struct loc_network** network, struct in6_addr* network_address,
f66f15e1 504 const struct loc_database_network_node_v0* node, unsigned int level) {
2a30e4de 505 // If the node is a leaf node, we end here
025ef489
MT
506 if (__loc_database_node_is_leaf(node))
507 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
2a30e4de 508
025ef489 509 int r;
2a30e4de
MT
510 off_t node_index;
511
512 // Follow the path
513 int bit = in6_addr_get_bit(address, level);
514 in6_addr_set_bit(network_address, level, bit);
515
516 if (bit == 0)
517 node_index = be32toh(node->zero);
518 else
519 node_index = be32toh(node->one);
520
521 // If we point back to root, the path ends here
522 if (node_index == 0) {
523 DEBUG(db->ctx, "Tree ends here\n");
524 return 1;
525 }
526
527 // Check boundaries
528 if ((size_t)node_index >= db->network_nodes_count)
529 return -EINVAL;
530
531 // Move on to the next node
532 r = __loc_database_lookup(db, address, network, network_address,
533 db->network_nodes_v0 + node_index, level + 1);
534
535 // End here if a result was found
536 if (r == 0)
537 return r;
538
539 // Raise any errors
540 else if (r < 0)
541 return r;
542
543 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
544
545 // If nothing was found, we have to search for an inexact match
546 return __loc_database_lookup_max(db, address, network, network_address, node, level);
547}
548
549LOC_EXPORT int loc_database_lookup(struct loc_database* db,
550 struct in6_addr* address, struct loc_network** network) {
551 struct in6_addr network_address;
552 memset(&network_address, 0, sizeof(network_address));
553
554 *network = NULL;
555
556 // Save start time
557 clock_t start = clock();
558
559 int r = __loc_database_lookup(db, address, network, &network_address,
560 db->network_nodes_v0, 0);
561
562 clock_t end = clock();
563
564 // Log how fast this has been
565 DEBUG(db->ctx, "Executed network search in %.8fs\n",
566 (double)(end - start) / CLOCKS_PER_SEC);
567
568 return r;
569}
570
571LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
572 const char* string, struct loc_network** network) {
573 struct in6_addr address;
574
575 int r = loc_parse_address(db->ctx, string, &address);
576 if (r)
577 return r;
578
579 return loc_database_lookup(db, &address, network);
580}