]> git.ipfire.org Git - people/ms/libloc.git/blob - src/database.c
database: Stop walking down the tree when we hit a leaf node
[people/ms/libloc.git] / src / database.c
1 /*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15 */
16
17 #include <arpa/inet.h>
18 #include <endian.h>
19 #include <errno.h>
20 #include <netinet/in.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/types.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include <loc/libloc.h>
32 #include <loc/as.h>
33 #include <loc/database.h>
34 #include <loc/format.h>
35 #include <loc/network.h>
36 #include <loc/private.h>
37 #include <loc/stringpool.h>
38
39 struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
44 time_t created_at;
45 off_t vendor;
46 off_t description;
47 off_t license;
48
49 // ASes in the database
50 struct loc_database_as_v0* as_v0;
51 size_t as_count;
52
53 // Network tree
54 struct loc_database_network_node_v0* network_nodes_v0;
55 size_t network_nodes_count;
56
57 // Networks
58 struct loc_database_network_v0* networks_v0;
59 size_t networks_count;
60
61 struct loc_stringpool* pool;
62 };
63
64 static int loc_database_read_magic(struct loc_database* db, FILE* f) {
65 struct loc_database_magic magic;
66
67 // Read from file
68 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
69
70 // Check if we have been able to read enough data
71 if (bytes_read < sizeof(magic)) {
72 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
73 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
74 return -ENOMSG;
75 }
76
77 // Compare magic bytes
78 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
79 DEBUG(db->ctx, "Magic value matches\n");
80
81 // Parse version
82 db->version = be16toh(magic.version);
83 DEBUG(db->ctx, "Database version is %u\n", db->version);
84
85 return 0;
86 }
87
88 ERROR(db->ctx, "Database format is not compatible\n");
89
90 // Return an error
91 return 1;
92 }
93
94 static int loc_database_read_as_section_v0(struct loc_database* db,
95 FILE* f, const struct loc_database_header_v0* header) {
96 off_t as_offset = be32toh(header->as_offset);
97 size_t as_length = be32toh(header->as_length);
98
99 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
100
101 if (as_length > 0) {
102 db->as_v0 = mmap(NULL, as_length, PROT_READ,
103 MAP_SHARED, fileno(f), as_offset);
104
105 if (db->as_v0 == MAP_FAILED)
106 return -errno;
107 }
108
109 db->as_count = as_length / sizeof(*db->as_v0);
110
111 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
112
113 return 0;
114 }
115
116 static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
117 FILE* f, const struct loc_database_header_v0* header) {
118 off_t network_nodes_offset = be32toh(header->network_tree_offset);
119 size_t network_nodes_length = be32toh(header->network_tree_length);
120
121 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
122 network_nodes_offset, network_nodes_length);
123
124 if (network_nodes_length > 0) {
125 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
126 MAP_SHARED, fileno(f), network_nodes_offset);
127
128 if (db->network_nodes_v0 == MAP_FAILED)
129 return -errno;
130 }
131
132 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
133
134 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
135
136 return 0;
137 }
138
139 static int loc_database_read_networks_section_v0(struct loc_database* db,
140 FILE* f, const struct loc_database_header_v0* header) {
141 off_t networks_offset = be32toh(header->network_data_offset);
142 size_t networks_length = be32toh(header->network_data_length);
143
144 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
145 networks_offset, networks_length);
146
147 if (networks_length > 0) {
148 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
149 MAP_SHARED, fileno(f), networks_offset);
150
151 if (db->networks_v0 == MAP_FAILED)
152 return -errno;
153 }
154
155 db->networks_count = networks_length / sizeof(*db->networks_v0);
156
157 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
158
159 return 0;
160 }
161
162 static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
163 struct loc_database_header_v0 header;
164
165 // Read from file
166 size_t size = fread(&header, 1, sizeof(header), f);
167
168 if (size < sizeof(header)) {
169 ERROR(db->ctx, "Could not read enough data for header\n");
170 return -ENOMSG;
171 }
172
173 // Copy over data
174 db->created_at = be64toh(header.created_at);
175 db->vendor = be32toh(header.vendor);
176 db->description = be32toh(header.description);
177 db->license = be32toh(header.license);
178
179 // Open pool
180 off_t pool_offset = be32toh(header.pool_offset);
181 size_t pool_length = be32toh(header.pool_length);
182
183 int r = loc_stringpool_open(db->ctx, &db->pool,
184 f, pool_length, pool_offset);
185 if (r)
186 return r;
187
188 // AS section
189 r = loc_database_read_as_section_v0(db, f, &header);
190 if (r)
191 return r;
192
193 // Network Nodes
194 r = loc_database_read_network_nodes_section_v0(db, f, &header);
195 if (r)
196 return r;
197
198 // Networks
199 r = loc_database_read_networks_section_v0(db, f, &header);
200 if (r)
201 return r;
202
203 return 0;
204 }
205
206 static int loc_database_read_header(struct loc_database* db, FILE* f) {
207 switch (db->version) {
208 case 0:
209 return loc_database_read_header_v0(db, f);
210
211 default:
212 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
213 return 1;
214 }
215 }
216
217 static int loc_database_read(struct loc_database* db, FILE* f) {
218 clock_t start = clock();
219
220 // Read magic bytes
221 int r = loc_database_read_magic(db, f);
222 if (r)
223 return r;
224
225 // Read the header
226 r = loc_database_read_header(db, f);
227 if (r)
228 return r;
229
230 clock_t end = clock();
231
232 INFO(db->ctx, "Opened database in %.8fs\n",
233 (double)(end - start) / CLOCKS_PER_SEC);
234
235 return 0;
236 }
237
238 LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
239 // Fail on invalid file handle
240 if (!f)
241 return -EINVAL;
242
243 struct loc_database* db = calloc(1, sizeof(*db));
244 if (!db)
245 return -ENOMEM;
246
247 // Reference context
248 db->ctx = loc_ref(ctx);
249 db->refcount = 1;
250
251 DEBUG(db->ctx, "Database object allocated at %p\n", db);
252
253 int r = loc_database_read(db, f);
254 if (r) {
255 loc_database_unref(db);
256 return r;
257 }
258
259 *database = db;
260
261 return 0;
262 }
263
264 LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
265 db->refcount++;
266
267 return db;
268 }
269
270 static void loc_database_free(struct loc_database* db) {
271 int r;
272
273 DEBUG(db->ctx, "Releasing database %p\n", db);
274
275 // Removing all ASes
276 if (db->as_v0) {
277 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
278 if (r)
279 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
280 }
281
282 // Remove mapped network sections
283 if (db->networks_v0) {
284 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
285 if (r)
286 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
287 }
288
289 // Remove mapped network nodes section
290 if (db->network_nodes_v0) {
291 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
292 if (r)
293 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
294 }
295
296 loc_stringpool_unref(db->pool);
297
298 loc_unref(db->ctx);
299 free(db);
300 }
301
302 LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
303 if (--db->refcount > 0)
304 return NULL;
305
306 loc_database_free(db);
307 return NULL;
308 }
309
310 LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
311 return db->created_at;
312 }
313
314 LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
315 return loc_stringpool_get(db->pool, db->vendor);
316 }
317
318 LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
319 return loc_stringpool_get(db->pool, db->description);
320 }
321
322 LOC_EXPORT const char* loc_database_get_license(struct loc_database* db) {
323 return loc_stringpool_get(db->pool, db->license);
324 }
325
326 LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
327 return db->as_count;
328 }
329
330 // Returns the AS at position pos
331 static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
332 if ((size_t)pos >= db->as_count)
333 return -EINVAL;
334
335 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
336
337 int r;
338 switch (db->version) {
339 case 0:
340 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
341 break;
342
343 default:
344 return -1;
345 }
346
347 if (r == 0) {
348 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
349 }
350
351 return r;
352 }
353
354 // Performs a binary search to find the AS in the list
355 LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
356 off_t lo = 0;
357 off_t hi = db->as_count - 1;
358
359 // Save start time
360 clock_t start = clock();
361
362 while (lo <= hi) {
363 off_t i = (lo + hi) / 2;
364
365 // Fetch AS in the middle between lo and hi
366 int r = loc_database_fetch_as(db, as, i);
367 if (r)
368 return r;
369
370 // Check if this is a match
371 uint32_t as_number = loc_as_get_number(*as);
372 if (as_number == number) {
373 clock_t end = clock();
374
375 // Log how fast this has been
376 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
377 (double)(end - start) / CLOCKS_PER_SEC);
378
379 return 0;
380 }
381
382 // If it wasn't, we release the AS and
383 // adjust our search pointers
384 loc_as_unref(*as);
385
386 if (as_number < number) {
387 lo = i + 1;
388 } else
389 hi = i - 1;
390 }
391
392 // Nothing found
393 *as = NULL;
394
395 return 1;
396 }
397
398 // Returns the network at position pos
399 static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
400 if ((size_t)pos >= db->networks_count)
401 return -EINVAL;
402
403 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
404
405 int r;
406 switch (db->version) {
407 case 0:
408 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
409 break;
410
411 default:
412 return -1;
413 }
414
415 if (r == 0) {
416 char* string = loc_network_str(*network);
417 DEBUG(db->ctx, "Got network %s\n", string);
418 free(string);
419 }
420
421 return r;
422 }
423
424 static int __loc_database_node_is_leaf(const struct loc_database_network_node_v0* node) {
425 return (node->zero == htobe32(0xffffffff));
426 }
427
428 static int __loc_database_lookup_handle_leaf(struct loc_database* db, const struct in6_addr* address,
429 struct loc_network** network, struct in6_addr* network_address,
430 const struct loc_database_network_node_v0* node) {
431 DEBUG(db->ctx, "Handling leaf node at %jd\n", node - db->network_nodes_v0);
432
433 // Fetch the network
434 int r = loc_database_fetch_network(db, network,
435 network_address, be32toh(node->one));
436 if (r)
437 return r;
438
439 // Check if the given IP address is inside the network
440 r = loc_network_match_address(*network, address);
441 if (r) {
442 DEBUG(db->ctx, "Searched address is not part of the network\n");
443
444 loc_network_unref(*network);
445 *network = NULL;
446 return 1;
447 }
448
449 // A network was found and the IP address matches
450 return 0;
451 }
452
453 // Returns the highest result available
454 static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
455 struct loc_network** network, struct in6_addr* network_address,
456 const struct loc_database_network_node_v0* node, int level) {
457 // If the node is a leaf node, we end here
458 if (__loc_database_node_is_leaf(node))
459 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
460
461 int r;
462 off_t node_index;
463
464 // Try to go down the ones path first
465 if (node->one) {
466 node_index = be32toh(node->one);
467 in6_addr_set_bit(network_address, level, 1);
468
469 // Check boundaries
470 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
471 r = __loc_database_lookup_max(db, address, network, network_address,
472 db->network_nodes_v0 + node_index, level + 1);
473
474 // Abort when match was found or error
475 if (r <= 0)
476 return r;
477 }
478 }
479
480 // ... and if that fails, we try to go down one step on a zero
481 // branch and then try the ones again...
482 if (node->zero) {
483 node_index = be32toh(node->zero);
484 in6_addr_set_bit(network_address, level, 0);
485
486 // Check boundaries
487 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
488 r = __loc_database_lookup_max(db, address, network, network_address,
489 db->network_nodes_v0 + node_index, level + 1);
490
491 // Abort when match was found or error
492 if (r <= 0)
493 return r;
494 }
495 }
496
497 // End of path
498 return 1;
499 }
500
501 // Searches for an exact match along the path
502 static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
503 struct loc_network** network, struct in6_addr* network_address,
504 const struct loc_database_network_node_v0* node, int level) {
505 // If the node is a leaf node, we end here
506 if (__loc_database_node_is_leaf(node))
507 return __loc_database_lookup_handle_leaf(db, address, network, network_address, node);
508
509 int r;
510 off_t node_index;
511
512 // Follow the path
513 int bit = in6_addr_get_bit(address, level);
514 in6_addr_set_bit(network_address, level, bit);
515
516 if (bit == 0)
517 node_index = be32toh(node->zero);
518 else
519 node_index = be32toh(node->one);
520
521 // If we point back to root, the path ends here
522 if (node_index == 0) {
523 DEBUG(db->ctx, "Tree ends here\n");
524 return 1;
525 }
526
527 // Check boundaries
528 if ((size_t)node_index >= db->network_nodes_count)
529 return -EINVAL;
530
531 // Move on to the next node
532 r = __loc_database_lookup(db, address, network, network_address,
533 db->network_nodes_v0 + node_index, level + 1);
534
535 // End here if a result was found
536 if (r == 0)
537 return r;
538
539 // Raise any errors
540 else if (r < 0)
541 return r;
542
543 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
544
545 // If nothing was found, we have to search for an inexact match
546 return __loc_database_lookup_max(db, address, network, network_address, node, level);
547 }
548
549 LOC_EXPORT int loc_database_lookup(struct loc_database* db,
550 struct in6_addr* address, struct loc_network** network) {
551 struct in6_addr network_address;
552 memset(&network_address, 0, sizeof(network_address));
553
554 *network = NULL;
555
556 // Save start time
557 clock_t start = clock();
558
559 int r = __loc_database_lookup(db, address, network, &network_address,
560 db->network_nodes_v0, 0);
561
562 clock_t end = clock();
563
564 // Log how fast this has been
565 DEBUG(db->ctx, "Executed network search in %.8fs\n",
566 (double)(end - start) / CLOCKS_PER_SEC);
567
568 return r;
569 }
570
571 LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
572 const char* string, struct loc_network** network) {
573 struct in6_addr address;
574
575 int r = loc_parse_address(db->ctx, string, &address);
576 if (r)
577 return r;
578
579 return loc_database_lookup(db, &address, network);
580 }