]> git.ipfire.org Git - people/ms/libloc.git/blob - src/database.c
73d23945203b0d2d2b4168f7ecf7903b410d6a0b
[people/ms/libloc.git] / src / database.c
1 /*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15 */
16
17 #include <arpa/inet.h>
18 #include <endian.h>
19 #include <errno.h>
20 #include <netinet/in.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/types.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include <loc/libloc.h>
32 #include <loc/as.h>
33 #include <loc/database.h>
34 #include <loc/format.h>
35 #include <loc/network.h>
36 #include <loc/private.h>
37 #include <loc/stringpool.h>
38
39 struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
44 time_t created_at;
45 off_t vendor;
46 off_t description;
47
48 // ASes in the database
49 struct loc_database_as_v0* as_v0;
50 size_t as_count;
51
52 // Network tree
53 struct loc_database_network_node_v0* network_nodes_v0;
54 size_t network_nodes_count;
55
56 // Networks
57 struct loc_database_network_v0* networks_v0;
58 size_t networks_count;
59
60 struct loc_stringpool* pool;
61 };
62
63 static int loc_database_read_magic(struct loc_database* db, FILE* f) {
64 struct loc_database_magic magic;
65
66 // Read from file
67 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
68
69 // Check if we have been able to read enough data
70 if (bytes_read < sizeof(magic)) {
71 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
72 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
73 return -ENOMSG;
74 }
75
76 // Compare magic bytes
77 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
78 DEBUG(db->ctx, "Magic value matches\n");
79
80 // Parse version
81 db->version = be16toh(magic.version);
82 DEBUG(db->ctx, "Database version is %u\n", db->version);
83
84 return 0;
85 }
86
87 ERROR(db->ctx, "Database format is not compatible\n");
88
89 // Return an error
90 return 1;
91 }
92
93 static int loc_database_read_as_section_v0(struct loc_database* db,
94 FILE* f, const struct loc_database_header_v0* header) {
95 off_t as_offset = be32toh(header->as_offset);
96 size_t as_length = be32toh(header->as_length);
97
98 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
99
100 if (as_length > 0) {
101 db->as_v0 = mmap(NULL, as_length, PROT_READ,
102 MAP_SHARED, fileno(f), as_offset);
103
104 if (db->as_v0 == MAP_FAILED)
105 return -errno;
106 }
107
108 db->as_count = as_length / sizeof(*db->as_v0);
109
110 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
111
112 return 0;
113 }
114
115 static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
116 FILE* f, const struct loc_database_header_v0* header) {
117 off_t network_nodes_offset = be32toh(header->network_tree_offset);
118 size_t network_nodes_length = be32toh(header->network_tree_length);
119
120 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
121 network_nodes_offset, network_nodes_length);
122
123 if (network_nodes_length > 0) {
124 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
125 MAP_SHARED, fileno(f), network_nodes_offset);
126
127 if (db->network_nodes_v0 == MAP_FAILED)
128 return -errno;
129 }
130
131 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
132
133 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
134
135 return 0;
136 }
137
138 static int loc_database_read_networks_section_v0(struct loc_database* db,
139 FILE* f, const struct loc_database_header_v0* header) {
140 off_t networks_offset = be32toh(header->network_data_offset);
141 size_t networks_length = be32toh(header->network_data_length);
142
143 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
144 networks_offset, networks_length);
145
146 if (networks_length > 0) {
147 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
148 MAP_SHARED, fileno(f), networks_offset);
149
150 if (db->networks_v0 == MAP_FAILED)
151 return -errno;
152 }
153
154 db->networks_count = networks_length / sizeof(*db->networks_v0);
155
156 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
157
158 return 0;
159 }
160
161 static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
162 struct loc_database_header_v0 header;
163
164 // Read from file
165 size_t size = fread(&header, 1, sizeof(header), f);
166
167 if (size < sizeof(header)) {
168 ERROR(db->ctx, "Could not read enough data for header\n");
169 return -ENOMSG;
170 }
171
172 // Copy over data
173 db->created_at = be64toh(header.created_at);
174 db->vendor = be32toh(header.vendor);
175 db->description = be32toh(header.description);
176
177 // Open pool
178 off_t pool_offset = be32toh(header.pool_offset);
179 size_t pool_length = be32toh(header.pool_length);
180
181 int r = loc_stringpool_open(db->ctx, &db->pool,
182 f, pool_length, pool_offset);
183 if (r)
184 return r;
185
186 // AS section
187 r = loc_database_read_as_section_v0(db, f, &header);
188 if (r)
189 return r;
190
191 // Network Nodes
192 r = loc_database_read_network_nodes_section_v0(db, f, &header);
193 if (r)
194 return r;
195
196 // Networks
197 r = loc_database_read_networks_section_v0(db, f, &header);
198 if (r)
199 return r;
200
201 return 0;
202 }
203
204 static int loc_database_read_header(struct loc_database* db, FILE* f) {
205 switch (db->version) {
206 case 0:
207 return loc_database_read_header_v0(db, f);
208
209 default:
210 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
211 return 1;
212 }
213 }
214
215 static int loc_database_read(struct loc_database* db, FILE* f) {
216 clock_t start = clock();
217
218 // Read magic bytes
219 int r = loc_database_read_magic(db, f);
220 if (r)
221 return r;
222
223 // Read the header
224 r = loc_database_read_header(db, f);
225 if (r)
226 return r;
227
228 clock_t end = clock();
229
230 INFO(db->ctx, "Opened database in %.8fs\n",
231 (double)(end - start) / CLOCKS_PER_SEC);
232
233 return 0;
234 }
235
236 LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
237 // Fail on invalid file handle
238 if (!f)
239 return -EINVAL;
240
241 struct loc_database* db = calloc(1, sizeof(*db));
242 if (!db)
243 return -ENOMEM;
244
245 // Reference context
246 db->ctx = loc_ref(ctx);
247 db->refcount = 1;
248
249 DEBUG(db->ctx, "Database object allocated at %p\n", db);
250
251 int r = loc_database_read(db, f);
252 if (r) {
253 loc_database_unref(db);
254 return r;
255 }
256
257 *database = db;
258
259 return 0;
260 }
261
262 LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
263 db->refcount++;
264
265 return db;
266 }
267
268 static void loc_database_free(struct loc_database* db) {
269 int r;
270
271 DEBUG(db->ctx, "Releasing database %p\n", db);
272
273 // Removing all ASes
274 if (db->as_v0) {
275 r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
276 if (r)
277 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
278 }
279
280 // Remove mapped network sections
281 if (db->networks_v0) {
282 r = munmap(db->networks_v0, db->networks_count * sizeof(*db->networks_v0));
283 if (r)
284 ERROR(db->ctx, "Could not unmap networks section: %s\n", strerror(errno));
285 }
286
287 // Remove mapped network nodes section
288 if (db->network_nodes_v0) {
289 r = munmap(db->network_nodes_v0, db->network_nodes_count * sizeof(*db->network_nodes_v0));
290 if (r)
291 ERROR(db->ctx, "Could not unmap network nodes section: %s\n", strerror(errno));
292 }
293
294 loc_stringpool_unref(db->pool);
295
296 loc_unref(db->ctx);
297 free(db);
298 }
299
300 LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
301 if (--db->refcount > 0)
302 return NULL;
303
304 loc_database_free(db);
305 return NULL;
306 }
307
308 LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
309 return db->created_at;
310 }
311
312 LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
313 return loc_stringpool_get(db->pool, db->vendor);
314 }
315
316 LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
317 return loc_stringpool_get(db->pool, db->description);
318 }
319
320 LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
321 return db->as_count;
322 }
323
324 // Returns the AS at position pos
325 static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
326 if ((size_t)pos >= db->as_count)
327 return -EINVAL;
328
329 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
330
331 int r;
332 switch (db->version) {
333 case 0:
334 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
335 break;
336
337 default:
338 return -1;
339 }
340
341 if (r == 0) {
342 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
343 }
344
345 return r;
346 }
347
348 // Performs a binary search to find the AS in the list
349 LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
350 off_t lo = 0;
351 off_t hi = db->as_count - 1;
352
353 // Save start time
354 clock_t start = clock();
355
356 while (lo <= hi) {
357 off_t i = (lo + hi) / 2;
358
359 // Fetch AS in the middle between lo and hi
360 int r = loc_database_fetch_as(db, as, i);
361 if (r)
362 return r;
363
364 // Check if this is a match
365 uint32_t as_number = loc_as_get_number(*as);
366 if (as_number == number) {
367 clock_t end = clock();
368
369 // Log how fast this has been
370 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
371 (double)(end - start) / CLOCKS_PER_SEC);
372
373 return 0;
374 }
375
376 // If it wasn't, we release the AS and
377 // adjust our search pointers
378 loc_as_unref(*as);
379
380 if (as_number < number) {
381 lo = i + 1;
382 } else
383 hi = i - 1;
384 }
385
386 // Nothing found
387 *as = NULL;
388
389 return 1;
390 }
391
392 // Returns the network at position pos
393 static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
394 if ((size_t)pos >= db->networks_count)
395 return -EINVAL;
396
397 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
398
399 int r;
400 switch (db->version) {
401 case 0:
402 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
403 break;
404
405 default:
406 return -1;
407 }
408
409 if (r == 0) {
410 char* string = loc_network_str(*network);
411 DEBUG(db->ctx, "Got network %s\n", string);
412 free(string);
413 }
414
415 return r;
416 }
417
418 static int __loc_database_lookup_leaf_node(struct loc_database* db, const struct in6_addr* address,
419 struct loc_network** network, struct in6_addr* network_address,
420 const struct loc_database_network_node_v0* node) {
421 // Check if this node is a leaf node
422 if (node->zero != htobe32(0xffffffff))
423 return 1;
424
425 DEBUG(db->ctx, "Node is a leaf: %jd\n", node - db->network_nodes_v0);
426
427 // Fetch the network
428 int r = loc_database_fetch_network(db, network,
429 network_address, be32toh(node->one));
430 if (r)
431 return r;
432
433 // Check if the given IP address is inside the network
434 r = loc_network_match_address(*network, address);
435 if (r) {
436 DEBUG(db->ctx, "Searched address is not part of the network\n");
437
438 loc_network_unref(*network);
439 *network = NULL;
440 return 1;
441 }
442
443 // A network was found and the IP address matches
444 return 0;
445 }
446
447 // Returns the highest result available
448 static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
449 struct loc_network** network, struct in6_addr* network_address,
450 const struct loc_database_network_node_v0* node, int level) {
451
452 // If the node is a leaf node, we end here
453 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
454 if (r <= 0)
455 return r;
456
457 off_t node_index;
458
459 // Try to go down the ones path first
460 if (node->one) {
461 node_index = be32toh(node->one);
462 in6_addr_set_bit(network_address, level, 1);
463
464 // Check boundaries
465 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
466 r = __loc_database_lookup_max(db, address, network, network_address,
467 db->network_nodes_v0 + node_index, level + 1);
468
469 // Abort when match was found or error
470 if (r <= 0)
471 return r;
472 }
473 }
474
475 // ... and if that fails, we try to go down one step on a zero
476 // branch and then try the ones again...
477 if (node->zero) {
478 node_index = be32toh(node->zero);
479 in6_addr_set_bit(network_address, level, 0);
480
481 // Check boundaries
482 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
483 r = __loc_database_lookup_max(db, address, network, network_address,
484 db->network_nodes_v0 + node_index, level + 1);
485
486 // Abort when match was found or error
487 if (r <= 0)
488 return r;
489 }
490 }
491
492 // End of path
493 return 1;
494 }
495
496 // Searches for an exact match along the path
497 static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
498 struct loc_network** network, struct in6_addr* network_address,
499 const struct loc_database_network_node_v0* node, int level) {
500 // If the node is a leaf node, we end here
501 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
502 if (r <= 0)
503 return r;
504
505 off_t node_index;
506
507 // Follow the path
508 int bit = in6_addr_get_bit(address, level);
509 in6_addr_set_bit(network_address, level, bit);
510
511 if (bit == 0)
512 node_index = be32toh(node->zero);
513 else
514 node_index = be32toh(node->one);
515
516 // If we point back to root, the path ends here
517 if (node_index == 0) {
518 DEBUG(db->ctx, "Tree ends here\n");
519 return 1;
520 }
521
522 // Check boundaries
523 if ((size_t)node_index >= db->network_nodes_count)
524 return -EINVAL;
525
526 // Move on to the next node
527 r = __loc_database_lookup(db, address, network, network_address,
528 db->network_nodes_v0 + node_index, level + 1);
529
530 // End here if a result was found
531 if (r == 0)
532 return r;
533
534 // Raise any errors
535 else if (r < 0)
536 return r;
537
538 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
539
540 // If nothing was found, we have to search for an inexact match
541 return __loc_database_lookup_max(db, address, network, network_address, node, level);
542 }
543
544 LOC_EXPORT int loc_database_lookup(struct loc_database* db,
545 struct in6_addr* address, struct loc_network** network) {
546 struct in6_addr network_address;
547 memset(&network_address, 0, sizeof(network_address));
548
549 *network = NULL;
550
551 // Save start time
552 clock_t start = clock();
553
554 int r = __loc_database_lookup(db, address, network, &network_address,
555 db->network_nodes_v0, 0);
556
557 clock_t end = clock();
558
559 // Log how fast this has been
560 DEBUG(db->ctx, "Executed network search in %.8fs\n",
561 (double)(end - start) / CLOCKS_PER_SEC);
562
563 return r;
564 }
565
566 LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
567 const char* string, struct loc_network** network) {
568 struct in6_addr address;
569
570 int r = loc_parse_address(db->ctx, string, &address);
571 if (r)
572 return r;
573
574 return loc_database_lookup(db, &address, network);
575 }