]>
Commit | Line | Data |
---|---|---|
2601e83e MT |
1 | /* |
2 | libloc - A library to determine the location of someone on the Internet | |
3 | ||
4 | Copyright (C) 2017 IPFire Development Team <info@ipfire.org> | |
5 | ||
6 | This library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | This library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | */ | |
16 | ||
2a30e4de | 17 | #include <arpa/inet.h> |
0676cd80 | 18 | #include <endian.h> |
2601e83e | 19 | #include <errno.h> |
10778041 | 20 | #include <netinet/in.h> |
2601e83e MT |
21 | #include <stddef.h> |
22 | #include <stdint.h> | |
23 | #include <stdio.h> | |
24 | #include <stdlib.h> | |
25 | #include <string.h> | |
c182393f | 26 | #include <sys/mman.h> |
2601e83e | 27 | #include <sys/types.h> |
96ea74a5 | 28 | #include <time.h> |
3f35869a | 29 | #include <unistd.h> |
2601e83e MT |
30 | |
31 | #include <loc/libloc.h> | |
9fc7f001 MT |
32 | #include <loc/as.h> |
33 | #include <loc/database.h> | |
a5db3e49 | 34 | #include <loc/format.h> |
10778041 | 35 | #include <loc/network.h> |
9fc7f001 MT |
36 | #include <loc/private.h> |
37 | #include <loc/stringpool.h> | |
2601e83e MT |
38 | |
39 | struct loc_database { | |
40 | struct loc_ctx* ctx; | |
41 | int refcount; | |
42 | ||
43 | unsigned int version; | |
96ea74a5 | 44 | time_t created_at; |
2601e83e MT |
45 | off_t vendor; |
46 | off_t description; | |
47 | ||
a5db3e49 | 48 | // ASes in the database |
c182393f | 49 | struct loc_database_as_v0* as_v0; |
a5db3e49 MT |
50 | size_t as_count; |
51 | ||
f66b7b09 MT |
52 | // Network tree |
53 | struct loc_database_network_node_v0* network_nodes_v0; | |
54 | size_t network_nodes_count; | |
55 | ||
a735a563 MT |
56 | // Networks |
57 | struct loc_database_network_v0* networks_v0; | |
58 | size_t networks_count; | |
59 | ||
2601e83e MT |
60 | struct loc_stringpool* pool; |
61 | }; | |
62 | ||
a7431f1a | 63 | static int loc_database_read_magic(struct loc_database* db, FILE* f) { |
2601e83e MT |
64 | struct loc_database_magic magic; |
65 | ||
66 | // Read from file | |
a7431f1a | 67 | size_t bytes_read = fread(&magic, 1, sizeof(magic), f); |
2601e83e MT |
68 | |
69 | // Check if we have been able to read enough data | |
70 | if (bytes_read < sizeof(magic)) { | |
71 | ERROR(db->ctx, "Could not read enough data to validate magic bytes\n"); | |
72 | DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic)); | |
73 | return -ENOMSG; | |
74 | } | |
75 | ||
76 | // Compare magic bytes | |
77 | if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) { | |
78 | DEBUG(db->ctx, "Magic value matches\n"); | |
79 | ||
80 | // Parse version | |
0676cd80 | 81 | db->version = be16toh(magic.version); |
2601e83e MT |
82 | DEBUG(db->ctx, "Database version is %u\n", db->version); |
83 | ||
84 | return 0; | |
85 | } | |
86 | ||
87 | ERROR(db->ctx, "Database format is not compatible\n"); | |
88 | ||
89 | // Return an error | |
90 | return 1; | |
91 | } | |
92 | ||
a5db3e49 | 93 | static int loc_database_read_as_section_v0(struct loc_database* db, |
edb4ba7c MT |
94 | FILE* f, const struct loc_database_header_v0* header) { |
95 | off_t as_offset = be32toh(header->as_offset); | |
96 | size_t as_length = be32toh(header->as_length); | |
97 | ||
c182393f | 98 | DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length); |
a5db3e49 | 99 | |
c182393f MT |
100 | if (as_length > 0) { |
101 | db->as_v0 = mmap(NULL, as_length, PROT_READ, | |
a7431f1a | 102 | MAP_SHARED, fileno(f), as_offset); |
a5db3e49 | 103 | |
c182393f MT |
104 | if (db->as_v0 == MAP_FAILED) |
105 | return -errno; | |
a5db3e49 MT |
106 | } |
107 | ||
c182393f MT |
108 | db->as_count = as_length / sizeof(*db->as_v0); |
109 | ||
a5db3e49 MT |
110 | INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count); |
111 | ||
112 | return 0; | |
113 | } | |
114 | ||
f66b7b09 | 115 | static int loc_database_read_network_nodes_section_v0(struct loc_database* db, |
edb4ba7c MT |
116 | FILE* f, const struct loc_database_header_v0* header) { |
117 | off_t network_nodes_offset = be32toh(header->network_tree_offset); | |
118 | size_t network_nodes_length = be32toh(header->network_tree_length); | |
119 | ||
f66b7b09 MT |
120 | DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n", |
121 | network_nodes_offset, network_nodes_length); | |
122 | ||
123 | if (network_nodes_length > 0) { | |
124 | db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ, | |
125 | MAP_SHARED, fileno(f), network_nodes_offset); | |
126 | ||
127 | if (db->network_nodes_v0 == MAP_FAILED) | |
128 | return -errno; | |
129 | } | |
130 | ||
131 | db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0); | |
132 | ||
133 | INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count); | |
134 | ||
135 | return 0; | |
136 | } | |
137 | ||
a735a563 MT |
138 | static int loc_database_read_networks_section_v0(struct loc_database* db, |
139 | FILE* f, const struct loc_database_header_v0* header) { | |
140 | off_t networks_offset = be32toh(header->network_data_offset); | |
141 | size_t networks_length = be32toh(header->network_data_length); | |
142 | ||
143 | DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n", | |
144 | networks_offset, networks_length); | |
145 | ||
146 | if (networks_length > 0) { | |
147 | db->networks_v0 = mmap(NULL, networks_length, PROT_READ, | |
148 | MAP_SHARED, fileno(f), networks_offset); | |
149 | ||
150 | if (db->networks_v0 == MAP_FAILED) | |
151 | return -errno; | |
152 | } | |
153 | ||
154 | db->networks_count = networks_length / sizeof(*db->networks_v0); | |
155 | ||
156 | INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count); | |
157 | ||
158 | return 0; | |
159 | } | |
160 | ||
a7431f1a | 161 | static int loc_database_read_header_v0(struct loc_database* db, FILE* f) { |
2601e83e MT |
162 | struct loc_database_header_v0 header; |
163 | ||
164 | // Read from file | |
a7431f1a | 165 | size_t size = fread(&header, 1, sizeof(header), f); |
2601e83e MT |
166 | |
167 | if (size < sizeof(header)) { | |
168 | ERROR(db->ctx, "Could not read enough data for header\n"); | |
169 | return -ENOMSG; | |
170 | } | |
171 | ||
172 | // Copy over data | |
96ea74a5 | 173 | db->created_at = be64toh(header.created_at); |
0676cd80 MT |
174 | db->vendor = be32toh(header.vendor); |
175 | db->description = be32toh(header.description); | |
2601e83e MT |
176 | |
177 | // Open pool | |
0676cd80 MT |
178 | off_t pool_offset = be32toh(header.pool_offset); |
179 | size_t pool_length = be32toh(header.pool_length); | |
2601e83e | 180 | |
0e974d4b | 181 | int r = loc_stringpool_open(db->ctx, &db->pool, |
a7431f1a | 182 | f, pool_length, pool_offset); |
2601e83e MT |
183 | if (r) |
184 | return r; | |
185 | ||
a5db3e49 | 186 | // AS section |
edb4ba7c | 187 | r = loc_database_read_as_section_v0(db, f, &header); |
a5db3e49 MT |
188 | if (r) |
189 | return r; | |
190 | ||
f66b7b09 | 191 | // Network Nodes |
edb4ba7c | 192 | r = loc_database_read_network_nodes_section_v0(db, f, &header); |
f66b7b09 MT |
193 | if (r) |
194 | return r; | |
195 | ||
a735a563 MT |
196 | // Networks |
197 | r = loc_database_read_networks_section_v0(db, f, &header); | |
198 | if (r) | |
199 | return r; | |
200 | ||
2601e83e MT |
201 | return 0; |
202 | } | |
203 | ||
a7431f1a | 204 | static int loc_database_read_header(struct loc_database* db, FILE* f) { |
2601e83e MT |
205 | switch (db->version) { |
206 | case 0: | |
a7431f1a | 207 | return loc_database_read_header_v0(db, f); |
2601e83e MT |
208 | |
209 | default: | |
210 | ERROR(db->ctx, "Incompatible database version: %u\n", db->version); | |
211 | return 1; | |
212 | } | |
213 | } | |
214 | ||
a7431f1a | 215 | static int loc_database_read(struct loc_database* db, FILE* f) { |
02879100 MT |
216 | clock_t start = clock(); |
217 | ||
218 | // Read magic bytes | |
a7431f1a | 219 | int r = loc_database_read_magic(db, f); |
02879100 MT |
220 | if (r) |
221 | return r; | |
222 | ||
223 | // Read the header | |
a7431f1a | 224 | r = loc_database_read_header(db, f); |
02879100 MT |
225 | if (r) |
226 | return r; | |
227 | ||
228 | clock_t end = clock(); | |
229 | ||
230 | INFO(db->ctx, "Opened database in %.8fs\n", | |
231 | (double)(end - start) / CLOCKS_PER_SEC); | |
232 | ||
233 | return 0; | |
234 | } | |
235 | ||
c182393f | 236 | LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) { |
a7431f1a MT |
237 | // Fail on invalid file handle |
238 | if (!f) | |
239 | return -EINVAL; | |
240 | ||
c182393f MT |
241 | struct loc_database* db = calloc(1, sizeof(*db)); |
242 | if (!db) | |
243 | return -ENOMEM; | |
244 | ||
245 | // Reference context | |
246 | db->ctx = loc_ref(ctx); | |
247 | db->refcount = 1; | |
248 | ||
249 | DEBUG(db->ctx, "Database object allocated at %p\n", db); | |
250 | ||
a7431f1a | 251 | int r = loc_database_read(db, f); |
02879100 MT |
252 | if (r) { |
253 | loc_database_unref(db); | |
2601e83e | 254 | return r; |
02879100 | 255 | } |
2601e83e | 256 | |
c182393f MT |
257 | *database = db; |
258 | ||
2601e83e | 259 | return 0; |
2601e83e MT |
260 | } |
261 | ||
c182393f MT |
262 | LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) { |
263 | db->refcount++; | |
264 | ||
265 | return db; | |
8f5b676a MT |
266 | } |
267 | ||
c182393f MT |
268 | static void loc_database_free(struct loc_database* db) { |
269 | DEBUG(db->ctx, "Releasing database %p\n", db); | |
c34e76f1 | 270 | |
c182393f MT |
271 | // Removing all ASes |
272 | if (db->as_v0) { | |
273 | int r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0)); | |
274 | if (r) | |
275 | ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno)); | |
276 | } | |
c34e76f1 | 277 | |
c182393f | 278 | loc_stringpool_unref(db->pool); |
c34e76f1 | 279 | |
c182393f MT |
280 | loc_unref(db->ctx); |
281 | free(db); | |
c34e76f1 MT |
282 | } |
283 | ||
c182393f MT |
284 | LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) { |
285 | if (--db->refcount > 0) | |
286 | return NULL; | |
78ace4ed | 287 | |
c182393f MT |
288 | loc_database_free(db); |
289 | return NULL; | |
290 | } | |
78ace4ed | 291 | |
c182393f MT |
292 | LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) { |
293 | return db->created_at; | |
294 | } | |
78ace4ed | 295 | |
c182393f MT |
296 | LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) { |
297 | return loc_stringpool_get(db->pool, db->vendor); | |
298 | } | |
78ace4ed | 299 | |
c182393f MT |
300 | LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) { |
301 | return loc_stringpool_get(db->pool, db->description); | |
302 | } | |
78ace4ed | 303 | |
c182393f MT |
304 | LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) { |
305 | return db->as_count; | |
78ace4ed MT |
306 | } |
307 | ||
c182393f MT |
308 | // Returns the AS at position pos |
309 | static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) { | |
310 | if ((size_t)pos >= db->as_count) | |
311 | return -EINVAL; | |
2601e83e | 312 | |
c182393f | 313 | DEBUG(db->ctx, "Fetching AS at position %jd\n", pos); |
2601e83e MT |
314 | |
315 | int r; | |
c182393f MT |
316 | switch (db->version) { |
317 | case 0: | |
318 | r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos); | |
319 | break; | |
2601e83e | 320 | |
c182393f MT |
321 | default: |
322 | return -1; | |
323 | } | |
2601e83e | 324 | |
c182393f MT |
325 | if (r == 0) { |
326 | DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as)); | |
2601e83e | 327 | } |
2601e83e | 328 | |
c182393f MT |
329 | return r; |
330 | } | |
c34e76f1 | 331 | |
c182393f MT |
332 | // Performs a binary search to find the AS in the list |
333 | LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) { | |
334 | off_t lo = 0; | |
335 | off_t hi = db->as_count - 1; | |
c34e76f1 | 336 | |
8f3e2a06 MT |
337 | // Save start time |
338 | clock_t start = clock(); | |
339 | ||
c182393f MT |
340 | while (lo <= hi) { |
341 | off_t i = (lo + hi) / 2; | |
8f5b676a | 342 | |
c182393f MT |
343 | // Fetch AS in the middle between lo and hi |
344 | int r = loc_database_fetch_as(db, as, i); | |
345 | if (r) | |
346 | return r; | |
a5db3e49 | 347 | |
c182393f MT |
348 | // Check if this is a match |
349 | uint32_t as_number = loc_as_get_number(*as); | |
8f3e2a06 MT |
350 | if (as_number == number) { |
351 | clock_t end = clock(); | |
352 | ||
353 | // Log how fast this has been | |
354 | DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number, | |
355 | (double)(end - start) / CLOCKS_PER_SEC); | |
356 | ||
c182393f | 357 | return 0; |
8f3e2a06 | 358 | } |
c182393f MT |
359 | |
360 | // If it wasn't, we release the AS and | |
361 | // adjust our search pointers | |
362 | loc_as_unref(*as); | |
363 | ||
364 | if (as_number < number) { | |
365 | lo = i + 1; | |
366 | } else | |
367 | hi = i - 1; | |
368 | } | |
2601e83e | 369 | |
c182393f MT |
370 | // Nothing found |
371 | *as = NULL; | |
2601e83e | 372 | |
8f3e2a06 | 373 | return 1; |
2601e83e | 374 | } |
10778041 MT |
375 | |
376 | // Returns the network at position pos | |
377 | static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) { | |
378 | if ((size_t)pos >= db->networks_count) | |
379 | return -EINVAL; | |
380 | ||
381 | DEBUG(db->ctx, "Fetching network at position %jd\n", pos); | |
382 | ||
383 | int r; | |
384 | switch (db->version) { | |
385 | case 0: | |
386 | r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos); | |
387 | break; | |
388 | ||
389 | default: | |
390 | return -1; | |
391 | } | |
392 | ||
393 | if (r == 0) { | |
394 | char* string = loc_network_str(*network); | |
395 | DEBUG(db->ctx, "Got network %s\n", string); | |
396 | free(string); | |
397 | } | |
398 | ||
399 | return r; | |
400 | } | |
2a30e4de MT |
401 | |
402 | static int __loc_database_lookup_leaf_node(struct loc_database* db, const struct in6_addr* address, | |
403 | struct loc_network** network, struct in6_addr* network_address, | |
404 | const struct loc_database_network_node_v0* node) { | |
405 | // Check if this node is a leaf node | |
406 | if (node->zero != htobe32(0xffffffff)) | |
407 | return 1; | |
408 | ||
409 | DEBUG(db->ctx, "Node is a leaf: %jd\n", node - db->network_nodes_v0); | |
410 | ||
411 | // Fetch the network | |
412 | int r = loc_database_fetch_network(db, network, | |
413 | network_address, be32toh(node->one)); | |
414 | if (r) | |
415 | return r; | |
416 | ||
417 | // Check if the given IP address is inside the network | |
418 | r = loc_network_match_address(*network, address); | |
419 | if (r) { | |
420 | DEBUG(db->ctx, "Searched address is not part of the network\n"); | |
421 | ||
422 | loc_network_unref(*network); | |
423 | *network = NULL; | |
424 | return 1; | |
425 | } | |
426 | ||
427 | // A network was found and the IP address matches | |
428 | return 0; | |
429 | } | |
430 | ||
431 | // Returns the highest result available | |
432 | static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address, | |
433 | struct loc_network** network, struct in6_addr* network_address, | |
434 | const struct loc_database_network_node_v0* node, int level) { | |
435 | ||
436 | // If the node is a leaf node, we end here | |
437 | int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node); | |
438 | if (r <= 0) | |
439 | return r; | |
440 | ||
441 | off_t node_index; | |
442 | ||
443 | // Try to go down the ones path first | |
444 | if (node->one) { | |
445 | node_index = be32toh(node->one); | |
446 | in6_addr_set_bit(network_address, level, 1); | |
447 | ||
448 | // Check boundaries | |
449 | if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) { | |
450 | r = __loc_database_lookup_max(db, address, network, network_address, | |
451 | db->network_nodes_v0 + node_index, level + 1); | |
452 | ||
453 | // Abort when match was found or error | |
454 | if (r <= 0) | |
455 | return r; | |
456 | } | |
457 | } | |
458 | ||
459 | // ... and if that fails, we try to go down one step on a zero | |
460 | // branch and then try the ones again... | |
461 | if (node->zero) { | |
462 | node_index = be32toh(node->zero); | |
463 | in6_addr_set_bit(network_address, level, 0); | |
464 | ||
465 | // Check boundaries | |
466 | if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) { | |
467 | r = __loc_database_lookup_max(db, address, network, network_address, | |
468 | db->network_nodes_v0 + node_index, level + 1); | |
469 | ||
470 | // Abort when match was found or error | |
471 | if (r <= 0) | |
472 | return r; | |
473 | } | |
474 | } | |
475 | ||
476 | // End of path | |
477 | return 1; | |
478 | } | |
479 | ||
480 | // Searches for an exact match along the path | |
481 | static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address, | |
482 | struct loc_network** network, struct in6_addr* network_address, | |
483 | const struct loc_database_network_node_v0* node, int level) { | |
484 | // If the node is a leaf node, we end here | |
485 | int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node); | |
486 | if (r <= 0) | |
487 | return r; | |
488 | ||
489 | off_t node_index; | |
490 | ||
491 | // Follow the path | |
492 | int bit = in6_addr_get_bit(address, level); | |
493 | in6_addr_set_bit(network_address, level, bit); | |
494 | ||
495 | if (bit == 0) | |
496 | node_index = be32toh(node->zero); | |
497 | else | |
498 | node_index = be32toh(node->one); | |
499 | ||
500 | // If we point back to root, the path ends here | |
501 | if (node_index == 0) { | |
502 | DEBUG(db->ctx, "Tree ends here\n"); | |
503 | return 1; | |
504 | } | |
505 | ||
506 | // Check boundaries | |
507 | if ((size_t)node_index >= db->network_nodes_count) | |
508 | return -EINVAL; | |
509 | ||
510 | // Move on to the next node | |
511 | r = __loc_database_lookup(db, address, network, network_address, | |
512 | db->network_nodes_v0 + node_index, level + 1); | |
513 | ||
514 | // End here if a result was found | |
515 | if (r == 0) | |
516 | return r; | |
517 | ||
518 | // Raise any errors | |
519 | else if (r < 0) | |
520 | return r; | |
521 | ||
522 | DEBUG(db->ctx, "Could not find an exact match at %u\n", level); | |
523 | ||
524 | // If nothing was found, we have to search for an inexact match | |
525 | return __loc_database_lookup_max(db, address, network, network_address, node, level); | |
526 | } | |
527 | ||
528 | LOC_EXPORT int loc_database_lookup(struct loc_database* db, | |
529 | struct in6_addr* address, struct loc_network** network) { | |
530 | struct in6_addr network_address; | |
531 | memset(&network_address, 0, sizeof(network_address)); | |
532 | ||
533 | *network = NULL; | |
534 | ||
535 | // Save start time | |
536 | clock_t start = clock(); | |
537 | ||
538 | int r = __loc_database_lookup(db, address, network, &network_address, | |
539 | db->network_nodes_v0, 0); | |
540 | ||
541 | clock_t end = clock(); | |
542 | ||
543 | // Log how fast this has been | |
544 | DEBUG(db->ctx, "Executed network search in %.8fs\n", | |
545 | (double)(end - start) / CLOCKS_PER_SEC); | |
546 | ||
547 | return r; | |
548 | } | |
549 | ||
550 | LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db, | |
551 | const char* string, struct loc_network** network) { | |
552 | struct in6_addr address; | |
553 | ||
554 | int r = loc_parse_address(db->ctx, string, &address); | |
555 | if (r) | |
556 | return r; | |
557 | ||
558 | return loc_database_lookup(db, &address, network); | |
559 | } |