]> git.ipfire.org Git - location/libloc.git/blob - src/database.c
database: Implement lookup
[location/libloc.git] / src / database.c
1 /*
2 libloc - A library to determine the location of someone on the Internet
3
4 Copyright (C) 2017 IPFire Development Team <info@ipfire.org>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15 */
16
17 #include <arpa/inet.h>
18 #include <endian.h>
19 #include <errno.h>
20 #include <netinet/in.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/types.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include <loc/libloc.h>
32 #include <loc/as.h>
33 #include <loc/database.h>
34 #include <loc/format.h>
35 #include <loc/network.h>
36 #include <loc/private.h>
37 #include <loc/stringpool.h>
38
39 struct loc_database {
40 struct loc_ctx* ctx;
41 int refcount;
42
43 unsigned int version;
44 time_t created_at;
45 off_t vendor;
46 off_t description;
47
48 // ASes in the database
49 struct loc_database_as_v0* as_v0;
50 size_t as_count;
51
52 // Network tree
53 struct loc_database_network_node_v0* network_nodes_v0;
54 size_t network_nodes_count;
55
56 // Networks
57 struct loc_database_network_v0* networks_v0;
58 size_t networks_count;
59
60 struct loc_stringpool* pool;
61 };
62
63 static int loc_database_read_magic(struct loc_database* db, FILE* f) {
64 struct loc_database_magic magic;
65
66 // Read from file
67 size_t bytes_read = fread(&magic, 1, sizeof(magic), f);
68
69 // Check if we have been able to read enough data
70 if (bytes_read < sizeof(magic)) {
71 ERROR(db->ctx, "Could not read enough data to validate magic bytes\n");
72 DEBUG(db->ctx, "Read %zu bytes, but needed %zu\n", bytes_read, sizeof(magic));
73 return -ENOMSG;
74 }
75
76 // Compare magic bytes
77 if (memcmp(LOC_DATABASE_MAGIC, magic.magic, strlen(LOC_DATABASE_MAGIC)) == 0) {
78 DEBUG(db->ctx, "Magic value matches\n");
79
80 // Parse version
81 db->version = be16toh(magic.version);
82 DEBUG(db->ctx, "Database version is %u\n", db->version);
83
84 return 0;
85 }
86
87 ERROR(db->ctx, "Database format is not compatible\n");
88
89 // Return an error
90 return 1;
91 }
92
93 static int loc_database_read_as_section_v0(struct loc_database* db,
94 FILE* f, const struct loc_database_header_v0* header) {
95 off_t as_offset = be32toh(header->as_offset);
96 size_t as_length = be32toh(header->as_length);
97
98 DEBUG(db->ctx, "Reading AS section from %jd (%zu bytes)\n", as_offset, as_length);
99
100 if (as_length > 0) {
101 db->as_v0 = mmap(NULL, as_length, PROT_READ,
102 MAP_SHARED, fileno(f), as_offset);
103
104 if (db->as_v0 == MAP_FAILED)
105 return -errno;
106 }
107
108 db->as_count = as_length / sizeof(*db->as_v0);
109
110 INFO(db->ctx, "Read %zu ASes from the database\n", db->as_count);
111
112 return 0;
113 }
114
115 static int loc_database_read_network_nodes_section_v0(struct loc_database* db,
116 FILE* f, const struct loc_database_header_v0* header) {
117 off_t network_nodes_offset = be32toh(header->network_tree_offset);
118 size_t network_nodes_length = be32toh(header->network_tree_length);
119
120 DEBUG(db->ctx, "Reading network nodes section from %jd (%zu bytes)\n",
121 network_nodes_offset, network_nodes_length);
122
123 if (network_nodes_length > 0) {
124 db->network_nodes_v0 = mmap(NULL, network_nodes_length, PROT_READ,
125 MAP_SHARED, fileno(f), network_nodes_offset);
126
127 if (db->network_nodes_v0 == MAP_FAILED)
128 return -errno;
129 }
130
131 db->network_nodes_count = network_nodes_length / sizeof(*db->network_nodes_v0);
132
133 INFO(db->ctx, "Read %zu network nodes from the database\n", db->network_nodes_count);
134
135 return 0;
136 }
137
138 static int loc_database_read_networks_section_v0(struct loc_database* db,
139 FILE* f, const struct loc_database_header_v0* header) {
140 off_t networks_offset = be32toh(header->network_data_offset);
141 size_t networks_length = be32toh(header->network_data_length);
142
143 DEBUG(db->ctx, "Reading networks section from %jd (%zu bytes)\n",
144 networks_offset, networks_length);
145
146 if (networks_length > 0) {
147 db->networks_v0 = mmap(NULL, networks_length, PROT_READ,
148 MAP_SHARED, fileno(f), networks_offset);
149
150 if (db->networks_v0 == MAP_FAILED)
151 return -errno;
152 }
153
154 db->networks_count = networks_length / sizeof(*db->networks_v0);
155
156 INFO(db->ctx, "Read %zu networks from the database\n", db->networks_count);
157
158 return 0;
159 }
160
161 static int loc_database_read_header_v0(struct loc_database* db, FILE* f) {
162 struct loc_database_header_v0 header;
163
164 // Read from file
165 size_t size = fread(&header, 1, sizeof(header), f);
166
167 if (size < sizeof(header)) {
168 ERROR(db->ctx, "Could not read enough data for header\n");
169 return -ENOMSG;
170 }
171
172 // Copy over data
173 db->created_at = be64toh(header.created_at);
174 db->vendor = be32toh(header.vendor);
175 db->description = be32toh(header.description);
176
177 // Open pool
178 off_t pool_offset = be32toh(header.pool_offset);
179 size_t pool_length = be32toh(header.pool_length);
180
181 int r = loc_stringpool_open(db->ctx, &db->pool,
182 f, pool_length, pool_offset);
183 if (r)
184 return r;
185
186 // AS section
187 r = loc_database_read_as_section_v0(db, f, &header);
188 if (r)
189 return r;
190
191 // Network Nodes
192 r = loc_database_read_network_nodes_section_v0(db, f, &header);
193 if (r)
194 return r;
195
196 // Networks
197 r = loc_database_read_networks_section_v0(db, f, &header);
198 if (r)
199 return r;
200
201 return 0;
202 }
203
204 static int loc_database_read_header(struct loc_database* db, FILE* f) {
205 switch (db->version) {
206 case 0:
207 return loc_database_read_header_v0(db, f);
208
209 default:
210 ERROR(db->ctx, "Incompatible database version: %u\n", db->version);
211 return 1;
212 }
213 }
214
215 static int loc_database_read(struct loc_database* db, FILE* f) {
216 clock_t start = clock();
217
218 // Read magic bytes
219 int r = loc_database_read_magic(db, f);
220 if (r)
221 return r;
222
223 // Read the header
224 r = loc_database_read_header(db, f);
225 if (r)
226 return r;
227
228 clock_t end = clock();
229
230 INFO(db->ctx, "Opened database in %.8fs\n",
231 (double)(end - start) / CLOCKS_PER_SEC);
232
233 return 0;
234 }
235
236 LOC_EXPORT int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f) {
237 // Fail on invalid file handle
238 if (!f)
239 return -EINVAL;
240
241 struct loc_database* db = calloc(1, sizeof(*db));
242 if (!db)
243 return -ENOMEM;
244
245 // Reference context
246 db->ctx = loc_ref(ctx);
247 db->refcount = 1;
248
249 DEBUG(db->ctx, "Database object allocated at %p\n", db);
250
251 int r = loc_database_read(db, f);
252 if (r) {
253 loc_database_unref(db);
254 return r;
255 }
256
257 *database = db;
258
259 return 0;
260 }
261
262 LOC_EXPORT struct loc_database* loc_database_ref(struct loc_database* db) {
263 db->refcount++;
264
265 return db;
266 }
267
268 static void loc_database_free(struct loc_database* db) {
269 DEBUG(db->ctx, "Releasing database %p\n", db);
270
271 // Removing all ASes
272 if (db->as_v0) {
273 int r = munmap(db->as_v0, db->as_count * sizeof(*db->as_v0));
274 if (r)
275 ERROR(db->ctx, "Could not unmap AS section: %s\n", strerror(errno));
276 }
277
278 loc_stringpool_unref(db->pool);
279
280 loc_unref(db->ctx);
281 free(db);
282 }
283
284 LOC_EXPORT struct loc_database* loc_database_unref(struct loc_database* db) {
285 if (--db->refcount > 0)
286 return NULL;
287
288 loc_database_free(db);
289 return NULL;
290 }
291
292 LOC_EXPORT time_t loc_database_created_at(struct loc_database* db) {
293 return db->created_at;
294 }
295
296 LOC_EXPORT const char* loc_database_get_vendor(struct loc_database* db) {
297 return loc_stringpool_get(db->pool, db->vendor);
298 }
299
300 LOC_EXPORT const char* loc_database_get_description(struct loc_database* db) {
301 return loc_stringpool_get(db->pool, db->description);
302 }
303
304 LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) {
305 return db->as_count;
306 }
307
308 // Returns the AS at position pos
309 static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) {
310 if ((size_t)pos >= db->as_count)
311 return -EINVAL;
312
313 DEBUG(db->ctx, "Fetching AS at position %jd\n", pos);
314
315 int r;
316 switch (db->version) {
317 case 0:
318 r = loc_as_new_from_database_v0(db->ctx, db->pool, as, db->as_v0 + pos);
319 break;
320
321 default:
322 return -1;
323 }
324
325 if (r == 0) {
326 DEBUG(db->ctx, "Got AS%u\n", loc_as_get_number(*as));
327 }
328
329 return r;
330 }
331
332 // Performs a binary search to find the AS in the list
333 LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, uint32_t number) {
334 off_t lo = 0;
335 off_t hi = db->as_count - 1;
336
337 // Save start time
338 clock_t start = clock();
339
340 while (lo <= hi) {
341 off_t i = (lo + hi) / 2;
342
343 // Fetch AS in the middle between lo and hi
344 int r = loc_database_fetch_as(db, as, i);
345 if (r)
346 return r;
347
348 // Check if this is a match
349 uint32_t as_number = loc_as_get_number(*as);
350 if (as_number == number) {
351 clock_t end = clock();
352
353 // Log how fast this has been
354 DEBUG(db->ctx, "Found AS%u in %.8fs\n", as_number,
355 (double)(end - start) / CLOCKS_PER_SEC);
356
357 return 0;
358 }
359
360 // If it wasn't, we release the AS and
361 // adjust our search pointers
362 loc_as_unref(*as);
363
364 if (as_number < number) {
365 lo = i + 1;
366 } else
367 hi = i - 1;
368 }
369
370 // Nothing found
371 *as = NULL;
372
373 return 1;
374 }
375
376 // Returns the network at position pos
377 static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, off_t pos) {
378 if ((size_t)pos >= db->networks_count)
379 return -EINVAL;
380
381 DEBUG(db->ctx, "Fetching network at position %jd\n", pos);
382
383 int r;
384 switch (db->version) {
385 case 0:
386 r = loc_network_new_from_database_v0(db->ctx, network, address, db->networks_v0 + pos);
387 break;
388
389 default:
390 return -1;
391 }
392
393 if (r == 0) {
394 char* string = loc_network_str(*network);
395 DEBUG(db->ctx, "Got network %s\n", string);
396 free(string);
397 }
398
399 return r;
400 }
401
402 static int __loc_database_lookup_leaf_node(struct loc_database* db, const struct in6_addr* address,
403 struct loc_network** network, struct in6_addr* network_address,
404 const struct loc_database_network_node_v0* node) {
405 // Check if this node is a leaf node
406 if (node->zero != htobe32(0xffffffff))
407 return 1;
408
409 DEBUG(db->ctx, "Node is a leaf: %jd\n", node - db->network_nodes_v0);
410
411 // Fetch the network
412 int r = loc_database_fetch_network(db, network,
413 network_address, be32toh(node->one));
414 if (r)
415 return r;
416
417 // Check if the given IP address is inside the network
418 r = loc_network_match_address(*network, address);
419 if (r) {
420 DEBUG(db->ctx, "Searched address is not part of the network\n");
421
422 loc_network_unref(*network);
423 *network = NULL;
424 return 1;
425 }
426
427 // A network was found and the IP address matches
428 return 0;
429 }
430
431 // Returns the highest result available
432 static int __loc_database_lookup_max(struct loc_database* db, const struct in6_addr* address,
433 struct loc_network** network, struct in6_addr* network_address,
434 const struct loc_database_network_node_v0* node, int level) {
435
436 // If the node is a leaf node, we end here
437 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
438 if (r <= 0)
439 return r;
440
441 off_t node_index;
442
443 // Try to go down the ones path first
444 if (node->one) {
445 node_index = be32toh(node->one);
446 in6_addr_set_bit(network_address, level, 1);
447
448 // Check boundaries
449 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
450 r = __loc_database_lookup_max(db, address, network, network_address,
451 db->network_nodes_v0 + node_index, level + 1);
452
453 // Abort when match was found or error
454 if (r <= 0)
455 return r;
456 }
457 }
458
459 // ... and if that fails, we try to go down one step on a zero
460 // branch and then try the ones again...
461 if (node->zero) {
462 node_index = be32toh(node->zero);
463 in6_addr_set_bit(network_address, level, 0);
464
465 // Check boundaries
466 if (node_index > 0 && (size_t)node_index <= db->network_nodes_count) {
467 r = __loc_database_lookup_max(db, address, network, network_address,
468 db->network_nodes_v0 + node_index, level + 1);
469
470 // Abort when match was found or error
471 if (r <= 0)
472 return r;
473 }
474 }
475
476 // End of path
477 return 1;
478 }
479
480 // Searches for an exact match along the path
481 static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address,
482 struct loc_network** network, struct in6_addr* network_address,
483 const struct loc_database_network_node_v0* node, int level) {
484 // If the node is a leaf node, we end here
485 int r = __loc_database_lookup_leaf_node(db, address, network, network_address, node);
486 if (r <= 0)
487 return r;
488
489 off_t node_index;
490
491 // Follow the path
492 int bit = in6_addr_get_bit(address, level);
493 in6_addr_set_bit(network_address, level, bit);
494
495 if (bit == 0)
496 node_index = be32toh(node->zero);
497 else
498 node_index = be32toh(node->one);
499
500 // If we point back to root, the path ends here
501 if (node_index == 0) {
502 DEBUG(db->ctx, "Tree ends here\n");
503 return 1;
504 }
505
506 // Check boundaries
507 if ((size_t)node_index >= db->network_nodes_count)
508 return -EINVAL;
509
510 // Move on to the next node
511 r = __loc_database_lookup(db, address, network, network_address,
512 db->network_nodes_v0 + node_index, level + 1);
513
514 // End here if a result was found
515 if (r == 0)
516 return r;
517
518 // Raise any errors
519 else if (r < 0)
520 return r;
521
522 DEBUG(db->ctx, "Could not find an exact match at %u\n", level);
523
524 // If nothing was found, we have to search for an inexact match
525 return __loc_database_lookup_max(db, address, network, network_address, node, level);
526 }
527
528 LOC_EXPORT int loc_database_lookup(struct loc_database* db,
529 struct in6_addr* address, struct loc_network** network) {
530 struct in6_addr network_address;
531 memset(&network_address, 0, sizeof(network_address));
532
533 *network = NULL;
534
535 // Save start time
536 clock_t start = clock();
537
538 int r = __loc_database_lookup(db, address, network, &network_address,
539 db->network_nodes_v0, 0);
540
541 clock_t end = clock();
542
543 // Log how fast this has been
544 DEBUG(db->ctx, "Executed network search in %.8fs\n",
545 (double)(end - start) / CLOCKS_PER_SEC);
546
547 return r;
548 }
549
550 LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db,
551 const char* string, struct loc_network** network) {
552 struct in6_addr address;
553
554 int r = loc_parse_address(db->ctx, string, &address);
555 if (r)
556 return r;
557
558 return loc_database_lookup(db, &address, network);
559 }