From ec684c1afb72e51f735c81446deb6430ea2dbd6b Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Tue, 15 Oct 2019 13:20:20 +0000 Subject: [PATCH] Add a dictionary with countries to the database Signed-off-by: Michael Tremer --- Makefile.am | 17 ++++- src/.gitignore | 1 + src/country.c | 174 ++++++++++++++++++++++++++++++++++++++++++ src/database.c | 106 +++++++++++++++++++++++++ src/libloc.sym | 12 +++ src/loc/country.h | 57 ++++++++++++++ src/loc/database.h | 4 + src/loc/format.h | 12 +++ src/loc/writer.h | 2 + src/test-country.c | 98 ++++++++++++++++++++++++ src/test-stringpool.c | 7 ++ src/writer.c | 60 +++++++++++++++ 12 files changed, 548 insertions(+), 2 deletions(-) create mode 100644 src/country.c create mode 100644 src/loc/country.h create mode 100644 src/test-country.c diff --git a/Makefile.am b/Makefile.am index 4bd6b4d..afc5833 100644 --- a/Makefile.am +++ b/Makefile.am @@ -68,6 +68,7 @@ EXTRA_DIST += \ pkginclude_HEADERS = \ src/loc/libloc.h \ src/loc/as.h \ + src/loc/country.h \ src/loc/database.h \ src/loc/format.h \ src/loc/network.h \ @@ -81,6 +82,7 @@ lib_LTLIBRARIES = \ src_libloc_la_SOURCES = \ src/libloc.c \ src/as.c \ + src/country.c \ src/database.c \ src/network.c \ src/stringpool.c \ @@ -210,7 +212,8 @@ TESTS = \ src/test-stringpool \ src/test-database \ src/test-as \ - src/test-network + src/test-network \ + src/test-country CLEANFILES += \ test.db \ @@ -224,7 +227,8 @@ check_PROGRAMS = \ src/test-stringpool \ src/test-database \ src/test-as \ - src/test-network + src/test-network \ + src/test-country src_test_libloc_SOURCES = \ src/test-libloc.c @@ -244,6 +248,15 @@ src_test_as_CFLAGS = \ src_test_as_LDADD = \ src/libloc.la +src_test_country_SOURCES = \ + src/test-country.c + +src_test_country_CFLAGS = \ + $(TESTS_CFLAGS) + +src_test_country_LDADD = \ + src/libloc.la + src_test_network_SOURCES = \ src/test-network.c diff --git a/src/.gitignore b/src/.gitignore index 0ac749b..0a6804a 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -8,5 +8,6 @@ libloc.pc test-as test-libloc test-database +test-country test-network test-stringpool diff --git a/src/country.c b/src/country.c new file mode 100644 index 0000000..d075bd7 --- /dev/null +++ b/src/country.c @@ -0,0 +1,174 @@ +/* + libloc - A library to determine the location of someone on the Internet + + Copyright (C) 2019 IPFire Development Team + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. +*/ + +#include +#include +#include + +#include +#include +#include + +struct loc_country { + struct loc_ctx* ctx; + int refcount; + + char* code; + char* continent_code; + + char* name; +}; + +LOC_EXPORT int loc_country_new(struct loc_ctx* ctx, struct loc_country** country, const char* country_code) { + struct loc_country* c = calloc(1, sizeof(*c)); + if (!c) + return -ENOMEM; + + c->ctx = loc_ref(ctx); + c->refcount = 1; + + c->code = strdup(country_code); + + DEBUG(c->ctx, "Country %s allocated at %p\n", c->code, c); + *country = c; + + return 0; +} + +LOC_EXPORT struct loc_country* loc_country_ref(struct loc_country* country) { + country->refcount++; + + return country; +} + +static void loc_country_free(struct loc_country* country) { + DEBUG(country->ctx, "Releasing country %s %p\n", country->code, country); + + if (country->code) + free(country->code); + + if (country->continent_code) + free(country->continent_code); + + if (country->name) + free(country->name); + + loc_unref(country->ctx); + free(country); +} + +LOC_EXPORT struct loc_country* loc_country_unref(struct loc_country* country) { + if (--country->refcount > 0) + return NULL; + + loc_country_free(country); + + return NULL; +} + +LOC_EXPORT const char* loc_country_get_code(struct loc_country* country) { + return country->code; +} + +LOC_EXPORT const char* loc_country_get_continent_code(struct loc_country* country) { + return country->continent_code; +} + +LOC_EXPORT int loc_country_set_continent_code(struct loc_country* country, const char* continent_code) { + // XXX validate input + + // Free previous value + if (country->continent_code) + free(country->continent_code); + + country->continent_code = strdup(continent_code); + + return 0; +} + +LOC_EXPORT const char* loc_country_get_name(struct loc_country* country) { + return country->name; +} + +LOC_EXPORT int loc_country_set_name(struct loc_country* country, const char* name) { + if (country->name) + free(country->name); + + if (name) + country->name = strdup(name); + + return 0; +} + +int loc_country_cmp(struct loc_country* country1, struct loc_country* country2) { + return strcmp(country1->code, country2->code); +} + +int loc_country_new_from_database_v0(struct loc_ctx* ctx, struct loc_stringpool* pool, + struct loc_country** country, const struct loc_database_country_v0* dbobj) { + char buffer[3]; + + // Read country code + loc_country_copy_code(buffer, dbobj->code); + + // Create a new country object + int r = loc_country_new(ctx, country, buffer); + if (r) + return r; + + // Continent Code + loc_country_copy_code(buffer, dbobj->continent_code); + + r = loc_country_set_continent_code(*country, buffer); + if (r) + goto FAIL; + + // Set name +#if 0 + // XXX Reading from the stringpool makes test-country.c fail + const char* name = loc_stringpool_get(pool, be32toh(dbobj->name)); + if (name) { + r = loc_country_set_name(*country, name); + if (r) + goto FAIL; + } +#endif + + return 0; + +FAIL: + loc_country_unref(*country); + return r; +} + +int loc_country_to_database_v0(struct loc_country* country, + struct loc_stringpool* pool, struct loc_database_country_v0* dbobj) { + // Add country code + for (unsigned int i = 0; i < 2; i++) { + dbobj->code[i] = country->code[i] ? country->code[i] : '\0'; + } + + // Add continent code + for (unsigned int i = 0; i < 2; i++) { + dbobj->continent_code[i] = country->continent_code[i] ? country->continent_code[i] : '\0'; + } + + // Save the name string in the string pool + off_t name = loc_stringpool_add(pool, country->name ? country->name : ""); + dbobj->name = htobe32(name); + + return 0; +} diff --git a/src/database.c b/src/database.c index f293ac1..825d798 100644 --- a/src/database.c +++ b/src/database.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -59,6 +60,10 @@ struct loc_database { struct loc_database_network_v0* networks_v0; size_t networks_count; + // Countries + struct loc_database_country_v0* countries_v0; + size_t countries_count; + struct loc_stringpool* pool; }; @@ -190,6 +195,30 @@ static int loc_database_read_networks_section_v0(struct loc_database* db, return 0; } +static int loc_database_read_countries_section_v0(struct loc_database* db, + FILE* f, const struct loc_database_header_v0* header) { + off_t countries_offset = be32toh(header->countries_offset); + size_t countries_length = be32toh(header->countries_length); + + DEBUG(db->ctx, "Reading countries section from %jd (%zu bytes)\n", + countries_offset, countries_length); + + if (countries_length > 0) { + db->countries_v0 = mmap(NULL, countries_length, PROT_READ, + MAP_SHARED, fileno(f), countries_offset); + + if (db->countries_v0 == MAP_FAILED) + return -errno; + } + + db->countries_count = countries_length / sizeof(*db->countries_v0); + + INFO(db->ctx, "Read %zu countries from the database\n", + db->countries_count); + + return 0; +} + static int loc_database_read_header_v0(struct loc_database* db, FILE* f) { struct loc_database_header_v0 header; @@ -231,6 +260,11 @@ static int loc_database_read_header_v0(struct loc_database* db, FILE* f) { if (r) return r; + // countries + r = loc_database_read_countries_section_v0(db, f, &header); + if (r) + return r; + return 0; } @@ -570,6 +604,78 @@ LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db, return loc_database_lookup(db, &address, network); } +// Returns the country at position pos +static int loc_database_fetch_country(struct loc_database* db, + struct loc_country** country, off_t pos) { + if ((size_t)pos >= db->countries_count) + return -EINVAL; + + DEBUG(db->ctx, "Fetching country at position %jd\n", pos); + + int r; + switch (db->version) { + case 0: + r = loc_country_new_from_database_v0(db->ctx, db->pool, country, db->countries_v0 + pos); + break; + + default: + return -1; + } + + if (r == 0) { + DEBUG(db->ctx, "Got country %s\n", loc_country_get_code(*country)); + } + + return r; +} + +// Performs a binary search to find the country in the list +LOC_EXPORT int loc_database_get_country(struct loc_database* db, + struct loc_country** country, const char* code) { + off_t lo = 0; + off_t hi = db->countries_count - 1; + + // Save start time + clock_t start = clock(); + + while (lo <= hi) { + off_t i = (lo + hi) / 2; + + // Fetch country in the middle between lo and hi + int r = loc_database_fetch_country(db, country, i); + if (r) + return r; + + // Check if this is a match + const char* cc = loc_country_get_code(*country); + int result = strcmp(code, cc); + + if (result == 0) { + clock_t end = clock(); + + // Log how fast this has been + DEBUG(db->ctx, "Found country %s in %.4fms\n", cc, + (double)(end - start) / CLOCKS_PER_SEC * 1000); + + return 0; + } + + // If it wasn't, we release the country and + // adjust our search pointers + loc_country_unref(*country); + + if (result < 0) { + lo = i + 1; + } else + hi = i - 1; + } + + // Nothing found + *country = NULL; + + return 1; +} + // Enumerator LOC_EXPORT int loc_database_enumerator_new(struct loc_database_enumerator** enumerator, diff --git a/src/libloc.sym b/src/libloc.sym index 0a3a76d..8ed931d 100644 --- a/src/libloc.sym +++ b/src/libloc.sym @@ -36,11 +36,22 @@ global: loc_as_set_name; loc_as_unref; + # Country + loc_country_get_code; + loc_country_get_continent_code; + loc_country_get_name; + loc_country_new; + loc_country_ref; + loc_country_set_continent_code; + loc_country_set_name; + loc_country_unref; + # Database loc_database_add_as; loc_database_count_as; loc_database_created_at; loc_database_get_as; + loc_database_get_country; loc_database_get_description; loc_database_get_license; loc_database_get_vendor; @@ -78,6 +89,7 @@ global: # Writer loc_writer_add_as; + loc_writer_add_country; loc_writer_add_network; loc_writer_get_description; loc_writer_get_license; diff --git a/src/loc/country.h b/src/loc/country.h new file mode 100644 index 0000000..b5fd944 --- /dev/null +++ b/src/loc/country.h @@ -0,0 +1,57 @@ +/* + libloc - A library to determine the location of someone on the Internet + + Copyright (C) 2019 IPFire Development Team + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. +*/ + +#ifndef LIBLOC_COUNTRY_H +#define LIBLOC_COUNTRY_H + +#include +#include +#include + +struct loc_country; +int loc_country_new(struct loc_ctx* ctx, struct loc_country** country, const char* country_code); +struct loc_country* loc_country_ref(struct loc_country* country); +struct loc_country* loc_country_unref(struct loc_country* country); + +const char* loc_country_get_code(struct loc_country* country); + +const char* loc_country_get_continent_code(struct loc_country* country); +int loc_country_set_continent_code(struct loc_country* country, const char* continent_code); + +const char* loc_country_get_name(struct loc_country* country); +int loc_country_set_name(struct loc_country* country, const char* name); + +int loc_country_cmp(struct loc_country* country1, struct loc_country* country2); + +#ifdef LIBLOC_PRIVATE + +int loc_country_new_from_database_v0(struct loc_ctx* ctx, struct loc_stringpool* pool, + struct loc_country** country, const struct loc_database_country_v0* dbobj); +int loc_country_to_database_v0(struct loc_country* country, + struct loc_stringpool* pool, struct loc_database_country_v0* dbobj); + +static inline void loc_country_copy_code(char* dst, const char* src) { + for (unsigned int i = 0; i < 2; i++) { + dst[i] = src[i]; + } + + // Terminate the string + dst[3] = '\0'; +} + +#endif + +#endif diff --git a/src/loc/database.h b/src/loc/database.h index 2a23a89..b1f0856 100644 --- a/src/loc/database.h +++ b/src/loc/database.h @@ -24,6 +24,7 @@ #include #include #include +#include struct loc_database; int loc_database_new(struct loc_ctx* ctx, struct loc_database** database, FILE* f); @@ -43,6 +44,9 @@ int loc_database_lookup(struct loc_database* db, int loc_database_lookup_from_string(struct loc_database* db, const char* string, struct loc_network** network); +int loc_database_get_country(struct loc_database* db, + struct loc_country** country, const char* code); + enum loc_database_enumerator_mode { LOC_DB_ENUMERATE_NETWORKS = 1, LOC_DB_ENUMERATE_ASES = 2, diff --git a/src/loc/format.h b/src/loc/format.h index e138f69..6ea6036 100644 --- a/src/loc/format.h +++ b/src/loc/format.h @@ -59,6 +59,10 @@ struct loc_database_header_v0 { uint32_t network_tree_offset; uint32_t network_tree_length; + // Tells us where the countries start + uint32_t countries_offset; + uint32_t countries_length; + // Tells us where the pool starts uint32_t pool_offset; uint32_t pool_length; @@ -92,5 +96,13 @@ struct loc_database_as_v0 { uint32_t name; }; +struct loc_database_country_v0 { + char code[2]; + char continent_code[2]; + + // Name in the string pool + uint32_t name; +}; + #endif #endif diff --git a/src/loc/writer.h b/src/loc/writer.h index 11ab2f2..d99969c 100644 --- a/src/loc/writer.h +++ b/src/loc/writer.h @@ -21,6 +21,7 @@ #include #include +#include #include struct loc_writer; @@ -39,6 +40,7 @@ int loc_writer_set_license(struct loc_writer* writer, const char* license); int loc_writer_add_as(struct loc_writer* writer, struct loc_as** as, uint32_t number); int loc_writer_add_network(struct loc_writer* writer, struct loc_network** network, const char* string); +int loc_writer_add_country(struct loc_writer* writer, struct loc_country** country, const char* country_code); int loc_writer_write(struct loc_writer* writer, FILE* f); diff --git a/src/test-country.c b/src/test-country.c new file mode 100644 index 0000000..96e1c3b --- /dev/null +++ b/src/test-country.c @@ -0,0 +1,98 @@ +/* + libloc - A library to determine the location of someone on the Internet + + Copyright (C) 2019 IPFire Development Team + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +int main(int argc, char** argv) { + struct loc_country* country; + int err; + + struct loc_ctx* ctx; + err = loc_new(&ctx); + if (err < 0) + exit(EXIT_FAILURE); + + // Create a database + struct loc_writer* writer; + err = loc_writer_new(ctx, &writer); + if (err < 0) + exit(EXIT_FAILURE); + + // Create a country + err = loc_writer_add_country(writer, &country, "T1"); + if (err) { + fprintf(stderr, "Could not create country\n"); + exit(EXIT_FAILURE); + } + + // Set name & continent + loc_country_set_name(country, "Testistan"); + loc_country_set_continent_code(country, "XX"); + + // Free country + loc_country_unref(country); + + FILE* f = fopen("test.db", "w"); + if (!f) { + fprintf(stderr, "Could not open file for writing: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + err = loc_writer_write(writer, f); + if (err) { + fprintf(stderr, "Could not write database: %s\n", strerror(-err)); + exit(EXIT_FAILURE); + } + fclose(f); + + loc_writer_unref(writer); + + // And open it again from disk + f = fopen("test.db", "r"); + if (!f) { + fprintf(stderr, "Could not open file for reading: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + struct loc_database* db; + err = loc_database_new(ctx, &db, f); + if (err) { + fprintf(stderr, "Could not open database: %s\n", strerror(-err)); + exit(EXIT_FAILURE); + } + + // Lookup an address in the subnet + err = loc_database_get_country(db, &country, "T1"); + if (err) { + fprintf(stderr, "Could not find country T1\n"); + exit(EXIT_FAILURE); + } + loc_country_unref(country); + + loc_database_unref(db); + loc_unref(ctx); + + return EXIT_SUCCESS; +} diff --git a/src/test-stringpool.c b/src/test-stringpool.c index 11653fe..85bbf68 100644 --- a/src/test-stringpool.c +++ b/src/test-stringpool.c @@ -59,6 +59,13 @@ int main(int argc, char** argv) { if (err < 0) exit(EXIT_FAILURE); + // Try reading some invalid string + const char* s = loc_stringpool_get(pool, 100); + if (s != NULL) { + fprintf(stderr, "An unexpected string was returned: %s\n", s); + exit(EXIT_FAILURE); + } + // Append a string off_t pos = loc_stringpool_add(pool, "ABC"); if (pos < 0) { diff --git a/src/writer.c b/src/writer.c index eb2b2e6..bf3d2bb 100644 --- a/src/writer.c +++ b/src/writer.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -41,6 +42,9 @@ struct loc_writer { struct loc_as** as; size_t as_count; + struct loc_country** countries; + size_t countries_count; + struct loc_network_tree* networks; }; @@ -183,6 +187,32 @@ LOC_EXPORT int loc_writer_add_network(struct loc_writer* writer, struct loc_netw return loc_network_tree_add_network(writer->networks, *network); } +static int __loc_country_cmp(const void* country1, const void* country2) { + return loc_country_cmp(*(struct loc_country**)country1, *(struct loc_country**)country2); +} + +LOC_EXPORT int loc_writer_add_country(struct loc_writer* writer, struct loc_country** country, const char* country_code) { + int r = loc_country_new(writer->ctx, country, country_code); + if (r) + return r; + + // We have a new country to add + writer->countries_count++; + + // Make space + writer->countries = realloc(writer->countries, sizeof(*writer->countries) * writer->countries_count); + if (!writer->countries) + return -ENOMEM; + + // Add as last element + writer->countries[writer->countries_count - 1] = loc_country_ref(*country); + + // Sort everything + qsort(writer->countries, writer->countries_count, sizeof(*writer->countries), __loc_country_cmp); + + return 0; +} + static void make_magic(struct loc_writer* writer, struct loc_database_magic* magic) { // Copy magic bytes for (unsigned int i = 0; i < strlen(LOC_DATABASE_MAGIC); i++) @@ -408,6 +438,31 @@ static int loc_database_write_networks(struct loc_writer* writer, return 0; } +static int loc_database_write_countries(struct loc_writer* writer, + struct loc_database_header_v0* header, off_t* offset, FILE* f) { + DEBUG(writer->ctx, "Countries section starts at %jd bytes\n", *offset); + header->countries_offset = htobe32(*offset); + + size_t countries_length = 0; + + struct loc_database_country_v0 country; + for (unsigned int i = 0; i < writer->countries_count; i++) { + // Convert country into database format + loc_country_to_database_v0(writer->countries[i], writer->pool, &country); + + // Write to disk + *offset += fwrite(&country, 1, sizeof(country), f); + countries_length += sizeof(country); + } + + DEBUG(writer->ctx, "Countries section has a length of %zu bytes\n", countries_length); + header->countries_length = htobe32(countries_length); + + align_page_boundary(offset, f); + + return 0; +} + LOC_EXPORT int loc_writer_write(struct loc_writer* writer, FILE* f) { struct loc_database_magic magic; make_magic(writer, &magic); @@ -457,6 +512,11 @@ LOC_EXPORT int loc_writer_write(struct loc_writer* writer, FILE* f) { if (r) return r; + // Write countries + r = loc_database_write_countries(writer, &header, &offset, f); + if (r) + return r; + // Write the header r = fseek(f, sizeof(magic), SEEK_SET); if (r) -- 2.39.2