From: Michael Tremer Date: Tue, 23 Aug 2022 12:56:10 +0000 (+0000) Subject: database: Read all data from the large mmap() X-Git-Tag: 0.9.15~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=975f280ff590d9ec3a76d9f1710145b871c0ae6c;p=location%2Flibloc.git database: Read all data from the large mmap() Signed-off-by: Michael Tremer --- diff --git a/src/database.c b/src/database.c index d58700e..a76afad 100644 --- a/src/database.c +++ b/src/database.c @@ -142,35 +142,53 @@ struct loc_database_enumerator { struct in6_addr gap4_start; }; -#define loc_database_read_object(db, buffer, objects, pos) \ - __loc_database_read_object(db, buffer, sizeof(*buffer), objects, pos) - /* - Reads an object into memory (used when mmap() isn't available) + Checks if it is safe to read the buffer of size length starting at p. */ -static void* __loc_database_read_object(struct loc_database* db, void* buffer, - const size_t length, const struct loc_database_objects* objects, const off_t pos) { - // Calculate offset - const off_t offset = pos * length; +#define loc_database_check_boundaries(db, p) \ + __loc_database_check_boundaries(db, (const char*)p, sizeof(*p)) - // Map the object first if possible - if (objects->data) - return objects->data + offset; +static int __loc_database_check_boundaries(struct loc_database* db, + const char* p, const size_t length) { + size_t offset = p - db->data; - // Otherwise fall back and read the object into the buffer - const int fd = fileno(db->f); + // Return if everything is within the boundary + if (offset <= db->length - length) + return 1; - // Read object - ssize_t bytes_read = pread(fd, buffer, length, objects->offset + offset); + DEBUG(db->ctx, "Database read check failed at %p for %zu byte(s)\n", p, length); + DEBUG(db->ctx, " p = %p (offset = %jd, length = %zu)\n", p, offset, length); + DEBUG(db->ctx, " data = %p (length = %zu)\n", db->data, db->length); + DEBUG(db->ctx, " end = %p\n", db->data + db->length); + DEBUG(db->ctx, " overflow of %zu byte(s)\n", offset + length - db->length); - // Check if we could read what we wanted - if (bytes_read < (ssize_t)length) { - ERROR(db->ctx, "Error reading object from database: %m\n"); + // Otherwise raise EFAULT + errno = EFAULT; + return 0; +} + +/* + Returns a pointer to the n-th object +*/ +static char* loc_database_object(struct loc_database* db, + const struct loc_database_objects* objects, const size_t length, const off_t n) { + // Return NULL if objects were not initialized + if (!objects->data) { + errno = EFAULT; return NULL; } - // Success! - return buffer; + // Calculate offset + const off_t offset = n * length; + + // Return a pointer to where the object lies + char* object = objects->data + offset; + + // Check if the object is part of the memory + if (!__loc_database_check_boundaries(db, object, length)) + return NULL; + + return object; } static int loc_database_version_supported(struct loc_database* db, uint8_t version) { @@ -285,25 +303,6 @@ static int loc_database_read_signature(struct loc_database* db, return 0; } -/* - Checks if it is safe to read the buffer of size length starting at p. -*/ -#define loc_database_check_boundaries(db, p) \ - __loc_database_check_boundaries(db, (const char*)p, sizeof(*p)) - -static int __loc_database_check_boundaries(struct loc_database* db, - const char* p, const size_t length) { - size_t offset = p - db->data; - - // Return if everything is within the boundary - if (offset < db->length - length) - return 1; - - // Otherwise raise EFAULT - errno = EFAULT; - return 0; -} - static int loc_database_read_header_v1(struct loc_database* db) { const struct loc_database_header_v1* header = (const struct loc_database_header_v1*)(db->data + LOC_DATABASE_MAGIC_SIZE); @@ -337,9 +336,15 @@ static int loc_database_read_header_v1(struct loc_database* db) { if (r) return r; + const char* stringpool_start = db->data + be32toh(header->pool_offset); + size_t stringpool_length = be32toh(header->pool_length); + + // Check if the stringpool is part of the mapped area + if (!__loc_database_check_boundaries(db, stringpool_start, stringpool_length)) + return 1; + // Open the stringpool - r = loc_stringpool_open(db->ctx, &db->pool, db->f, - be32toh(header->pool_length), be32toh(header->pool_offset)); + r = loc_stringpool_open(db->ctx, &db->pool, stringpool_start, stringpool_length); if (r) return r; @@ -709,8 +714,7 @@ LOC_EXPORT size_t loc_database_count_as(struct loc_database* db) { // Returns the AS at position pos static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, off_t pos) { - struct loc_database_as_v1 as_v1; - struct loc_database_as_v1* p_v1; + struct loc_database_as_v1* as_v1 = NULL; int r; if ((size_t)pos >= db->as_objects.count) { @@ -722,12 +726,13 @@ static int loc_database_fetch_as(struct loc_database* db, struct loc_as** as, of switch (db->version) { case LOC_DATABASE_VERSION_1: - // Read the object - p_v1 = loc_database_read_object(db, &as_v1, &db->as_objects, pos); - if (!p_v1) + // Find the object + as_v1 = (struct loc_database_as_v1*)loc_database_object(db, + &db->as_objects, sizeof(*as_v1), pos); + if (!as_v1) return 1; - r = loc_as_new_from_database_v1(db->ctx, db->pool, as, p_v1); + r = loc_as_new_from_database_v1(db->ctx, db->pool, as, as_v1); break; default: @@ -792,8 +797,7 @@ LOC_EXPORT int loc_database_get_as(struct loc_database* db, struct loc_as** as, // Returns the network at position pos static int loc_database_fetch_network(struct loc_database* db, struct loc_network** network, struct in6_addr* address, unsigned int prefix, off_t pos) { - struct loc_database_network_v1 network_v1; - struct loc_database_network_v1* p_v1; + struct loc_database_network_v1* network_v1 = NULL; int r; if ((size_t)pos >= db->network_objects.count) { @@ -808,11 +812,12 @@ static int loc_database_fetch_network(struct loc_database* db, struct loc_networ switch (db->version) { case LOC_DATABASE_VERSION_1: // Read the object - p_v1 = loc_database_read_object(db, &network_v1, &db->network_objects, pos); - if (!p_v1) + network_v1 = (struct loc_database_network_v1*)loc_database_object(db, + &db->network_objects, sizeof(*network_v1), pos); + if (!network_v1) return 1; - r = loc_network_new_from_database_v1(db->ctx, network, address, prefix, p_v1); + r = loc_network_new_from_database_v1(db->ctx, network, address, prefix, network_v1); break; default: @@ -862,14 +867,14 @@ static int __loc_database_lookup_handle_leaf(struct loc_database* db, const stru static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* address, struct loc_network** network, struct in6_addr* network_address, off_t node_index, unsigned int level) { - struct loc_database_network_node_v1 node_v1; - struct loc_database_network_node_v1* p_v1; + struct loc_database_network_node_v1* node_v1 = NULL; int r; // Fetch the next node - p_v1 = loc_database_read_object(db, &node_v1, &db->network_node_objects, node_index); - if (!p_v1) + node_v1 = (struct loc_database_network_node_v1*)loc_database_object(db, + &db->network_node_objects, sizeof(*node_v1), node_index); + if (!node_v1) return 1; // Follow the path @@ -877,9 +882,9 @@ static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* loc_address_set_bit(network_address, level, bit); if (bit == 0) - node_index = be32toh(p_v1->zero); + node_index = be32toh(node_v1->zero); else - node_index = be32toh(p_v1->one); + node_index = be32toh(node_v1->one); // If the node index is zero, the tree ends here // and we cannot descend any further @@ -907,8 +912,8 @@ static int __loc_database_lookup(struct loc_database* db, const struct in6_addr* } // If this node has a leaf, we will check if it matches - if (__loc_database_node_is_leaf(p_v1)) { - r = __loc_database_lookup_handle_leaf(db, address, network, network_address, level, p_v1); + if (__loc_database_node_is_leaf(node_v1)) { + r = __loc_database_lookup_handle_leaf(db, address, network, network_address, level, node_v1); if (r <= 0) return r; } @@ -955,8 +960,7 @@ LOC_EXPORT int loc_database_lookup_from_string(struct loc_database* db, // Returns the country at position pos static int loc_database_fetch_country(struct loc_database* db, struct loc_country** country, off_t pos) { - struct loc_database_country_v1 country_v1; - struct loc_database_country_v1* p_v1; + struct loc_database_country_v1* country_v1 = NULL; int r; // Check if the country is within range @@ -970,11 +974,12 @@ static int loc_database_fetch_country(struct loc_database* db, switch (db->version) { case LOC_DATABASE_VERSION_1: // Read the object - p_v1 = loc_database_read_object(db, &country_v1, &db->country_objects, pos); - if (!p_v1) + country_v1 = (struct loc_database_country_v1*)loc_database_object(db, + &db->country_objects, sizeof(*country_v1), pos); + if (!country_v1) return 1; - r = loc_country_new_from_database_v1(db->ctx, db->pool, country, p_v1); + r = loc_country_new_from_database_v1(db->ctx, db->pool, country, country_v1); break; default: @@ -1315,8 +1320,6 @@ static int loc_database_enumerator_match_network( static int __loc_database_enumerator_next_network( struct loc_database_enumerator* enumerator, struct loc_network** network, int filter) { - struct loc_database_network_node_v1 node_v1; - // Return top element from the stack while (1) { *network = loc_network_list_pop_first(enumerator->stack); @@ -1360,8 +1363,9 @@ static int __loc_database_enumerator_next_network( enumerator->networks_visited[node->offset]++; // Pop node from top of the stack - struct loc_database_network_node_v1* n = loc_database_read_object(enumerator->db, - &node_v1, &enumerator->db->network_node_objects, node->offset); + struct loc_database_network_node_v1* n = + (struct loc_database_network_node_v1*)loc_database_object(enumerator->db, + &enumerator->db->network_node_objects, sizeof(*n), node->offset); if (!n) return 1; diff --git a/src/libloc/stringpool.h b/src/libloc/stringpool.h index 932aad7..c6bd216 100644 --- a/src/libloc/stringpool.h +++ b/src/libloc/stringpool.h @@ -27,7 +27,7 @@ struct loc_stringpool; int loc_stringpool_new(struct loc_ctx* ctx, struct loc_stringpool** pool); int loc_stringpool_open(struct loc_ctx* ctx, struct loc_stringpool** pool, - FILE* f, size_t length, off_t offset); + const char* data, const size_t length); struct loc_stringpool* loc_stringpool_ref(struct loc_stringpool* pool); struct loc_stringpool* loc_stringpool_unref(struct loc_stringpool* pool); diff --git a/src/stringpool.c b/src/stringpool.c index d617807..9986a61 100644 --- a/src/stringpool.c +++ b/src/stringpool.c @@ -27,89 +27,36 @@ #include #include +#define LOC_STRINGPOOL_BLOCK_SIZE (512 * 1024) + struct loc_stringpool { struct loc_ctx* ctx; int refcount; - // A file descriptor when we open an existing stringpool - int fd; - - off_t offset; + // Reference to any mapped data + const char* data; ssize_t length; - // Mapped data (from mmap()) - char* mmapped_data; - - char* data; - char* pos; - - char buffer[LOC_DATABASE_PAGE_SIZE]; + // Reference to own storage + char* blocks; + size_t size; }; -static off_t loc_stringpool_get_offset(struct loc_stringpool* pool, const char* pos) { - if (pos < pool->data) { - errno = EFAULT; - return -1; - } - - if (pos > (pool->data + pool->length)) { - errno = EFAULT; - return -1; - } +static int loc_stringpool_grow(struct loc_stringpool* pool, const size_t size) { + DEBUG(pool->ctx, "Growing string pool by %zu byte(s)\n", size); - return pos - pool->data; -} - -static char* __loc_stringpool_get(struct loc_stringpool* pool, off_t offset) { - ssize_t bytes_read; - - // Check boundaries - if (offset < 0 || offset >= pool->length) { - errno = ERANGE; - return NULL; - } + // Increment size + pool->size += size; - // Return any data that we have in memory - if (pool->data) - return pool->data + offset; - - // Otherwise read a block from file - bytes_read = pread(pool->fd, pool->buffer, sizeof(pool->buffer), - pool->offset + offset); - - // Break on error - if (bytes_read < 0) { - ERROR(pool->ctx, "Could not read from string pool: %m\n"); - return NULL; - } - - // It is okay, if we did not read as much as we wanted, since we might be reading - // the last block which might be of an unknown size. - - // Search for a complete string. If there is no NULL byte, the block is garbage. - char* end = memchr(pool->buffer, bytes_read, '\0'); - if (!end) - return NULL; - - // Return what's in the buffer - return pool->buffer; -} - -static int loc_stringpool_grow(struct loc_stringpool* pool, size_t length) { - DEBUG(pool->ctx, "Growing string pool to %zu bytes\n", length); - - // Save pos pointer - off_t pos = loc_stringpool_get_offset(pool, pool->pos); - - // Reallocate data section - pool->data = realloc(pool->data, length); - if (!pool->data) + // Reallocate blocks + pool->blocks = realloc(pool->blocks, pool->size); + if (!pool->blocks) { + ERROR(pool->ctx, "Could not grow string pool: %m\n"); return 1; + } - pool->length = length; - - // Restore pos - pool->pos = __loc_stringpool_get(pool, pos); + // Update data pointer + pool->data = pool->blocks; return 0; } @@ -122,44 +69,33 @@ static off_t loc_stringpool_append(struct loc_stringpool* pool, const char* stri DEBUG(pool->ctx, "Appending '%s' to string pool at %p\n", string, pool); + // How much space to we need? + const size_t length = strlen(string) + 1; + // Make sure we have enough space - int r = loc_stringpool_grow(pool, pool->length + strlen(string) + 1); - if (r) - return -1; + if (pool->length + length > pool->size) { + int r = loc_stringpool_grow(pool, LOC_STRINGPOOL_BLOCK_SIZE); + if (r) + return r; + } - off_t offset = loc_stringpool_get_offset(pool, pool->pos); + off_t offset = pool->length; - // Copy string byte by byte - while (*string) - *pool->pos++ = *string++; + // Copy the string + memcpy(pool->blocks + offset, string, length); - // Terminate the string - *pool->pos++ = '\0'; + // Update the length of the pool + pool->length += length; return offset; } static void loc_stringpool_free(struct loc_stringpool* pool) { DEBUG(pool->ctx, "Releasing string pool %p\n", pool); - int r; - - // Close file - if (pool->fd > 0) - close(pool->fd); - - // Unmap any mapped memory - if (pool->mmapped_data) { - r = munmap(pool->mmapped_data, pool->length); - if (r) - ERROR(pool->ctx, "Error unmapping string pool: %m\n"); - - if (pool->mmapped_data == pool->data) - pool->data = NULL; - } // Free any data - if (pool->data) - free(pool->data); + if (pool->blocks) + free(pool->blocks); loc_unref(pool->ctx); free(pool); @@ -178,27 +114,8 @@ int loc_stringpool_new(struct loc_ctx* ctx, struct loc_stringpool** pool) { return 0; } -static int loc_stringpool_mmap(struct loc_stringpool* pool) { - // Try mmap() - char* p = mmap(NULL, pool->length, PROT_READ, MAP_PRIVATE, pool->fd, pool->offset); - - if (p == MAP_FAILED) { - // Ignore if data hasn't been aligned correctly - if (errno == EINVAL) - return 0; - - ERROR(pool->ctx, "Could not mmap stringpool: %m\n"); - return 1; - } - - // Store mapped memory area - pool->data = pool->mmapped_data = pool->pos = p; - - return 0; -} - int loc_stringpool_open(struct loc_ctx* ctx, struct loc_stringpool** pool, - FILE* f, size_t length, off_t offset) { + const char* data, const size_t length) { struct loc_stringpool* p = NULL; // Allocate a new stringpool @@ -206,29 +123,11 @@ int loc_stringpool_open(struct loc_ctx* ctx, struct loc_stringpool** pool, if (r) goto ERROR; - // Store offset and length - p->offset = offset; + // Store data and length + p->data = data; p->length = length; - DEBUG(p->ctx, "Reading string pool starting from %jd (%zu bytes)\n", - (intmax_t)p->offset, p->length); - - int fd = fileno(f); - - // Copy the file descriptor - p->fd = dup(fd); - if (p->fd < 0) { - ERROR(ctx, "Could not duplicate file the file descriptor: %m\n"); - r = 1; - goto ERROR; - } - - // Map data into memory - if (p->length > 0) { - r = loc_stringpool_mmap(p); - if (r) - goto ERROR; - } + DEBUG(p->ctx, "Opened string pool at %p (%zu bytes)\n", p->data, p->length); *pool = p; return 0; @@ -256,11 +155,18 @@ struct loc_stringpool* loc_stringpool_unref(struct loc_stringpool* pool) { } const char* loc_stringpool_get(struct loc_stringpool* pool, off_t offset) { - return __loc_stringpool_get(pool, offset); + // Check boundaries + if (offset < 0 || offset >= pool->length) { + errno = ERANGE; + return NULL; + } + + // Return any data that we have in memory + return pool->data + offset; } size_t loc_stringpool_get_size(struct loc_stringpool* pool) { - return loc_stringpool_get_offset(pool, pool->pos); + return pool->length; } static off_t loc_stringpool_find(struct loc_stringpool* pool, const char* s) {