From: Michael Tremer Date: Thu, 7 Dec 2017 12:36:11 +0000 (+0000) Subject: Add deduplicated and memory-efficient string pool X-Git-Tag: 0.9.0~181 X-Git-Url: http://git.ipfire.org/?p=people%2Fms%2Flibloc.git;a=commitdiff_plain;h=62b83e6d9749b1b1d04bef39e16ce95a4792c7e4 Add deduplicated and memory-efficient string pool Signed-off-by: Michael Tremer --- diff --git a/Makefile.am b/Makefile.am index 24ae1e7..d516901 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,7 +43,9 @@ lib_LTLIBRARIES = \ src_libloc_la_SOURCES =\ src/libloc-private.h \ - src/libloc.c + src/libloc.c \ + src/stringpool.c \ + src/stringpool.h EXTRA_DIST += src/libloc.sym @@ -65,13 +67,21 @@ CLEANFILES += \ src/libloc.pc TESTS = \ - src/test-libloc + src/test-libloc \ + src/test-stringpool check_PROGRAMS = \ - src/test-libloc + src/test-libloc \ + src/test-stringpool src_test_libloc_SOURCES = \ src/test-libloc.c src_test_libloc_LDADD = \ src/libloc.la + +src_test_stringpool_SOURCES = \ + src/test-stringpool.c + +src_test_stringpool_LDADD = \ + src/libloc.la diff --git a/src/.gitignore b/src/.gitignore index a948ad3..9df8d54 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -6,3 +6,4 @@ *.trs libloc.pc test-libloc +test-stringpool diff --git a/src/libloc.sym b/src/libloc.sym index 14dc126..faf3edd 100644 --- a/src/libloc.sym +++ b/src/libloc.sym @@ -1,3 +1,14 @@ +LIBLOC_PRIVATE { +global: + # String Pool + loc_stringpool_add; + loc_stringpool_dump; + loc_stringpool_get; + loc_stringpool_new; + loc_stringpool_ref; + loc_stringpool_unref; +}; + LIBLOC_1 { global: loc_ref; diff --git a/src/stringpool.c b/src/stringpool.c new file mode 100644 index 0000000..299816c --- /dev/null +++ b/src/stringpool.c @@ -0,0 +1,186 @@ +/* + libloc - A library to determine the location of someone on the Internet + + Copyright (C) 2017 IPFire Development Team + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. +*/ + +#include +#include +#include +#include +#include + +#include +#include "libloc-private.h" +#include "stringpool.h" + +struct loc_stringpool { + struct loc_ctx* ctx; + + int refcount; + char* data; + char* pos; + + ssize_t max_length; +}; + +LOC_EXPORT int loc_stringpool_new(struct loc_ctx* ctx, struct loc_stringpool** pool, size_t max_length) { + if (max_length <= 0) + return -EINVAL; + + struct loc_stringpool* p = calloc(1, sizeof(*p)); + if (!p) + return -ENOMEM; + + p->ctx = loc_ref(ctx); + p->refcount = 1; + p->max_length = max_length; + + // Allocate the data section + p->data = malloc(p->max_length); + p->pos = p->data; + + DEBUG(p->ctx, "String pool allocated at %p\n", p); + DEBUG(p->ctx, " Maximum size: %zu bytes\n", p->max_length); + *pool = p; + + return 0; +} + +LOC_EXPORT struct loc_stringpool* loc_stringpool_ref(struct loc_stringpool* pool) { + pool->refcount++; + + return pool; +} + +static void loc_stringpool_free(struct loc_stringpool* pool) { + DEBUG(pool->ctx, "Releasing string pool %p\n", pool); + + loc_unref(pool->ctx); + + free(pool->data); + free(pool); +} + +LOC_EXPORT struct loc_stringpool* loc_stringpool_unref(struct loc_stringpool* pool) { + if (--pool->refcount > 0) + return NULL; + + loc_stringpool_free(pool); + + return NULL; +} + +static off_t loc_stringpool_get_offset(struct loc_stringpool* pool, const char* pos) { + if (pos < pool->data) + return -EFAULT; + + if (pos > (pool->data + pool->max_length)) + return -EFAULT; + + return pos - pool->data; +} + +static off_t loc_stringpool_get_next_offset(struct loc_stringpool* pool, off_t offset) { + const char* string = loc_stringpool_get(pool, offset); + + return offset + strlen(string) + 1; +} + +static size_t loc_stringpool_space_left(struct loc_stringpool* pool) { + return pool->max_length - loc_stringpool_get_offset(pool, pool->pos); +} + +LOC_EXPORT const char* loc_stringpool_get(struct loc_stringpool* pool, off_t offset) { + if (offset >= (ssize_t)pool->max_length) + return NULL; + + const char* string = pool->data + offset; + + // If the string is empty, we have reached the end + if (!*string) + return NULL; + + return string; +} + +static off_t loc_stringpool_find(struct loc_stringpool* pool, const char* s) { + if (!s || !*s) + return -EINVAL; + + off_t offset = 0; + while (offset < pool->max_length) { + const char* string = loc_stringpool_get(pool, offset); + if (!string) + break; + + int r = strcmp(s, string); + if (r == 0) + return offset; + + offset = loc_stringpool_get_next_offset(pool, offset); + } + + return -ENOENT; +} + +static off_t loc_stringpool_append(struct loc_stringpool* pool, const char* string) { + if (!string || !*string) + return -EINVAL; + + DEBUG(pool->ctx, "Appending '%s' to string pool at %p\n", string, pool); + + // Check if we have enough space left + size_t l = strlen(string) + 1; + if (l > loc_stringpool_space_left(pool)) { + DEBUG(pool->ctx, "Not enough space to append '%s'\n", string); + DEBUG(pool->ctx, " Need %zu bytes but only have %zu\n", l, loc_stringpool_space_left(pool)); + return -ENOSPC; + } + + off_t offset = loc_stringpool_get_offset(pool, pool->pos); + + // Copy string byte by byte + while (*string && loc_stringpool_space_left(pool) > 1) { + *pool->pos++ = *string++; + } + + // Terminate the string + *pool->pos++ = '\0'; + + return offset; +} + +LOC_EXPORT off_t loc_stringpool_add(struct loc_stringpool* pool, const char* string) { + off_t offset = loc_stringpool_find(pool, string); + if (offset >= 0) { + DEBUG(pool->ctx, "Found '%s' at position %jd\n", string, offset); + return offset; + } + + return loc_stringpool_append(pool, string); +} + +LOC_EXPORT void loc_stringpool_dump(struct loc_stringpool* pool) { + off_t offset = 0; + + while (offset < pool->max_length) { + const char* string = loc_stringpool_get(pool, offset); + if (!string) + break; + + printf("%jd (%zu): %s\n", offset, strlen(string), string); + + offset = loc_stringpool_get_next_offset(pool, offset); + } +} diff --git a/src/stringpool.h b/src/stringpool.h new file mode 100644 index 0000000..a07cd5c --- /dev/null +++ b/src/stringpool.h @@ -0,0 +1,33 @@ +/* + libloc - A library to determine the location of someone on the Internet + + Copyright (C) 2017 IPFire Development Team + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. +*/ + +#ifndef LIBLOC_STRINGPOOL_H +#define LIBLOC_STRINGPOOL_H + +#include + +#include + +struct loc_stringpool; +int loc_stringpool_new(struct loc_ctx* ctx, struct loc_stringpool** pool, size_t max_length); +struct loc_stringpool* loc_stringpool_ref(struct loc_stringpool* pool); +struct loc_stringpool* loc_stringpool_unref(struct loc_stringpool* pool); + +const char* loc_stringpool_get(struct loc_stringpool* pool, off_t offset); +off_t loc_stringpool_add(struct loc_stringpool* pool, const char* string); +void loc_stringpool_dump(struct loc_stringpool* pool); + +#endif diff --git a/src/test-stringpool.c b/src/test-stringpool.c new file mode 100644 index 0000000..169554a --- /dev/null +++ b/src/test-stringpool.c @@ -0,0 +1,111 @@ +/* + libloc - A library to determine the location of someone on the Internet + + Copyright (C) 2017 IPFire Development Team + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "stringpool.h" + +static const char* characters = "012345789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +static char* random_string(size_t size) { + char* string = malloc(size + 1); + + char* p = string; + for (unsigned int i = 0; i < size; i++) { + *p++ = characters[rand() % strlen(characters)]; + } + *p = '\0'; + + return string; +} + +int main(int argc, char** argv) { + // Initialize the RNG + time_t now = time(NULL); + srand(now); + + int err; + + struct loc_ctx* ctx; + err = loc_new(&ctx); + if (err < 0) + exit(EXIT_FAILURE); + + // Create the stringpool + struct loc_stringpool* pool; + err = loc_stringpool_new(ctx, &pool, 10002 * 4); + if (err < 0) + exit(EXIT_FAILURE); + + // Append a string + off_t pos = loc_stringpool_add(pool, "ABC"); + if (pos < 0) { + fprintf(stderr, "Could not add string: %s\n", strerror(-pos)); + exit(EXIT_FAILURE); + } + + printf("Added string at %jd\n", pos); + + // Must start at first byte + if (pos != 0) { + fprintf(stderr, "First string didn't start at the first byte\n"); + exit(EXIT_FAILURE); + } + + // Append the same string again + pos = loc_stringpool_add(pool, "ABC"); + if (pos != 0) { + fprintf(stderr, "Same string was added at a different position again\n"); + exit(EXIT_FAILURE); + } + + // Append another string + pos = loc_stringpool_add(pool, "DEF"); + if (pos == 0) { + fprintf(stderr, "Second string was added at the first address\n"); + exit(EXIT_FAILURE); + } + + // Add 10000 random strings + for (unsigned int i = 0; i < 10000; i++) { + char* string = random_string(3); + + pos = loc_stringpool_add(pool, string); + free(string); + + if (pos < 0) { + fprintf(stderr, "Could not add string %d: %s\n", i, strerror(-pos)); + exit(EXIT_FAILURE); + } + } + + // Dump pool + loc_stringpool_dump(pool); + + loc_stringpool_unref(pool); + loc_unref(ctx); + + return EXIT_SUCCESS; +}