From 9c76637fff162478ebd9ad4b705ca0745110a98a Mon Sep 17 00:00:00 2001 From: Erwan Le Goas Date: Wed, 14 Sep 2022 17:40:24 +0200 Subject: [PATCH] MINOR: anon: add new macros and functions to anonymize contents These macros and functions will be used to anonymize strings by producing a short hash. This will allow to match config elements against dump elements without revealing the original data. This will later be used to anonymize configuration parts and CLI commands output. For now only string, identifiers and addresses are supported, but the model is easily extensible. --- include/haproxy/tools.h | 20 ++++++++ src/tools.c | 101 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h index 0e719832c9..d3bf1cf571 100644 --- a/include/haproxy/tools.h +++ b/include/haproxy/tools.h @@ -45,6 +45,7 @@ #include #include #include +#include /****** string-specific macros and functions ******/ /* if a > max, then bound to . The macro returns the new */ @@ -55,6 +56,19 @@ #define SWAP(a, b) do { typeof(a) t; t = a; a = b; b = t; } while(0) +/* return the hash of a string and length for a given key. All keys are valid. */ +#define HA_ANON(key, str, len) (XXH32(str, len, key) & 0xFFFFFF) + +/* use if you want to return a simple hash. Key 0 doesn't hash. */ +#define HA_ANON_STR(key, str) hash_anon(key, str, "", "") + +/* use if you want to return a hash like : IP('hash'). Key 0 doesn't hash. */ +#define HA_ANON_ID(key, str) hash_anon(key, str, "ID(", ")") + +/* use if you want to return a hash like : PATH('hash'). Key 0 doesn't hash. */ +#define HA_ANON_PATH(key, str) hash_anon(key, str, "PATH(", ")") + + /* * copies at most chars from to . Last char is always * set to 0, unless is 0. The number of chars copied is returned @@ -480,6 +494,12 @@ unsigned int inetaddr_host(const char *text); unsigned int inetaddr_host_lim(const char *text, const char *stop); unsigned int inetaddr_host_lim_ret(char *text, char *stop, char **ret); +/* Function that hashes or not a string according to the anonymizing key (scramble). */ +const char *hash_anon(uint32_t scramble, const char *string2hash, const char *prefix, const char *suffix); + +/* Function that hashes or not an ip according to the ipstring entered */ +const char * hash_ipanon(uint32_t scramble, char *ipstring); + static inline char *cut_crlf(char *s) { while (*s != '\r' && *s != '\n') { diff --git a/src/tools.c b/src/tools.c index 2311f725a6..34b1ab099c 100644 --- a/src/tools.c +++ b/src/tools.c @@ -76,6 +76,9 @@ extern void *__elf_aux_vector; */ #define RET0_UNLESS(__x) do { if (!(__x)) return 0; } while (0) +/* Define the number of line of hash_word */ +#define NB_L_HASH_WORD 7 + /* enough to store NB_ITOA_STR integers of : * 2^64-1 = 18446744073709551615 or * -2^63 = -9223372036854775808 @@ -102,6 +105,10 @@ THREAD_LOCAL unsigned int statistical_prng_state = 2463534242U; /* set to true if this is a static build */ int build_is_static = 0; +/* A global static table to store hashed words */ +static THREAD_LOCAL char hash_word[NB_L_HASH_WORD][20]; +static THREAD_LOCAL int index_hash = 0; + /* * unsigned long long ASCII representation * @@ -5874,6 +5881,100 @@ void update_word_fingerprint(uint8_t *fp, const char *word) fp[32 * from + to]++; } +/* This function hashes a word, scramble is the anonymizing key, returns + * the hashed word when the key (scramble) != 0, else returns the word. + * This function can be called NB_L_HASH_WORD times in a row, don't call + * it if you called it more than NB_L_HASH_WORD. + */ +const char *hash_anon(uint32_t scramble, const char *string2hash, const char *prefix, const char *suffix) +{ + index_hash++; + if (index_hash > NB_L_HASH_WORD) + index_hash = 0; + + /* don't hash empty strings */ + if (!string2hash[0] || (string2hash[0] == ' ' && string2hash[1] == 0)) + return string2hash; + + if (scramble != 0) { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "%s%06x%s", + prefix, HA_ANON(scramble, string2hash, strlen(string2hash)), suffix); + return hash_word[index_hash]; + } + else + return string2hash; +} + +/* This function hashes or not an ip address ipstring, scramble is the anonymizing + * key, returns the hashed ip with his port or ipstring when there is nothing to hash. + */ +const char *hash_ipanon(uint32_t scramble, char *ipstring) +{ + char *errmsg = NULL; + struct sockaddr_storage *sa; + char addr[46]; + int port; + + index_hash++; + if (index_hash > NB_L_HASH_WORD) { + index_hash = 0; + } + + if (strncmp(ipstring, "localhost", 1) == 0) { + return ipstring; + } + else { + sa = str2sa_range(ipstring, NULL, NULL, NULL, NULL, NULL, &errmsg, NULL, NULL, + PA_O_PORT_OK | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT | PA_O_PORT_RANGE); + if (sa == NULL) { + return ipstring; + } + else { + addr_to_str(sa, addr, sizeof(addr)); + port = get_host_port(sa); + + switch(sa->ss_family) { + case AF_INET: + if (strncmp(addr, "127", 3) == 0 || strncmp(addr, "255", 3) == 0 || strncmp(addr, "0", 1) == 0) { + return ipstring; + } + else { + if (port != 0) { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV4(%06x):%d", HA_ANON(scramble, addr, strlen(addr)), port); + return hash_word[index_hash]; + } + else { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV4(%06x)", HA_ANON(scramble, addr, strlen(addr))); + return hash_word[index_hash]; + } + } + break; + + case AF_INET6: + if (strcmp(addr, "::1") == 0) { + return ipstring; + } + else { + if (port != 0) { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV6(%06x):%d", HA_ANON(scramble, addr, strlen(addr)), port); + return hash_word[index_hash]; + } + else { + snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV6(%06x)", HA_ANON(scramble, addr, strlen(addr))); + return hash_word[index_hash]; + } + } + break; + + default: + return ipstring; + break; + }; + } + } + return ipstring; +} + /* Initialize array with the fingerprint of word by counting the * transitions between characters. is a 1024-entries array indexed as * 32*from+to. Positions for 'from' and 'to' are: -- 2.39.5