From: Willy Tarreau Date: Tue, 24 Jun 2025 15:14:47 +0000 (+0200) Subject: MINOR: tools: add support for ist to the word fingerprinting functions X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a4d78dd4f5882176e4259baa6f973f3abe6ffe1f;p=thirdparty%2Fhaproxy.git MINOR: tools: add support for ist to the word fingerprinting functions The word fingerprinting functions are used to compare similar words to suggest a correctly spelled one that looks like what the user proposed. Currently the functions only support const char*, but there's no reason for this, and it would be convenient to support substrings extracted from random pieces of configurations. Here we're adding new variants "_with_len" that take these ISTs and which are in fact a slight change of the original ones that the old ones now rely on. --- diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h index 160d5f3ac..931f71506 100644 --- a/include/haproxy/tools.h +++ b/include/haproxy/tools.h @@ -1028,7 +1028,9 @@ int is_dir_present(const char *path_fmt, ...); uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr); ssize_t read_line_to_trash(const char *path_fmt, ...); size_t sanitize_for_printing(char *line, size_t pos, size_t width); +void update_word_fingerprint_with_len(uint8_t *fp, struct ist word); void update_word_fingerprint(uint8_t *fp, const char *word); +void make_word_fingerprint_with_len(uint8_t *fp, struct ist word); void make_word_fingerprint(uint8_t *fp, const char *word); int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2); diff --git a/src/tools.c b/src/tools.c index 7f29e807b..093e691b9 100644 --- a/src/tools.c +++ b/src/tools.c @@ -6627,20 +6627,20 @@ size_t sanitize_for_printing(char *line, size_t pos, size_t width) return pos - shift; } -/* Update array with the fingerprint of word by counting the - * transitions between characters. is a 1024-entries array indexed as - * 32*from+to. Positions for 'from' and 'to' are: +/* Update array with the fingerprint of word for up to chars + * by counting the transitions between characters. is a 1024-entries array + * indexed as 32*from+to. Positions for 'from' and 'to' are: * 1..26=letter, 27=digit, 28=other/begin/end. * Row "from=0" is used to mark the character's presence. Others unused. */ -void update_word_fingerprint(uint8_t *fp, const char *word) +void update_word_fingerprint_with_len(uint8_t *fp, struct ist word) { const char *p; int from, to; int c; from = 28; // begin - for (p = word; *p; p++) { + for (p = word.ptr; p < word.ptr + word.len; p++) { c = tolower((unsigned char)*p); switch(c) { case 'a'...'z': to = c - 'a' + 1; break; @@ -6656,6 +6656,17 @@ void update_word_fingerprint(uint8_t *fp, const char *word) fp[32 * from + to]++; } +/* Update array with the fingerprint of word by counting the + * transitions between characters. is a 1024-entries array indexed as + * 32*from+to. Positions for 'from' and 'to' are: + * 1..26=letter, 27=digit, 28=other/begin/end. + * Row "from=0" is used to mark the character's presence. Others unused. + */ +void update_word_fingerprint(uint8_t *fp, const char *word) +{ + return update_word_fingerprint_with_len(fp, ist(word)); +} + /* This function hashes a word, scramble is the anonymizing key, returns * the hashed word when the key (scramble) != 0, else returns the word. * This function can be called NB_L_HASH_WORD times in a row, don't call @@ -6785,6 +6796,17 @@ void make_word_fingerprint(uint8_t *fp, const char *word) update_word_fingerprint(fp, word); } +/* Initialize array with the fingerprint of word by counting the + * transitions between characters. is a 1024-entries array indexed as + * 32*from+to. Positions for 'from' and 'to' are: + * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused. + */ +void make_word_fingerprint_with_len(uint8_t *fp, struct ist word) +{ + memset(fp, 0, 1024); + update_word_fingerprint_with_len(fp, word); +} + /* Return the distance between two word fingerprints created by function * make_word_fingerprint(). It's a positive integer calculated as the sum of * the differences between each location.