MINOR: tools: improve word fingerprinting by counting presence

author Willy Tarreau <w@1wt.eu>

Mon, 15 Mar 2021 08:34:27 +0000 (09:34 +0100)

committer Willy Tarreau <w@1wt.eu>

Mon, 15 Mar 2021 08:38:42 +0000 (09:38 +0100)
author Willy Tarreau <w@1wt.eu>
Mon, 15 Mar 2021 08:34:27 +0000 (09:34 +0100)
committer Willy Tarreau <w@1wt.eu>
Mon, 15 Mar 2021 08:38:42 +0000 (09:38 +0100)
diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h

index 901dca0bb9682da11daf83f5bf87ff44998942c5..3121fea51a09d444d8bfbbd9e7a4e990754a70fe 100644 (file)
--- a/include/haproxy/tools.h
+++ b/include/haproxy/tools.h
@@ -1077,28 +1077,30 @@ static inline unsigned int statistical_prng()
   * is zero, it's assumed that <curr> is the first character. If <curr> is zero
   * its assumed to mark the end. Both may be zero. <fp> is a 1024-entries array
   * indexed as 32*from+to. Positions for 'from' and 'to' are:
- *   0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ *   1..26=letter, 27=digit, 28=other/begin/end.
+ * Row "from=0" is used to mark the character's presence. Others unused.
   */
  static inline void update_char_fingerprint(uint8_t *fp, char prev, char curr)
  {
         int from, to;
  
         switch (prev) {
-       case 0:         from = 26; break; // begin
-       case 'a'...'z': from = prev - 'a'; break;
-       case 'A'...'Z': from = tolower(prev) - 'a'; break;
-       case '0'...'9': from = 26; break;
-       default:        from = 27; break;
+       case 0:         from = 28; break; // begin
+       case 'a'...'z': from = prev - 'a' + 1; break;
+       case 'A'...'Z': from = tolower(prev) - 'a' + 1; break;
+       case '0'...'9': from = 27; break;
+       default:        from = 28; break;
         }
  
         switch (curr) {
         case 0:         to = 28; break; // end
-       case 'a'...'z': to = curr - 'a'; break;
-       case 'A'...'Z': to = tolower(curr) - 'a'; break;
-       case '0'...'9': to = 26; break;
-       default:        to = 27; break;
+       case 'a'...'z': to = curr - 'a' + 1; break;
+       case 'A'...'Z': to = tolower(curr) - 'a' + 1; break;
+       case '0'...'9': to = 27; break;
+       default:        to = 28; break;
         }
-
+       if (curr)
+               fp[to] = 1;
         fp[32 * from + to]++;
  }
  
diff --git a/src/tools.c b/src/tools.c

index 1255e748b84afdc2f47bc9608358e11560464ae8..ffd167a24ff2420964fe1abccfb93a03fa88fdb7 100644 (file)
--- a/src/tools.c
+++ b/src/tools.c
@@ -5372,7 +5372,8 @@ size_t sanitize_for_printing(char *line, size_t pos, size_t width)
  /* Update array <fp> with the fingerprint of word <word> by counting the
   * transitions between characters. <fp> is a 1024-entries array indexed as
   * 32*from+to. Positions for 'from' and 'to' are:
- *   0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ *   1..26=letter, 27=digit, 28=other/begin/end.
+ * Row "from=0" is used to mark the character's presence. Others unused.
   */
  void update_word_fingerprint(uint8_t *fp, const char *word)
  {
@@ -5384,11 +5385,12 @@ void update_word_fingerprint(uint8_t *fp, const char *word)
         for (p = word; *p; p++) {
                 c = tolower(*p);
                 switch(c) {
-               case 'a'...'z': to = c - 'a'; break;
-               case 'A'...'Z': to = tolower(c) - 'a'; break;
-               case '0'...'9': to = 26; break;
-               default: to = 27; break;
+               case 'a'...'z': to = c - 'a' + 1; break;
+               case 'A'...'Z': to = tolower(c) - 'a' + 1; break;
+               case '0'...'9': to = 27; break;
+               default:        to = 28; break;
                 }
+               fp[to] = 1;
                 fp[32 * from + to]++;
                 from = to;
         }
author	Willy Tarreau <w@1wt.eu>
	Mon, 15 Mar 2021 08:34:27 +0000 (09:34 +0100)
committer	Willy Tarreau <w@1wt.eu>
	Mon, 15 Mar 2021 08:38:42 +0000 (09:38 +0100)
include/haproxy/tools.h		patch \| blob \| blame \| history
src/tools.c		patch \| blob \| blame \| history