uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr);
ssize_t read_line_to_trash(const char *path_fmt, ...);
size_t sanitize_for_printing(char *line, size_t pos, size_t width);
+void update_word_fingerprint_with_len(uint8_t *fp, struct ist word);
void update_word_fingerprint(uint8_t *fp, const char *word);
+void make_word_fingerprint_with_len(uint8_t *fp, struct ist word);
void make_word_fingerprint(uint8_t *fp, const char *word);
int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2);
return pos - shift;
}
-/* Update array <fp> with the fingerprint of word <word> by counting the
- * transitions between characters. <fp> is a 1024-entries array indexed as
- * 32*from+to. Positions for 'from' and 'to' are:
+/* Update array <fp> with the fingerprint of word <word> for up to <len> chars
+ * by counting the transitions between characters. <fp> is a 1024-entries array
+ * indexed as 32*from+to. Positions for 'from' and 'to' are:
* 1..26=letter, 27=digit, 28=other/begin/end.
* Row "from=0" is used to mark the character's presence. Others unused.
*/
-void update_word_fingerprint(uint8_t *fp, const char *word)
+void update_word_fingerprint_with_len(uint8_t *fp, struct ist word)
{
const char *p;
int from, to;
int c;
from = 28; // begin
- for (p = word; *p; p++) {
+ for (p = word.ptr; p < word.ptr + word.len; p++) {
c = tolower((unsigned char)*p);
switch(c) {
case 'a'...'z': to = c - 'a' + 1; break;
fp[32 * from + to]++;
}
+/* Update array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 1..26=letter, 27=digit, 28=other/begin/end.
+ * Row "from=0" is used to mark the character's presence. Others unused.
+ */
+void update_word_fingerprint(uint8_t *fp, const char *word)
+{
+ return update_word_fingerprint_with_len(fp, ist(word));
+}
+
/* This function hashes a word, scramble is the anonymizing key, returns
* the hashed word when the key (scramble) != 0, else returns the word.
* This function can be called NB_L_HASH_WORD times in a row, don't call
update_word_fingerprint(fp, word);
}
+/* Initialize array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ */
+void make_word_fingerprint_with_len(uint8_t *fp, struct ist word)
+{
+ memset(fp, 0, 1024);
+ update_word_fingerprint_with_len(fp, word);
+}
+
/* Return the distance between two word fingerprints created by function
* make_word_fingerprint(). It's a positive integer calculated as the sum of
* the differences between each location.