]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
string-util: add a function to determine levenshtein distance of two strings
authorLennart Poettering <lennart@poettering.net>
Mon, 21 Aug 2023 11:38:03 +0000 (13:38 +0200)
committerLennart Poettering <lennart@poettering.net>
Mon, 21 Aug 2023 21:19:49 +0000 (23:19 +0200)
src/basic/string-util.c
src/basic/string-util.h
src/test/test-string-util.c

index 7cddec712b0ca4cc36930dbd8f8c724c6f7b6039..854cf963acb1edcacd0a01d8de8ca8f9660cbcac 100644 (file)
@@ -1446,3 +1446,67 @@ bool version_is_valid_versionspec(const char *s) {
 
         return true;
 }
+
+ssize_t strlevenshtein(const char *x, const char *y) {
+        _cleanup_free_ size_t *t0 = NULL, *t1 = NULL, *t2 = NULL;
+        size_t xl, yl;
+
+        /* This is inspired from the Linux kernel's Levenshtein implementation */
+
+        if (streq_ptr(x, y))
+                return 0;
+
+        xl = strlen_ptr(x);
+        if (xl > SSIZE_MAX)
+                return -E2BIG;
+
+        yl = strlen_ptr(y);
+        if (yl > SSIZE_MAX)
+                return -E2BIG;
+
+        if (isempty(x))
+                return yl;
+        if (isempty(y))
+                return xl;
+
+        t0 = new0(size_t, yl + 1);
+        if (!t0)
+                return -ENOMEM;
+        t1 = new0(size_t, yl + 1);
+        if (!t1)
+                return -ENOMEM;
+        t2 = new0(size_t, yl + 1);
+        if (!t2)
+                return -ENOMEM;
+
+        for (size_t i = 0; i <= yl; i++)
+                t1[i] = i;
+
+        for (size_t i = 0; i < xl; i++) {
+                t2[0] = i + 1;
+
+                for (size_t j = 0; j < yl; j++) {
+                        /* Substitution */
+                        t2[j+1] = t1[j] + (x[i] != y[j]);
+
+                        /* Swap */
+                        if (i > 0 && j > 0 && x[i-1] == y[j] && x[i] == y[j-1] && t2[j+1] > t0[j-1] + 1)
+                                t2[j+1] = t0[j-1] + 1;
+
+                        /* Deletion */
+                        if (t2[j+1] > t1[j+1] + 1)
+                                t2[j+1] = t1[j+1] + 1;
+
+                        /* Insertion */
+                        if (t2[j+1] > t2[j] + 1)
+                                t2[j+1] = t2[j] + 1;
+                }
+
+                size_t *dummy = t0;
+                t0 = t1;
+                t1 = t2;
+                t2 = dummy;
+        }
+
+        return t1[yl];
+}
index f473946864e748054d2a2abf7b9869e24e67661c..73d586d4c54c849687bb4f0cff489b68b5c30135 100644 (file)
@@ -284,3 +284,5 @@ char *startswith_strv(const char *string, char **strv);
 bool version_is_valid(const char *s);
 
 bool version_is_valid_versionspec(const char *s);
+
+ssize_t strlevenshtein(const char *x, const char *y);
index 6ec70054e785c2340a977e54cf7d4fc5373be427..b5f0008d76bef481b49198af55205965f2c8ec46 100644 (file)
@@ -1292,4 +1292,28 @@ TEST(strextendn) {
         x = mfree(x);
 }
 
+TEST(strlevenshtein) {
+        assert_se(strlevenshtein(NULL, NULL) == 0);
+        assert_se(strlevenshtein("", "") == 0);
+        assert_se(strlevenshtein("", NULL) == 0);
+        assert_se(strlevenshtein(NULL, "") == 0);
+
+        assert_se(strlevenshtein("a", "a") == 0);
+        assert_se(strlevenshtein("a", "b") == 1);
+        assert_se(strlevenshtein("b", "a") == 1);
+        assert_se(strlevenshtein("a", "") == 1);
+        assert_se(strlevenshtein("", "a") == 1);
+
+        assert_se(strlevenshtein("xxx", "xxx") == 0);
+        assert_se(strlevenshtein("xxx", "yyy") == 3);
+        assert_se(strlevenshtein("yyy", "xxx") == 3);
+        assert_se(strlevenshtein("xx", "xxx") == 1);
+        assert_se(strlevenshtein("xxx", "xx") == 1);
+        assert_se(strlevenshtein("x", "xxx") == 2);
+        assert_se(strlevenshtein("xxx", "x") == 2);
+
+        assert_se(strlevenshtein("sitting", "kitten") == 3);
+        assert_se(strlevenshtein("sunday", "saturday") == 3);
+}
+
 DEFINE_TEST_MAIN(LOG_DEBUG);