From: Douglas Bagnall Date: Fri, 10 May 2024 03:43:36 +0000 (+1200) Subject: ldb: remove old ldb_comparison_fold_utf8_broken() X-Git-Tag: tdb-1.4.11~558 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=42ae85d70af8da1aecbf45f5fb6e7d7ee1c379fb;p=thirdparty%2Fsamba.git ldb: remove old ldb_comparison_fold_utf8_broken() There are no callers. Signed-off-by: Douglas Bagnall Reviewed-by: Andrew Bartlett --- diff --git a/lib/ldb/common/attrib_handlers.c b/lib/ldb/common/attrib_handlers.c index 1c37b606060..145ff487310 100644 --- a/lib/ldb/common/attrib_handlers.c +++ b/lib/ldb/common/attrib_handlers.c @@ -326,150 +326,6 @@ int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx, return memcmp(v1->data, v2->data, v1->length); } -/* - compare two case insensitive strings, ignoring multiple whitespaces - and leading and trailing whitespaces - see rfc2252 section 8.1 - - try to optimize for the ascii case, - but if we find out an utf8 codepoint revert to slower but correct function -*/ -__attribute__ ((unused)) -static int ldb_comparison_fold_utf8_broken(struct ldb_context *ldb, void *mem_ctx, - const struct ldb_val *v1, const struct ldb_val *v2) -{ - const char *s1=(const char *)v1->data, *s2=(const char *)v2->data; - size_t n1 = v1->length, n2 = v2->length; - char *b1, *b2; - const char *u1, *u2; - int ret; - - while (n1 && *s1 == ' ') { s1++; n1--; }; - while (n2 && *s2 == ' ') { s2++; n2--; }; - - while (n1 && n2 && *s1 && *s2) { - /* the first 127 (0x7F) chars are ascii and utf8 guarantees they - * never appear in multibyte sequences */ - if (((unsigned char)s1[0]) & 0x80) goto utf8str; - if (((unsigned char)s2[0]) & 0x80) goto utf8str; - if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) { - break; - } - if (*s1 == ' ') { - while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; } - while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; } - } - s1++; s2++; - n1--; n2--; - } - - /* check for trailing spaces only if the other pointers has - * reached the end of the strings otherwise we can - * mistakenly match. ex. "domain users" <-> - * "domainUpdates" - */ - if (n1 && *s1 == ' ' && (!n2 || !*s2)) { - while (n1 && *s1 == ' ') { s1++; n1--; } - } - if (n2 && *s2 == ' ' && (!n1 || !*s1)) { - while (n2 && *s2 == ' ') { s2++; n2--; } - } - if (n1 == 0 && n2 != 0) { - return -(int)ldb_ascii_toupper(*s2); - } - if (n2 == 0 && n1 != 0) { - return (int)ldb_ascii_toupper(*s1); - } - if (n1 == 0 && n2 == 0) { - return 0; - } - return (int)ldb_ascii_toupper(*s1) - (int)ldb_ascii_toupper(*s2); - -utf8str: - /* - * No need to recheck from the start, just from the first utf8 charu - * found. Note that the callback of ldb_casefold() needs to be ascii - * compatible. - * - * Probably ldb_casefold() is wrap_casefold() which wraps - * strupper_talloc_n(). - */ - b1 = ldb_casefold(ldb, mem_ctx, s1, n1); - b2 = ldb_casefold(ldb, mem_ctx, s2, n2); - - if (!b1 || !b2) { - /* - * One of the strings was not UTF8, so we have no - * options but to do a binary compare. - * - * FIXME: this can be non-transitive. - * - * consider { - * CA 8A "ʊ" - * C6 B1 "Ʊ" - * C8 FE invalid utf-8 - * } - * - * The byte "0xfe" is always invalid in utf-8, so the - * comparisons against that string end up coming this way, - * while the "Ʊ" vs "ʊ" comparison goes via the ldb_casefold - * branch. Then: - * - * "ʊ" == "Ʊ" by casefold. - * "ʊ" > {c8 fe} by byte comparison. - * "Ʊ" < {c8 fe} by byte comparison. - * - * In many cases there are no invalid encodings between the - * upper and lower case letters, but the string as a whole - * might also compare differently due to the space-eating in - * the other branch. - */ - talloc_free(b1); - talloc_free(b2); - ret = memcmp(s1, s2, MIN(n1, n2)); - if (ret == 0) { - if (n1 == n2) { - return 0; - } - if (n1 > n2) { - if (s1[n2] == '\0') { - return 0; - } - return 1; - } else { - if (s2[n1] == '\0') { - return 0; - } - return -1; - } - } - return ret; - } - - u1 = b1; - u2 = b2; - - while (*u1 & *u2) { - if (*u1 != *u2) - break; - if (*u1 == ' ') { - while (u1[0] == u1[1]) u1++; - while (u2[0] == u2[1]) u2++; - } - u1++; u2++; - } - if (! (*u1 && *u2)) { - while (*u1 == ' ') u1++; - while (*u2 == ' ') u2++; - } - ret = NUMERIC_CMP(*u1, *u2); - - talloc_free(b1); - talloc_free(b2); - - return ret; -} - /* * ldb_comparison_fold is a schema syntax comparison_fn for utf-8 strings that * collapse multiple spaces into one (e.g. "Directory String" syntax).