ldb: remove old ldb_comparison_fold_utf8_broken()

author Douglas Bagnall <douglas.bagnall@catalyst.net.nz>

Fri, 10 May 2024 03:43:36 +0000 (15:43 +1200)

committer Andrew Bartlett <abartlet@samba.org>

Wed, 22 May 2024 23:12:32 +0000 (23:12 +0000)
author Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Fri, 10 May 2024 03:43:36 +0000 (15:43 +1200)
committer Andrew Bartlett <abartlet@samba.org>
Wed, 22 May 2024 23:12:32 +0000 (23:12 +0000)
diff --git a/lib/ldb/common/attrib_handlers.c b/lib/ldb/common/attrib_handlers.c

index 1c37b6060601374fff1b0491ce02fcbec68605d3..145ff487310b6b8c73fa338e4658945cf25e3678 100644 (file)
--- a/lib/ldb/common/attrib_handlers.c
+++ b/lib/ldb/common/attrib_handlers.c
@@ -326,150 +326,6 @@ int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx,
         return memcmp(v1->data, v2->data, v1->length);
  }
  
-/*
-  compare two case insensitive strings, ignoring multiple whitespaces
-  and leading and trailing whitespaces
-  see rfc2252 section 8.1
-
-  try to optimize for the ascii case,
-  but if we find out an utf8 codepoint revert to slower but correct function
-*/
-__attribute__ ((unused))
-static int ldb_comparison_fold_utf8_broken(struct ldb_context *ldb, void *mem_ctx,
-                                          const struct ldb_val *v1, const struct ldb_val *v2)
-{
-       const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
-       size_t n1 = v1->length, n2 = v2->length;
-       char *b1, *b2;
-       const char *u1, *u2;
-       int ret;
-
-       while (n1 && *s1 == ' ') { s1++; n1--; };
-       while (n2 && *s2 == ' ') { s2++; n2--; };
-
-       while (n1 && n2 && *s1 && *s2) {
-               /* the first 127 (0x7F) chars are ascii and utf8 guarantees they
-                * never appear in multibyte sequences */
-               if (((unsigned char)s1[0]) & 0x80) goto utf8str;
-               if (((unsigned char)s2[0]) & 0x80) goto utf8str;
-               if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
-                       break;
-               }
-               if (*s1 == ' ') {
-                       while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
-                       while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
-               }
-               s1++; s2++;
-               n1--; n2--;
-       }
-
-       /* check for trailing spaces only if the other pointers has
-        * reached the end of the strings otherwise we can
-        * mistakenly match.  ex. "domain users" <->
-        * "domainUpdates"
-        */
-       if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
-               while (n1 && *s1 == ' ') { s1++; n1--; }
-       }
-       if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
-               while (n2 && *s2 == ' ') { s2++; n2--; }
-       }
-       if (n1 == 0 && n2 != 0) {
-               return -(int)ldb_ascii_toupper(*s2);
-       }
-       if (n2 == 0 && n1 != 0) {
-               return (int)ldb_ascii_toupper(*s1);
-       }
-       if (n1 == 0 && n2 == 0) {
-               return 0;
-       }
-       return (int)ldb_ascii_toupper(*s1) - (int)ldb_ascii_toupper(*s2);
-
-utf8str:
-       /*
-        * No need to recheck from the start, just from the first utf8 charu
-        * found. Note that the callback of ldb_casefold() needs to be ascii
-        * compatible.
-        *
-        * Probably ldb_casefold() is wrap_casefold() which wraps
-        * strupper_talloc_n().
-        */
-       b1 = ldb_casefold(ldb, mem_ctx, s1, n1);
-       b2 = ldb_casefold(ldb, mem_ctx, s2, n2);
-
-       if (!b1 || !b2) {
-               /*
-                * One of the strings was not UTF8, so we have no
-                * options but to do a binary compare.
-                *
-                * FIXME: this can be non-transitive.
-                *
-                * consider {
-                *           CA 8A  "ʊ"
-                *           C6 B1  "Ʊ"
-                *           C8 FE  invalid utf-8
-                *          }
-                *
-                * The byte "0xfe" is always invalid in utf-8, so the
-                * comparisons against that string end up coming this way,
-                * while the "Ʊ" vs "ʊ" comparison goes via the ldb_casefold
-                * branch. Then:
-                *
-                *  "ʊ" == "Ʊ"     by casefold.
-                *  "ʊ" > {c8 fe}  by byte comparison.
-                *  "Ʊ" < {c8 fe}  by byte comparison.
-                *
-                * In many cases there are no invalid encodings between the
-                * upper and lower case letters, but the string as a whole
-                * might also compare differently due to the space-eating in
-                * the other branch.
-                */
-               talloc_free(b1);
-               talloc_free(b2);
-               ret = memcmp(s1, s2, MIN(n1, n2));
-               if (ret == 0) {
-                       if (n1 == n2) {
-                               return 0;
-                       }
-                       if (n1 > n2) {
-                               if (s1[n2] == '\0') {
-                                       return 0;
-                               }
-                               return 1;
-                       } else {
-                               if (s2[n1] == '\0') {
-                                       return 0;
-                               }
-                               return -1;
-                       }
-               }
-               return ret;
-       }
-
-       u1 = b1;
-       u2 = b2;
-
-       while (*u1 & *u2) {
-               if (*u1 != *u2)
-                       break;
-               if (*u1 == ' ') {
-                       while (u1[0] == u1[1]) u1++;
-                       while (u2[0] == u2[1]) u2++;
-               }
-               u1++; u2++;
-       }
-       if (! (*u1 && *u2)) {
-               while (*u1 == ' ') u1++;
-               while (*u2 == ' ') u2++;
-       }
-       ret = NUMERIC_CMP(*u1, *u2);
-
-       talloc_free(b1);
-       talloc_free(b2);
-
-       return ret;
-}
-
  /*
   * ldb_comparison_fold is a schema syntax comparison_fn for utf-8 strings that
   * collapse multiple spaces into one (e.g. "Directory String" syntax).
author	Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
	Fri, 10 May 2024 03:43:36 +0000 (15:43 +1200)
committer	Andrew Bartlett <abartlet@samba.org>
	Wed, 22 May 2024 23:12:32 +0000 (23:12 +0000)