return memcmp(v1->data, v2->data, v1->length);
}
-/*
- compare two case insensitive strings, ignoring multiple whitespaces
- and leading and trailing whitespaces
- see rfc2252 section 8.1
-
- try to optimize for the ascii case,
- but if we find out an utf8 codepoint revert to slower but correct function
-*/
-__attribute__ ((unused))
-static int ldb_comparison_fold_utf8_broken(struct ldb_context *ldb, void *mem_ctx,
- const struct ldb_val *v1, const struct ldb_val *v2)
-{
- const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
- size_t n1 = v1->length, n2 = v2->length;
- char *b1, *b2;
- const char *u1, *u2;
- int ret;
-
- while (n1 && *s1 == ' ') { s1++; n1--; };
- while (n2 && *s2 == ' ') { s2++; n2--; };
-
- while (n1 && n2 && *s1 && *s2) {
- /* the first 127 (0x7F) chars are ascii and utf8 guarantees they
- * never appear in multibyte sequences */
- if (((unsigned char)s1[0]) & 0x80) goto utf8str;
- if (((unsigned char)s2[0]) & 0x80) goto utf8str;
- if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
- break;
- }
- if (*s1 == ' ') {
- while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
- while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
- }
- s1++; s2++;
- n1--; n2--;
- }
-
- /* check for trailing spaces only if the other pointers has
- * reached the end of the strings otherwise we can
- * mistakenly match. ex. "domain users" <->
- * "domainUpdates"
- */
- if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
- while (n1 && *s1 == ' ') { s1++; n1--; }
- }
- if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
- while (n2 && *s2 == ' ') { s2++; n2--; }
- }
- if (n1 == 0 && n2 != 0) {
- return -(int)ldb_ascii_toupper(*s2);
- }
- if (n2 == 0 && n1 != 0) {
- return (int)ldb_ascii_toupper(*s1);
- }
- if (n1 == 0 && n2 == 0) {
- return 0;
- }
- return (int)ldb_ascii_toupper(*s1) - (int)ldb_ascii_toupper(*s2);
-
-utf8str:
- /*
- * No need to recheck from the start, just from the first utf8 charu
- * found. Note that the callback of ldb_casefold() needs to be ascii
- * compatible.
- *
- * Probably ldb_casefold() is wrap_casefold() which wraps
- * strupper_talloc_n().
- */
- b1 = ldb_casefold(ldb, mem_ctx, s1, n1);
- b2 = ldb_casefold(ldb, mem_ctx, s2, n2);
-
- if (!b1 || !b2) {
- /*
- * One of the strings was not UTF8, so we have no
- * options but to do a binary compare.
- *
- * FIXME: this can be non-transitive.
- *
- * consider {
- * CA 8A "ʊ"
- * C6 B1 "Ʊ"
- * C8 FE invalid utf-8
- * }
- *
- * The byte "0xfe" is always invalid in utf-8, so the
- * comparisons against that string end up coming this way,
- * while the "Ʊ" vs "ʊ" comparison goes via the ldb_casefold
- * branch. Then:
- *
- * "ʊ" == "Ʊ" by casefold.
- * "ʊ" > {c8 fe} by byte comparison.
- * "Ʊ" < {c8 fe} by byte comparison.
- *
- * In many cases there are no invalid encodings between the
- * upper and lower case letters, but the string as a whole
- * might also compare differently due to the space-eating in
- * the other branch.
- */
- talloc_free(b1);
- talloc_free(b2);
- ret = memcmp(s1, s2, MIN(n1, n2));
- if (ret == 0) {
- if (n1 == n2) {
- return 0;
- }
- if (n1 > n2) {
- if (s1[n2] == '\0') {
- return 0;
- }
- return 1;
- } else {
- if (s2[n1] == '\0') {
- return 0;
- }
- return -1;
- }
- }
- return ret;
- }
-
- u1 = b1;
- u2 = b2;
-
- while (*u1 & *u2) {
- if (*u1 != *u2)
- break;
- if (*u1 == ' ') {
- while (u1[0] == u1[1]) u1++;
- while (u2[0] == u2[1]) u2++;
- }
- u1++; u2++;
- }
- if (! (*u1 && *u2)) {
- while (*u1 == ' ') u1++;
- while (*u2 == ' ') u2++;
- }
- ret = NUMERIC_CMP(*u1, *u2);
-
- talloc_free(b1);
- talloc_free(b2);
-
- return ret;
-}
-
/*
* ldb_comparison_fold is a schema syntax comparison_fn for utf-8 strings that
* collapse multiple spaces into one (e.g. "Directory String" syntax).