From: Douglas Bagnall Date: Thu, 16 May 2024 23:34:35 +0000 (+1200) Subject: ldb: add ldb_set_utf8_functions() for setting casefold functions X-Git-Tag: tdb-1.4.11~569 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ae7ca36830be7823dde17bcaeae74b5f46b1aa3d;p=thirdparty%2Fsamba.git ldb: add ldb_set_utf8_functions() for setting casefold functions This replaces ldb_set_utf8_fns(), which will be deprecated really soon. The reason for this, as shown in surrounding commits, is that without an explicit case-insensitive comparison we need to rely on the casefold, and if the casefold can fail (because, e.g. bad utf-8) the comparison ends up being a bit chaotic. The strings being compared are generally user controlled, and a malicious user might find ways of hiding values or perhaps fooling a binary search. A case-insensitive comparisons that works gradually through the string without an all-at-once casefold is better placed to deal with problems where they happen, and we are able to separately specialise for the ASCII case (used by SSSD) and the UTF-8 case (Samba). Signed-off-by: Douglas Bagnall Reviewed-by: Andrew Bartlett --- diff --git a/lib/ldb/ABI/ldb-2.10.0.sigs b/lib/ldb/ABI/ldb-2.10.0.sigs index 2266387cd60..18b2d5f3526 100644 --- a/lib/ldb/ABI/ldb-2.10.0.sigs +++ b/lib/ldb/ABI/ldb-2.10.0.sigs @@ -275,6 +275,7 @@ ldb_set_timeout: int (struct ldb_context *, struct ldb_request *, int) ldb_set_timeout_from_prev_req: int (struct ldb_context *, struct ldb_request *, struct ldb_request *) ldb_set_utf8_default: void (struct ldb_context *) ldb_set_utf8_fns: void (struct ldb_context *, void *, char *(*)(void *, void *, const char *, size_t)) +ldb_set_utf8_functions: void (struct ldb_context *, void *, char *(*)(void *, void *, const char *, size_t), int (*)(void *, const struct ldb_val *, const struct ldb_val *)) ldb_setup_wellknown_attributes: int (struct ldb_context *) ldb_should_b64_encode: int (struct ldb_context *, const struct ldb_val *) ldb_standard_syntax_by_name: const struct ldb_schema_syntax *(struct ldb_context *, const char *) diff --git a/lib/ldb/common/ldb_utf8.c b/lib/ldb/common/ldb_utf8.c index 178bdd86de1..a59c20c5827 100644 --- a/lib/ldb/common/ldb_utf8.c +++ b/lib/ldb/common/ldb_utf8.c @@ -34,6 +34,24 @@ #include "ldb_private.h" #include "system/locale.h" +/* + * Set functions for comparing and case-folding case-insensitive ldb val + * strings. + */ +void ldb_set_utf8_functions(struct ldb_context *ldb, + void *context, + char *(*casefold)(void *, void *, const char *, size_t), + int (*casecmp)(void *ctx, + const struct ldb_val *v1, + const struct ldb_val *v2)) +{ + if (context) + ldb->utf8_fns.context = context; + if (casefold) + ldb->utf8_fns.casefold = casefold; + if (casecmp) + ldb->utf8_fns.casecmp = casecmp; +} /* this allow the user to pass in a caseless comparison @@ -43,12 +61,10 @@ void ldb_set_utf8_fns(struct ldb_context *ldb, void *context, char *(*casefold)(void *, void *, const char *, size_t)) { - if (context) - ldb->utf8_fns.context = context; - if (casefold) - ldb->utf8_fns.casefold = casefold; + ldb_set_utf8_functions(ldb, context, casefold, NULL); } + /* a simple case folding function NOTE: does not handle UTF8 diff --git a/lib/ldb/include/ldb.h b/lib/ldb/include/ldb.h index a3436359559..61651f4e54d 100644 --- a/lib/ldb/include/ldb.h +++ b/lib/ldb/include/ldb.h @@ -2205,10 +2205,34 @@ int ldb_set_debug(struct ldb_context *ldb, void *context); /** - this allows the user to set custom utf8 function for error reporting. make - sure it is able to handle ASCII first, so it prevents issues with dotted - languages. -*/ + * This allows the user to set custom utf8 functions. + * + * Be aware that casefold in some locales will break ldb expectations. In + * particular, if 'i' is uppercased to 'İ' (a capital I with a dot, used in + * some languages), the string '