From: Arran Cudbard-Bell Date: Mon, 27 May 2024 15:35:06 +0000 (-0400) Subject: Add str.uf8 validation function X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1476187cb4df90a66d0f8b01d47d198cf78705b5;p=thirdparty%2Ffreeradius-server.git Add str.uf8 validation function --- diff --git a/src/lib/unlang/xlat_builtin.c b/src/lib/unlang/xlat_builtin.c index b77483f0644..1b37edf1d56 100644 --- a/src/lib/unlang/xlat_builtin.c +++ b/src/lib/unlang/xlat_builtin.c @@ -24,6 +24,7 @@ * @copyright 2000 Alan DeKok (aland@freeradius.org) */ + RCSID("$Id$") /** @@ -45,6 +46,7 @@ RCSID("$Id$") #include #include #include +#include #include #include #include @@ -2973,6 +2975,41 @@ static xlat_action_t xlat_func_strlen(TALLOC_CTX *ctx, fr_dcursor_t *out, return XLAT_ACTION_DONE; } +static xlat_arg_parser_t const xlat_func_str_utf8_arg[] = { + { .concat = true, .type = FR_TYPE_STRING }, + XLAT_ARG_PARSER_TERMINATOR +}; + +/** Return whether a string is valid UTF-8 + * + * This function returns true if the input string is valid UTF-8, false otherwise. + * + * Example: +@verbatim +%str.utf8(🍉🥝🍓) == true +%str.utf8(🍉\xff🍓) == false +@endverbatim + * + * @ingroup xlat_functions + */ +static xlat_action_t xlat_func_str_utf8(TALLOC_CTX *ctx, fr_dcursor_t *out, + UNUSED xlat_ctx_t const *xctx, + UNUSED request_t *request, fr_value_box_list_t *args) +{ + fr_value_box_t *vb; + fr_value_box_t *in_head; + + XLAT_ARGS(args, &in_head); + + MEM(vb = fr_value_box_alloc(ctx, FR_TYPE_BOOL, NULL)); + vb->vb_bool = (fr_utf8_str((uint8_t const *)in_head->vb_strvalue, + in_head->vb_length) >= 0); + + fr_dcursor_append(out, vb); + + return XLAT_ACTION_DONE; +} + static xlat_arg_parser_t const xlat_func_substr_args[] = { { .single = true, .required = true, .type = FR_TYPE_VOID }, { .single = true, .required = true, .type = FR_TYPE_INT32 }, @@ -4054,6 +4091,7 @@ do { \ XLAT_REGISTER_PURE("string", xlat_func_string, FR_TYPE_STRING, xlat_func_string_arg); XLAT_REGISTER_PURE("strlen", xlat_func_strlen, FR_TYPE_SIZE, xlat_func_strlen_arg); + XLAT_REGISTER_PURE("str.utf8", xlat_func_str_utf8, FR_TYPE_BOOL, xlat_func_str_utf8_arg); XLAT_REGISTER_PURE("tolower", xlat_func_tolower, FR_TYPE_STRING, xlat_change_case_arg); XLAT_REGISTER_PURE("toupper", xlat_func_toupper, FR_TYPE_STRING, xlat_change_case_arg); XLAT_REGISTER_PURE("urlquote", xlat_func_urlquote, FR_TYPE_STRING, xlat_func_urlquote_arg); diff --git a/src/lib/unlang/xlat_eval.c b/src/lib/unlang/xlat_eval.c index 88b803e1af4..1c5452c7bea 100644 --- a/src/lib/unlang/xlat_eval.c +++ b/src/lib/unlang/xlat_eval.c @@ -82,7 +82,7 @@ static fr_slen_t xlat_fmt_print(fr_sbuff_t *out, xlat_exp_t const *node) case XLAT_BOX: case XLAT_GROUP: fr_assert(node->fmt != NULL); - return fr_sbuff_in_strcpy(out, node->fmt); + return fr_sbuff_in_sprintf(out, "%pV", fr_box_strvalue_buffer(node->fmt)); case XLAT_ONE_LETTER: fr_assert(node->fmt != NULL); @@ -93,7 +93,7 @@ static fr_slen_t xlat_fmt_print(fr_sbuff_t *out, xlat_exp_t const *node) if (tmpl_is_attr(node->vpt) && (node->fmt[0] == '&')) { return fr_sbuff_in_strcpy(out, node->fmt); } else { - return fr_sbuff_in_sprintf(out, "%%{%s}", node->fmt); + return fr_sbuff_in_sprintf(out, "%%{%pV}", fr_box_strvalue_buffer(node->fmt)); } case XLAT_VIRTUAL: diff --git a/src/lib/util/print.c b/src/lib/util/print.c index ae8a8f6668b..e9fd3b0645e 100644 --- a/src/lib/util/print.c +++ b/src/lib/util/print.c @@ -42,9 +42,7 @@ inline size_t fr_utf8_char(uint8_t const *str, ssize_t inlen) if (inlen < 0) inlen = 4; /* longest char */ - if (*str < 0x20) return 0; - - if (*str <= 0x7e) return 1; /* 1 */ + if (*str <= 0x7f) return 1; /* 1 */ if (*str <= 0xc1) return 0; @@ -139,10 +137,10 @@ inline size_t fr_utf8_char(uint8_t const *str, ssize_t inlen) * @param[in] inlen length of input string. May be -1 if str * is \0 terminated. * @return The number of bytes validated. If ret == inlen the entire - * string is valid. Else ret gives the offset at which the - * first invalid byte sequence was found. + * string is valid. Else ret gives the negative offset at + * which the first invalid byte sequence was found. */ -ssize_t fr_utf8_str(uint8_t const *str, ssize_t inlen) +fr_slen_t fr_utf8_str(uint8_t const *str, ssize_t inlen) { uint8_t const *p, *end; size_t len; @@ -156,7 +154,7 @@ ssize_t fr_utf8_str(uint8_t const *str, ssize_t inlen) size_t clen; clen = fr_utf8_char(p, end - p); - if (clen == 0) return end - p; + if (clen == 0) return p - end; p += clen; } while (p < end); diff --git a/src/tests/keywords/xlat-utf8 b/src/tests/keywords/xlat-utf8 new file mode 100644 index 00000000000..03bab555722 --- /dev/null +++ b/src/tests/keywords/xlat-utf8 @@ -0,0 +1,26 @@ +# +# PRE: if +# +string valid_utf8 +string invalid_utf8 + +&valid_utf8 = "🍩abcdef🍩\n\t" +&invalid_utf8 = "🍩\x80" + +if (%str.utf8("\xFE")) { + test_fail +} + +if (!%str.utf8(%{valid_utf8})) { + test_fail +} + +if (!%str.utf8("abcdefghijklmnopqrstuvwxyz")) { + test_fail +} + +if (%str.utf8(%{invalid_utf8})) { + test_fail +} + +success