From: Arran Cudbard-Bell <a.cudbardb@freeradius.org>
Date: Mon, 27 May 2024 15:35:06 +0000 (-0400)
Subject: Add str.uf8 validation function
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1476187cb4df90a66d0f8b01d47d198cf78705b5;p=thirdparty%2Ffreeradius-server.git

Add str.uf8 validation function
---

diff --git a/src/lib/unlang/xlat_builtin.c b/src/lib/unlang/xlat_builtin.c
index b77483f0644..1b37edf1d56 100644
--- a/src/lib/unlang/xlat_builtin.c
+++ b/src/lib/unlang/xlat_builtin.c
@@ -24,6 +24,7 @@
  * @copyright 2000 Alan DeKok (aland@freeradius.org)
  */
 
+
 RCSID("$Id$")
 
 /**
@@ -45,6 +46,7 @@ RCSID("$Id$")
 #include <freeradius-devel/util/dlist.h>
 #include <freeradius-devel/util/md5.h>
 #include <freeradius-devel/util/misc.h>
+#include <freeradius-devel/util/print.h>
 #include <freeradius-devel/util/rand.h>
 #include <freeradius-devel/util/regex.h>
 #include <freeradius-devel/util/sbuff.h>
@@ -2973,6 +2975,41 @@ static xlat_action_t xlat_func_strlen(TALLOC_CTX *ctx, fr_dcursor_t *out,
 	return XLAT_ACTION_DONE;
 }
 
+static xlat_arg_parser_t const xlat_func_str_utf8_arg[] = {
+	{ .concat = true, .type = FR_TYPE_STRING },
+	XLAT_ARG_PARSER_TERMINATOR
+};
+
+/** Return whether a string is valid UTF-8
+ *
+ * This function returns true if the input string is valid UTF-8, false otherwise.
+ *
+ * Example:
+@verbatim
+%str.utf8(ðð¥ð) == true
+%str.utf8(ð\xffð) == false
+@endverbatim
+ *
+ * @ingroup xlat_functions
+ */
+static xlat_action_t xlat_func_str_utf8(TALLOC_CTX *ctx, fr_dcursor_t *out,
+				        UNUSED xlat_ctx_t const *xctx,
+					UNUSED request_t *request, fr_value_box_list_t *args)
+{
+	fr_value_box_t	*vb;
+	fr_value_box_t	*in_head;
+
+	XLAT_ARGS(args, &in_head);
+
+	MEM(vb = fr_value_box_alloc(ctx, FR_TYPE_BOOL, NULL));
+	vb->vb_bool = (fr_utf8_str((uint8_t const *)in_head->vb_strvalue,
+				   in_head->vb_length) >= 0);
+
+	fr_dcursor_append(out, vb);
+
+	return XLAT_ACTION_DONE;
+}
+
 static xlat_arg_parser_t const xlat_func_substr_args[] = {
 	{ .single = true, .required = true, .type = FR_TYPE_VOID },
 	{ .single = true, .required = true, .type = FR_TYPE_INT32 },
@@ -4054,6 +4091,7 @@ do { \
 
 	XLAT_REGISTER_PURE("string", xlat_func_string, FR_TYPE_STRING, xlat_func_string_arg);
 	XLAT_REGISTER_PURE("strlen", xlat_func_strlen, FR_TYPE_SIZE, xlat_func_strlen_arg);
+	XLAT_REGISTER_PURE("str.utf8", xlat_func_str_utf8, FR_TYPE_BOOL, xlat_func_str_utf8_arg);
 	XLAT_REGISTER_PURE("tolower", xlat_func_tolower, FR_TYPE_STRING, xlat_change_case_arg);
 	XLAT_REGISTER_PURE("toupper", xlat_func_toupper, FR_TYPE_STRING, xlat_change_case_arg);
 	XLAT_REGISTER_PURE("urlquote", xlat_func_urlquote, FR_TYPE_STRING, xlat_func_urlquote_arg);
diff --git a/src/lib/unlang/xlat_eval.c b/src/lib/unlang/xlat_eval.c
index 88b803e1af4..1c5452c7bea 100644
--- a/src/lib/unlang/xlat_eval.c
+++ b/src/lib/unlang/xlat_eval.c
@@ -82,7 +82,7 @@ static fr_slen_t xlat_fmt_print(fr_sbuff_t *out, xlat_exp_t const *node)
 	case XLAT_BOX:
 	case XLAT_GROUP:
 		fr_assert(node->fmt != NULL);
-		return fr_sbuff_in_strcpy(out, node->fmt);
+		return fr_sbuff_in_sprintf(out, "%pV", fr_box_strvalue_buffer(node->fmt));
 
 	case XLAT_ONE_LETTER:
 		fr_assert(node->fmt != NULL);
@@ -93,7 +93,7 @@ static fr_slen_t xlat_fmt_print(fr_sbuff_t *out, xlat_exp_t const *node)
 		if (tmpl_is_attr(node->vpt) && (node->fmt[0] == '&')) {
 			return fr_sbuff_in_strcpy(out, node->fmt);
 		} else {
-			return fr_sbuff_in_sprintf(out, "%%{%s}", node->fmt);
+			return fr_sbuff_in_sprintf(out, "%%{%pV}", fr_box_strvalue_buffer(node->fmt));
 		}
 
 	case XLAT_VIRTUAL:
diff --git a/src/lib/util/print.c b/src/lib/util/print.c
index ae8a8f6668b..e9fd3b0645e 100644
--- a/src/lib/util/print.c
+++ b/src/lib/util/print.c
@@ -42,9 +42,7 @@ inline size_t fr_utf8_char(uint8_t const *str, ssize_t inlen)
 
 	if (inlen < 0) inlen = 4;	/* longest char */
 
-	if (*str < 0x20) return 0;
-
-	if (*str <= 0x7e) return 1;	/* 1 */
+	if (*str <= 0x7f) return 1;	/* 1 */
 
 	if (*str <= 0xc1) return 0;
 
@@ -139,10 +137,10 @@ inline size_t fr_utf8_char(uint8_t const *str, ssize_t inlen)
  * @param[in] inlen	length of input string.  May be -1 if str
  *			is \0 terminated.
  * @return The number of bytes validated.  If ret == inlen the entire
- *	   string is valid.  Else ret gives the offset at which the
- *	   first invalid byte sequence was found.
+ *	   string is valid.  Else ret gives the negative offset at
+ *	   which the first invalid byte sequence was found.
  */
-ssize_t fr_utf8_str(uint8_t const *str, ssize_t inlen)
+fr_slen_t fr_utf8_str(uint8_t const *str, ssize_t inlen)
 {
 	uint8_t const *p, *end;
 	size_t len;
@@ -156,7 +154,7 @@ ssize_t fr_utf8_str(uint8_t const *str, ssize_t inlen)
 		size_t clen;
 
 		clen = fr_utf8_char(p, end - p);
-		if (clen == 0) return end - p;
+		if (clen == 0) return p - end;
 		p += clen;
 	} while (p < end);
 
diff --git a/src/tests/keywords/xlat-utf8 b/src/tests/keywords/xlat-utf8
new file mode 100644
index 00000000000..03bab555722
--- /dev/null
+++ b/src/tests/keywords/xlat-utf8
@@ -0,0 +1,26 @@
+#
+#  PRE: if
+#
+string valid_utf8
+string invalid_utf8
+
+&valid_utf8 = "ð©abcdefð©\n\t"
+&invalid_utf8 = "ð©\x80"
+
+if (%str.utf8("\xFE")) {
+	test_fail
+}
+
+if (!%str.utf8(%{valid_utf8})) {
+	test_fail
+}
+
+if (!%str.utf8("abcdefghijklmnopqrstuvwxyz")) {
+	test_fail
+}
+
+if (%str.utf8(%{invalid_utf8})) {
+	test_fail
+}
+
+success