Add str.uf8 validation function

author Arran Cudbard-Bell <a.cudbardb@freeradius.org>

Mon, 27 May 2024 15:35:06 +0000 (11:35 -0400)

committer Arran Cudbard-Bell <a.cudbardb@freeradius.org>

Mon, 27 May 2024 23:28:32 +0000 (19:28 -0400)
author Arran Cudbard-Bell <a.cudbardb@freeradius.org>
Mon, 27 May 2024 15:35:06 +0000 (11:35 -0400)
committer Arran Cudbard-Bell <a.cudbardb@freeradius.org>
Mon, 27 May 2024 23:28:32 +0000 (19:28 -0400)
diff --git a/src/lib/unlang/xlat_builtin.c b/src/lib/unlang/xlat_builtin.c

index b77483f0644a2e04bda2532264149b927bdbb45c..1b37edf1d56127f9ff825405d379dda2d61f8a1c 100644 (file)
--- a/src/lib/unlang/xlat_builtin.c
+++ b/src/lib/unlang/xlat_builtin.c
@@ -24,6 +24,7 @@
   * @copyright 2000 Alan DeKok (aland@freeradius.org)
   */
  
+
  RCSID("$Id$")
  
  /**
@@ -45,6 +46,7 @@ RCSID("$Id$")
  #include <freeradius-devel/util/dlist.h>
  #include <freeradius-devel/util/md5.h>
  #include <freeradius-devel/util/misc.h>
+#include <freeradius-devel/util/print.h>
  #include <freeradius-devel/util/rand.h>
  #include <freeradius-devel/util/regex.h>
  #include <freeradius-devel/util/sbuff.h>
@@ -2973,6 +2975,41 @@ static xlat_action_t xlat_func_strlen(TALLOC_CTX *ctx, fr_dcursor_t *out,
         return XLAT_ACTION_DONE;
  }
  
+static xlat_arg_parser_t const xlat_func_str_utf8_arg[] = {
+       { .concat = true, .type = FR_TYPE_STRING },
+       XLAT_ARG_PARSER_TERMINATOR
+};
+
+/** Return whether a string is valid UTF-8
+ *
+ * This function returns true if the input string is valid UTF-8, false otherwise.
+ *
+ * Example:
+@verbatim
+%str.utf8(🍉🥝🍓) == true
+%str.utf8(🍉\xff🍓) == false
+@endverbatim
+ *
+ * @ingroup xlat_functions
+ */
+static xlat_action_t xlat_func_str_utf8(TALLOC_CTX *ctx, fr_dcursor_t *out,
+                                       UNUSED xlat_ctx_t const *xctx,
+                                       UNUSED request_t *request, fr_value_box_list_t *args)
+{
+       fr_value_box_t  *vb;
+       fr_value_box_t  *in_head;
+
+       XLAT_ARGS(args, &in_head);
+
+       MEM(vb = fr_value_box_alloc(ctx, FR_TYPE_BOOL, NULL));
+       vb->vb_bool = (fr_utf8_str((uint8_t const *)in_head->vb_strvalue,
+                                  in_head->vb_length) >= 0);
+
+       fr_dcursor_append(out, vb);
+
+       return XLAT_ACTION_DONE;
+}
+
  static xlat_arg_parser_t const xlat_func_substr_args[] = {
         { .single = true, .required = true, .type = FR_TYPE_VOID },
         { .single = true, .required = true, .type = FR_TYPE_INT32 },
@@ -4054,6 +4091,7 @@ do { \
  
         XLAT_REGISTER_PURE("string", xlat_func_string, FR_TYPE_STRING, xlat_func_string_arg);
         XLAT_REGISTER_PURE("strlen", xlat_func_strlen, FR_TYPE_SIZE, xlat_func_strlen_arg);
+       XLAT_REGISTER_PURE("str.utf8", xlat_func_str_utf8, FR_TYPE_BOOL, xlat_func_str_utf8_arg);
         XLAT_REGISTER_PURE("tolower", xlat_func_tolower, FR_TYPE_STRING, xlat_change_case_arg);
         XLAT_REGISTER_PURE("toupper", xlat_func_toupper, FR_TYPE_STRING, xlat_change_case_arg);
         XLAT_REGISTER_PURE("urlquote", xlat_func_urlquote, FR_TYPE_STRING, xlat_func_urlquote_arg);
diff --git a/src/lib/unlang/xlat_eval.c b/src/lib/unlang/xlat_eval.c

index 88b803e1af4ad0eb0a4d50ddf1d2b0c797e6ea4a..1c5452c7bea3d8f1881f5fbc0067e525381e5f81 100644 (file)
--- a/src/lib/unlang/xlat_eval.c
+++ b/src/lib/unlang/xlat_eval.c
@@ -82,7 +82,7 @@ static fr_slen_t xlat_fmt_print(fr_sbuff_t *out, xlat_exp_t const *node)
         case XLAT_BOX:
         case XLAT_GROUP:
                 fr_assert(node->fmt != NULL);
-               return fr_sbuff_in_strcpy(out, node->fmt);
+               return fr_sbuff_in_sprintf(out, "%pV", fr_box_strvalue_buffer(node->fmt));
  
         case XLAT_ONE_LETTER:
                 fr_assert(node->fmt != NULL);
@@ -93,7 +93,7 @@ static fr_slen_t xlat_fmt_print(fr_sbuff_t *out, xlat_exp_t const *node)
                 if (tmpl_is_attr(node->vpt) && (node->fmt[0] == '&')) {
                         return fr_sbuff_in_strcpy(out, node->fmt);
                 } else {
-                       return fr_sbuff_in_sprintf(out, "%%{%s}", node->fmt);
+                       return fr_sbuff_in_sprintf(out, "%%{%pV}", fr_box_strvalue_buffer(node->fmt));
                 }
  
         case XLAT_VIRTUAL:
diff --git a/src/lib/util/print.c b/src/lib/util/print.c

index ae8a8f6668b6d155854898ffd05d932a6c8b564d..e9fd3b0645e98181df408fd187d6e3ec460cfeab 100644 (file)
--- a/src/lib/util/print.c
+++ b/src/lib/util/print.c
@@ -42,9 +42,7 @@ inline size_t fr_utf8_char(uint8_t const *str, ssize_t inlen)
  
         if (inlen < 0) inlen = 4;       /* longest char */
  
-       if (*str < 0x20) return 0;
-
-       if (*str <= 0x7e) return 1;     /* 1 */
+       if (*str <= 0x7f) return 1;     /* 1 */
  
         if (*str <= 0xc1) return 0;
  
@@ -139,10 +137,10 @@ inline size_t fr_utf8_char(uint8_t const *str, ssize_t inlen)
   * @param[in] inlen    length of input string.  May be -1 if str
   *                     is \0 terminated.
   * @return The number of bytes validated.  If ret == inlen the entire
- *        string is valid.  Else ret gives the offset at which the
- *        first invalid byte sequence was found.
+ *        string is valid.  Else ret gives the negative offset at
+ *        which the first invalid byte sequence was found.
   */
-ssize_t fr_utf8_str(uint8_t const *str, ssize_t inlen)
+fr_slen_t fr_utf8_str(uint8_t const *str, ssize_t inlen)
  {
         uint8_t const *p, *end;
         size_t len;
@@ -156,7 +154,7 @@ ssize_t fr_utf8_str(uint8_t const *str, ssize_t inlen)
                 size_t clen;
  
                 clen = fr_utf8_char(p, end - p);
-               if (clen == 0) return end - p;
+               if (clen == 0) return p - end;
                 p += clen;
         } while (p < end);
  
diff --git a/src/tests/keywords/xlat-utf8 b/src/tests/keywords/xlat-utf8

new file mode 100644 (file)

index 0000000..03bab55
--- /dev/null
+++ b/src/tests/keywords/xlat-utf8
@@ -0,0 +1,26 @@
+#
+#  PRE: if
+#
+string valid_utf8
+string invalid_utf8
+
+&valid_utf8 = "🍩abcdef🍩\n\t"
+&invalid_utf8 = "🍩\x80"
+
+if (%str.utf8("\xFE")) {
+       test_fail
+}
+
+if (!%str.utf8(%{valid_utf8})) {
+       test_fail
+}
+
+if (!%str.utf8("abcdefghijklmnopqrstuvwxyz")) {
+       test_fail
+}
+
+if (%str.utf8(%{invalid_utf8})) {
+       test_fail
+}
+
+success
author	Arran Cudbard-Bell <a.cudbardb@freeradius.org>
	Mon, 27 May 2024 15:35:06 +0000 (11:35 -0400)
committer	Arran Cudbard-Bell <a.cudbardb@freeradius.org>
	Mon, 27 May 2024 23:28:32 +0000 (19:28 -0400)
src/lib/unlang/xlat_builtin.c		patch \| blob \| blame \| history
src/lib/unlang/xlat_eval.c		patch \| blob \| blame \| history
src/lib/util/print.c		patch \| blob \| blame \| history
src/tests/keywords/xlat-utf8	[new file with mode: 0644]	patch \| blob