From: Alan T. DeKok <aland@freeradius.org>
Date: Tue, 21 Jun 2022 23:16:24 +0000 (-0400)
Subject: break out tokenize regex RHS into its own function
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d7b0bbebf344788386a7bb8e1dd77ca571e10902;p=thirdparty%2Ffreeradius-server.git

break out tokenize regex RHS into its own function
---

diff --git a/src/lib/unlang/xlat_expr.c b/src/lib/unlang/xlat_expr.c
index 115396e3537..73bbd96c642 100644
--- a/src/lib/unlang/xlat_expr.c
+++ b/src/lib/unlang/xlat_expr.c
@@ -1446,7 +1446,7 @@ static ssize_t tokenize_expression(xlat_exp_head_t *head, xlat_exp_t **out, fr_s
 
 static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_t *in,
 			      fr_sbuff_parse_rules_t const *p_rules, tmpl_rules_t const *t_rules,
-			      fr_sbuff_parse_rules_t const *bracket_rules, bool expect_regex);
+			      fr_sbuff_parse_rules_t const *bracket_rules);
 
 static fr_table_num_sorted_t const expr_quote_table[] = {
 	{ L("\""),	T_DOUBLE_QUOTED_STRING	},	/* Don't re-order, backslash throws off ordering */
@@ -1528,7 +1528,7 @@ static ssize_t tokenize_unary(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 	 *	that we return that, and not the child node
 	 */
 	if (!func) {
-		return tokenize_field(head, out, in, p_rules, t_rules, bracket_rules, false);
+		return tokenize_field(head, out, in, p_rules, t_rules, bracket_rules);
 	}
 	
 	MEM(unary = xlat_exp_alloc(head, XLAT_FUNC, func->name, strlen(func->name)));
@@ -1537,7 +1537,7 @@ static ssize_t tokenize_unary(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 	unary->call.func = func;
 	unary->flags = func->flags;
 
-	slen = tokenize_field(unary->call.args, &node, &our_in, p_rules, t_rules, bracket_rules, false);
+	slen = tokenize_field(unary->call.args, &node, &our_in, p_rules, t_rules, bracket_rules);
 	if (slen < 0) {
 		talloc_free(unary);
 		FR_SBUFF_ERROR_RETURN_ADJ(&our_in, slen);
@@ -1649,12 +1649,115 @@ static ssize_t expr_cast_from_substr(fr_type_t *cast, fr_sbuff_t *in)
 	return fr_sbuff_set(in, &our_in);
 }
 
+/*
+ *	Tokenize the RHS of a regular expression.
+ */
+static ssize_t tokenize_regex_rhs(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_t *in,
+				  tmpl_rules_t const *t_rules,
+				  fr_sbuff_parse_rules_t const *bracket_rules)
+{
+	ssize_t			slen;
+	xlat_exp_t		*node = NULL;
+	fr_sbuff_t		our_in = FR_SBUFF(in);
+	fr_sbuff_marker_t	opand_m;
+	tmpl_t			*vpt;
+
+	XLAT_DEBUG("REGEX_RHS <-- %pV", fr_box_strvalue_len(fr_sbuff_current(in), fr_sbuff_remaining(in)));
+
+	fr_sbuff_skip_whitespace(&our_in);
+
+	/*
+	 *	Record where the operand begins for better error offsets later
+	 */
+	fr_sbuff_marker(&opand_m, &our_in);
+
+	/*
+	 *	Regexes cannot have casts or sub-expressions.
+	 */
+	if (!fr_sbuff_next_if_char(&our_in, '/')) {
+		fr_strerror_const("Expected regular expression");
+		goto error;
+	}
+
+	/*
+	 *	Allocate the xlat node now so the talloc hierarchy is correct
+	 */
+	MEM(node = xlat_exp_alloc_null(head));
+	xlat_exp_set_type(node, XLAT_TMPL);
+
+	/*
+	 *	tmpl_afrom_substr does pretty much all the work of
+	 *	parsing the operand.
+	 */
+	slen = tmpl_afrom_substr(node, &vpt, &our_in, T_SOLIDUS_QUOTED_STRING, value_parse_rules_quoted[T_SOLIDUS_QUOTED_STRING], t_rules);
+	if (!vpt) {
+	error:
+		talloc_free(node);
+		fr_sbuff_set(&our_in, &opand_m);
+		return -fr_sbuff_used(&our_in);
+	}
+
+	/*
+	 *	@todo - allow for the RHS to be an attribute, too?
+	 */
+	fr_assert(tmpl_contains_regex(vpt));
+
+	/*
+	 *	It would be nice if tmpl_afrom_substr() did this :(
+	 */
+	if (!fr_sbuff_next_if_char(&our_in, '/')) {
+		fr_strerror_const("Unterminated regular expression");
+		goto error;
+	}
+
+	slen = tmpl_regex_flags_substr(vpt, &our_in, bracket_rules->terminals);
+	if (slen < 0) {
+		talloc_free(node);
+		FR_SBUFF_ERROR_RETURN_ADJ(&our_in, -slen - 2); /* account for // */
+	}
+
+	node->vpt = vpt;
+	node->quote = T_SOLIDUS_QUOTED_STRING;
+	node->fmt = vpt->name;
+
+	/*
+	 *	Resolve things if we can.
+	 */
+	if (tmpl_is_unresolved(node->vpt) && (tmpl_resolve(node->vpt, NULL) < 0)) goto error;
+
+	node->flags.pure = tmpl_is_data(node->vpt);
+	node->flags.needs_resolving = tmpl_needs_resolving(node->vpt);
+	xlat_flags_merge(&head->flags, &node->flags);
+
+	fr_sbuff_skip_whitespace(&our_in);
+
+	/*
+	 *	Try to compile regular expressions, but only if
+	 *	they're not being dynamically expanded.
+	 */
+	if (tmpl_is_regex_uncompiled(node->vpt) && !tmpl_is_regex_xlat(node->vpt)) {
+		slen = tmpl_regex_compile(node->vpt, true);
+		if (slen <= 0) goto error;
+	}
+
+#ifdef __clang_analyzer__
+	if (!node) return 0;	/* shut up stupid analyzer */
+#else
+	fr_assert(node != NULL);
+#endif
+
+	*out = node;
+
+	return fr_sbuff_set(in, &our_in);
+}
+
+
 /*
  *	Tokenize a field without unary operators.
  */
 static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_t *in,
 			      fr_sbuff_parse_rules_t const *p_rules, tmpl_rules_t const *t_rules,
-			      fr_sbuff_parse_rules_t const *bracket_rules, bool expect_regex)
+			      fr_sbuff_parse_rules_t const *bracket_rules)
 {
 	ssize_t			slen;
 	xlat_exp_t		*node = NULL;
@@ -1666,11 +1769,6 @@ static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 
 	XLAT_DEBUG("FIELD <-- %pV", fr_box_strvalue_len(fr_sbuff_current(in), fr_sbuff_remaining(in)));
 
-	/*
-	 *	Regexes cannot have casts or subgroups.
-	 */
-	if (expect_regex && !fr_sbuff_is_char(&our_in, '/')) goto expected_regex_error;
-
 	/*
 	 *	Allow for explicit casts.  Non-leaf types are forbidden.
 	 */
@@ -1741,12 +1839,9 @@ static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 		break;
 
 	case T_SOLIDUS_QUOTED_STRING:
-		if (!expect_regex) {
-			fr_strerror_const("Unexpected regular expression");
-			fr_sbuff_set(&our_in, &opand_m);	/* Error points to the quoting char at the start of the string */
-			goto error;
-		}
-		FALL_THROUGH;
+		fr_strerror_const("Unexpected regular expression");
+		fr_sbuff_set(&our_in, &opand_m);	/* Error points to the quoting char at the start of the string */
+		goto error;
 
 	case T_BACK_QUOTED_STRING:
 	case T_DOUBLE_QUOTED_STRING:
@@ -1775,23 +1870,6 @@ static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 		return -fr_sbuff_used(&our_in);
 	}
 
-	/*
-	 *	If there's a regex, we have to expect it.
-	 */
-	if (expect_regex) {
-		if (quote != T_SOLIDUS_QUOTED_STRING) {
-			fr_sbuff_advance(&our_in, -(slen + 1)); /* account for quote */
-		expected_regex_error:
-			fr_strerror_const("Expected regular expression");
-			goto error;
-		}
-
-		/*
-		 *	@todo - allow for the RHS to be an attribute, too?
-		 */
-		fr_assert(tmpl_contains_regex(vpt));
-	}
-
 	/*
 	 *	It would be nice if tmpl_afrom_substr() did this :(
 	 */
@@ -1826,30 +1904,10 @@ static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 		if (tmpl_resolve(node->vpt, NULL) < 0) return -1;
 	}
 
-	node->flags.pure = tmpl_is_data(node->vpt);
-	node->flags.needs_resolving = tmpl_needs_resolving(node->vpt);
-
-	/*
-	 *	Parse the regex flags if necessary.
-	 */
-	if (quote == T_SOLIDUS_QUOTED_STRING) {
-		slen = tmpl_regex_flags_substr(node->vpt, &our_in, bracket_rules->terminals);
-		if (slen < 0) goto error;
-	}
-
-	*out = node;
 	node->flags.pure = tmpl_is_data(node->vpt);
 	node->flags.needs_resolving = tmpl_needs_resolving(node->vpt);
 	xlat_flags_merge(&head->flags, &node->flags);
 
-	/* don't merge flags.  That will happen when the node is added to the head */
-
-	/*
-	 *	Don't call tmpl_resolve() here, it should be called
-	 *	in pass2 or later during tokenization if we've managed
-	 *	to resolve all the operands in the expression.
-	 */
-
 	fr_sbuff_skip_whitespace(&our_in);
 
 	/*
@@ -1897,14 +1955,7 @@ static ssize_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuff_
 		}
 	}
 
-	/*
-	 *	Try to compile regular expressions, but only if
-	 *	they're not being dynamically expanded.
-	 */
-	if (tmpl_is_regex_uncompiled(node->vpt) && !tmpl_is_regex_xlat(node->vpt)) {
-		slen = tmpl_regex_compile(node->vpt, true);
-		if (slen <= 0) goto error;
-	}
+	fr_assert(!tmpl_contains_regex(vpt));
 
 done:
 #ifdef __clang_analyzer__
@@ -2175,7 +2226,7 @@ redo:
 	 */
 	XLAT_DEBUG("    recurse RHS <-- %pV", fr_box_strvalue_len(fr_sbuff_current(&our_in), fr_sbuff_remaining(&our_in)));
 	if ((op == T_OP_REG_EQ) || (op == T_OP_REG_NE)) {
-		slen = tokenize_field(head, &rhs, &our_in, p_rules, t_rules, bracket_rules, true);
+		slen = tokenize_regex_rhs(head, &rhs, &our_in, t_rules, bracket_rules);
 	} else {
 		slen = tokenize_expression(head, &rhs, &our_in, p_rules, t_rules, op, bracket_rules, input_rules);
 	}
diff --git a/src/tests/unit/xlat/cond_regex.txt b/src/tests/unit/xlat/cond_regex.txt
index 7b859067221..b15c784357c 100644
--- a/src/tests/unit/xlat/cond_regex.txt
+++ b/src/tests/unit/xlat/cond_regex.txt
@@ -35,10 +35,10 @@ xlat_purify &User-Name =~ /bar/ima
 match ERROR offset 19: Unsupported regex flag 'a'
 
 xlat_purify &User-Name =~ /bar/ii
-match ERROR offset 19: Duplicate regex flag 'i'
+match ERROR offset 20: Duplicate regex flag 'i'
 
 xlat_purify &User-Name =~ /bar/iia
-match ERROR offset 19: Duplicate regex flag 'i'
+match ERROR offset 20: Duplicate regex flag 'i'
 
 #
 #  Escape the backslashes correctly