From: Alan T. DeKok Date: Sun, 26 Jan 2025 15:38:03 +0000 (-0500) Subject: cleanups of expression and condition parsing X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a99e5db92dda45b3929ed07ba441a02ab13b33af;p=thirdparty%2Ffreeradius-server.git cleanups of expression and condition parsing we need quotes around bare words in more places. Any explicit cast is NOT passed down when parsing the next thing. Instead, the next thing is parsed as-is, and then the cast is applied by the current function. This cleans up a lot of odd cases. Also add more checks for different tmpl types when casting things Add '#if 0' out code to complain on unresolved data when parsing. Changing that will require a bunch of other updates, to add quotes around bare words. the tmpl_resolve() function would treat unresolved data as either enums or strings. That will be changing to require either '::' prefix on enums, OR quotes around non-attribute bare words. So (ippadr)* is now invalid, as "*" can't be parsed by tmpl_afrom_substr(). Instead, we must use (ipaddr)'*' --- diff --git a/src/lib/unlang/xlat_expr.c b/src/lib/unlang/xlat_expr.c index bd64e7eefa1..b2a03af5614 100644 --- a/src/lib/unlang/xlat_expr.c +++ b/src/lib/unlang/xlat_expr.c @@ -466,11 +466,33 @@ static fr_slen_t xlat_expr_print_regex(fr_sbuff_t *out, xlat_exp_t const *node, * A space is printed after the first argument only if * there's a second one. So add one if we "ate" the second argument. */ - FR_SBUFF_IN_CHAR_RETURN(out, ' '); + if (inst->xlat) FR_SBUFF_IN_CHAR_RETURN(out, ' '); FR_SBUFF_IN_STRCPY_RETURN(out, fr_tokens[node->call.func->token]); FR_SBUFF_IN_CHAR_RETURN(out, ' '); + /* + * Regexes which aren't instantiated: only for unit tests. + */ + if (!inst->xlat) { + child = xlat_exp_next(node->call.args, child); + fr_assert(!xlat_exp_next(node->call.args, child)); + fr_assert(child->type == XLAT_GROUP); + + FR_SBUFF_IN_CHAR_RETURN(out, '/'); + FR_SBUFF_IN_STRCPY_RETURN(out, child->fmt); + FR_SBUFF_IN_CHAR_RETURN(out, '/'); + + child = xlat_exp_head(child->group); + fr_assert(child->type == XLAT_TMPL); + + /* + * The RHS may be a group + */ + FR_SBUFF_RETURN(regex_flags_print, out, tmpl_regex_flags(child->vpt)); + goto done; + } + fr_assert(tmpl_contains_regex(inst->xlat->vpt)); if (inst->xlat->quote == T_SINGLE_QUOTED_STRING) FR_SBUFF_IN_CHAR_RETURN(out, 'm'); @@ -480,6 +502,7 @@ static fr_slen_t xlat_expr_print_regex(fr_sbuff_t *out, xlat_exp_t const *node, FR_SBUFF_RETURN(regex_flags_print, out, inst->regex_flags); +done: FR_SBUFF_IN_CHAR_RETURN(out, ')'); return fr_sbuff_used_total(out) - at_in; @@ -2410,12 +2433,12 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf xlat_exp_t *node = NULL; fr_sbuff_t our_in = FR_SBUFF(in); fr_sbuff_marker_t opand_m; - tmpl_rules_t our_t_rules = *t_rules; + tmpl_rules_t our_t_rules; tmpl_t *vpt = NULL; fr_token_t quote; int triple = 1; - fr_type_t cast_type = FR_TYPE_NULL; - xlat_exp_t *cast = NULL; + fr_type_t cast_type; + fr_dict_attr_t const *enumv; XLAT_DEBUG("FIELD <-- %pV", fr_box_strvalue_len(fr_sbuff_current(in), fr_sbuff_remaining(in))); @@ -2424,6 +2447,21 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf */ if (expr_cast_from_substr(&cast_type, &our_in) < 0) return -1; + /* + * Do NOT pass the cast down to the next set of parsing routines. Instead, let the next data be + * parsed as whatever, and then add a cast, or cast in place as necessary. + */ + our_t_rules = *t_rules; + if (cast_type == FR_TYPE_NULL) { + cast_type = our_t_rules.cast; + enumv = our_t_rules.enumv; + } else { + enumv = NULL; + } + + our_t_rules.cast = FR_TYPE_NULL; + our_t_rules.enumv = NULL; + /* * If we still have '(', then recurse for other expressions * @@ -2460,21 +2498,6 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf goto done; } - /* - * If there is a cast, try to pass it recursively to the parser. This allows us to set default - * data types, etc. - * - * We may end up removing the cast later, if for example the tmpl is an attribute whose data type - * matches the cast. - * - * This ifdef isn't defined anywhere, and is just a reminder to check this code when we move to - * tmpl_require_enum_prefix=yes. This cast has to be deleted. - */ - if (!tmpl_require_enum_prefix && (cast_type != FR_TYPE_NULL)) { - our_t_rules.cast = cast_type; - our_t_rules.enumv = NULL; - } - /* * Record where the operand begins for better error offsets later */ @@ -2511,11 +2534,6 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf FALL_THROUGH; case T_BACK_QUOTED_STRING: - /* - * Don't put the cast in the tmpl, but put it instead in the expression. - */ - if (cast_type != FR_TYPE_NULL) our_t_rules.cast = FR_TYPE_NULL; - p_rules = value_parse_rules_quoted[quote]; /* @@ -2608,16 +2626,25 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf fr_sbuff_skip_whitespace(&our_in); +#if 0 /* - * The tmpl has a cast, and it's the same as the explicit cast we were given, we can discard the - * explicit cast. - * - * Also do this for tmpls which are attributes, and which have the same data type as the cast. - * - * This work isn't _strictly_ necessary, but it avoids duplicate casts at run-time. + * A bare word which is NOT a known attribute. That's an error. + */ + if (tmpl_is_data_unresolved(vpt)) { + fr_assert(quote == T_BARE_WORD); + fr_strerror_const("Failed parsing input"); + fr_sbuff_set(&our_in, &opand_m); + goto error; + } +#endif + + /* + * The tmpl has a cast, and it's the same as the explicit cast we were given, we can sometimes + * discard the explicit cast. */ if (cast_type != FR_TYPE_NULL) { if (tmpl_rules_cast(vpt) == cast_type) { + fr_assert(0); cast_type = FR_TYPE_NULL; } else if (tmpl_is_attr(vpt)) { @@ -2628,13 +2655,19 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf da = tmpl_attr_tail_da(vpt); /* could be a list! */ /* - * Omit the cast if the da type matches our cast. BUT don't do this for enums! In that - * case, the cast will convert the value-box to one _without_ an enumv entry, which means - * that the value will get printed as its underlying data type, and not as the enum name. + * Set the cast for attributes. Note that tmpl_cast_set() will take care of + * suppressing redundant casts. But it still allows (uint32)&Service-Type, + * which means "return the raw value", and not "return enum name". */ - if (da && (da->type == cast_type)) { - tmpl_cast_set(vpt, cast_type); - cast_type = FR_TYPE_NULL; + if (da) { + if (tmpl_cast_set(vpt, cast_type) < 0) { + fr_sbuff_set(&our_in, &opand_m); + goto error; + } else { + cast_type = FR_TYPE_NULL; + } + } else { /* it's something like &reply. */ + fr_assert(0); } } else if (tmpl_is_data(vpt)) { @@ -2650,43 +2683,54 @@ static fr_slen_t tokenize_field(xlat_exp_head_t *head, xlat_exp_t **out, fr_sbuf if (tmpl_value_type(vpt) == cast_type) { cast_type = FR_TYPE_NULL; + } else if (tmpl_cast_in_place(vpt, cast_type, enumv) < 0) { + fr_sbuff_set(&our_in, &opand_m); + goto error; + } else { /* - * Cast it now, and remove the cast type. + * We've parsed the data as the new data type, so we don't need any more + * casting. */ - if (tmpl_cast_in_place(vpt, cast_type, NULL) < 0) { - fr_sbuff_set(&our_in, &opand_m); - goto error; - } - cast_type = FR_TYPE_NULL; } - } - /* - * Cast a string to a string means "no cast". - */ - if ((cast_type == FR_TYPE_STRING) && (vpt->quote != T_BARE_WORD)) { - tmpl_cast_set(vpt, FR_TYPE_NULL); - cast_type = FR_TYPE_NULL; - } + } else if (tmpl_contains_xlat(vpt)) { + /* + * (string) "foo %{...}" is redundant. Drop the cast. + */ + if ((cast_type == FR_TYPE_STRING) && (vpt->quote != T_BARE_WORD)) { + tmpl_cast_set(vpt, FR_TYPE_NULL); + cast_type = FR_TYPE_NULL; - /* - * Push the cast down. - * - * But if we're casting to string, and the RHS is already a string, we don't need to cast - * it. We can just discard the cast. - */ - if ((cast_type != FR_TYPE_NULL) && (tmpl_rules_cast(vpt) == FR_TYPE_NULL)) { - if ((cast_type != FR_TYPE_STRING) || (vpt->quote == T_BARE_WORD)) { + } else { + /* + * Push the cast to the tmpl. + */ tmpl_cast_set(vpt, cast_type); + cast_type = FR_TYPE_NULL; } - cast_type = FR_TYPE_NULL; + + } else if (tmpl_is_data_unresolved(vpt)) { + /* + * A bare word which is NOT a known attribute. That's an error. + */ + fr_assert(quote == T_BARE_WORD); + fr_strerror_const("Failed parsing input"); + fr_sbuff_set(&our_in, &opand_m); + goto error; + + } else { + /* + * Regex? Or something else weird? + */ + tmpl_debug(vpt); + fr_assert(0); } } /* - * Else we're not hoisting, set the node to the VPT + * Assign the tmpl to the node. */ node->vpt = vpt; node->quote = quote; @@ -2731,6 +2775,8 @@ done: * If there is a cast, then reparent the node with a cast wrapper. */ if (cast_type != FR_TYPE_NULL) { + xlat_exp_t *cast; + MEM(cast = expr_cast_alloc(head, cast_type)); xlat_func_append_arg(cast, node, false); node = cast; @@ -2943,30 +2989,30 @@ redo: fr_sbuff_skip_whitespace(&our_in); fr_sbuff_marker(&m_rhs, &our_in); -#if 0 /* - * If LHS is attr && structural, allow only == and !=, then check that the RHS is {}. - * - * However, we don't want the LHS evaluated, so just re-write it as an "exists" xlat? - * - * @todo - check lists for equality? + * We now parse the RHS, allowing a (perhaps different) cast on the RHS. */ - if ((lhs->type == XLAT_TMPL) && tmpl_is_attr(lhs->vpt) && fr_type_is_structural(tmpl_attr_tail_da(lhs->vpt)->type)) { - if ((op != T_OP_CMP_EQ) && (op != T_OP_NE)) { - fr_strerror_printf("Invalid operator '%s' for left hand side structural attribute", fr_tokens[op]); - fr_sbuff_set(&our_in, &m_op); + XLAT_DEBUG(" recurse RHS <-- %pV", fr_box_strvalue_len(fr_sbuff_current(&our_in), fr_sbuff_remaining(&our_in))); + if ((op == T_OP_REG_EQ) || (op == T_OP_REG_NE)) { + fr_type_t type; + + /* + * @todo - LHS shouldn't be anything else. + */ + fr_assert(lhs->type == XLAT_TMPL); + + type = tmpl_cast_get(lhs->vpt); + if ((type != FR_TYPE_NULL) && (type != FR_TYPE_STRING)) { + fr_strerror_const("Casts cannot be used with regular expressions"); + fr_sbuff_set(&our_in, &m_lhs); FR_SBUFF_ERROR_RETURN(&our_in); } - fr_assert(0); - } -#endif + /* + * Cast the LHS to a string, if it's not already one! + */ + if (lhs->vpt->quote == T_BARE_WORD) tmpl_cast_set(lhs->vpt, FR_TYPE_STRING); - /* - * We now parse the RHS, allowing a (perhaps different) cast on the RHS. - */ - XLAT_DEBUG(" recurse RHS <-- %pV", fr_box_strvalue_len(fr_sbuff_current(&our_in), fr_sbuff_remaining(&our_in))); - if ((op == T_OP_REG_EQ) || (op == T_OP_REG_NE)) { slen = tokenize_regex_rhs(head, &rhs, &our_in, t_rules, bracket_rules); } else { slen = tokenize_expression(head, &rhs, &our_in, p_rules, t_rules, op, bracket_rules, input_rules, cond); diff --git a/src/tests/unit/condition/base.txt b/src/tests/unit/condition/base.txt index d924cfc6111..5904d27c64f 100644 --- a/src/tests/unit/condition/base.txt +++ b/src/tests/unit/condition/base.txt @@ -232,7 +232,7 @@ condition &Event-Timestamp == 'January 1, 2012' # literals are parsed when the conditions are parsed condition (integer)X == 1 -match ERROR offset 10: Failed parsing string as type 'uint32' +match ERROR offset 10: Failed parsing input # # @todo - parsing - there is no enum named "X" @@ -266,7 +266,7 @@ condition (ether) 00:11:22:33:44:55 == "%md4('00:11:22:33:44:55')" match (00:11:22:33:44:55 == "%md4('00:11:22:33:44:55')") condition (ether) 00:XX:22:33:44:55 == 00:11:22:33:44:55 -match ERROR offset 12: Missing separator, expected ':' +match ERROR offset 9: enum values must contain at least one alpha character # # Tests for boolean data types. @@ -347,7 +347,7 @@ condition (ipaddr)127.0.0.1/32 == 127.0.0.1 match true condition (ipaddr)127.0.0.1/327 == 127.0.0.1 -match ERROR offset 9: Invalid IPv4 mask length "/327". Should be between 0-32 +match ERROR offset 9: enum values must contain at least one alpha character condition (ipaddr)127.0.0.1/32 == 127.0.0.1 match true @@ -420,7 +420,6 @@ match ERROR offset 2: Invalid data type 'vsa' in cast condition (string)"foo" == &User-Name match ("foo" == &User-Name) -# This used to be expr, but expr isn't a builtin, so it failed... condition (integer)"%md4(' 1 + 1')" < &NAS-Port match ((uint32)"%md4(' 1 + 1')" < &NAS-Port) diff --git a/src/tests/unit/condition/ip.txt b/src/tests/unit/condition/ip.txt index e78c093fabb..e8055f56538 100644 --- a/src/tests/unit/condition/ip.txt +++ b/src/tests/unit/condition/ip.txt @@ -10,7 +10,7 @@ proto-dictionary radius condition &NAS-IPv6-Address == :: match (&NAS-IPv6-Address == ::) -condition &NAS-IP-Address == (ipaddr)* +condition &NAS-IP-Address == (ipaddr)'*' match (&NAS-IP-Address == 0.0.0.0) condition &NAS-IP-Address == 0.0.0.0 diff --git a/src/tests/unit/condition/regex.ignore b/src/tests/unit/condition/regex.ignore deleted file mode 100644 index e19df078b6f..00000000000 --- a/src/tests/unit/condition/regex.ignore +++ /dev/null @@ -1,88 +0,0 @@ -proto-dictionary radius - -condition &User-Name !~ /^foo\nbar$/ -match !&User-Name =~ /^foo\nbar$/ - -condition (ok =~ handled) -match ERROR offset 5: Invalid location for operator - -condition (ok == /foo/) -match ERROR offset 5: Invalid location for operator - -# -# bare words are cast to strings -# -condition foo =~ /bar/ -match 'foo' =~ /bar/ - -# -# Convert !~ to !(COND) for regex -# -condition foo !~ /bar/ -match !'foo' =~ /bar/ - -condition !foo !~ /bar/ -match 'foo' =~ /bar/ - -# -# Flags -# -condition foo =~ /bar/i -match 'foo' =~ /bar/i - -condition foo =~ /bar/m -match 'foo' =~ /bar/m - -condition foo =~ /bar/im -match 'foo' =~ /bar/im - -condition foo =~ /bar/ima -match ERROR offset 15: Unsupported regex flag 'a' - -condition foo =~ /bar/ii -match ERROR offset 14: Duplicate regex flag 'i' - -condition foo =~ /bar/iia -match ERROR offset 14: Duplicate regex flag 'i' - -# -# Escape the backslashes correctly -# And print them correctly -# - -condition &User-Name =~ /@|./ -match &User-Name =~ /@|./ - -condition &User-Name =~ /@|\\/ -match &User-Name =~ /@|\\/ - -condition &User-Name =~ /^([^\\]*)\\(.*)$/ -match &User-Name =~ /^([^\\]*)\\(.*)$/ - -# -# Non-integer types get cast to string. -# -condition &NAS-Port =~ /%{NAS-Port}/ -match &NAS-Port =~ /%{NAS-Port}/ - -# -# Cannot add a bad cast -# -condition &Filter-Id =~ /foo/ -match ERROR offset 10: Casts cannot be used with regular expressions - -condition &Filter-Id =~ /foo/ -match ERROR offset 28: Casts cannot be used with regular expressions - - -xlat %{1} -match %{1} - -xlat %{33} -match ERROR offset 3: Invalid regex reference. Must be in range 0-32 - -condition &User-Name == /foo/ -match ERROR offset 15: Unexpected regular expression - -count -match 43 diff --git a/src/tests/unit/condition/regex.txt b/src/tests/unit/condition/regex.txt new file mode 100644 index 00000000000..ce9b81e367d --- /dev/null +++ b/src/tests/unit/condition/regex.txt @@ -0,0 +1,97 @@ +# +# Tests for parsing conditional expressions. +# +# $Id$ +# + +proto-dictionary radius +tmpl-rules allow_unresolved=yes allow_unknown=yes + +condition &User-Name =~ (uint32) /foo/ +match ERROR offset 15: Expected regular expression + +condition &User-Name !~ /^foo\nbar$/ +match (&User-Name !~ /^foo\nbar$/) + +condition ('ok' =~ 'handled') +match ERROR offset 10: Expected regular expression + +condition ('ok' == /foo/) +match ERROR offset 10: Unexpected regular expression + +condition 'foo' =~ /bar/ +match ('foo' =~ /bar/) + +condition 'foo' !~ /bar/ +match ('foo' !~ /bar/) + +condition !foo !~ /bar/ +match ERROR offset 1: Operator '!' is only applied to the left hand side of the '!~' operation, add (..) to evaluate the operation first + +condition !('foo' !~ /bar/) +match !('foo' !~ /bar/) + +# +# Flags +# +condition 'foo' =~ /bar/i +match ('foo' =~ /bar/i) + +condition 'foo' =~ /bar/m +match ('foo' =~ /bar/m) + +condition 'foo' =~ /bar/im +match ('foo' =~ /bar/im) + +condition 'foo' =~ /bar/ima +match ERROR offset 17: Unsupported regex flag 'a' + +condition 'foo' =~ /bar/ii +match ERROR offset 16: Duplicate regex flag 'i' + +condition 'foo' =~ /bar/iia +match ERROR offset 16: Duplicate regex flag 'i' + +# +# Escape the backslashes correctly +# And print them correctly +# + +condition &User-Name =~ /@|./ +match (&User-Name =~ /@|./) + +condition &User-Name =~ /@|\\/ +match (&User-Name =~ /@|\\/) + +condition &User-Name =~ /^([^\\]*)\\(.*)$/ +match (&User-Name =~ /^([^\\]*)\\(.*)$/) + +# +# Non-integer types get cast to string. +# +condition &NAS-Port =~ /%{NAS-Port}/ +match ((string)&NAS-Port =~ /%{NAS-Port}/) + +# +# Cannot add a bad cast +# +condition (uint32)&Filter-Id =~ /foo/ +match ERROR offset 1: Casts cannot be used with regular expressions + +condition &Filter-Id =~ (uint32)/foo/ +match ERROR offset 15: Expected regular expression + +xlat %{1} +match %{1} + +xlat %{33} +match ERROR offset 3: Invalid regex reference. Must be in range 0-32 + +condition &User-Name == /foo/ +match ERROR offset 15: Unexpected regular expression + +condition %md5("foo") =~ /foo/ +match ((string)%md5("foo") =~ /foo/) + +count +match 50 diff --git a/src/tests/unit/xlat/cond_base.txt b/src/tests/unit/xlat/cond_base.txt index 7bf0329a380..4177ae67ce1 100644 --- a/src/tests/unit/xlat/cond_base.txt +++ b/src/tests/unit/xlat/cond_base.txt @@ -230,7 +230,7 @@ xlat_purify &Event-Timestamp == 'January 1 2012' # literals are parsed when the conditions are parsed xlat_purify (integer)X == 1 -match ERROR offset 10: Failed parsing string as type 'uint32' +match ERROR offset 10: Failed parsing input # # @todo - parsing - resolution is delayed, so we don't know where in the input @@ -271,7 +271,7 @@ xlat_purify (ether)00:11:22:33:44:55 == "%md4('00:11:22:33:44:55')" match (00:11:22:33:44:55 == "%md4('00:11:22:33:44:55')") xlat_purify (ether) 00:XX:22:33:44:55 == 00:11:22:33:44:55 -match ERROR offset 12: Missing separator, expected ':' +match ERROR offset 9: enum values must contain at least one alpha character # # Tests for boolean data types. @@ -349,7 +349,7 @@ xlat_purify (ipaddr)127.0.0.1/32 == 127.0.0.1 match true xlat_purify (ipaddr)127.0.0.1/327 == 127.0.0.1 -match ERROR offset 9: Invalid IPv4 mask length "/327". Should be between 0-32 +match ERROR offset 9: enum values must contain at least one alpha character xlat_purify (ipaddr)127.0.0.1/32 == 127.0.0.1 match true diff --git a/src/tests/unit/xlat/cond_regex.txt b/src/tests/unit/xlat/cond_regex.txt index 64ca851a305..68ab6da0f6e 100644 --- a/src/tests/unit/xlat/cond_regex.txt +++ b/src/tests/unit/xlat/cond_regex.txt @@ -54,14 +54,14 @@ xlat_purify &User-Name =~ /^([^\\]*)\\(.*)$/ match (&User-Name =~ /^([^\\]*)\\(.*)$/) xlat_purify &NAS-Port =~ /%{Login-TCP-Port} foo/ -match (&NAS-Port =~ /%{Login-TCP-Port} foo/) +match ((string)&NAS-Port =~ /%{Login-TCP-Port} foo/) # # If they're dumb enough to add a cast, then it will be just cast again # to "string" before the regular expression is evaluated. # xlat_purify (integer)&Filter-Id =~ /foo/ -match ((uint32)&Filter-Id =~ /foo/) +match ERROR offset 1: Casts cannot be used with regular expressions xlat_purify &Filter-Id =~ /foo/ match ERROR offset 15: Expected regular expression