]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Rework] Rework expressions processing
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 16 Jun 2020 20:30:34 +0000 (21:30 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 16 Jun 2020 20:30:34 +0000 (21:30 +0100)
src/libutil/expression.c

index b7dc0cc9610703cc9d88b8b8f7de88e68d857099..4c07643aeaaad2ece57b9813affacc0e72c6f1c3 100644 (file)
@@ -40,19 +40,23 @@ enum rspamd_expression_elt_type {
 enum rspamd_expression_op_flag {
        RSPAMD_EXPRESSION_UNARY = 1u << 0u,
        RSPAMD_EXPRESSION_BINARY = 1u << 1u,
-       RSPAMD_EXPRESSION_NARY = 1u << 2u
+       RSPAMD_EXPRESSION_NARY = 1u << 2u,
+       RSPAMD_EXPRESSION_ARITHMETIC = 1u << 3u,
+       RSPAMD_EXPRESSION_LOGICAL = 1u << 4u,
+       RSPAMD_EXPRESSION_COMPARISON = 1u << 5u,
 };
 
 struct rspamd_expression_operation {
        enum rspamd_expression_op op;
        guint logical_priority;
+       guint op_flags;
 };
 
 struct rspamd_expression_elt {
        enum rspamd_expression_elt_type type;
        union {
                rspamd_expression_atom_t *atom;
-               enum rspamd_expression_op op;
+               struct rspamd_expression_operation op;
                gdouble lim;
        } p;
 
@@ -241,6 +245,48 @@ rspamd_expr_logic_priority (enum rspamd_expression_op op)
        return ret;
 }
 
+static guint
+rspamd_expr_op_flags (enum rspamd_expression_op op)
+{
+       guint ret = 0;
+
+       switch (op) {
+       case OP_NOT:
+               ret |= RSPAMD_EXPRESSION_UNARY|RSPAMD_EXPRESSION_LOGICAL;
+               break;
+       case OP_MULT:
+               ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
+               break;
+       case OP_DIVIDE:
+               ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
+               break;
+       case OP_PLUS:
+               ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_ARITHMETIC;
+               break;
+       case OP_MINUS:
+               ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_ARITHMETIC;
+               break;
+       case OP_GE:
+       case OP_GT:
+       case OP_LE:
+       case OP_LT:
+               ret |= RSPAMD_EXPRESSION_BINARY|RSPAMD_EXPRESSION_COMPARISON;
+               break;
+       case OP_AND:
+               ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
+               break;
+       case OP_OR:
+               ret |= RSPAMD_EXPRESSION_NARY|RSPAMD_EXPRESSION_LOGICAL;
+               break;
+       case OP_OBRACE:
+       case OP_CBRACE:
+       case OP_INVALID:
+               break;
+       }
+
+       return ret;
+}
+
 /*
  * Return FALSE if symbol is not operation symbol (operand)
  * Return TRUE if symbol is operation symbol
@@ -428,14 +474,14 @@ rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
 
        g_assert (op->type == ELT_OP);
 
-       if (op->p.op == OP_NOT) {
+       if (op->p.op.op_flags & RSPAMD_EXPRESSION_UNARY) {
                /* Unary operator */
                res = g_node_new (op);
                a1 = rspamd_expr_stack_elt_pop (operands);
 
                if (a1 == NULL) {
                        g_set_error (err, rspamd_expr_quark(), EINVAL, "no operand to "
-                                       "unary '%s' operation", rspamd_expr_op_to_str (op->p.op));
+                                       "unary '%s' operation", rspamd_expr_op_to_str (op->p.op.op));
                        g_node_destroy (res);
 
                        return FALSE;
@@ -449,44 +495,54 @@ rspamd_ast_add_node (GPtrArray *operands, struct rspamd_expression_elt *op,
                }
        }
        else {
-               /* For binary operators we might want to examine chains */
+               /* For binary/nary operators we might want to examine chains */
                a2 = rspamd_expr_stack_elt_pop (operands);
                a1 = rspamd_expr_stack_elt_pop (operands);
 
                if (a2 == NULL) {
                        g_set_error (err, rspamd_expr_quark(), EINVAL, "no left operand to "
-                                       "'%s' operation", rspamd_expr_op_to_str (op->p.op));
+                                       "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
                        return FALSE;
                }
+
                if (a1 == NULL) {
                        g_set_error (err, rspamd_expr_quark(), EINVAL, "no right operand to "
-                                       "'%s' operation", rspamd_expr_op_to_str (op->p.op));
+                                       "'%s' operation", rspamd_expr_op_to_str (op->p.op.op));
                        return FALSE;
                }
 
-               /* First try with a1 */
-               test = a1;
-               test_elt = test->data;
+               /* Nary stuff */
+               if (op->p.op.op_flags & RSPAMD_EXPRESSION_NARY) {
+                       /*
+                        * We convert a set of ops like X + Y + Z to a nary tree like
+                        * X Y Z +
+                        * for the longest possible prefix of atoms/limits
+                        */
 
-               if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
-                       /* Add children */
-                       g_node_append (test, a2);
-                       rspamd_expr_stack_elt_push (operands, a1);
-                       return TRUE;
-               }
+                       /* First try with a1 */
+                       test = a1;
+                       test_elt = test->data;
 
-               /* Now test a2 */
-               test = a2;
-               test_elt = test->data;
+                       if (test_elt->type == ELT_OP && test_elt->p.op.op == op->p.op.op) {
+                               /* Add children */
+                               g_node_append (test, a2);
+                               rspamd_expr_stack_elt_push (operands, a1);
+                               return TRUE;
+                       }
+
+                       /* Now test a2 */
+                       test = a2;
+                       test_elt = test->data;
 
-               if (test_elt->type == ELT_OP && test_elt->p.op == op->p.op) {
-                       /* Add children */
-                       g_node_prepend (test, a1);
-                       rspamd_expr_stack_elt_push (operands, a2);
-                       return TRUE;
+                       if (test_elt->type == ELT_OP && test_elt->p.op.op == op->p.op.op) {
+                               /* Add children */
+                               g_node_prepend (test, a1);
+                               rspamd_expr_stack_elt_push (operands, a2);
+                               return TRUE;
+                       }
                }
 
-               /* No optimizations possible, so create new level */
+               /* No optimizations possible, so create new level */
                res = g_node_new (op);
                g_node_append (res, a1);
                g_node_append (res, a2);
@@ -558,10 +614,10 @@ rspamd_ast_priority_cmp (GNode *a, GNode *b)
        gdouble w1, w2;
 
        if (ea->type == ELT_LIMIT) {
-               return -1;
+               return 1;
        }
        else if (eb->type == ELT_LIMIT) {
-               return 1;
+               return -1;
        }
 
        /* Special logic for atoms */
@@ -584,17 +640,26 @@ static gboolean
 rspamd_ast_resort_traverse (GNode *node, gpointer unused)
 {
        GNode *children, *last;
+       struct rspamd_expression_elt *elt;
 
-       if (node->children) {
+       elt = (struct rspamd_expression_elt *)node->data;
 
-               children = node->children;
-               last = g_node_last_sibling (children);
-               /* Needed for utlist compatibility */
-               children->prev = last;
-               DL_SORT (node->children, rspamd_ast_priority_cmp);
-               /* Restore GLIB compatibility */
-               children = node->children;
-               children->prev = NULL;
+       /*
+        * We sort merely logical operations, everything else is dangerous
+        */
+       if (elt->type == ELT_OP && elt->p.op.op_flags & RSPAMD_EXPRESSION_LOGICAL) {
+
+               if (node->children) {
+
+                       children = node->children;
+                       last = g_node_last_sibling (children);
+                       /* Needed for utlist compatibility */
+                       children->prev = last;
+                       DL_SORT (node->children, rspamd_ast_priority_cmp);
+                       /* Restore GLIB compatibility */
+                       children = node->children;
+                       children->prev = NULL;
+               }
        }
 
        return FALSE;
@@ -854,10 +919,15 @@ rspamd_parse_expression (const gchar *line, gsize len,
                                                goto err;
                                        }
 
+                                       guint op_priority = rspamd_expr_logic_priority (op);
+
                                        if (op != OP_OBRACE) {
                                                elt.type = ELT_OP;
-                                               elt.p.op = op;
+                                               elt.p.op.op = op;
+                                               elt.p.op.op_flags = rspamd_expr_op_flags (op);
+                                               elt.p.op.logical_priority = op_priority;
                                                g_array_append_val (e->expressions, elt);
+
                                                if (!rspamd_ast_add_node (operand_stack,
                                                                rspamd_expr_dup_elt (pool, &elt), err)) {
                                                        goto err;
@@ -890,12 +960,17 @@ rspamd_parse_expression (const gchar *line, gsize len,
                                        }
 
                                        /* We ignore associativity for now */
+                                       guint op_priority = rspamd_expr_logic_priority (op);
+
                                        if (op_stack != OP_OBRACE &&
-                                                       rspamd_expr_logic_priority (op) <
-                                                       rspamd_expr_logic_priority (op_stack)) {
+                                                       op_priority < rspamd_expr_logic_priority (op_stack)) {
                                                elt.type = ELT_OP;
-                                               elt.p.op = op_stack;
+                                               elt.p.op.op = op_stack;
+                                               elt.p.op.op_flags = rspamd_expr_op_flags (op);
+                                               elt.p.op.logical_priority = op_priority;
+
                                                g_array_append_val (e->expressions, elt);
+
                                                if (!rspamd_ast_add_node (operand_stack,
                                                                rspamd_expr_dup_elt (pool, &elt), err)) {
                                                        goto err;
@@ -933,7 +1008,10 @@ rspamd_parse_expression (const gchar *line, gsize len,
                        != OP_INVALID) {
                if (op_stack != OP_OBRACE) {
                        elt.type = ELT_OP;
-                       elt.p.op = op_stack;
+                       elt.p.op.op = op_stack;
+                       elt.p.op.op_flags = rspamd_expr_op_flags (op_stack);
+                       elt.p.op.logical_priority = rspamd_expr_logic_priority (op_stack);
+
                        g_array_append_val (e->expressions, elt);
                        if (!rspamd_ast_add_node (operand_stack,
                                        rspamd_expr_dup_elt (pool, &elt), err)) {
@@ -991,52 +1069,16 @@ err:
  */
 static gboolean
 rspamd_ast_node_done (struct rspamd_expression_elt *elt,
-               struct rspamd_expression_elt *parelt, gdouble acc, gdouble lim)
+               struct rspamd_expression_elt *parelt, gdouble acc)
 {
        gboolean ret = FALSE;
 
        g_assert (elt->type == ELT_OP);
 
-       switch (elt->p.op) {
+       switch (elt->p.op.op) {
        case OP_NOT:
                ret = TRUE;
                break;
-       case OP_PLUS:
-               if (parelt && lim > 0) {
-                       g_assert (parelt->type == ELT_OP);
-
-                       switch (parelt->p.op) {
-                       case OP_GE:
-                               ret = acc >= lim;
-                               break;
-                       case OP_GT:
-                               ret = acc > lim;
-                               break;
-                       case OP_LE:
-                               ret = acc <= lim;
-                               break;
-                       case OP_LT:
-                               ret = acc < lim;
-                               break;
-                       default:
-                               ret = FALSE;
-                               break;
-                       }
-               }
-               break;
-       case OP_GE:
-               ret = acc >= lim;
-               break;
-       case OP_GT:
-               ret = acc > lim;
-               break;
-       case OP_LE:
-               ret = acc <= lim;
-               break;
-       case OP_LT:
-               ret = acc < lim;
-               break;
-       case OP_MULT:
        case OP_AND:
                ret = acc == 0;
                break;
@@ -1044,7 +1086,6 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt,
                ret = acc != 0;
                break;
        default:
-               g_assert (0);
                break;
        }
 
@@ -1052,36 +1093,28 @@ rspamd_ast_node_done (struct rspamd_expression_elt *elt,
 }
 
 static gdouble
-rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
-               gdouble acc, gdouble lim)
+rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val, gdouble acc)
 {
        gdouble ret = val;
 
        g_assert (elt->type == ELT_OP);
 
        if (isnan (acc)) {
-               switch (elt->p.op) {
+               switch (elt->p.op.op) {
                case OP_PLUS:
                case OP_MINUS:
                case OP_OR:
                case OP_AND:
                case OP_MULT:
                case OP_DIVIDE:
-                       /* Hack for arithmetics */
-                       if (!isnan (lim)) {
-                               acc = lim;
-                       }
-                       else {
-                               return val;
-                       }
-                       break;
+                       return val;
                default:
                        acc = val;
                        break;
                }
        }
 
-       switch (elt->p.op) {
+       switch (elt->p.op.op) {
        case OP_NOT:
                ret = fabs (val) > DOUBLE_EPSILON ? 0.0 : 1.0;
                break;
@@ -1098,16 +1131,16 @@ rspamd_ast_do_op (struct rspamd_expression_elt *elt, gdouble val,
                ret = acc / val;
                break;
        case OP_GE:
-               ret = acc >= lim;
+               ret = acc >= val;
                break;
        case OP_GT:
-               ret = acc > lim;
+               ret = acc > val;
                break;
        case OP_LE:
-               ret = acc <= lim;
+               ret = acc <= val;
                break;
        case OP_LT:
-               ret = acc < lim;
+               ret = acc < val;
                break;
        case OP_AND:
                ret = (acc * val);
@@ -1129,7 +1162,7 @@ rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
 {
        struct rspamd_expression_elt *elt, *celt, *parelt = NULL;
        GNode *cld;
-       gdouble acc = NAN, lim = NAN;
+       gdouble acc = NAN;
        gdouble t1, t2, val;
        gboolean calc_ticks = FALSE;
 
@@ -1181,33 +1214,19 @@ rspamd_ast_process_node (struct rspamd_expression *e, GNode *node,
        case ELT_OP:
                g_assert (node->children != NULL);
 
-               /* Try to find limit at the parent node */
-               if (node->parent) {
-                       parelt = node->parent->data;
-                       celt = node->parent->children->data;
-
-                       if (celt->type == ELT_LIMIT) {
-                               lim = celt->p.lim;
-                       }
-               }
-
                DL_FOREACH (node->children, cld) {
                        celt = cld->data;
 
-                       /* Save limit if we've found it */
-                       if (celt->type == ELT_LIMIT) {
-                               lim = celt->p.lim;
-                               continue;
-                       }
-
                        val = rspamd_ast_process_node (e, cld, process_data);
-                       msg_debug_expression ("before op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val);
-                       acc = rspamd_ast_do_op (elt, val, acc, lim);
-                       msg_debug_expression ("after op: op=%s; lim=%.1f; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op), lim, acc, val);
+                       msg_debug_expression ("before op: op=%s; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op.op),
+                                       acc, val);
+                       acc = rspamd_ast_do_op (elt, val, acc);
+                       msg_debug_expression ("after op: op=%s; acc=%.1f; val = %.2f", rspamd_expr_op_to_str(elt->p.op.op),
+                                       acc, val);
 
                        /* Check if we need to process further */
                        if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
-                               if (rspamd_ast_node_done (elt, parelt, acc, lim)) {
+                               if (rspamd_ast_node_done (elt, parelt, acc)) {
                                        msg_debug_expression ("optimizer: done");
                                        return acc;
                                }
@@ -1319,7 +1338,7 @@ rspamd_ast_string_traverse (GNode *n, gpointer d)
                }
        }
        else {
-               op_str = rspamd_expr_op_to_str (elt->p.op);
+               op_str = rspamd_expr_op_to_str (elt->p.op.op);
                g_string_append (res, op_str);
 
                if (n->children) {
@@ -1401,7 +1420,7 @@ rspamd_expression_node_is_op (GNode *node, enum rspamd_expression_op op)
 
        elt = node->data;
 
-       if (elt->type == ELT_OP && elt->p.op == op) {
+       if (elt->type == ELT_OP && elt->p.op.op == op) {
                return TRUE;
        }