/* int dict_flags;
/* DESCRIPTION
/* dict_pcre_open() opens the named file and compiles the contained
-/* regular expressions.
+/* regular expressions. The result object can be used to match strings
+/* against the table.
/* SEE ALSO
/* dict(3) generic dictionary manager
/* AUTHOR(S)
#include "dict.h"
#include "dict_pcre.h"
#include "mac_parse.h"
-
-/* PCRE library */
-
#include "pcre.h"
-#define PCRE_MAX_CAPTURE 99 /* Max strings captured by regexp - */
- /* essentially the max number of (..) */
+ /*
+ * Support for IF/ENDIF based on an idea by Bert Driehuis.
+ */
+#define DICT_PCRE_OP_MATCH 1 /* Match this regexp */
+#define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */
+#define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */
+
+ /*
+ * Max strings captured by regexp - essentially the max number of (..)
+ */
+#define PCRE_MAX_CAPTURE 99
+
+ /*
+ * Regular expression before and after compilation.
+ */
+typedef struct {
+ char *regexp; /* regular expression */
+ int options; /* options */
+} DICT_PCRE_REGEXP;
+
+typedef struct {
+ pcre *pattern; /* the compiled pattern */
+ pcre_extra *hints; /* hints to speed pattern execution */
+} DICT_PCRE_ENGINE;
+
+ /*
+ * Compiled generic rule, and subclasses that derive from it.
+ */
+typedef struct DICT_PCRE_RULE {
+ int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */
+ int nesting; /* level of IF/ENDIF nesting */
+ int lineno; /* source file line number */
+ struct DICT_PCRE_RULE *next; /* next rule in dict */
+} DICT_PCRE_RULE;
-struct dict_pcre_list {
- pcre *pattern; /* The compiled pattern */
- pcre_extra *hints; /* Hints to speed pattern execution */
- char *replace; /* Replacement string */
- int lineno; /* Source file line number */
- struct dict_pcre_list *next; /* Next regexp in dict */
-};
+typedef struct {
+ DICT_PCRE_RULE rule; /* generic part */
+ pcre *pattern; /* compiled pattern */
+ pcre_extra *hints; /* hints to speed pattern execution */
+ char *replacement; /* replacement string */
+} DICT_PCRE_MATCH_RULE;
+typedef struct {
+ DICT_PCRE_RULE rule; /* generic members */
+ pcre *pattern; /* compiled pattern */
+ pcre_extra *hints; /* hints to speed pattern execution */
+} DICT_PCRE_IF_RULE;
+
+ /*
+ * PCRE map.
+ */
typedef struct {
DICT dict; /* generic members */
- struct dict_pcre_list *head;
+ DICT_PCRE_RULE *head;
} DICT_PCRE;
-static dict_pcre_init = 0; /* flag need to init pcre library */
+static int dict_pcre_init = 0; /* flag need to init pcre library */
/*
- * Context for macro expansion callback.
+ * Context for $number expansion callback.
*/
-struct dict_pcre_context {
- const char *dict_name; /* source dict name */
+typedef struct {
+ const char *mapname; /* source dict name */
int lineno; /* source file line number */
- VSTRING *buf; /* target string buffer */
- const char *subject; /* str against which we match */
+ VSTRING *expansion_buf; /* target string buffer */
+ const char *lookup_string; /* string against which we match */
int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */
int matches; /* Count of cuts */
-};
-
-/*
- * Macro expansion callback - replace $0-${99} with strings cut from
- * matched string.
- */
-static int dict_pcre_action(int type, VSTRING *buf, char *ptr)
+} DICT_PCRE_EXPAND_CONTEXT;
+
+ /*
+ * Compatibility.
+ */
+#ifndef MAC_PARSE_OK
+#define MAC_PARSE_OK 0
+#endif
+
+ /*
+ * Macros to make dense code more accessible.
+ */
+#define NULL_STARTOFFSET (0)
+#define NULL_EXEC_OPTIONS (0)
+#define NULL_OVECTOR ((int *) 0)
+#define NULL_OVECTOR_LENGTH (0)
+
+/* dict_pcre_expand - replace $number with matched text */
+
+static int dict_pcre_expand(int type, VSTRING *buf, char *ptr)
{
- struct dict_pcre_context *ctxt = (struct dict_pcre_context *) ptr;
+ DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
const char *pp;
- int n,
- ret;
+ int n;
+ int ret;
+ /*
+ * Replace $0-${99} with strings cut from matched text.
+ */
if (type == MAC_PARSE_VARNAME) {
n = atoi(vstring_str(buf));
- ret = pcre_get_substring(ctxt->subject, ctxt->offsets, ctxt->matches,
- n, &pp);
+ ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
+ ctxt->matches, n, &pp);
if (ret < 0) {
if (ret == PCRE_ERROR_NOSUBSTRING)
msg_fatal("regexp %s, line %d: replace index out of range",
- ctxt->dict_name, ctxt->lineno);
+ ctxt->mapname, ctxt->lineno);
else
msg_fatal("regexp %s, line %d: pcre_get_substring error: %d",
- ctxt->dict_name, ctxt->lineno, ret);
+ ctxt->mapname, ctxt->lineno, ret);
}
if (*pp == 0) {
myfree((char *) pp);
return (MAC_PARSE_UNDEF);
}
- vstring_strcat(ctxt->buf, pp);
+ vstring_strcat(ctxt->expansion_buf, pp);
myfree((char *) pp);
- return (0);
- } else
- /* Straight text - duplicate with no substitution */
- vstring_strcat(ctxt->buf, vstring_str(buf));
+ return (MAC_PARSE_OK);
+ }
- return (0);
+ /*
+ * Straight text - duplicate with no substitution.
+ */
+ else {
+ vstring_strcat(ctxt->expansion_buf, vstring_str(buf));
+ return (MAC_PARSE_OK);
+ }
}
-/*
- * Look up regexp dict and perform string substitution on matched
- * strings.
- */
-static const char *dict_pcre_lookup(DICT *dict, const char *name)
+/* dict_pcre_exec_error - report matching error */
+
+static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
+{
+ switch (errval) {
+ case 0:
+ msg_warn("pcre map %s, line %d: too many (...)",
+ mapname, lineno);
+ return;
+ case PCRE_ERROR_NULL:
+ case PCRE_ERROR_BADOPTION:
+ msg_fatal("pcre map %s, line %d: bad args to re_exec",
+ mapname, lineno);
+ case PCRE_ERROR_BADMAGIC:
+ case PCRE_ERROR_UNKNOWN_NODE:
+ msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
+ mapname, lineno);
+ default:
+ msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
+ mapname, lineno, errval);
+ }
+}
+
+/* dict_pcre_lookup - match string and perform optional substitution */
+
+static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
{
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
- struct dict_pcre_list *pcre_list;
- int name_len = strlen(name);
- struct dict_pcre_context ctxt;
- static VSTRING *buf;
+ DICT_PCRE_RULE *rule;
+ DICT_PCRE_IF_RULE *if_rule;
+ DICT_PCRE_MATCH_RULE *match_rule;
+ int lookup_len = strlen(lookup_string);
+ DICT_PCRE_EXPAND_CONTEXT ctxt;
+ static VSTRING *expansion_buf;
+ int nesting = 0;
dict_errno = 0;
if (msg_verbose)
- msg_info("dict_pcre_lookup: %s: %s", dict_pcre->dict.name, name);
-
- /* Search for a matching expression */
- ctxt.matches = 0;
- for (pcre_list = dict_pcre->head; pcre_list; pcre_list = pcre_list->next) {
- if (pcre_list->pattern) {
- ctxt.matches = pcre_exec(pcre_list->pattern, pcre_list->hints,
- name, name_len, 0, 0, ctxt.offsets, PCRE_MAX_CAPTURE * 3);
- if (ctxt.matches != PCRE_ERROR_NOMATCH) {
- if (ctxt.matches > 0)
- break; /* Got a match! */
- else {
- /* An error */
- switch (ctxt.matches) {
- case 0:
- msg_warn("pcre map %s, line %d: too many (...)",
- dict_pcre->dict.name, pcre_list->lineno);
- break;
- case PCRE_ERROR_NULL:
- case PCRE_ERROR_BADOPTION:
- msg_fatal("pcre map %s, line %d: bad args to re_exec",
- dict_pcre->dict.name, pcre_list->lineno);
- break;
- case PCRE_ERROR_BADMAGIC:
- case PCRE_ERROR_UNKNOWN_NODE:
- msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
- dict_pcre->dict.name, pcre_list->lineno);
- break;
- default:
- msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
- dict_pcre->dict.name, pcre_list->lineno, ctxt.matches);
- break;
- }
- return ((char *) 0);
- }
+ msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
+
+ for (rule = dict_pcre->head; rule; rule = rule->next) {
+
+ /*
+ * Skip rules inside failed IF/ENDIF.
+ */
+ if (nesting < rule->nesting)
+ continue;
+
+ switch (rule->op) {
+
+ /*
+ * Search for a matching expression.
+ */
+ case DICT_PCRE_OP_MATCH:
+ match_rule = (DICT_PCRE_MATCH_RULE *) rule;
+ ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
+ lookup_string, lookup_len,
+ NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
+ ctxt.offsets, PCRE_MAX_CAPTURE * 3);
+ if (ctxt.matches == PCRE_ERROR_NOMATCH)
+ continue;
+ if (ctxt.matches <= 0) {
+ dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
+ continue;
}
- }
- }
- /* If we've got a match, */
- if (ctxt.matches > 0) {
- /* Then perform substitution on replacement string */
- if (buf == 0)
- buf = vstring_alloc(10);
- VSTRING_RESET(buf);
- ctxt.buf = buf;
- ctxt.subject = name;
- ctxt.dict_name = dict_pcre->dict.name;
- ctxt.lineno = pcre_list->lineno;
-
- if (mac_parse(pcre_list->replace, dict_pcre_action, (char *) &ctxt) & MAC_PARSE_ERROR)
- msg_fatal("pcre map %s, line %d: bad replacement syntax",
- dict_pcre->dict.name, pcre_list->lineno);
-
- VSTRING_TERMINATE(buf);
- return (vstring_str(buf));
+ /*
+ * We've got a match. Perform substitution on replacement string.
+ */
+ if (expansion_buf == 0)
+ expansion_buf = vstring_alloc(10);
+ VSTRING_RESET(expansion_buf);
+ ctxt.expansion_buf = expansion_buf;
+ ctxt.lookup_string = lookup_string;
+ ctxt.mapname = dict->name;
+ ctxt.lineno = rule->lineno;
+
+ if (mac_parse(match_rule->replacement, dict_pcre_expand,
+ (char *) &ctxt) & MAC_PARSE_ERROR)
+ msg_fatal("pcre map %s, line %d: bad replacement syntax",
+ dict->name, rule->lineno);
+
+ VSTRING_TERMINATE(expansion_buf);
+ return (vstring_str(expansion_buf));
+
+ /*
+ * Conditional. XXX We provide space for matched substring info
+ * because PCRE uses part of it as workspace for backtracking.
+ * PCRE will allocate memory if it runs out of backtracking
+ * storage.
+ */
+ case DICT_PCRE_OP_IF:
+ if_rule = (DICT_PCRE_IF_RULE *) rule;
+ ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
+ lookup_string, lookup_len,
+ NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
+ ctxt.offsets, PCRE_MAX_CAPTURE * 3);
+ if (ctxt.matches == PCRE_ERROR_NOMATCH)
+ continue;
+ if (ctxt.matches <= 0) {
+ dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
+ continue;
+ }
+ nesting++;
+ continue;
+
+ /*
+ * ENDIF after successful IF.
+ */
+ case DICT_PCRE_OP_ENDIF:
+ nesting--;
+ continue;
+
+ default:
+ msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
+ }
}
- return ((char *) 0);
+ return (0);
}
/* dict_pcre_close - close pcre dictionary */
static void dict_pcre_close(DICT *dict)
{
DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
- struct dict_pcre_list *pcre_list;
- struct dict_pcre_list *next;
-
- for (pcre_list = dict_pcre->head; pcre_list; pcre_list = next) {
- next = pcre_list->next;
- if (pcre_list->pattern)
- myfree((char *) pcre_list->pattern);
- if (pcre_list->hints)
- myfree((char *) pcre_list->hints);
- if (pcre_list->replace)
- myfree((char *) pcre_list->replace);
- myfree((char *) pcre_list);
+ DICT_PCRE_RULE *rule;
+ DICT_PCRE_RULE *next;
+ DICT_PCRE_MATCH_RULE *match_rule;
+ DICT_PCRE_IF_RULE *if_rule;
+
+ for (rule = dict_pcre->head; rule; rule = next) {
+ next = rule->next;
+ switch (rule->op) {
+ case DICT_PCRE_OP_MATCH:
+ match_rule = (DICT_PCRE_MATCH_RULE *) rule;
+ if (match_rule->pattern)
+ myfree((char *) match_rule->pattern);
+ if (match_rule->hints)
+ myfree((char *) match_rule->hints);
+ if (match_rule->replacement)
+ myfree((char *) match_rule->replacement);
+ break;
+ case DICT_PCRE_OP_IF:
+ if_rule = (DICT_PCRE_IF_RULE *) rule;
+ if (if_rule->pattern)
+ myfree((char *) if_rule->pattern);
+ if (if_rule->hints)
+ myfree((char *) if_rule->hints);
+ break;
+ case DICT_PCRE_OP_ENDIF:
+ break;
+ default:
+ msg_panic("dict_regexp_close: unknown operation %d", rule->op);
+ }
+ myfree((char *) rule);
}
dict_free(dict);
}
-/*
- * dict_pcre_open - load and compile a file containing regular expressions
- */
-DICT *dict_pcre_open(const char *map, int unused_flags, int dict_flags)
+/* dict_pcre_get_pattern - extract pattern from rule */
+
+static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
+ DICT_PCRE_REGEXP *pattern)
+{
+ char *p = *bufp;
+ char re_delimiter;
+
+ re_delimiter = *p++;
+ pattern->regexp = p;
+
+ /*
+ * Search for second delimiter, handling backslash escape.
+ */
+ while (*p) {
+ if (*p == '\\') {
+ ++p;
+ if (*p == 0)
+ break;
+ } else if (*p == re_delimiter)
+ break;
+ ++p;
+ }
+
+ if (!*p) {
+ msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
+ "ignoring this rule", mapname, lineno, re_delimiter);
+ return (0);
+ }
+ *p++ = 0; /* Null term the regexp */
+
+ /*
+ * Parse any regexp options.
+ */
+ pattern->options = PCRE_CASELESS | PCRE_DOTALL;
+ while (*p && !ISSPACE(*p)) {
+ switch (*p) {
+ case 'i':
+ pattern->options ^= PCRE_CASELESS;
+ break;
+ case 'm':
+ pattern->options ^= PCRE_MULTILINE;
+ break;
+ case 's':
+ pattern->options ^= PCRE_DOTALL;
+ break;
+ case 'x':
+ pattern->options ^= PCRE_EXTENDED;
+ break;
+ case 'A':
+ pattern->options ^= PCRE_ANCHORED;
+ break;
+ case 'E':
+ pattern->options ^= PCRE_DOLLAR_ENDONLY;
+ break;
+ case 'U':
+ pattern->options ^= PCRE_UNGREEDY;
+ break;
+ case 'X':
+ pattern->options ^= PCRE_EXTRA;
+ break;
+ default:
+ msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
+ "skipping this rule", mapname, lineno, *p);
+ return (0);
+ }
+ ++p;
+ }
+ *bufp = p;
+ return (1);
+}
+
+/* dict_pcre_compile - compile pattern */
+
+static int dict_pcre_compile(const char *mapname, int lineno,
+ DICT_PCRE_REGEXP *pattern,
+ DICT_PCRE_ENGINE *engine)
+{
+ const char *error;
+ int errptr;
+
+ engine->pattern = pcre_compile(pattern->regexp, pattern->options,
+ &error, &errptr, NULL);
+ if (engine->pattern == 0) {
+ msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
+ mapname, lineno, errptr, error);
+ return (0);
+ }
+ engine->hints = pcre_study(engine->pattern, 0, &error);
+ if (error != 0) {
+ msg_warn("pcre map %s, line %d: error while studying regex: %s",
+ mapname, lineno, error);
+ myfree((char *) engine->pattern);
+ return (0);
+ }
+ return (1);
+}
+
+/* dict_pcre_rule_alloc - fill in a generic rule structure */
+
+static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
+ int lineno,
+ size_t size)
+{
+ DICT_PCRE_RULE *rule;
+
+ rule = (DICT_PCRE_RULE *) mymalloc(size);
+ rule->op = op;
+ rule->nesting = nesting;
+ rule->lineno = lineno;
+ rule->next = 0;
+
+ return (rule);
+}
+
+/* dict_pcre_parse_rule - parse and compile one rule */
+
+static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
+ char *line, int nesting)
+{
+ char *p;
+
+ p = line;
+
+ /*
+ * An ordinary match rule takes one pattern and replacement text.
+ */
+ if (!ISALNUM(*p)) {
+ DICT_PCRE_REGEXP regexp;
+ DICT_PCRE_ENGINE engine;
+ DICT_PCRE_MATCH_RULE *match_rule;
+
+ /*
+ * Get the pattern string and options.
+ */
+ if (dict_pcre_get_pattern(mapname, lineno, &p, ®exp) == 0)
+ return (0);
+
+ /*
+ * Get the replacement text.
+ */
+ while (*p && ISSPACE(*p))
+ ++p;
+ if (!*p)
+ msg_warn("%s, line %d: no replacement text: using empty string",
+ mapname, lineno);
+
+ /*
+ * Compile the pattern.
+ */
+ if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0)
+ return (0);
+
+ /*
+ * Save the result.
+ */
+ match_rule = (DICT_PCRE_MATCH_RULE *)
+ dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
+ sizeof(DICT_PCRE_MATCH_RULE));
+ match_rule->replacement = mystrdup(p);
+ match_rule->pattern = engine.pattern;
+ match_rule->hints = engine.hints;
+ return ((DICT_PCRE_RULE *) match_rule);
+ }
+
+ /*
+ * The IF operator takes one pattern but no replacement text.
+ */
+ else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
+ DICT_PCRE_REGEXP regexp;
+ DICT_PCRE_ENGINE engine;
+ DICT_PCRE_IF_RULE *if_rule;
+
+ p += 2;
+
+ /*
+ * Get the pattern.
+ */
+ while (*p && ISSPACE(*p))
+ p++;
+ if (!dict_pcre_get_pattern(mapname, lineno, &p, ®exp))
+ return (0);
+
+ /*
+ * Warn about out-of-place text.
+ */
+ if (*p)
+ msg_warn("pcre map %s, line %d: ignoring extra text after IF",
+ mapname, lineno);
+
+ /*
+ * Compile the pattern.
+ */
+ if (dict_pcre_compile(mapname, lineno, ®exp, &engine) == 0)
+ return (0);
+
+ /*
+ * Save the result.
+ */
+ if_rule = (DICT_PCRE_IF_RULE *)
+ dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
+ sizeof(DICT_PCRE_IF_RULE));
+ if_rule->pattern = engine.pattern;
+ if_rule->hints = engine.hints;
+ return ((DICT_PCRE_RULE *) if_rule);
+ }
+
+ /*
+ * The ENDIF operator takes no patterns and no replacement text.
+ */
+ else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
+ DICT_PCRE_RULE *rule;
+
+ p += 5;
+
+ /*
+ * Warn about out-of-place ENDIFs.
+ */
+ if (nesting == 0) {
+ msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
+ mapname, lineno);
+ return (0);
+ }
+
+ /*
+ * Warn about out-of-place text.
+ */
+ if (*p)
+ msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
+ mapname, lineno);
+
+ /*
+ * Save the result.
+ */
+ rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
+ sizeof(DICT_PCRE_RULE));
+ return (rule);
+ }
+
+ /*
+ * Unrecognized input.
+ */
+ else {
+ msg_warn("regexp map %s, line %d: ignoring unrecognized request",
+ mapname, lineno);
+ return (0);
+ }
+}
+
+/* dict_pcre_open - load and compile a file containing regular expressions */
+
+DICT *dict_pcre_open(const char *mapname, int unused_flags, int dict_flags)
{
DICT_PCRE *dict_pcre;
VSTREAM *map_fp;
VSTRING *line_buffer;
- struct dict_pcre_list *pcre_list = NULL,
- *pl;
+ DICT_PCRE_RULE *last_rule = 0;
+ DICT_PCRE_RULE *rule;
int lineno = 0;
- char *regexp,
- *p,
- re_delimiter;
- int re_options;
- pcre *pattern;
- pcre_extra *hints;
- const char *error;
- int errptr;
+ int nesting = 0;
+ char *p;
line_buffer = vstring_alloc(100);
- dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, map,
+ dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
sizeof(*dict_pcre));
dict_pcre->dict.lookup = dict_pcre_lookup;
dict_pcre->dict.close = dict_pcre_close;
dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
- dict_pcre->head = NULL;
+ dict_pcre->head = 0;
if (dict_pcre_init == 0) {
pcre_malloc = (void *(*) (size_t)) mymalloc;
pcre_free = (void (*) (void *)) myfree;
dict_pcre_init = 1;
}
- if ((map_fp = vstream_fopen(map, O_RDONLY, 0)) == 0) {
- msg_fatal("open %s: %m", map);
- }
- while (readlline(line_buffer, map_fp, &lineno)) {
+ /*
+ * Parse the pcre table.
+ */
+ if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
+ msg_fatal("open %s: %m", mapname);
+
+ while (readlline(line_buffer, map_fp, &lineno)) {
p = vstring_str(line_buffer);
trimblanks(p, 0)[0] = 0; /* Trim space at end */
- re_delimiter = *p++;
- regexp = p;
-
- /* Search for second delimiter, handling backslash escape */
- while (*p) {
- if (*p == '\\') {
- ++p;
- if (*p == 0)
- break;
- } else if (*p == re_delimiter)
- break;
- ++p;
- }
-
- if (!*p) {
- msg_warn("%s, line %d: no closing regexp delimiter: %c",
- VSTREAM_PATH(map_fp), lineno, re_delimiter);
- continue;
- }
- *p++ = '\0'; /* Null term the regexp */
-
- /* Now parse any regexp options */
- re_options = PCRE_CASELESS | PCRE_DOTALL;
- while (*p && !ISSPACE(*p)) {
- switch (*p) {
- case 'i':
- re_options ^= PCRE_CASELESS;
- break;
- case 'm':
- re_options ^= PCRE_MULTILINE;
- break;
- case 's':
- re_options ^= PCRE_DOTALL;
- break;
- case 'x':
- re_options ^= PCRE_EXTENDED;
- break;
- case 'A':
- re_options ^= PCRE_ANCHORED;
- break;
- case 'E':
- re_options ^= PCRE_DOLLAR_ENDONLY;
- break;
- case 'U':
- re_options ^= PCRE_UNGREEDY;
- break;
- case 'X':
- re_options ^= PCRE_EXTRA;
- break;
- default:
- msg_warn("%s, line %d: unknown regexp option '%c'",
- VSTREAM_PATH(map_fp), lineno, *p);
- }
- ++p;
- }
-
- while (*p && ISSPACE(*p))
- ++p;
-
- if (!*p) {
- msg_warn("%s, line %d: no replacement text",
- VSTREAM_PATH(map_fp), lineno);
- p = "";
- }
- /* Compile the patern */
- pattern = pcre_compile(regexp, re_options, &error, &errptr, NULL);
- if (pattern == NULL) {
- msg_warn("%s, line %d: error in regex at offset %d: %s",
- VSTREAM_PATH(map_fp), lineno, errptr, error);
+ if (*p == 0)
continue;
- }
- hints = pcre_study(pattern, 0, &error);
- if (error != NULL) {
- msg_warn("%s, line %d: error while studying regex: %s",
- VSTREAM_PATH(map_fp), lineno, error);
- myfree((char *) pattern);
+ rule = dict_pcre_parse_rule(mapname, lineno, p, nesting);
+ if (rule == 0)
continue;
+ if (rule->op == DICT_PCRE_OP_IF) {
+ nesting++;
+ } else if (rule->op == DICT_PCRE_OP_ENDIF) {
+ nesting--;
}
- /* Add it to the list */
- pl = (struct dict_pcre_list *) mymalloc(sizeof(struct dict_pcre_list));
-
- /* Save the replacement string (if any) */
- pl->replace = mystrdup(p);
- pl->pattern = pattern;
- pl->hints = hints;
- pl->next = NULL;
- pl->lineno = lineno;
-
- if (pcre_list == NULL)
- dict_pcre->head = pl;
+ if (last_rule == 0)
+ dict_pcre->head = rule;
else
- pcre_list->next = pl;
- pcre_list = pl;
+ last_rule->next = rule;
+ last_rule = rule;
}
vstring_free(line_buffer);
vstream_fclose(map_fp);
- return (DICT_DEBUG(&dict_pcre->dict));
+ return (DICT_DEBUG (&dict_pcre->dict));
}
#endif /* HAS_PCRE */
#include "mac_parse.h"
/*
- * Support for IF/ENDIF based on code by Bert Driehuis.
+ * Support for IF/ENDIF based on an idea by Bert Driehuis.
*/
-#define REGEXP_OP_MATCH 1 /* Match this regexp */
-#define REGEXP_OP_IF 2 /* Increase if/endif nesting on match */
-#define REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting */
+#define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */
+#define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */
+#define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */
/*
* Regular expression before compiling.
typedef struct {
char *regexp; /* regular expression */
int options; /* regcomp() options */
+ int match; /* positive or negative match */
} DICT_REGEXP_PATTERN;
/*
- * Compiled regexp rule with replacement text.
+ * Compiled generic rule, and subclasses that derive from it.
*/
-typedef struct dict_regexp_list {
- struct dict_regexp_list *next; /* next regexp in dict */
- regex_t *primary_exp; /* compiled primary pattern */
- regex_t *negated_exp; /* compiled negated pattern */
- char *replacement; /* replacement text */
- size_t max_nsub; /* largest replacement $number */
- int lineno; /* source file line number */
+typedef struct DICT_REGEXP_RULE {
+ int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */
int nesting; /* Level of search nesting */
- int op; /* REGEXP_OP_MATCH, OP_IF, OP_ENDIF */
+ int lineno; /* source file line number */
+ struct DICT_REGEXP_RULE *next; /* next rule in dict */
} DICT_REGEXP_RULE;
+typedef struct {
+ DICT_REGEXP_RULE rule; /* generic part */
+ regex_t *first_exp; /* compiled primary pattern */
+ int first_match; /* positive or negative match */
+ regex_t *second_exp; /* compiled secondary pattern */
+ int second_match; /* positive or negative match */
+ char *replacement; /* replacement text */
+ size_t max_sub; /* largest $number in replacement */
+} DICT_REGEXP_MATCH_RULE;
+
+typedef struct {
+ DICT_REGEXP_RULE rule; /* generic members */
+ regex_t *expr; /* the condition */
+ int match; /* positive or negative match */
+} DICT_REGEXP_IF_RULE;
+
/*
* Regexp map.
*/
typedef struct {
DICT dict; /* generic members */
- regmatch_t *pmatch; /* replacement substring storage */
+ regmatch_t *pmatch; /* matched substring info */
DICT_REGEXP_RULE *head; /* first rule */
} DICT_REGEXP;
+ /*
+ * Macros to make dense code more readable.
+ */
+#define NULL_SUBSTITUTIONS (0)
+#define NULL_MATCH_RESULT ((regmatch_t *) 0)
+
/*
* Context for $number expansion callback.
*/
typedef struct {
- DICT_REGEXP *dict; /* the dictionary entry */
- DICT_REGEXP_RULE *rule; /* the rule we matched */
- VSTRING *buf; /* target string buffer */
- const char *subject; /* matched text */
+ DICT_REGEXP *dict_regexp; /* the dictionary entry */
+ DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */
+ const char *lookup_string; /* matched text */
+ VSTRING *expansion_buf; /* buffer for $number expansion */
} DICT_REGEXP_EXPAND_CONTEXT;
/*
* Context for $number pre-scan callback.
*/
typedef struct {
- const char *map; /* name of regexp map */
+ const char *mapname; /* name of regexp map */
int lineno; /* where in file */
- size_t max_nsub; /* largest $number seen */
+ size_t max_sub; /* largest $number seen */
} DICT_REGEXP_PRESCAN_CONTEXT;
/*
static int dict_regexp_expand(int type, VSTRING *buf, char *ptr)
{
DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
- DICT_REGEXP_RULE *rule = ctxt->rule;
- DICT_REGEXP *dict = ctxt->dict;
+ DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
+ DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
+ regmatch_t *pmatch;
size_t n;
/*
*/
if (type == MAC_PARSE_VARNAME) {
n = atoi(vstring_str(buf));
- if (n < 1 || n > rule->max_nsub)
+ if (n < 1 || n > match_rule->max_sub)
msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
- dict->dict.name, rule->lineno, vstring_str(buf));
- if (dict->pmatch[n].rm_so < 0 ||
- dict->pmatch[n].rm_so == dict->pmatch[n].rm_eo) {
+ dict_regexp->dict.name, match_rule->rule.lineno,
+ vstring_str(buf));
+ pmatch = dict_regexp->pmatch + n;
+ if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
return (MAC_PARSE_UNDEF); /* empty or not matched */
- }
- vstring_strncat(ctxt->buf, ctxt->subject + dict->pmatch[n].rm_so,
- dict->pmatch[n].rm_eo - dict->pmatch[n].rm_so);
+ vstring_strncat(ctxt->expansion_buf,
+ ctxt->lookup_string + pmatch->rm_so,
+ pmatch->rm_eo - pmatch->rm_so);
+ return (MAC_PARSE_OK);
}
/*
* Straight text - duplicate with no substitution.
*/
- else
- vstring_strcat(ctxt->buf, vstring_str(buf));
-
- return (MAC_PARSE_OK);
+ else {
+ vstring_strcat(ctxt->expansion_buf, vstring_str(buf));
+ return (MAC_PARSE_OK);
+ }
}
/* dict_regexp_regerror - report regexp compile/execute error */
-static void dict_regexp_regerror(const char *map, int lineno, int error,
- const regex_t * expr)
+static void dict_regexp_regerror(const char *mapname, int lineno, int error,
+ const regex_t *expr)
{
char errbuf[256];
(void) regerror(error, expr, errbuf, sizeof(errbuf));
- msg_warn("regexp map %s, line %d: %s", map, lineno, errbuf);
+ msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
}
-/* dict_regexp_lookup - match string and perform substitution */
+ /*
+ * Inlined to reduce function call overhead in the time-critical loop.
+ */
+#define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
+ ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
+ ((err) == REG_NOMATCH ? !(match) : \
+ (err) == 0 ? (match) : \
+ (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
+
+/* dict_regexp_lookup - match string and perform optional substitution */
-static const char *dict_regexp_lookup(DICT *dict, const char *name)
+static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
{
DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
DICT_REGEXP_RULE *rule;
- DICT_REGEXP_EXPAND_CONTEXT ctxt;
- static VSTRING *buf;
+ DICT_REGEXP_IF_RULE *if_rule;
+ DICT_REGEXP_MATCH_RULE *match_rule;
+ DICT_REGEXP_EXPAND_CONTEXT expand_context;
+ static VSTRING *expansion_buf;
int error;
int nesting = 0;
dict_errno = 0;
if (msg_verbose)
- msg_info("dict_regexp_lookup: %s: %s", dict_regexp->dict.name, name);
+ msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
- /*
- * Search for the first matching primary expression. Limit the overhead
- * for substring substitution to the bare minimum.
- */
for (rule = dict_regexp->head; rule; rule = rule->next) {
- if (nesting < rule->nesting) /* inside false IF/ENDIF */
- continue;
- if (rule->op == REGEXP_OP_ENDIF) {
- nesting--;
- continue;
- }
- error = regexec(rule->primary_exp, name, rule->max_nsub + 1,
- rule->max_nsub ? dict_regexp->pmatch :
- (regmatch_t *) 0, 0);
- switch (error) {
- case REG_NOMATCH:
- continue;
- default:
- dict_regexp_regerror(dict_regexp->dict.name, rule->lineno,
- error, rule->primary_exp);
- continue;
- case 0:
- break;
- }
/*
- * Primary expression match found. Require a negative match on the
- * optional negated expression. In this case we're never going to do
- * any string substitution.
+ * Skip rules inside failed IF/ENDIF.
*/
- if (rule->negated_exp) {
- error = regexec(rule->negated_exp, name, 0, (regmatch_t *) 0, 0);
- switch (error) {
- case 0:
+ if (nesting < rule->nesting)
+ continue;
+
+ switch (rule->op) {
+
+ /*
+ * Search for the first matching primary expression. Limit the
+ * overhead for substring substitution to the bare minimum.
+ */
+ case DICT_REGEXP_OP_MATCH:
+ match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
+ if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
+ match_rule->first_exp,
+ match_rule->first_match,
+ lookup_string,
+ match_rule->max_sub > 0 ?
+ match_rule->max_sub + 1 : 0,
+ dict_regexp->pmatch))
continue;
- default:
- dict_regexp_regerror(dict_regexp->dict.name, rule->lineno,
- error, rule->negated_exp);
+ if (match_rule->second_exp
+ && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
+ match_rule->second_exp,
+ match_rule->second_match,
+ lookup_string,
+ NULL_SUBSTITUTIONS,
+ NULL_MATCH_RESULT))
continue;
- case REG_NOMATCH:
- break;
- }
- }
- /*
- * Match found.
- */
- if (rule->op == REGEXP_OP_IF) {
- nesting++;
+ /*
+ * Skip $number substitutions when the replacement text contains
+ * no $number strings (as learned during the pre-scan).
+ */
+ if (match_rule->max_sub == 0)
+ return (match_rule->replacement);
+
+ /*
+ * Perform $number substitutions on the replacement text. We
+ * pre-scanned the replacement text at compile time. Any macro
+ * expansion errors at this point mean something impossible has
+ * happened.
+ */
+ if (!expansion_buf)
+ expansion_buf = vstring_alloc(10);
+ VSTRING_RESET(expansion_buf);
+ expand_context.expansion_buf = expansion_buf;
+ expand_context.lookup_string = lookup_string;
+ expand_context.match_rule = match_rule;
+ expand_context.dict_regexp = dict_regexp;
+
+ if (mac_parse(match_rule->replacement, dict_regexp_expand,
+ (char *) &expand_context) & MAC_PARSE_ERROR)
+ msg_panic("regexp map %s, line %d: bad replacement syntax",
+ dict->name, rule->lineno);
+ VSTRING_TERMINATE(expansion_buf);
+ return (vstring_str(expansion_buf));
+
+ /*
+ * Conditional.
+ */
+ case DICT_REGEXP_OP_IF:
+ if_rule = (DICT_REGEXP_IF_RULE *) rule;
+ if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
+ if_rule->expr, if_rule->match, lookup_string,
+ NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
+ nesting++;
continue;
- }
- /*
- * Skip $number substitutions when the replacement text contains no
- * $number strings (as learned during the pre-scan).
- */
- if (rule->max_nsub == 0)
- return (rule->replacement);
+ /*
+ * ENDIF after successful IF.
+ */
+ case DICT_REGEXP_OP_ENDIF:
+ nesting--;
+ continue;
- /*
- * Perform $number substitutions on the replacement text. We
- * pre-scanned the replacement text at compile time. Any macro
- * expansion errors at this point mean something impossible has
- * happened.
- */
- if (!buf)
- buf = vstring_alloc(10);
- VSTRING_RESET(buf);
- ctxt.buf = buf;
- ctxt.subject = name;
- ctxt.rule = rule;
- ctxt.dict = dict_regexp;
-
- if (mac_parse(rule->replacement, dict_regexp_expand, (char *) &ctxt) & MAC_PARSE_ERROR)
- msg_panic("regexp map %s, line %d: bad replacement syntax",
- dict_regexp->dict.name, rule->lineno);
- VSTRING_TERMINATE(buf);
- return (vstring_str(buf));
+ default:
+ msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
+ }
}
return (0);
}
DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
DICT_REGEXP_RULE *rule;
DICT_REGEXP_RULE *next;
+ DICT_REGEXP_MATCH_RULE *match_rule;
+ DICT_REGEXP_IF_RULE *if_rule;
for (rule = dict_regexp->head; rule; rule = next) {
next = rule->next;
- if (rule->primary_exp) {
- regfree(rule->primary_exp);
- myfree((char *) rule->primary_exp);
- }
- if (rule->negated_exp) {
- regfree(rule->negated_exp);
- myfree((char *) rule->negated_exp);
+ switch (rule->op) {
+ case DICT_REGEXP_OP_MATCH:
+ match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
+ if (match_rule->first_exp) {
+ regfree(match_rule->first_exp);
+ myfree((char *) match_rule->first_exp);
+ }
+ if (match_rule->second_exp) {
+ regfree(match_rule->second_exp);
+ myfree((char *) match_rule->second_exp);
+ }
+ if (match_rule->replacement)
+ myfree((char *) match_rule->replacement);
+ break;
+ case DICT_REGEXP_OP_IF:
+ if_rule = (DICT_REGEXP_IF_RULE *) rule;
+ if (if_rule->expr) {
+ regfree(if_rule->expr);
+ myfree((char *) if_rule->expr);
+ }
+ break;
+ case DICT_REGEXP_OP_ENDIF:
+ break;
+ default:
+ msg_panic("dict_regexp_close: unknown operation %d", rule->op);
}
- myfree((char *) rule->replacement);
myfree((char *) rule);
}
if (dict_regexp->pmatch)
dict_free(dict);
}
-/* dict_regexp_get_pattern - extract one pattern with options from rule */
+/* dict_regexp_get_pat - extract one pattern with options from rule */
-static int dict_regexp_get_pattern(const char *map, int lineno, char **bufp,
- DICT_REGEXP_PATTERN *pat)
+static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
+ DICT_REGEXP_PATTERN *pat)
{
char *p = *bufp;
char re_delim;
- re_delim = *p++;
- pat->regexp = p;
+ /*
+ * Process negation operators.
+ */
+ pat->match = 1;
+ while (*p == '!') {
+ pat->match = !pat->match;
+ p++;
+ }
+
+ /*
+ * Grr...aceful handling of whitespace after '!'.
+ */
+ while (*p && ISSPACE(*p))
+ p++;
+ if (*p == 0) {
+ msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
+ mapname, lineno);
+ return (0);
+ }
/*
* Search for the closing delimiter, handling backslash escape.
*/
+ re_delim = *p++;
+ pat->regexp = p;
while (*p) {
if (*p == '\\') {
if (p[1])
}
if (!*p) {
msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
- "skipping this rule", map, lineno, re_delim);
+ "skipping this rule", mapname, lineno, re_delim);
return (0);
}
*p++ = 0; /* null terminate */
break;
default:
msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
- "skipping this rule", map, lineno, *p);
+ "skipping this rule", mapname, lineno, *p);
return (0);
}
++p;
return (1);
}
-/* dict_regexp_compile - compile one pattern */
+/* dict_regexp_get_pats - get the primary and second patterns and flags */
-static regex_t *dict_regexp_compile(const char *map, int lineno,
- DICT_REGEXP_PATTERN *pat)
+static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
+ DICT_REGEXP_PATTERN *first_pat,
+ DICT_REGEXP_PATTERN *second_pat)
{
- int error;
- regex_t *expr;
- expr = (regex_t *) mymalloc(sizeof(*expr));
- error = regcomp(expr, pat->regexp, pat->options);
- if (error != 0) {
- dict_regexp_regerror(map, lineno, error, expr);
- myfree((char *) expr);
+ /*
+ * Get the primary and optional secondary patterns and their flags.
+ */
+ if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
return (0);
+ if (**p == '!') {
+#if 0
+ msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away, "
+ "use \"if /!pattern2/ ... /pattern1/ ... endif\" instead",
+ mapname, lineno);
+#endif
+ if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
+ return (0);
+ } else {
+ second_pat->regexp = 0;
}
- return (expr);
+ return (1);
}
/* dict_regexp_prescan - find largest $number in replacement text */
if (type == MAC_PARSE_VARNAME) {
if (!alldig(vstring_str(buf))) {
msg_warn("regexp map %s, line %d: non-numeric replacement macro name \"%s\"",
- ctxt->map, ctxt->lineno, vstring_str(buf));
+ ctxt->mapname, ctxt->lineno, vstring_str(buf));
return (MAC_PARSE_ERROR);
}
n = atoi(vstring_str(buf));
- if (n > ctxt->max_nsub)
- ctxt->max_nsub = n;
+ if (n > ctxt->max_sub)
+ ctxt->max_sub = n;
}
return (MAC_PARSE_OK);
}
-/* dict_regexp_patterns - get the primary and negated patterns and flags */
+/* dict_regexp_compile_pat - compile one pattern */
-static int dict_regexp_patterns(const char *map, int lineno, char **p,
- DICT_REGEXP_PATTERN *primary_pat,
- DICT_REGEXP_PATTERN *negated_pat)
+static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
+ DICT_REGEXP_PATTERN *pat)
{
+ int error;
+ regex_t *expr;
- /*
- * Get the primary and optional negated patterns and their flags.
- */
- if (dict_regexp_get_pattern(map, lineno, p, primary_pat) == 0)
+ expr = (regex_t *) mymalloc(sizeof(*expr));
+ error = regcomp(expr, pat->regexp, pat->options);
+ if (error != 0) {
+ dict_regexp_regerror(mapname, lineno, error, expr);
+ myfree((char *) expr);
return (0);
- if (*(*p) == '!' && (*p)[1] && !ISSPACE((*p)[1])) {
- (*p)++;
- if (dict_regexp_get_pattern(map, lineno, p, negated_pat) == 0)
- return (0);
- } else {
- negated_pat->regexp = 0;
}
- return (1);
+ return (expr);
+}
+
+/* dict_regexp_rule_alloc - fill in a generic rule structure */
+
+static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting,
+ int lineno,
+ size_t size)
+{
+ DICT_REGEXP_RULE *rule;
+
+ rule = (DICT_REGEXP_RULE *) mymalloc(size);
+ rule->op = op;
+ rule->nesting = nesting;
+ rule->lineno = lineno;
+ rule->next = 0;
+
+ return (rule);
}
/* dict_regexp_parseline - parse one rule */
-static DICT_REGEXP_RULE *dict_regexp_parseline(const char *map, int lineno,
+static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno,
char *line, int nesting)
{
- DICT_REGEXP_RULE *rule;
char *p;
- regex_t *primary_exp;
- regex_t *negated_exp;
- DICT_REGEXP_PATTERN primary_pat;
- DICT_REGEXP_PATTERN negated_pat;
- DICT_REGEXP_PRESCAN_CONTEXT ctxt;
- int op = REGEXP_OP_MATCH;
p = line;
/*
- * The MATCH operator takes both patterns and replacement text.
+ * An ordinary rule takes one or two patterns and replacement text.
*/
if (!ISALNUM(*p)) {
- op = REGEXP_OP_MATCH;
- if (!dict_regexp_patterns(map, lineno, &p, &primary_pat, &negated_pat))
+ DICT_REGEXP_PATTERN first_pat;
+ DICT_REGEXP_PATTERN second_pat;
+ DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
+ regex_t *first_exp;
+ regex_t *second_exp;
+ DICT_REGEXP_MATCH_RULE *match_rule;
+
+ /*
+ * Get the primary and the optional secondary patterns.
+ */
+ if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
return (0);
/*
* Get the replacement text.
*/
- if (!ISSPACE(*p)) {
- msg_warn("regexp map %s, line %d: invalid expression: "
- "skipping this rule", map, lineno);
- return (0);
- }
while (*p && ISSPACE(*p))
++p;
if (!*p) {
msg_warn("regexp map %s, line %d: using empty replacement string",
- map, lineno);
+ mapname, lineno);
+ }
+
+ /*
+ * Find the highest-numbered $number in the replacement text. We can
+ * speed up pattern matching 1) by passing hints to the regexp
+ * compiler, setting the REG_NOSUB flag when the replacement text
+ * contains no $number string; 2) by passing hints to the regexp
+ * execution code, limiting the amount of text that is made available
+ * for substitution.
+ */
+ prescan_context.mapname = mapname;
+ prescan_context.lineno = lineno;
+ prescan_context.max_sub = 0;
+
+ if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context)
+ & MAC_PARSE_ERROR) {
+ msg_warn("regexp map %s, line %d: bad replacement syntax: "
+ "skipping this rule", mapname, lineno);
+ return (0);
+ }
+
+ /*
+ * Compile the primary and the optional secondary pattern. Speed up
+ * execution when no matched text needs to be substituted into the
+ * result string, or when the highest numbered substring is less than
+ * the total number of () subpatterns.
+ */
+#define FREE_EXPR_AND_RETURN(expr, rval) \
+ { regfree(expr); myfree((char *) (expr)); return (rval); }
+
+ if (prescan_context.max_sub == 0)
+ first_pat.options |= REG_NOSUB;
+ if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
+ &first_pat)) == 0)
+ return (0);
+ if (prescan_context.max_sub > first_exp->re_nsub) {
+ msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
+ "skipping this rule", mapname, lineno,
+ prescan_context.max_sub);
+ FREE_EXPR_AND_RETURN(first_exp, 0);
+ }
+ if (second_pat.regexp != 0) {
+ second_pat.options |= REG_NOSUB;
+ if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
+ &second_pat)) == 0)
+ FREE_EXPR_AND_RETURN(first_exp, 0);
+ } else {
+ second_exp = 0;
}
+ match_rule = (DICT_REGEXP_MATCH_RULE *)
+ dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno,
+ sizeof(DICT_REGEXP_MATCH_RULE));
+ match_rule->first_exp = first_exp;
+ match_rule->first_match = first_pat.match;
+ match_rule->max_sub =
+ (prescan_context.max_sub > 0 ? prescan_context.max_sub + 1 : 0);
+ match_rule->second_exp = second_exp;
+ match_rule->second_match = second_pat.match;
+ match_rule->replacement = mystrdup(p);
+ return ((DICT_REGEXP_RULE *) match_rule);
}
/*
- * The IF operator takes patterns but no replacement text.
+ * The IF operator takes one pattern but no replacement text.
*/
else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
- op = REGEXP_OP_IF;
+ DICT_REGEXP_PATTERN pattern;
+ regex_t *expr;
+ DICT_REGEXP_IF_RULE *if_rule;
+
p += 2;
while (*p && ISSPACE(*p))
p++;
- if (!dict_regexp_patterns(map, lineno, &p, &primary_pat, &negated_pat))
+ if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
return (0);
if (*p)
- msg_warn("%s, line %d: ignoring extra text after IF", map, lineno);
+ msg_warn("regexp map %s, line %d: ignoring extra text after IF",
+ mapname, lineno);
+ if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
+ return (0);
+ if_rule = (DICT_REGEXP_IF_RULE *)
+ dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno,
+ sizeof(DICT_REGEXP_IF_RULE));
+ if_rule->expr = expr;
+ if_rule->match = pattern.match;
+ return ((DICT_REGEXP_RULE *) if_rule);
}
/*
* The ENDIF operator takes no patterns and no replacement text.
*/
else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
- op = REGEXP_OP_ENDIF;
+ DICT_REGEXP_RULE *rule;
+
p += 5;
- if (*p)
- msg_warn("%s, line %d: ignoring extra text after ENDIF",
- map, lineno);
if (nesting == 0) {
- msg_warn("%s, line %d: ignoring ENDIF without matching IF",
- map, lineno);
+ msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
+ mapname, lineno);
return (0);
}
- primary_pat.regexp = negated_pat.regexp = 0;
+ if (*p)
+ msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
+ mapname, lineno);
+ rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno,
+ sizeof(DICT_REGEXP_RULE));
+ return (rule);
}
/*
- * Unrecognized request.
+ * Unrecognized input.
*/
else {
msg_warn("regexp map %s, line %d: ignoring unrecognized request",
- map, lineno);
+ mapname, lineno);
return (0);
}
-
- /*
- * Do some compile-time optimizations to speed up pattern matches.
- */
- if (primary_pat.regexp) {
- ctxt.map = map;
- ctxt.lineno = lineno;
- ctxt.max_nsub = 0;
-
- /*
- * Find the highest-numbered $number substitution string. We can
- * speed up processing 1) by passing hints to the regexp compiler,
- * setting the REG_NOSUB flag when the replacement text contains no
- * $number string; 2) by passing hints to the regexp execution code,
- * limiting the amount of text that is made available for
- * substitution.
- */
- if (mac_parse(p, dict_regexp_prescan, (char *) &ctxt) & MAC_PARSE_ERROR) {
- msg_warn("regexp map %s, line %d: bad replacement syntax: "
- "skipping this rule", map, lineno);
- return (0);
- }
-
- /*
- * Compile the primary and the optional negated pattern. Speed up
- * execution when no matched text needs to be substituted into the
- * result string, or when the highest numbered substring is less than
- * the total number of () subpatterns.
- */
- if (ctxt.max_nsub == 0)
- primary_pat.options |= REG_NOSUB;
- if ((primary_exp = dict_regexp_compile(map, lineno, &primary_pat)) == 0)
- return (0);
- if (ctxt.max_nsub > primary_exp->re_nsub) {
- msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
- "skipping this rule", map, lineno, ctxt.max_nsub);
- regfree(primary_exp);
- myfree((char *) primary_exp);
- return (0);
- }
- if (negated_pat.regexp != 0) {
- negated_pat.options |= REG_NOSUB;
- if ((negated_exp = dict_regexp_compile(map, lineno, &negated_pat)) == 0) {
- regfree(primary_exp);
- myfree((char *) primary_exp);
- return (0);
- }
- } else
- negated_exp = 0;
- } else {
- primary_exp = negated_exp = 0;
- ctxt.max_nsub = 0;
- }
-
- /*
- * Package up the result.
- */
- rule = (DICT_REGEXP_RULE *) mymalloc(sizeof(DICT_REGEXP_RULE));
- rule->primary_exp = primary_exp;
- rule->negated_exp = negated_exp;
- rule->replacement = mystrdup(p);
- rule->max_nsub = ctxt.max_nsub;
- rule->lineno = lineno;
- rule->op = op;
- rule->nesting = nesting;
- rule->next = 0;
- return (rule);
}
/* dict_regexp_open - load and compile a file containing regular expressions */
-DICT *dict_regexp_open(const char *map, int unused_flags, int dict_flags)
+DICT *dict_regexp_open(const char *mapname, int unused_flags, int dict_flags)
{
DICT_REGEXP *dict_regexp;
VSTREAM *map_fp;
DICT_REGEXP_RULE *rule;
DICT_REGEXP_RULE *last_rule = 0;
int lineno = 0;
- size_t max_nsub = 0;
+ size_t max_sub = 0;
int nesting = 0;
char *p;
line_buffer = vstring_alloc(100);
- dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, map,
+ dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
sizeof(*dict_regexp));
dict_regexp->dict.lookup = dict_regexp_lookup;
dict_regexp->dict.close = dict_regexp_close;
/*
* Parse the regexp table.
*/
- if ((map_fp = vstream_fopen(map, O_RDONLY, 0)) == 0)
- msg_fatal("open %s: %m", map);
+ if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
+ msg_fatal("open %s: %m", mapname);
while (readlline(line_buffer, map_fp, &lineno)) {
p = vstring_str(line_buffer);
trimblanks(p, 0)[0] = 0;
if (*p == 0)
continue;
- rule = dict_regexp_parseline(map, lineno, p, nesting);
- if (rule) {
- if (rule->max_nsub > max_nsub)
- max_nsub = rule->max_nsub;
- if (rule->op == REGEXP_OP_IF)
- nesting++;
- if (rule->op == REGEXP_OP_ENDIF)
- nesting--;
- if (last_rule == 0)
- dict_regexp->head = rule;
- else
- last_rule->next = rule;
- last_rule = rule;
+ rule = dict_regexp_parseline(mapname, lineno, p, nesting);
+ if (rule == 0)
+ continue;
+ if (rule->op == DICT_REGEXP_OP_MATCH) {
+ if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
+ max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
+ } else if (rule->op == DICT_REGEXP_OP_IF) {
+ nesting++;
+ } else if (rule->op == DICT_REGEXP_OP_ENDIF) {
+ nesting--;
}
+ if (last_rule == 0)
+ dict_regexp->head = rule;
+ else
+ last_rule->next = rule;
+ last_rule = rule;
}
if (nesting)
- msg_warn("%s, line %d: more IFs than ENDIFs", map, lineno);
+ msg_warn("regexp map %s, line %d: more IFs than ENDIFs",
+ mapname, lineno);
/*
* Allocate space for only as many matched substrings as used in the
* replacement text.
*/
- if (max_nsub > 0)
+ if (max_sub > 0)
dict_regexp->pmatch =
- (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_nsub + 1));
+ (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
/*
* Clean up.