From: Thierry Fournier Date: Wed, 10 Feb 2016 21:55:20 +0000 (+0100) Subject: MINOR: map: Add regex matching replacement X-Git-Tag: v1.7-dev2~107 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8feaa661b6150f33f04af8159c28e24e62dce1d0;p=thirdparty%2Fhaproxy.git MINOR: map: Add regex matching replacement This patch declares a new map which provides a string based on a string with back references replaced by the content matched by the regex. --- diff --git a/doc/configuration.txt b/doc/configuration.txt index 7dd5744d75..25b94afaaf 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -11815,13 +11815,19 @@ map__([,]) -----------+--------------+-----------------+-----------------+--------------- str | end | map_end | map_end_int | map_end_ip -----------+--------------+-----------------+-----------------+--------------- - str | reg | map_reg | map_reg_int | map_reg_ip + | | map_reg | | + str | reg +-----------------+ map_reg_int | map_reg_ip + | | map_regm | | -----------+--------------+-----------------+-----------------+--------------- int | int | map_int | map_int_int | map_int_ip -----------+--------------+-----------------+-----------------+--------------- ip | ip | map_ip | map_ip_int | map_ip_ip -----------+--------------+-----------------+-----------------+--------------- + The special map called "map_regm" expect matching zone in the regular + expression and modify the output replacing back reference (like "\1") by + the corresponding match text. + The file contains one key + value per line. Lines which start with '#' are ignored, just like empty lines. Leading tabs and spaces are stripped. The key is then the first "word" (series of non-space/tabs characters), and the value diff --git a/include/proto/pattern.h b/include/proto/pattern.h index 0a132f2482..9c93db93d2 100644 --- a/include/proto/pattern.h +++ b/include/proto/pattern.h @@ -67,6 +67,7 @@ int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err) int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err); +int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err); int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err); @@ -174,6 +175,7 @@ struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int * and restores the previous character when leaving. */ struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill); +struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill); /* * pattern_ref manipulation. diff --git a/include/types/pattern.h b/include/types/pattern.h index a71c3434fe..912e086b78 100644 --- a/include/types/pattern.h +++ b/include/types/pattern.h @@ -87,6 +87,7 @@ enum { PAT_MATCH_DOM, /* domain-like sub-string (str) */ PAT_MATCH_END, /* end of string (str) */ PAT_MATCH_REG, /* regex (str -> reg) */ + PAT_MATCH_REGM, /* regex (str -> reg) with match zones */ /* keep this one last */ PAT_MATCH_NUM }; diff --git a/src/map.c b/src/map.c index 35feea983b..a28cedae92 100644 --- a/src/map.c +++ b/src/map.c @@ -163,6 +163,7 @@ static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *pr { struct map_descriptor *desc; struct pattern *pat; + struct chunk *str; /* get config */ desc = arg_p[0].data.map; @@ -172,8 +173,19 @@ static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *pr /* Match case. */ if (pat) { - /* Copy sample. */ if (pat->data) { + /* In the regm case, merge the sample with the input. */ + if ((long)private == PAT_MATCH_REGM) { + str = get_trash_chunk(); + str->len = exp_replace(str->str, str->size, smp->data.u.str.str, + pat->data->u.str.str, + (regmatch_t *)smp->ctx.a[0]); + if (str->len == -1) + return 0; + smp->data.u.str = *str; + return 1; + } + /* Copy sample. */ smp->data = *pat->data; smp->flags |= SMP_F_CONST; return 1; @@ -242,6 +254,7 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM }, { "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END }, { "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG }, + { "map_regm", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REGM}, { "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT }, { "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP }, diff --git a/src/pattern.c b/src/pattern.c index 254c10650e..b4cb8e9ea6 100644 --- a/src/pattern.c +++ b/src/pattern.c @@ -41,6 +41,7 @@ char *pat_match_names[PAT_MATCH_NUM] = { [PAT_MATCH_DOM] = "dom", [PAT_MATCH_END] = "end", [PAT_MATCH_REG] = "reg", + [PAT_MATCH_REGM] = "regm", }; int (*pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = { @@ -57,6 +58,7 @@ int (*pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char * [PAT_MATCH_DOM] = pat_parse_str, [PAT_MATCH_END] = pat_parse_str, [PAT_MATCH_REG] = pat_parse_reg, + [PAT_MATCH_REGM] = pat_parse_reg, }; int (*pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = { @@ -73,6 +75,7 @@ int (*pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, ch [PAT_MATCH_DOM] = pat_idx_list_str, [PAT_MATCH_END] = pat_idx_list_str, [PAT_MATCH_REG] = pat_idx_list_reg, + [PAT_MATCH_REGM] = pat_idx_list_regm, }; void (*pat_delete_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pat_ref_elt *) = { @@ -89,6 +92,7 @@ void (*pat_delete_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pat_ref_elt [PAT_MATCH_DOM] = pat_del_list_ptr, [PAT_MATCH_END] = pat_del_list_ptr, [PAT_MATCH_REG] = pat_del_list_reg, + [PAT_MATCH_REGM] = pat_del_list_reg, }; void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = { @@ -105,6 +109,7 @@ void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = { [PAT_MATCH_DOM] = pat_prune_ptr, [PAT_MATCH_END] = pat_prune_ptr, [PAT_MATCH_REG] = pat_prune_reg, + [PAT_MATCH_REGM] = pat_prune_reg, }; struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = { @@ -121,6 +126,7 @@ struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern [PAT_MATCH_DOM] = pat_match_dom, [PAT_MATCH_END] = pat_match_end, [PAT_MATCH_REG] = pat_match_reg, + [PAT_MATCH_REGM] = pat_match_regm, }; /* Just used for checking configuration compatibility */ @@ -138,6 +144,7 @@ int pat_match_types[PAT_MATCH_NUM] = { [PAT_MATCH_DOM] = SMP_T_STR, [PAT_MATCH_END] = SMP_T_STR, [PAT_MATCH_REG] = SMP_T_STR, + [PAT_MATCH_REGM] = SMP_T_STR, }; /* this struct is used to return information */ @@ -539,6 +546,30 @@ struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int return ret; } +/* Executes a regex. It temporarily changes the data to add a trailing zero, + * and restores the previous character when leaving. This function fills + * a matching array. + */ +struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill) +{ + struct pattern_list *lst; + struct pattern *pattern; + struct pattern *ret = NULL; + + list_for_each_entry(lst, &expr->patterns, list) { + pattern = &lst->pat; + + if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.str, smp->data.u.str.len, + MAX_MATCH, pmatch, 0)) { + ret = pattern; + smp->ctx.a[0] = pmatch; + break; + } + } + + return ret; +} + /* Executes a regex. It temporarily changes the data to add a trailing zero, * and restores the previous character when leaving. */ @@ -1146,7 +1177,7 @@ int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err) return 1; } -int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err) +int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err) { struct pattern_list *patl; @@ -1169,7 +1200,8 @@ int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err) } /* compile regex */ - if (!regex_comp(pat->ptr.str, patl->pat.ptr.reg, !(expr->mflags & PAT_MF_IGNORE_CASE), 0, err)) { + if (!regex_comp(pat->ptr.str, patl->pat.ptr.reg, + !(expr->mflags & PAT_MF_IGNORE_CASE), cap, err)) { free(patl->pat.ptr.reg); free(patl); return 0; @@ -1183,6 +1215,16 @@ int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err) return 1; } +int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + return pat_idx_list_reg_cap(expr, pat, 0, err); +} + +int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err) +{ + return pat_idx_list_reg_cap(expr, pat, 1, err); +} + int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err) { unsigned int mask;