From: Michael Tremer Date: Wed, 3 Mar 2021 14:33:41 +0000 (+0000) Subject: parser: Use PCRE2 for regex matching X-Git-Tag: 0.9.28~1285^2~653 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c178c275fd2887ddaf7cf3f0fea3f76be454978a;p=pakfire.git parser: Use PCRE2 for regex matching The regular expression is only compiled once per parser when it is being needed and not every time we are searching for variables. Signed-off-by: Michael Tremer --- diff --git a/src/libpakfire/parser.c b/src/libpakfire/parser.c index a8a238e5d..addc44ad6 100644 --- a/src/libpakfire/parser.c +++ b/src/libpakfire/parser.c @@ -23,6 +23,9 @@ #include #include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include + #include #include #include @@ -30,8 +33,6 @@ #include #include -#define VARIABLE_PATTERN "%\\{([A-Za-z0-9_\\-]+)\\}" - struct _PakfireParser { Pakfire pakfire; struct _PakfireParser* parent; @@ -42,8 +43,33 @@ struct _PakfireParser { struct pakfire_parser_declaration** declarations; size_t num_declarations; + + // Regular expressions + pcre2_code* regex_variable; }; +static int pakfire_parser_compile_regex(PakfireParser parser) { + int pcre2_errno; + size_t pcre2_offset; + PCRE2_UCHAR errmsg[256]; + + if (!parser->regex_variable) { + parser->regex_variable = pcre2_compile((PCRE2_SPTR)"%\\{([A-Za-z0-9_\\-]+)\\}", + PCRE2_ZERO_TERMINATED, 0, &pcre2_errno, &pcre2_offset, NULL); + + if (!parser->regex_variable) { + pcre2_get_error_message(pcre2_errno, errmsg, sizeof(errmsg)); + ERROR(parser->pakfire, "PCRE2 compilation failed at offset %zu: %s\n", + pcre2_offset, errmsg); + + return 1; + } + } + + return 0; +} + + static char* pakfire_parser_make_canonical_name(const char* namespace, const char* name) { char* buffer = NULL; @@ -119,6 +145,10 @@ static void pakfire_parser_free_declarations(PakfireParser parser) { static void pakfire_parser_free(PakfireParser parser) { DEBUG(parser->pakfire, "Releasing parser at %p\n", parser); + // Release regular expressions + if (parser->regex_variable) + pcre2_code_free(parser->regex_variable); + pakfire_parser_free_declarations(parser); if (parser->namespace) @@ -319,27 +349,17 @@ static struct pakfire_parser_declaration* pakfire_parser_find_declaration( return d; } -static char* extract_string(const char* buffer, const regmatch_t* match) { - // Determine the length of the string - size_t l = match->rm_eo - match->rm_so; - - // Allocate sufficient memory - char* s = malloc(l + 1); - if (!s) - return NULL; - - // Copy string - snprintf(s, l + 1, "%s", buffer + match->rm_so); - - return s; -} - PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser, const char* namespace, const char* value) { // Return NULL when the value is NULL if (!value) return NULL; + PCRE2_UCHAR* variable = NULL; + PCRE2_SIZE variable_length; + PCRE2_UCHAR* pattern = NULL; + PCRE2_SIZE pattern_length; + // Create a working copy of the string we are expanding char* buffer = strdup(value); @@ -348,45 +368,35 @@ PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser, if (!pos) return buffer; - // Compile the regular expression - regex_t preg; - int r = regcomp(&preg, VARIABLE_PATTERN, REG_EXTENDED); - if (r) { - char error[1024]; - regerror(r, &preg, error, sizeof(error)); - - ERROR(parser->pakfire, "Could not compile regular expression (%s): %s", - VARIABLE_PATTERN, error); - - return NULL; - } + // Compile all regular expressions + int r = pakfire_parser_compile_regex(parser); + if (r) + goto ERROR; - char* variable = NULL; - char* pattern = NULL; - const size_t max_groups = 2; - regmatch_t groups[max_groups]; + // Allocate memory for results + pcre2_match_data* match = pcre2_match_data_create_from_pattern(parser->regex_variable, NULL); // Search for any variables while (1) { // Perform matching - r = regexec(&preg, buffer, max_groups, groups, 0); + int r = pcre2_match(parser->regex_variable, (PCRE2_UCHAR*)buffer, strlen(buffer), 0, 0, match, NULL); // End loop when we have expanded all variables - if (r == REG_NOMATCH) { + if (r == PCRE2_ERROR_NOMATCH) { DEBUG(parser->pakfire, "No (more) matches found in: %s\n", buffer); break; } // Find the variable name - variable = extract_string(buffer, &groups[1]); - if (!variable) + r = pcre2_substring_get_bynumber(match, 1, &variable, &variable_length); + if (r) goto ERROR; DEBUG(parser->pakfire, "Expanding variable: %s\n", variable); // Search for a declaration of this variable struct pakfire_parser_declaration* d = - pakfire_parser_find_declaration(parser, namespace, variable); + pakfire_parser_find_declaration(parser, namespace, (const char*)variable); // What is its value? const char* repl = NULL; @@ -402,12 +412,12 @@ PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser, } // Find the entire matched pattern - pattern = extract_string(buffer, &groups[0]); - if (!pattern) + r = pcre2_substring_get_bynumber(match, 0, &pattern, &pattern_length); + if (r) goto ERROR; // Replace all occurrences - char* tmp = pakfire_string_replace(buffer, pattern, repl); + char* tmp = pakfire_string_replace(buffer, (const char*)pattern, repl); if (!tmp) goto ERROR; @@ -416,8 +426,8 @@ PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser, buffer = tmp; // Free resources - free(variable); - free(pattern); + pcre2_substring_free(variable); + pcre2_substring_free(pattern); variable = pattern = NULL; DEBUG(parser->pakfire, "New buffer: %s\n", buffer); @@ -432,12 +442,12 @@ ERROR: } OUT: - regfree(&preg); + pcre2_match_data_free(match); if (variable) - free(variable); + pcre2_substring_free(variable); if (pattern) - free(pattern); + pcre2_substring_free(pattern); return buffer; }