]> git.ipfire.org Git - pakfire.git/commitdiff
parser: Use PCRE2 for regex matching
authorMichael Tremer <michael.tremer@ipfire.org>
Wed, 3 Mar 2021 14:33:41 +0000 (14:33 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Wed, 3 Mar 2021 14:33:41 +0000 (14:33 +0000)
The regular expression is only compiled once per parser when it is being
needed and not every time we are searching for variables.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/libpakfire/parser.c

index a8a238e5df9e711eb517dcbca21c452e27dbd9a8..addc44ad69506721c6964bf30721cf7c708e604e 100644 (file)
@@ -23,6 +23,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
 #include <pakfire/errno.h>
 #include <pakfire/logging.h>
 #include <pakfire/parser.h>
@@ -30,8 +33,6 @@
 #include <pakfire/private.h>
 #include <pakfire/util.h>
 
-#define VARIABLE_PATTERN "%\\{([A-Za-z0-9_\\-]+)\\}"
-
 struct _PakfireParser {
        Pakfire pakfire;
        struct _PakfireParser* parent;
@@ -42,8 +43,33 @@ struct _PakfireParser {
 
        struct pakfire_parser_declaration** declarations;
        size_t num_declarations;
+
+       // Regular expressions
+       pcre2_code* regex_variable;
 };
 
+static int pakfire_parser_compile_regex(PakfireParser parser) {
+       int pcre2_errno;
+       size_t pcre2_offset;
+       PCRE2_UCHAR errmsg[256];
+
+       if (!parser->regex_variable) {
+               parser->regex_variable = pcre2_compile((PCRE2_SPTR)"%\\{([A-Za-z0-9_\\-]+)\\}",
+                       PCRE2_ZERO_TERMINATED, 0, &pcre2_errno, &pcre2_offset, NULL);
+
+               if (!parser->regex_variable) {
+                       pcre2_get_error_message(pcre2_errno, errmsg, sizeof(errmsg));
+                       ERROR(parser->pakfire, "PCRE2 compilation failed at offset %zu: %s\n",
+                               pcre2_offset, errmsg);
+
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+
 static char* pakfire_parser_make_canonical_name(const char* namespace, const char* name) {
        char* buffer = NULL;
 
@@ -119,6 +145,10 @@ static void pakfire_parser_free_declarations(PakfireParser parser) {
 static void pakfire_parser_free(PakfireParser parser) {
        DEBUG(parser->pakfire, "Releasing parser at %p\n", parser);
 
+       // Release regular expressions
+       if (parser->regex_variable)
+               pcre2_code_free(parser->regex_variable);
+
        pakfire_parser_free_declarations(parser);
 
        if (parser->namespace)
@@ -319,27 +349,17 @@ static struct pakfire_parser_declaration* pakfire_parser_find_declaration(
        return d;
 }
 
-static char* extract_string(const char* buffer, const regmatch_t* match) {
-       // Determine the length of the string
-       size_t l = match->rm_eo - match->rm_so;
-
-       // Allocate sufficient memory
-       char* s = malloc(l + 1);
-       if (!s)
-               return NULL;
-
-       // Copy string
-       snprintf(s, l + 1, "%s", buffer + match->rm_so);
-
-       return s;
-}
-
 PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser,
                const char* namespace, const char* value) {
        // Return NULL when the value is NULL
        if (!value)
                return NULL;
 
+       PCRE2_UCHAR* variable = NULL;
+       PCRE2_SIZE variable_length;
+       PCRE2_UCHAR* pattern = NULL;
+       PCRE2_SIZE pattern_length;
+
        // Create a working copy of the string we are expanding
        char* buffer = strdup(value);
 
@@ -348,45 +368,35 @@ PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser,
        if (!pos)
                return buffer;
 
-       // Compile the regular expression
-       regex_t preg;
-       int r = regcomp(&preg, VARIABLE_PATTERN, REG_EXTENDED);
-       if (r) {
-               char error[1024];
-               regerror(r, &preg, error, sizeof(error));
-
-               ERROR(parser->pakfire, "Could not compile regular expression (%s): %s",
-                       VARIABLE_PATTERN, error);
-
-               return NULL;
-       }
+       // Compile all regular expressions
+       int r = pakfire_parser_compile_regex(parser);
+       if (r)
+               goto ERROR;
 
-       char* variable = NULL;
-       char* pattern = NULL;
-       const size_t max_groups = 2;
-       regmatch_t groups[max_groups];
+       // Allocate memory for results
+       pcre2_match_data* match = pcre2_match_data_create_from_pattern(parser->regex_variable, NULL);
 
        // Search for any variables
        while (1) {
                // Perform matching
-               r = regexec(&preg, buffer, max_groups, groups, 0);
+               int r = pcre2_match(parser->regex_variable, (PCRE2_UCHAR*)buffer, strlen(buffer), 0, 0, match, NULL);
 
                // End loop when we have expanded all variables
-               if (r == REG_NOMATCH) {
+               if (r == PCRE2_ERROR_NOMATCH) {
                        DEBUG(parser->pakfire, "No (more) matches found in: %s\n", buffer);
                        break;
                }
 
                // Find the variable name
-               variable = extract_string(buffer, &groups[1]);
-               if (!variable)
+               r = pcre2_substring_get_bynumber(match, 1, &variable, &variable_length);
+               if (r)
                        goto ERROR;
 
                DEBUG(parser->pakfire, "Expanding variable: %s\n", variable);
 
                // Search for a declaration of this variable
                struct pakfire_parser_declaration* d =
-                       pakfire_parser_find_declaration(parser, namespace, variable);
+                       pakfire_parser_find_declaration(parser, namespace, (const char*)variable);
 
                // What is its value?
                const char* repl = NULL;
@@ -402,12 +412,12 @@ PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser,
                }
 
                // Find the entire matched pattern
-               pattern = extract_string(buffer, &groups[0]);
-               if (!pattern)
+               r = pcre2_substring_get_bynumber(match, 0, &pattern, &pattern_length);
+               if (r)
                        goto ERROR;
 
                // Replace all occurrences
-               char* tmp = pakfire_string_replace(buffer, pattern, repl);
+               char* tmp = pakfire_string_replace(buffer, (const char*)pattern, repl);
                if (!tmp)
                        goto ERROR;
 
@@ -416,8 +426,8 @@ PAKFIRE_EXPORT char* pakfire_parser_expand(PakfireParser parser,
                buffer = tmp;
 
                // Free resources
-               free(variable);
-               free(pattern);
+               pcre2_substring_free(variable);
+               pcre2_substring_free(pattern);
                variable = pattern = NULL;
 
                DEBUG(parser->pakfire, "New buffer: %s\n", buffer);
@@ -432,12 +442,12 @@ ERROR:
        }
 
 OUT:
-       regfree(&preg);
+       pcre2_match_data_free(match);
 
        if (variable)
-               free(variable);
+               pcre2_substring_free(variable);
        if (pattern)
-               free(pattern);
+               pcre2_substring_free(pattern);
 
        return buffer;
 }