]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
journal: Move more pattern matching logic into pcre2-util
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Fri, 22 Jul 2022 12:49:42 +0000 (14:49 +0200)
committerDaan De Meyer <daan.j.demeyer@gmail.com>
Mon, 25 Jul 2022 12:16:17 +0000 (14:16 +0200)
To avoid having "#if HAVE_PCRE2" all throughout the code, let's
confine the pcre2 header specific stuff to pcre2-util.c. Instead of
exposing all the individual symbols from pcre2, let's only expose
three high level functions that do all we need:

- pcre2_pattern_compile(): Compile the regex
- pcre2_pattern_matches(): Check if the compiled regex matches a message
- pcre2_pattern_free(): Free the compiled regex

We expose the compiled pcre2 pattern (which is of type pcre2_code *) as
a void pointer to avoid having to include pcre2.h in all code where we
work with compiled pcre2 patterns. For readability, we typedef void
to pcre2_pattern and use that as the type specifier for compiled pcre2
patterns.

src/journal/journalctl.c
src/shared/pcre2-util.c
src/shared/pcre2-util.h

index 127c3d491b86825411492bff763c8f563e61a006..5cc92b5f36d03505b1e12c5ac24fe55e325ecf44 100644 (file)
 #include <sys/stat.h>
 #include <unistd.h>
 
-#if HAVE_PCRE2
-#  define PCRE2_CODE_UNIT_WIDTH 8
-#  include <pcre2.h>
-#endif
-
 #include "sd-bus.h"
 #include "sd-device.h"
 #include "sd-journal.h"
@@ -133,11 +128,9 @@ static uint64_t arg_vacuum_size = 0;
 static uint64_t arg_vacuum_n_files = 0;
 static usec_t arg_vacuum_time = 0;
 static char **arg_output_fields = NULL;
-#if HAVE_PCRE2
 static const char *arg_pattern = NULL;
 static pcre2_code *arg_compiled_pattern = NULL;
-static int arg_case_sensitive = -1; /* -1 means be smart */
-#endif
+static PatternCompileCase arg_case = PATTERN_COMPILE_CASE_AUTO;
 
 STATIC_DESTRUCTOR_REGISTER(arg_file, strv_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_facilities, set_freep);
@@ -148,9 +141,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_user_units, strv_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_root, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_output_fields, strv_freep);
-#if HAVE_PCRE2
-STATIC_DESTRUCTOR_REGISTER(arg_compiled_pattern, sym_pcre2_code_freep);
-#endif
+STATIC_DESTRUCTOR_REGISTER(arg_compiled_pattern, pattern_freep);
 
 static enum {
         ACTION_SHOW,
@@ -180,29 +171,6 @@ typedef struct BootId {
         LIST_FIELDS(struct BootId, boot_list);
 } BootId;
 
-#if HAVE_PCRE2
-static int pattern_compile(const char *pattern, unsigned flags, pcre2_code **out) {
-        int errorcode, r;
-        PCRE2_SIZE erroroffset;
-        pcre2_code *p;
-
-        p = sym_pcre2_compile((PCRE2_SPTR8) pattern,
-                              PCRE2_ZERO_TERMINATED, flags, &errorcode, &erroroffset, NULL);
-        if (!p) {
-                unsigned char buf[LINE_MAX];
-
-                r = sym_pcre2_get_error_message(errorcode, buf, sizeof buf);
-
-                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
-                                       "Bad pattern \"%s\": %s", pattern,
-                                       r < 0 ? "unknown error" : (char *)buf);
-        }
-
-        *out = p;
-        return 0;
-}
-#endif
-
 static int add_matches_for_device(sd_journal *j, const char *devpath) {
         _cleanup_(sd_device_unrefp) sd_device *device = NULL;
         sd_device *d = NULL;
@@ -918,7 +886,6 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
-#if HAVE_PCRE2
                 case 'g':
                         arg_pattern = optarg;
                         break;
@@ -928,16 +895,11 @@ static int parse_argv(int argc, char *argv[]) {
                                 r = parse_boolean(optarg);
                                 if (r < 0)
                                         return log_error_errno(r, "Bad --case-sensitive= argument \"%s\": %m", optarg);
-                                arg_case_sensitive = r;
+                                arg_case = r ? PATTERN_COMPILE_CASE_SENSITIVE : PATTERN_COMPILE_CASE_INSENSITIVE;
                         } else
-                                arg_case_sensitive = true;
+                                arg_case = PATTERN_COMPILE_CASE_SENSITIVE;
 
                         break;
-#else
-                case 'g':
-                case ARG_CASE_SENSITIVE:
-                        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Compiled without pattern matching support");
-#endif
 
                 case 'S':
                         r = parse_timestamp(optarg, &arg_since);
@@ -1114,44 +1076,11 @@ static int parse_argv(int argc, char *argv[]) {
                 arg_system_units = strv_free(arg_system_units);
         }
 
-#if HAVE_PCRE2
         if (arg_pattern) {
-                unsigned flags;
-
-                r = dlopen_pcre2();
-                if (r < 0)
-                        return r;
-
-                if (arg_case_sensitive >= 0)
-                        flags = !arg_case_sensitive * PCRE2_CASELESS;
-                else {
-                        _cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
-                        bool has_case;
-                        _cleanup_(sym_pcre2_code_freep) pcre2_code *cs = NULL;
-
-                        md = sym_pcre2_match_data_create(1, NULL);
-                        if (!md)
-                                return log_oom();
-
-                        r = pattern_compile("[[:upper:]]", 0, &cs);
-                        if (r < 0)
-                                return r;
-
-                        r = sym_pcre2_match(cs, (PCRE2_SPTR8) arg_pattern, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL);
-                        has_case = r >= 0;
-
-                        flags = !has_case * PCRE2_CASELESS;
-                }
-
-                log_debug("Doing case %s matching based on %s",
-                          flags & PCRE2_CASELESS ? "insensitive" : "sensitive",
-                          arg_case_sensitive >= 0 ? "request" : "pattern casing");
-
-                r = pattern_compile(arg_pattern, flags, &arg_compiled_pattern);
+                r = pattern_compile_and_log(arg_pattern, arg_case, &arg_compiled_pattern);
                 if (r < 0)
                         return r;
         }
-#endif
 
         return 1;
 }
@@ -2703,16 +2632,9 @@ int main(int argc, char *argv[]) {
                                 }
                         }
 
-#if HAVE_PCRE2
                         if (arg_compiled_pattern) {
-                                _cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
                                 const void *message;
                                 size_t len;
-                                PCRE2_SIZE *ovec;
-
-                                md = sym_pcre2_match_data_create(1, NULL);
-                                if (!md)
-                                        return log_oom();
 
                                 r = sd_journal_get_data(j, "MESSAGE", &message, &len);
                                 if (r < 0) {
@@ -2727,33 +2649,15 @@ int main(int argc, char *argv[]) {
 
                                 assert_se(message = startswith(message, "MESSAGE="));
 
-                                r = sym_pcre2_match(arg_compiled_pattern,
-                                                    message,
-                                                    len - strlen("MESSAGE="),
-                                                    0,      /* start at offset 0 in the subject */
-                                                    0,      /* default options */
-                                                    md,
-                                                    NULL);
-                                if (r == PCRE2_ERROR_NOMATCH) {
+                                r = pattern_matches_and_log(arg_compiled_pattern, message,
+                                                            len - strlen("MESSAGE="), highlight);
+                                if (r < 0)
+                                        goto finish;
+                                if (r == 0) {
                                         need_seek = true;
                                         continue;
                                 }
-                                if (r < 0) {
-                                        unsigned char buf[LINE_MAX];
-                                        int r2;
-
-                                        r2 = sym_pcre2_get_error_message(r, buf, sizeof buf);
-                                        log_error("Pattern matching failed: %s",
-                                                  r2 < 0 ? "unknown error" : (char*) buf);
-                                        r = -EINVAL;
-                                        goto finish;
-                                }
-
-                                ovec = sym_pcre2_get_ovector_pointer(md);
-                                highlight[0] = ovec[0];
-                                highlight[1] = ovec[1];
                         }
-#endif
 
                         flags =
                                 arg_all * OUTPUT_SHOW_ALL |
index 80f6cec3423694fdcca3c760cb6b587088f4b70b..998dab04910c847b784e8a6155becb70ebf08c4a 100644 (file)
@@ -14,8 +14,10 @@ pcre2_code* (*sym_pcre2_compile)(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_
 int (*sym_pcre2_get_error_message)(int, PCRE2_UCHAR *, PCRE2_SIZE);
 int (*sym_pcre2_match)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *);
 PCRE2_SIZE* (*sym_pcre2_get_ovector_pointer)(pcre2_match_data *);
+#endif
 
 int dlopen_pcre2(void) {
+#if HAVE_PCRE2
         /* So here's something weird: PCRE2 actually renames the symbols exported by the library via C
          * macros, so that the exported symbols carry a suffix "_8" but when used from C the suffix is
          * gone. In the argument list below we ignore this mangling. Surprisingly (at least to me), we
@@ -33,12 +35,123 @@ int dlopen_pcre2(void) {
                         DLSYM_ARG(pcre2_get_error_message),
                         DLSYM_ARG(pcre2_match),
                         DLSYM_ARG(pcre2_get_ovector_pointer));
+#else
+        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "PCRE2 support is not compiled in.");
+#endif
 }
 
+int pattern_compile_and_log(const char *pattern, PatternCompileCase case_, pcre2_code **ret) {
+#if HAVE_PCRE2
+        PCRE2_SIZE erroroffset;
+        pcre2_code *p;
+        unsigned flags = 0;
+        int errorcode, r;
+
+        assert(pattern);
+
+        r = dlopen_pcre2();
+        if (r < 0)
+                return r;
+
+        if (case_ == PATTERN_COMPILE_CASE_INSENSITIVE)
+                flags = PCRE2_CASELESS;
+        else if (case_ == PATTERN_COMPILE_CASE_AUTO) {
+                _cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
+                bool has_case;
+                _cleanup_(sym_pcre2_code_freep) pcre2_code *cs = NULL;
+
+                md = sym_pcre2_match_data_create(1, NULL);
+                if (!md)
+                        return log_oom();
+
+                r = pattern_compile_and_log("[[:upper:]]", PATTERN_COMPILE_CASE_SENSITIVE, &cs);
+                if (r < 0)
+                        return r;
+
+                r = sym_pcre2_match(cs, (PCRE2_SPTR8) pattern, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL);
+                has_case = r >= 0;
+
+                flags = !has_case * PCRE2_CASELESS;
+        }
+
+        log_debug("Doing case %s matching based on %s",
+                  flags & PCRE2_CASELESS ? "insensitive" : "sensitive",
+                  case_ != PATTERN_COMPILE_CASE_AUTO ? "request" : "pattern casing");
+
+        p = sym_pcre2_compile((PCRE2_SPTR8) pattern,
+                              PCRE2_ZERO_TERMINATED, flags, &errorcode, &erroroffset, NULL);
+        if (!p) {
+                unsigned char buf[LINE_MAX];
+
+                r = sym_pcre2_get_error_message(errorcode, buf, sizeof buf);
+
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+                                       "Bad pattern \"%s\": %s", pattern,
+                                       r < 0 ? "unknown error" : (char *)buf);
+        }
+
+        if (ret)
+                *ret = p;
+
+        return 0;
 #else
+        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "PCRE2 support is not compiled in.");
+#endif
+}
 
-int dlopen_pcre2(void) {
-        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                               "PCRE2 support is not compiled in.");
+int pattern_matches_and_log(pcre2_code *compiled_pattern, const char *message, size_t size, size_t *ret_ovec) {
+#if HAVE_PCRE2
+        _cleanup_(sym_pcre2_match_data_freep) pcre2_match_data *md = NULL;
+        int r;
+
+        assert(compiled_pattern);
+        assert(message);
+        /* pattern_compile_and_log() must be called before this function is called and that function already
+         * dlopens pcre2 so we can assert on it being available here. */
+        assert(pcre2_dl);
+
+        md = sym_pcre2_match_data_create(1, NULL);
+        if (!md)
+                return log_oom();
+
+        r = sym_pcre2_match(compiled_pattern,
+                            (const unsigned char *)message,
+                            size,
+                            0,      /* start at offset 0 in the subject */
+                            0,      /* default options */
+                            md,
+                            NULL);
+        if (r == PCRE2_ERROR_NOMATCH)
+                return false;
+        if (r < 0) {
+                unsigned char buf[LINE_MAX];
+
+                r = sym_pcre2_get_error_message(r, buf, sizeof(buf));
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Pattern matching failed: %s",
+                                       r < 0 ? "unknown error" : (char*) buf);
+        }
+
+        if (ret_ovec) {
+                ret_ovec[0] = sym_pcre2_get_ovector_pointer(md)[0];
+                ret_ovec[1] = sym_pcre2_get_ovector_pointer(md)[1];
+        }
+
+        return true;
+#else
+        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "PCRE2 support is not compiled in.");
+#endif
 }
+
+void *pattern_free(pcre2_code *p) {
+#if HAVE_PCRE2
+        if (!p)
+                return NULL;
+
+        assert(pcre2_dl);
+        sym_pcre2_code_free(p);
+        return NULL;
+#else
+        assert(p == NULL);
+        return NULL;
 #endif
+}
index f17dcd5573484e9f5d7d473bcd4b7a5879f1c972..11f1d77f4f28d795ac81ff523862870769f1585e 100644 (file)
@@ -18,6 +18,24 @@ extern PCRE2_SIZE* (*sym_pcre2_get_ovector_pointer)(pcre2_match_data *);
 
 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(pcre2_match_data*, sym_pcre2_match_data_free, NULL);
 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(pcre2_code*, sym_pcre2_code_free, NULL);
+#else
+
+typedef struct {} pcre2_code;
+
 #endif
 
+typedef enum {
+        PATTERN_COMPILE_CASE_AUTO,
+        PATTERN_COMPILE_CASE_SENSITIVE,
+        PATTERN_COMPILE_CASE_INSENSITIVE,
+        _PATTERN_COMPILE_CASE_MAX,
+        _PATTERN_COMPILE_CASE_INVALID = -EINVAL,
+} PatternCompileCase;
+
+int pattern_compile_and_log(const char *pattern, PatternCompileCase case_, pcre2_code **ret);
+int pattern_matches_and_log(pcre2_code *compiled_pattern, const char *message, size_t size, size_t *ret_ovec);
+void *pattern_free(pcre2_code *p);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_code*, pattern_free);
+
 int dlopen_pcre2(void);