]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/basic/extract-word.c
basic/extract-word,man: clarify "correction" of invalid escapes
[thirdparty/systemd.git] / src / basic / extract-word.c
index 6721b85c0ad396fc77b01bb9becdce469a4bed32..f8cac3e911fddef4630ff78eb731077b508089bd 100644 (file)
@@ -1,5 +1,3 @@
-/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
-
 /***
   This file is part of systemd.
 
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
 #include "alloc-util.h"
 #include "escape.h"
 #include "extract-word.h"
+#include "log.h"
+#include "macro.h"
 #include "string-util.h"
 #include "utf8.h"
-#include "util.h"
 
 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
         _cleanup_free_ char *s = NULL;
         size_t allocated = 0, sz = 0;
+        char c;
         int r;
 
         char quote = 0;                 /* 0 or ' or " */
         bool backslash = false;         /* whether we've just seen a backslash */
-        bool separator = false;         /* whether we've just seen a separator */
-        bool start = true;              /* false means we're looking at a value */
 
         assert(p);
         assert(ret);
 
-        if (!separators)
-                separators = WHITESPACE;
-
         /* Bail early if called after last value or with no input */
         if (!*p)
-                goto finish_force_terminate;
+                goto finish;
+        c = **p;
+
+        if (!separators)
+                separators = WHITESPACE;
 
         /* Parses the first word of a string, and returns it in
          * *ret. Removes all quotes in the process. When parsing fails
          * (because of an uneven number of quotes or similar), leaves
          * the pointer *p at the first invalid character. */
 
-        for (;;) {
-                char c = **p;
-
-                if (start) {
-                        if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
-                                if (!GREEDY_REALLOC(s, allocated, sz+1))
-                                        return -ENOMEM;
-
-                        if (c == 0)
-                                goto finish_force_terminate;
-                        else if (strchr(separators, c)) {
-                                (*p) ++;
-                                if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
-                                        goto finish_force_next;
-                                continue;
+        if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
+                if (!GREEDY_REALLOC(s, allocated, sz+1))
+                        return -ENOMEM;
+
+        for (;; (*p)++, c = **p) {
+                if (c == 0)
+                        goto finish_force_terminate;
+                else if (strchr(separators, c)) {
+                        if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+                                (*p)++;
+                                goto finish_force_next;
                         }
-
+                } else {
                         /* We found a non-blank character, so we will always
                          * want to return a string (even if it is empty),
                          * allocate it here. */
                         if (!GREEDY_REALLOC(s, allocated, sz+1))
                                 return -ENOMEM;
-
-                        start = false;
+                        break;
                 }
+        }
 
+        for (;; (*p)++, c = **p) {
                 if (backslash) {
                         if (!GREEDY_REALLOC(s, allocated, sz+7))
                                 return -ENOMEM;
@@ -100,74 +105,81 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra
                         }
 
                         if (flags & EXTRACT_CUNESCAPE) {
-                                uint32_t u;
+                                bool eight_bit = false;
+                                char32_t u;
 
-                                r = cunescape_one(*p, (size_t) -1, &c, &u);
+                                r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
                                 if (r < 0) {
                                         if (flags & EXTRACT_CUNESCAPE_RELAX) {
                                                 s[sz++] = '\\';
                                                 s[sz++] = c;
-                                                goto end_escape;
-                                        }
-                                        return -EINVAL;
+                                        } else
+                                                return -EINVAL;
+                                } else {
+                                        (*p) += r - 1;
+
+                                        if (eight_bit)
+                                                s[sz++] = u;
+                                        else
+                                                sz += utf8_encode_unichar(s + sz, u);
                                 }
-
-                                (*p) += r - 1;
-
-                                if (c != 0)
-                                        s[sz++] = c; /* normal explicit char */
-                                else
-                                        sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
                         } else
                                 s[sz++] = c;
 
-end_escape:
                         backslash = false;
 
                 } else if (quote) {     /* inside either single or double quotes */
-                        if (c == 0) {
-                                if (flags & EXTRACT_RELAX)
-                                        goto finish_force_terminate;
-                                return -EINVAL;
-                        } else if (c == quote)          /* found the end quote */
-                                quote = 0;
-                        else if (c == '\\')
-                                backslash = true;
-                        else {
-                                if (!GREEDY_REALLOC(s, allocated, sz+2))
-                                        return -ENOMEM;
-
-                                s[sz++] = c;
+                        for (;; (*p)++, c = **p) {
+                                if (c == 0) {
+                                        if (flags & EXTRACT_RELAX)
+                                                goto finish_force_terminate;
+                                        return -EINVAL;
+                                } else if (c == quote) {        /* found the end quote */
+                                        quote = 0;
+                                        break;
+                                } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+                                        backslash = true;
+                                        break;
+                                } else {
+                                        if (!GREEDY_REALLOC(s, allocated, sz+2))
+                                                return -ENOMEM;
+
+                                        s[sz++] = c;
+                                }
                         }
 
-                } else if (separator) {
-                        if (c == 0)
-                                goto finish_force_terminate;
-                        if (!strchr(separators, c))
-                                goto finish;
-
                 } else {
-                        if (c == 0)
-                                goto finish_force_terminate;
-                        else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
-                                quote = c;
-                        else if (c == '\\')
-                                backslash = true;
-                        else if (strchr(separators, c)) {
-                                if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
-                                        (*p) ++;
-                                        goto finish_force_next;
-                                }
-                                separator = true;
-                        } else {
-                                if (!GREEDY_REALLOC(s, allocated, sz+2))
-                                        return -ENOMEM;
+                        for (;; (*p)++, c = **p) {
+                                if (c == 0)
+                                        goto finish_force_terminate;
+                                else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
+                                        quote = c;
+                                        break;
+                                } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+                                        backslash = true;
+                                        break;
+                                } else if (strchr(separators, c)) {
+                                        if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+                                                (*p)++;
+                                                goto finish_force_next;
+                                        }
+                                        /* Skip additional coalesced separators. */
+                                        for (;; (*p)++, c = **p) {
+                                                if (c == 0)
+                                                        goto finish_force_terminate;
+                                                if (!strchr(separators, c))
+                                                        break;
+                                        }
+                                        goto finish;
 
-                                s[sz++] = c;
+                                } else {
+                                        if (!GREEDY_REALLOC(s, allocated, sz+2))
+                                                return -ENOMEM;
+
+                                        s[sz++] = c;
+                                }
                         }
                 }
-
-                (*p) ++;
         }
 
 finish_force_terminate:
@@ -215,8 +227,8 @@ int extract_first_word_and_warn(
                 *p = save;
                 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
                 if (r >= 0) {
-                        /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
-                        log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
+                        /* It worked this time, hence it must have been an invalid escape sequence. */
+                        log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
                         return r;
                 }