-/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
-
/***
This file is part of systemd.
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
#include "alloc-util.h"
#include "escape.h"
-#include "utf8.h"
-#include "util.h"
#include "extract-word.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "utf8.h"
int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
_cleanup_free_ char *s = NULL;
size_t allocated = 0, sz = 0;
+ char c;
int r;
char quote = 0; /* 0 or ' or " */
bool backslash = false; /* whether we've just seen a backslash */
- bool separator = false; /* whether we've just seen a separator */
- bool start = true; /* false means we're looking at a value */
assert(p);
assert(ret);
- if (!separators)
- separators = WHITESPACE;
-
/* Bail early if called after last value or with no input */
if (!*p)
- goto finish_force_terminate;
+ goto finish;
+ c = **p;
+
+ if (!separators)
+ separators = WHITESPACE;
/* Parses the first word of a string, and returns it in
* *ret. Removes all quotes in the process. When parsing fails
* (because of an uneven number of quotes or similar), leaves
* the pointer *p at the first invalid character. */
- for (;;) {
- char c = **p;
-
- if (start) {
- if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
- if (!GREEDY_REALLOC(s, allocated, sz+1))
- return -ENOMEM;
-
- if (c == 0)
- goto finish_force_terminate;
- else if (strchr(separators, c)) {
- (*p) ++;
- if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
- goto finish_force_next;
- continue;
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
+ if (!GREEDY_REALLOC(s, allocated, sz+1))
+ return -ENOMEM;
+
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
}
-
+ } else {
/* We found a non-blank character, so we will always
* want to return a string (even if it is empty),
* allocate it here. */
if (!GREEDY_REALLOC(s, allocated, sz+1))
return -ENOMEM;
-
- start = false;
+ break;
}
+ }
+ for (;; (*p)++, c = **p) {
if (backslash) {
if (!GREEDY_REALLOC(s, allocated, sz+7))
return -ENOMEM;
}
if (flags & EXTRACT_CUNESCAPE) {
+ bool eight_bit = false;
char32_t u;
- r = cunescape_one(*p, (size_t) -1, &c, &u);
+ r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
if (r < 0) {
if (flags & EXTRACT_CUNESCAPE_RELAX) {
s[sz++] = '\\';
s[sz++] = c;
- goto end_escape;
- }
- return -EINVAL;
+ } else
+ return -EINVAL;
+ } else {
+ (*p) += r - 1;
+
+ if (eight_bit)
+ s[sz++] = u;
+ else
+ sz += utf8_encode_unichar(s + sz, u);
}
-
- (*p) += r - 1;
-
- if (c != 0)
- s[sz++] = c; /* normal explicit char */
- else
- sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
} else
s[sz++] = c;
-end_escape:
backslash = false;
} else if (quote) { /* inside either single or double quotes */
- if (c == 0) {
- if (flags & EXTRACT_RELAX)
- goto finish_force_terminate;
- return -EINVAL;
- } else if (c == quote) /* found the end quote */
- quote = 0;
- else if (c == '\\')
- backslash = true;
- else {
- if (!GREEDY_REALLOC(s, allocated, sz+2))
- return -ENOMEM;
-
- s[sz++] = c;
+ for (;; (*p)++, c = **p) {
+ if (c == 0) {
+ if (flags & EXTRACT_RELAX)
+ goto finish_force_terminate;
+ return -EINVAL;
+ } else if (c == quote) { /* found the end quote */
+ quote = 0;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
}
- } else if (separator) {
- if (c == 0)
- goto finish_force_terminate;
- if (!strchr(separators, c))
- goto finish;
-
} else {
- if (c == 0)
- goto finish_force_terminate;
- else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
- quote = c;
- else if (c == '\\')
- backslash = true;
- else if (strchr(separators, c)) {
- if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
- (*p) ++;
- goto finish_force_next;
- }
- separator = true;
- } else {
- if (!GREEDY_REALLOC(s, allocated, sz+2))
- return -ENOMEM;
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
+ quote = c;
+ break;
+ } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
+ backslash = true;
+ break;
+ } else if (strchr(separators, c)) {
+ if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
+ (*p)++;
+ goto finish_force_next;
+ }
+ /* Skip additional coalesced separators. */
+ for (;; (*p)++, c = **p) {
+ if (c == 0)
+ goto finish_force_terminate;
+ if (!strchr(separators, c))
+ break;
+ }
+ goto finish;
- s[sz++] = c;
+ } else {
+ if (!GREEDY_REALLOC(s, allocated, sz+2))
+ return -ENOMEM;
+
+ s[sz++] = c;
+ }
}
}
-
- (*p) ++;
}
finish_force_terminate:
*p = save;
r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
if (r >= 0) {
- /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
- log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
+ /* It worked this time, hence it must have been an invalid escape sequence. */
+ log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
return r;
}