X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=src%2Fbasic%2Fstring-util.c;h=c6dad5275fe16b08e4fac292bb12315845479a52;hb=083d27b654ca579c44bcfb92792dc73ec7c80c98;hp=7c75970d7ba1c4156de31ea8ff7bb2f0a293c39e;hpb=b667d50d3443be7fd861d319b5acd525aa15329c;p=thirdparty%2Fsystemd.git

diff --git a/src/basic/string-util.c b/src/basic/string-util.c
index 7c75970d7ba..c6dad5275fe 100644
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@@ -1,9 +1,4 @@
 /* SPDX-License-Identifier: LGPL-2.1+ */
-/***
-  This file is part of systemd.
-
-  Copyright 2010 Lennart Poettering
-***/
 
 #include <errno.h>
 #include <stdarg.h>
@@ -14,12 +9,15 @@
 #include <string.h>
 
 #include "alloc-util.h"
+#include "escape.h"
 #include "gunicode.h"
+#include "locale-util.h"
 #include "macro.h"
 #include "string-util.h"
 #include "terminal-util.h"
 #include "utf8.h"
 #include "util.h"
+#include "fileio.h"
 
 int strcmp_ptr(const char *a, const char *b) {
 
@@ -130,7 +128,7 @@ static size_t strcspn_escaped(const char *s, const char *reject) {
 }
 
 /* Split a string into words. */
-const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
+const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags) {
         const char *current;
 
         current = *state;
@@ -146,20 +144,24 @@ const char* split(const char **state, size_t *l, const char *separator, bool quo
                 return NULL;
         }
 
-        if (quoted && strchr("\'\"", *current)) {
+        if (flags & SPLIT_QUOTES && strchr("\'\"", *current)) {
                 char quotechars[2] = {*current, '\0'};
 
                 *l = strcspn_escaped(current + 1, quotechars);
                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
                         /* right quote missing or garbage at the end */
+                        if (flags & SPLIT_RELAX) {
+                                *state = current + *l + 1 + (current[*l + 1] != '\0');
+                                return current + 1;
+                        }
                         *state = current;
                         return NULL;
                 }
                 *state = current++ + *l + 2;
-        } else if (quoted) {
+        } else if (flags & SPLIT_QUOTES) {
                 *l = strcspn_escaped(current, separator);
-                if (current[*l] && !strchr(separator, current[*l])) {
+                if (current[*l] && !strchr(separator, current[*l]) && !(flags & SPLIT_RELAX)) {
                         /* unfinished escape */
                         *state = current;
                         return NULL;
@@ -266,23 +268,12 @@ char *strjoin_real(const char *x, ...) {
 }
 
 char *strstrip(char *s) {
-        char *e;
-
         if (!s)
                 return NULL;
 
-        /* Drops trailing whitespace. Modifies the string in
-         * place. Returns pointer to first non-space character */
-
-        s += strspn(s, WHITESPACE);
-
-        for (e = strchr(s, 0); e > s; e --)
-                if (!strchr(WHITESPACE, e[-1]))
-                        break;
-
-        *e = 0;
+        /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
 
-        return s;
+        return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
 }
 
 char *delete_chars(char *s, const char *bad) {
@@ -451,94 +442,154 @@ bool string_has_cc(const char *p, const char *ok) {
         return false;
 }
 
+static int write_ellipsis(char *buf, bool unicode) {
+        if (unicode || is_locale_utf8()) {
+                buf[0] = 0xe2; /* tri-dot ellipsis: â¦ */
+                buf[1] = 0x80;
+                buf[2] = 0xa6;
+        } else {
+                buf[0] = '.';
+                buf[1] = '.';
+                buf[2] = '.';
+        }
+
+        return 3;
+}
+
 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
-        size_t x;
-        char *r;
+        size_t x, need_space, suffix_len;
+        char *t;
 
         assert(s);
         assert(percent <= 100);
-        assert(new_length >= 3);
+        assert(new_length != (size_t) -1);
 
-        if (old_length <= 3 || old_length <= new_length)
+        if (old_length <= new_length)
                 return strndup(s, old_length);
 
-        r = new0(char, new_length+3);
-        if (!r)
+        /* Special case short ellipsations */
+        switch (new_length) {
+
+        case 0:
+                return strdup("");
+
+        case 1:
+                if (is_locale_utf8())
+                        return strdup("â¦");
+                else
+                        return strdup(".");
+
+        case 2:
+                if (!is_locale_utf8())
+                        return strdup("..");
+
+                break;
+
+        default:
+                break;
+        }
+
+        /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
+         * character ("â¦"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
+         * either for the UTF-8 encoded character or for three ASCII characters. */
+        need_space = is_locale_utf8() ? 1 : 3;
+
+        t = new(char, new_length+3);
+        if (!t)
                 return NULL;
 
-        x = (new_length * percent) / 100;
+        assert(new_length >= need_space);
 
-        if (x > new_length - 3)
-                x = new_length - 3;
+        x = ((new_length - need_space) * percent + 50) / 100;
+        assert(x <= new_length - need_space);
 
-        memcpy(r, s, x);
-        r[x] = 0xe2; /* tri-dot ellipsis: â¦ */
-        r[x+1] = 0x80;
-        r[x+2] = 0xa6;
-        memcpy(r + x + 3,
-               s + old_length - (new_length - x - 1),
-               new_length - x - 1);
+        memcpy(t, s, x);
+        write_ellipsis(t + x, false);
+        suffix_len = new_length - x - need_space;
+        memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+        *(t + x + 3 + suffix_len) = '\0';
 
-        return r;
+        return t;
 }
 
 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
-        size_t x;
-        char *e;
+        size_t x, k, len, len2;
         const char *i, *j;
-        unsigned k, len, len2;
+        char *e;
         int r;
 
+        /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
+         * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
+         * strings.
+         *
+         * Ellipsation is done in a locale-dependent way:
+         * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
+         * 2. Otherwise, a unicode ellipsis is used ("â¦")
+         *
+         * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
+         * the current locale is UTF-8.
+         */
+
         assert(s);
         assert(percent <= 100);
 
         if (new_length == (size_t) -1)
                 return strndup(s, old_length);
 
-        assert(new_length >= 3);
+        if (new_length == 0)
+                return strdup("");
 
-        /* if no multibyte characters use ascii_ellipsize_mem for speed */
-        if (ascii_is_valid(s))
+        /* If no multibyte characters use ascii_ellipsize_mem for speed */
+        if (ascii_is_valid_n(s, old_length))
                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 
-        if (old_length <= 3 || old_length <= new_length)
-                return strndup(s, old_length);
-
-        x = (new_length * percent) / 100;
-
-        if (x > new_length - 3)
-                x = new_length - 3;
+        x = ((new_length - 1) * percent) / 100;
+        assert(x <= new_length - 1);
 
         k = 0;
-        for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
+        for (i = s; i < s + old_length; i = utf8_next_char(i)) {
                 char32_t c;
+                int w;
 
                 r = utf8_encoded_to_unichar(i, &c);
                 if (r < 0)
                         return NULL;
-                k += unichar_iswide(c) ? 2 : 1;
-        }
 
-        if (k > x) /* last character was wide and went over quota */
-                x++;
+                w = unichar_iswide(c) ? 2 : 1;
+                if (k + w <= x)
+                        k += w;
+                else
+                        break;
+        }
 
-        for (j = s + old_length; k < new_length && j > i; ) {
+        for (j = s + old_length; j > i; ) {
                 char32_t c;
+                int w;
+                const char *jj;
 
-                j = utf8_prev_char(j);
-                r = utf8_encoded_to_unichar(j, &c);
+                jj = utf8_prev_char(j);
+                r = utf8_encoded_to_unichar(jj, &c);
                 if (r < 0)
                         return NULL;
-                k += unichar_iswide(c) ? 2 : 1;
+
+                w = unichar_iswide(c) ? 2 : 1;
+                if (k + w <= new_length) {
+                        k += w;
+                        j = jj;
+                } else
+                        break;
         }
         assert(i <= j);
 
         /* we don't actually need to ellipsize */
         if (i == j)
-                return memdup(s, old_length + 1);
+                return memdup_suffix0(s, old_length);
 
-        /* make space for ellipsis */
-        j = utf8_next_char(j);
+        /* make space for ellipsis, if possible */
+        if (j < s + old_length)
+                j = utf8_next_char(j);
+        else if (i > s)
+                i = utf8_prev_char(i);
 
         len = i - s;
         len2 = s + old_length - j;
@@ -552,21 +603,81 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne
         */
 
         memcpy(e, s, len);
-        e[len]   = 0xe2; /* tri-dot ellipsis: â¦ */
-        e[len + 1] = 0x80;
-        e[len + 2] = 0xa6;
-
-        memcpy(e + len + 3, j, len2 + 1);
+        write_ellipsis(e + len, true);
+        memcpy(e + len + 3, j, len2);
+        *(e + len + 3 + len2) = '\0';
 
         return e;
 }
 
-char *ellipsize(const char *s, size_t length, unsigned percent) {
+char *cellescape(char *buf, size_t len, const char *s) {
+        /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
+         * characters are copied as they are, everything else is escaped. The result
+         * is different then if escaping and ellipsization was performed in two
+         * separate steps, because each sequence is either stored in full or skipped.
+         *
+         * This function should be used for logging about strings which expected to
+         * be plain ASCII in a safe way.
+         *
+         * An ellipsis will be used if s is too long. It was always placed at the
+         * very end.
+         */
 
-        if (length == (size_t) -1)
-                return strdup(s);
+        size_t i = 0, last_char_width[4] = {}, k = 0, j;
+
+        assert(len > 0); /* at least a terminating NUL */
+
+        for (;;) {
+                char four[4];
+                int w;
+
+                if (*s == 0) /* terminating NUL detected? then we are done! */
+                        goto done;
+
+                w = cescape_char(*s, four);
+                if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
+                                      * ellipsize at the previous location */
+                        break;
+
+                /* OK, there was space, let's add this escaped character to the buffer */
+                memcpy(buf + i, four, w);
+                i += w;
+
+                /* And remember its width in the ring buffer */
+                last_char_width[k] = w;
+                k = (k + 1) % 4;
 
-        return ellipsize_mem(s, strlen(s), length, percent);
+                s++;
+        }
+
+        /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
+         * characters ideally, but the buffer is shorter than that in the first place take what we can get */
+        for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
+
+                if (i + 4 <= len) /* nice, we reached our space goal */
+                        break;
+
+                k = k == 0 ? 3 : k - 1;
+                if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
+                        break;
+
+                assert(i >= last_char_width[k]);
+                i -= last_char_width[k];
+        }
+
+        if (i + 4 <= len) /* yay, enough space */
+                i += write_ellipsis(buf + i, false);
+        else if (i + 3 <= len) { /* only space for ".." */
+                buf[i++] = '.';
+                buf[i++] = '.';
+        } else if (i + 2 <= len) /* only space for a single "." */
+                buf[i++] = '.';
+        else
+                assert(i + 1 <= len);
+
+ done:
+        buf[i] = '\0';
+        return buf;
 }
 
 bool nulstr_contains(const char *nulstr, const char *needle) {
@@ -651,7 +762,8 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
         enum {
                 STATE_OTHER,
                 STATE_ESCAPE,
-                STATE_BRACKET
+                STATE_CSI,
+                STATE_CSO,
         } state = STATE_OTHER;
         char *obuf = NULL;
         size_t osz = 0, isz, shift[2] = {};
@@ -660,7 +772,17 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
         assert(ibuf);
         assert(*ibuf);
 
-        /* Strips ANSI color and replaces TABs by 8 spaces */
+        /* This does three things:
+         *
+         * 1. Replaces TABs by 8 spaces
+         * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' â¦ 'm' sequences
+         * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' â¦ BEL sequences
+         *
+         * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
+         * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
+         * most basic formatting noise, but nothing else.
+         *
+         * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
 
         isz = _isz ? *_isz : strlen(*ibuf);
 
@@ -695,8 +817,11 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
                                 fputc('\x1B', f);
                                 advance_offsets(i - *ibuf, highlight, shift, 1);
                                 break;
-                        } else if (*i == '[') {
-                                state = STATE_BRACKET;
+                        } else if (*i == '[') { /* ANSI CSI */
+                                state = STATE_CSI;
+                                begin = i + 1;
+                        } else if (*i == ']') { /* ANSI CSO */
+                                state = STATE_CSO;
                                 begin = i + 1;
                         } else {
                                 fputc('\x1B', f);
@@ -707,10 +832,10 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
 
                         break;
 
-                case STATE_BRACKET:
+                case STATE_CSI:
 
-                        if (i >= *ibuf + isz || /* EOT */
-                            (!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) {
+                        if (i >= *ibuf + isz || /* EOT â¦ */
+                            !strchr("01234567890;m", *i)) { /* â¦ or invalid chars in sequence */
                                 fputc('\x1B', f);
                                 fputc('[', f);
                                 advance_offsets(i - *ibuf, highlight, shift, 2);
@@ -718,11 +843,26 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
                                 i = begin-1;
                         } else if (*i == 'm')
                                 state = STATE_OTHER;
+
+                        break;
+
+                case STATE_CSO:
+
+                        if (i >= *ibuf + isz || /* EOT â¦ */
+                            (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* â¦ or invalid chars in sequence */
+                                fputc('\x1B', f);
+                                fputc(']', f);
+                                advance_offsets(i - *ibuf, highlight, shift, 2);
+                                state = STATE_OTHER;
+                                i = begin-1;
+                        } else if (*i == '\a')
+                                state = STATE_OTHER;
+
                         break;
                 }
         }
 
-        if (ferror(f)) {
+        if (fflush_and_check(f) < 0) {
                 fclose(f);
                 return mfree(obuf);
         }
@@ -868,7 +1008,7 @@ int free_and_strdup(char **p, const char *s) {
 
         assert(p);
 
-        /* Replaces a string pointer with an strdup()ed new string,
+        /* Replaces a string pointer with a strdup()ed new string,
          * possibly freeing the old one. */
 
         if (streq_ptr(*p, s))
@@ -887,6 +1027,32 @@ int free_and_strdup(char **p, const char *s) {
         return 1;
 }
 
+int free_and_strndup(char **p, const char *s, size_t l) {
+        char *t;
+
+        assert(p);
+        assert(s || l == 0);
+
+        /* Replaces a string pointer with a strndup()ed new string,
+         * freeing the old one. */
+
+        if (!*p && !s)
+                return 0;
+
+        if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
+                return 0;
+
+        if (s) {
+                t = strndup(s, l);
+                if (!t)
+                        return -ENOMEM;
+        } else
+                t = NULL;
+
+        free_and_replace(*p, t);
+        return 1;
+}
+
 #if !HAVE_EXPLICIT_BZERO
 /*
  * Pointer to memset is volatile so that compiler must de-reference