X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=src%2Fbasic%2Fstring-util.c;h=c6dad5275fe16b08e4fac292bb12315845479a52;hb=083d27b654ca579c44bcfb92792dc73ec7c80c98;hp=7c75970d7ba1c4156de31ea8ff7bb2f0a293c39e;hpb=b667d50d3443be7fd861d319b5acd525aa15329c;p=thirdparty%2Fsystemd.git diff --git a/src/basic/string-util.c b/src/basic/string-util.c index 7c75970d7ba..c6dad5275fe 100644 --- a/src/basic/string-util.c +++ b/src/basic/string-util.c @@ -1,9 +1,4 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ -/*** - This file is part of systemd. - - Copyright 2010 Lennart Poettering -***/ #include #include @@ -14,12 +9,15 @@ #include #include "alloc-util.h" +#include "escape.h" #include "gunicode.h" +#include "locale-util.h" #include "macro.h" #include "string-util.h" #include "terminal-util.h" #include "utf8.h" #include "util.h" +#include "fileio.h" int strcmp_ptr(const char *a, const char *b) { @@ -130,7 +128,7 @@ static size_t strcspn_escaped(const char *s, const char *reject) { } /* Split a string into words. */ -const char* split(const char **state, size_t *l, const char *separator, bool quoted) { +const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags) { const char *current; current = *state; @@ -146,20 +144,24 @@ const char* split(const char **state, size_t *l, const char *separator, bool quo return NULL; } - if (quoted && strchr("\'\"", *current)) { + if (flags & SPLIT_QUOTES && strchr("\'\"", *current)) { char quotechars[2] = {*current, '\0'}; *l = strcspn_escaped(current + 1, quotechars); if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] || (current[*l + 2] && !strchr(separator, current[*l + 2]))) { /* right quote missing or garbage at the end */ + if (flags & SPLIT_RELAX) { + *state = current + *l + 1 + (current[*l + 1] != '\0'); + return current + 1; + } *state = current; return NULL; } *state = current++ + *l + 2; - } else if (quoted) { + } else if (flags & SPLIT_QUOTES) { *l = strcspn_escaped(current, separator); - if (current[*l] && !strchr(separator, current[*l])) { + if (current[*l] && !strchr(separator, current[*l]) && !(flags & SPLIT_RELAX)) { /* unfinished escape */ *state = current; return NULL; @@ -266,23 +268,12 @@ char *strjoin_real(const char *x, ...) { } char *strstrip(char *s) { - char *e; - if (!s) return NULL; - /* Drops trailing whitespace. Modifies the string in - * place. Returns pointer to first non-space character */ - - s += strspn(s, WHITESPACE); - - for (e = strchr(s, 0); e > s; e --) - if (!strchr(WHITESPACE, e[-1])) - break; - - *e = 0; + /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */ - return s; + return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE); } char *delete_chars(char *s, const char *bad) { @@ -451,94 +442,154 @@ bool string_has_cc(const char *p, const char *ok) { return false; } +static int write_ellipsis(char *buf, bool unicode) { + if (unicode || is_locale_utf8()) { + buf[0] = 0xe2; /* tri-dot ellipsis: … */ + buf[1] = 0x80; + buf[2] = 0xa6; + } else { + buf[0] = '.'; + buf[1] = '.'; + buf[2] = '.'; + } + + return 3; +} + static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { - size_t x; - char *r; + size_t x, need_space, suffix_len; + char *t; assert(s); assert(percent <= 100); - assert(new_length >= 3); + assert(new_length != (size_t) -1); - if (old_length <= 3 || old_length <= new_length) + if (old_length <= new_length) return strndup(s, old_length); - r = new0(char, new_length+3); - if (!r) + /* Special case short ellipsations */ + switch (new_length) { + + case 0: + return strdup(""); + + case 1: + if (is_locale_utf8()) + return strdup("…"); + else + return strdup("."); + + case 2: + if (!is_locale_utf8()) + return strdup(".."); + + break; + + default: + break; + } + + /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one + * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage, + * either for the UTF-8 encoded character or for three ASCII characters. */ + need_space = is_locale_utf8() ? 1 : 3; + + t = new(char, new_length+3); + if (!t) return NULL; - x = (new_length * percent) / 100; + assert(new_length >= need_space); - if (x > new_length - 3) - x = new_length - 3; + x = ((new_length - need_space) * percent + 50) / 100; + assert(x <= new_length - need_space); - memcpy(r, s, x); - r[x] = 0xe2; /* tri-dot ellipsis: … */ - r[x+1] = 0x80; - r[x+2] = 0xa6; - memcpy(r + x + 3, - s + old_length - (new_length - x - 1), - new_length - x - 1); + memcpy(t, s, x); + write_ellipsis(t + x, false); + suffix_len = new_length - x - need_space; + memcpy(t + x + 3, s + old_length - suffix_len, suffix_len); + *(t + x + 3 + suffix_len) = '\0'; - return r; + return t; } char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { - size_t x; - char *e; + size_t x, k, len, len2; const char *i, *j; - unsigned k, len, len2; + char *e; int r; + /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up + * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8 + * strings. + * + * Ellipsation is done in a locale-dependent way: + * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...") + * 2. Otherwise, a unicode ellipsis is used ("…") + * + * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or + * the current locale is UTF-8. + */ + assert(s); assert(percent <= 100); if (new_length == (size_t) -1) return strndup(s, old_length); - assert(new_length >= 3); + if (new_length == 0) + return strdup(""); - /* if no multibyte characters use ascii_ellipsize_mem for speed */ - if (ascii_is_valid(s)) + /* If no multibyte characters use ascii_ellipsize_mem for speed */ + if (ascii_is_valid_n(s, old_length)) return ascii_ellipsize_mem(s, old_length, new_length, percent); - if (old_length <= 3 || old_length <= new_length) - return strndup(s, old_length); - - x = (new_length * percent) / 100; - - if (x > new_length - 3) - x = new_length - 3; + x = ((new_length - 1) * percent) / 100; + assert(x <= new_length - 1); k = 0; - for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) { + for (i = s; i < s + old_length; i = utf8_next_char(i)) { char32_t c; + int w; r = utf8_encoded_to_unichar(i, &c); if (r < 0) return NULL; - k += unichar_iswide(c) ? 2 : 1; - } - if (k > x) /* last character was wide and went over quota */ - x++; + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= x) + k += w; + else + break; + } - for (j = s + old_length; k < new_length && j > i; ) { + for (j = s + old_length; j > i; ) { char32_t c; + int w; + const char *jj; - j = utf8_prev_char(j); - r = utf8_encoded_to_unichar(j, &c); + jj = utf8_prev_char(j); + r = utf8_encoded_to_unichar(jj, &c); if (r < 0) return NULL; - k += unichar_iswide(c) ? 2 : 1; + + w = unichar_iswide(c) ? 2 : 1; + if (k + w <= new_length) { + k += w; + j = jj; + } else + break; } assert(i <= j); /* we don't actually need to ellipsize */ if (i == j) - return memdup(s, old_length + 1); + return memdup_suffix0(s, old_length); - /* make space for ellipsis */ - j = utf8_next_char(j); + /* make space for ellipsis, if possible */ + if (j < s + old_length) + j = utf8_next_char(j); + else if (i > s) + i = utf8_prev_char(i); len = i - s; len2 = s + old_length - j; @@ -552,21 +603,81 @@ char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigne */ memcpy(e, s, len); - e[len] = 0xe2; /* tri-dot ellipsis: … */ - e[len + 1] = 0x80; - e[len + 2] = 0xa6; - - memcpy(e + len + 3, j, len2 + 1); + write_ellipsis(e + len, true); + memcpy(e + len + 3, j, len2); + *(e + len + 3 + len2) = '\0'; return e; } -char *ellipsize(const char *s, size_t length, unsigned percent) { +char *cellescape(char *buf, size_t len, const char *s) { + /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII + * characters are copied as they are, everything else is escaped. The result + * is different then if escaping and ellipsization was performed in two + * separate steps, because each sequence is either stored in full or skipped. + * + * This function should be used for logging about strings which expected to + * be plain ASCII in a safe way. + * + * An ellipsis will be used if s is too long. It was always placed at the + * very end. + */ - if (length == (size_t) -1) - return strdup(s); + size_t i = 0, last_char_width[4] = {}, k = 0, j; + + assert(len > 0); /* at least a terminating NUL */ + + for (;;) { + char four[4]; + int w; + + if (*s == 0) /* terminating NUL detected? then we are done! */ + goto done; + + w = cescape_char(*s, four); + if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's + * ellipsize at the previous location */ + break; + + /* OK, there was space, let's add this escaped character to the buffer */ + memcpy(buf + i, four, w); + i += w; + + /* And remember its width in the ring buffer */ + last_char_width[k] = w; + k = (k + 1) % 4; - return ellipsize_mem(s, strlen(s), length, percent); + s++; + } + + /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4 + * characters ideally, but the buffer is shorter than that in the first place take what we can get */ + for (j = 0; j < ELEMENTSOF(last_char_width); j++) { + + if (i + 4 <= len) /* nice, we reached our space goal */ + break; + + k = k == 0 ? 3 : k - 1; + if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */ + break; + + assert(i >= last_char_width[k]); + i -= last_char_width[k]; + } + + if (i + 4 <= len) /* yay, enough space */ + i += write_ellipsis(buf + i, false); + else if (i + 3 <= len) { /* only space for ".." */ + buf[i++] = '.'; + buf[i++] = '.'; + } else if (i + 2 <= len) /* only space for a single "." */ + buf[i++] = '.'; + else + assert(i + 1 <= len); + + done: + buf[i] = '\0'; + return buf; } bool nulstr_contains(const char *nulstr, const char *needle) { @@ -651,7 +762,8 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { enum { STATE_OTHER, STATE_ESCAPE, - STATE_BRACKET + STATE_CSI, + STATE_CSO, } state = STATE_OTHER; char *obuf = NULL; size_t osz = 0, isz, shift[2] = {}; @@ -660,7 +772,17 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { assert(ibuf); assert(*ibuf); - /* Strips ANSI color and replaces TABs by 8 spaces */ + /* This does three things: + * + * 1. Replaces TABs by 8 spaces + * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences + * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences + * + * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any + * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the + * most basic formatting noise, but nothing else. + * + * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */ isz = _isz ? *_isz : strlen(*ibuf); @@ -695,8 +817,11 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { fputc('\x1B', f); advance_offsets(i - *ibuf, highlight, shift, 1); break; - } else if (*i == '[') { - state = STATE_BRACKET; + } else if (*i == '[') { /* ANSI CSI */ + state = STATE_CSI; + begin = i + 1; + } else if (*i == ']') { /* ANSI CSO */ + state = STATE_CSO; begin = i + 1; } else { fputc('\x1B', f); @@ -707,10 +832,10 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { break; - case STATE_BRACKET: + case STATE_CSI: - if (i >= *ibuf + isz || /* EOT */ - (!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) { + if (i >= *ibuf + isz || /* EOT … */ + !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */ fputc('\x1B', f); fputc('[', f); advance_offsets(i - *ibuf, highlight, shift, 2); @@ -718,11 +843,26 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { i = begin-1; } else if (*i == 'm') state = STATE_OTHER; + + break; + + case STATE_CSO: + + if (i >= *ibuf + isz || /* EOT … */ + (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ + fputc('\x1B', f); + fputc(']', f); + advance_offsets(i - *ibuf, highlight, shift, 2); + state = STATE_OTHER; + i = begin-1; + } else if (*i == '\a') + state = STATE_OTHER; + break; } } - if (ferror(f)) { + if (fflush_and_check(f) < 0) { fclose(f); return mfree(obuf); } @@ -868,7 +1008,7 @@ int free_and_strdup(char **p, const char *s) { assert(p); - /* Replaces a string pointer with an strdup()ed new string, + /* Replaces a string pointer with a strdup()ed new string, * possibly freeing the old one. */ if (streq_ptr(*p, s)) @@ -887,6 +1027,32 @@ int free_and_strdup(char **p, const char *s) { return 1; } +int free_and_strndup(char **p, const char *s, size_t l) { + char *t; + + assert(p); + assert(s || l == 0); + + /* Replaces a string pointer with a strndup()ed new string, + * freeing the old one. */ + + if (!*p && !s) + return 0; + + if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0')) + return 0; + + if (s) { + t = strndup(s, l); + if (!t) + return -ENOMEM; + } else + t = NULL; + + free_and_replace(*p, t); + return 1; +} + #if !HAVE_EXPLICIT_BZERO /* * Pointer to memset is volatile so that compiler must de-reference