-/***
- This file is part of systemd.
-
- Copyright 2010 Lennart Poettering
-
- systemd is free software; you can redistribute it and/or modify it
- under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 2.1 of the License, or
- (at your option) any later version.
-
- systemd is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with systemd; If not, see <http://www.gnu.org/licenses/>.
-***/
+/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
+#include <stdio_ext.h>
#include <stdlib.h>
#include <string.h>
#include "alloc-util.h"
+#include "escape.h"
#include "gunicode.h"
+#include "locale-util.h"
#include "macro.h"
#include "string-util.h"
+#include "terminal-util.h"
#include "utf8.h"
#include "util.h"
+#include "fileio.h"
int strcmp_ptr(const char *a, const char *b) {
}
char *strstrip(char *s) {
- char *e;
-
- /* Drops trailing whitespace. Modifies the string in
- * place. Returns pointer to first non-space character */
-
- s += strspn(s, WHITESPACE);
-
- for (e = strchr(s, 0); e > s; e --)
- if (!strchr(WHITESPACE, e[-1]))
- break;
+ if (!s)
+ return NULL;
- *e = 0;
+ /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
- return s;
+ return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
}
char *delete_chars(char *s, const char *bad) {
char *f, *t;
- /* Drops all whitespace, regardless where in the string */
+ /* Drops all specified bad characters, regardless where in the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
for (f = s, t = s; *f; f++) {
if (strchr(bad, *f))
return s;
}
+char *delete_trailing_chars(char *s, const char *bad) {
+ char *p, *c = s;
+
+ /* Drops all specified bad characters, at the end of the string */
+
+ if (!s)
+ return NULL;
+
+ if (!bad)
+ bad = WHITESPACE;
+
+ for (p = s; *p; p++)
+ if (!strchr(bad, *p))
+ c = p + 1;
+
+ *c = 0;
+
+ return s;
+}
+
char *truncate_nl(char *s) {
assert(s);
return false;
}
+static int write_ellipsis(char *buf, bool unicode) {
+ if (unicode || is_locale_utf8()) {
+ buf[0] = 0xe2; /* tri-dot ellipsis: … */
+ buf[1] = 0x80;
+ buf[2] = 0xa6;
+ } else {
+ buf[0] = '.';
+ buf[1] = '.';
+ buf[2] = '.';
+ }
+
+ return 3;
+}
+
static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
- size_t x;
- char *r;
+ size_t x, need_space, suffix_len;
+ char *t;
assert(s);
assert(percent <= 100);
- assert(new_length >= 3);
+ assert(new_length != (size_t) -1);
- if (old_length <= 3 || old_length <= new_length)
+ if (old_length <= new_length)
return strndup(s, old_length);
- r = new0(char, new_length+3);
- if (!r)
+ /* Special case short ellipsations */
+ switch (new_length) {
+
+ case 0:
+ return strdup("");
+
+ case 1:
+ if (is_locale_utf8())
+ return strdup("…");
+ else
+ return strdup(".");
+
+ case 2:
+ if (!is_locale_utf8())
+ return strdup("..");
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
+ * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
+ * either for the UTF-8 encoded character or for three ASCII characters. */
+ need_space = is_locale_utf8() ? 1 : 3;
+
+ t = new(char, new_length+3);
+ if (!t)
return NULL;
- x = (new_length * percent) / 100;
+ assert(new_length >= need_space);
- if (x > new_length - 3)
- x = new_length - 3;
+ x = ((new_length - need_space) * percent + 50) / 100;
+ assert(x <= new_length - need_space);
- memcpy(r, s, x);
- r[x] = 0xe2; /* tri-dot ellipsis: … */
- r[x+1] = 0x80;
- r[x+2] = 0xa6;
- memcpy(r + x + 3,
- s + old_length - (new_length - x - 1),
- new_length - x - 1);
+ memcpy(t, s, x);
+ write_ellipsis(t + x, false);
+ suffix_len = new_length - x - need_space;
+ memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
+ *(t + x + 3 + suffix_len) = '\0';
- return r;
+ return t;
}
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
- size_t x;
- char *e;
+ size_t x, k, len, len2;
const char *i, *j;
- unsigned k, len, len2;
+ char *e;
int r;
+ /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
+ * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
+ * strings.
+ *
+ * Ellipsation is done in a locale-dependent way:
+ * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
+ * 2. Otherwise, a unicode ellipsis is used ("…")
+ *
+ * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
+ * the current locale is UTF-8.
+ */
+
assert(s);
assert(percent <= 100);
- assert(new_length >= 3);
-
- /* if no multibyte characters use ascii_ellipsize_mem for speed */
- if (ascii_is_valid(s))
- return ascii_ellipsize_mem(s, old_length, new_length, percent);
- if (old_length <= 3 || old_length <= new_length)
+ if (new_length == (size_t) -1)
return strndup(s, old_length);
- x = (new_length * percent) / 100;
+ if (new_length == 0)
+ return strdup("");
+
+ /* If no multibyte characters use ascii_ellipsize_mem for speed */
+ if (ascii_is_valid_n(s, old_length))
+ return ascii_ellipsize_mem(s, old_length, new_length, percent);
- if (x > new_length - 3)
- x = new_length - 3;
+ x = ((new_length - 1) * percent) / 100;
+ assert(x <= new_length - 1);
k = 0;
- for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
+ for (i = s; i < s + old_length; i = utf8_next_char(i)) {
char32_t c;
+ int w;
r = utf8_encoded_to_unichar(i, &c);
if (r < 0)
return NULL;
- k += unichar_iswide(c) ? 2 : 1;
- }
- if (k > x) /* last character was wide and went over quota */
- x++;
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= x)
+ k += w;
+ else
+ break;
+ }
- for (j = s + old_length; k < new_length && j > i; ) {
+ for (j = s + old_length; j > i; ) {
char32_t c;
+ int w;
+ const char *jj;
- j = utf8_prev_char(j);
- r = utf8_encoded_to_unichar(j, &c);
+ jj = utf8_prev_char(j);
+ r = utf8_encoded_to_unichar(jj, &c);
if (r < 0)
return NULL;
- k += unichar_iswide(c) ? 2 : 1;
+
+ w = unichar_iswide(c) ? 2 : 1;
+ if (k + w <= new_length) {
+ k += w;
+ j = jj;
+ } else
+ break;
}
assert(i <= j);
/* we don't actually need to ellipsize */
if (i == j)
- return memdup(s, old_length + 1);
+ return memdup_suffix0(s, old_length);
- /* make space for ellipsis */
- j = utf8_next_char(j);
+ /* make space for ellipsis, if possible */
+ if (j < s + old_length)
+ j = utf8_next_char(j);
+ else if (i > s)
+ i = utf8_prev_char(i);
len = i - s;
len2 = s + old_length - j;
*/
memcpy(e, s, len);
- e[len] = 0xe2; /* tri-dot ellipsis: … */
- e[len + 1] = 0x80;
- e[len + 2] = 0xa6;
-
- memcpy(e + len + 3, j, len2 + 1);
+ write_ellipsis(e + len, true);
+ memcpy(e + len + 3, j, len2);
+ *(e + len + 3 + len2) = '\0';
return e;
}
-char *ellipsize(const char *s, size_t length, unsigned percent) {
- return ellipsize_mem(s, strlen(s), length, percent);
+char *cellescape(char *buf, size_t len, const char *s) {
+ /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
+ * characters are copied as they are, everything else is escaped. The result
+ * is different then if escaping and ellipsization was performed in two
+ * separate steps, because each sequence is either stored in full or skipped.
+ *
+ * This function should be used for logging about strings which expected to
+ * be plain ASCII in a safe way.
+ *
+ * An ellipsis will be used if s is too long. It was always placed at the
+ * very end.
+ */
+
+ size_t i = 0, last_char_width[4] = {}, k = 0, j;
+
+ assert(len > 0); /* at least a terminating NUL */
+
+ for (;;) {
+ char four[4];
+ int w;
+
+ if (*s == 0) /* terminating NUL detected? then we are done! */
+ goto done;
+
+ w = cescape_char(*s, four);
+ if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
+ * ellipsize at the previous location */
+ break;
+
+ /* OK, there was space, let's add this escaped character to the buffer */
+ memcpy(buf + i, four, w);
+ i += w;
+
+ /* And remember its width in the ring buffer */
+ last_char_width[k] = w;
+ k = (k + 1) % 4;
+
+ s++;
+ }
+
+ /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
+ * characters ideally, but the buffer is shorter than that in the first place take what we can get */
+ for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
+
+ if (i + 4 <= len) /* nice, we reached our space goal */
+ break;
+
+ k = k == 0 ? 3 : k - 1;
+ if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
+ break;
+
+ assert(i >= last_char_width[k]);
+ i -= last_char_width[k];
+ }
+
+ if (i + 4 <= len) /* yay, enough space */
+ i += write_ellipsis(buf + i, false);
+ else if (i + 3 <= len) { /* only space for ".." */
+ buf[i++] = '.';
+ buf[i++] = '.';
+ } else if (i + 2 <= len) /* only space for a single "." */
+ buf[i++] = '.';
+ else
+ assert(i + 1 <= len);
+
+ done:
+ buf[i] = '\0';
+ return buf;
}
bool nulstr_contains(const char *nulstr, const char *needle) {
}
char *strreplace(const char *text, const char *old_string, const char *new_string) {
+ size_t l, old_len, new_len, allocated = 0;
+ char *t, *ret = NULL;
const char *f;
- char *t, *r;
- size_t l, old_len, new_len;
- assert(text);
assert(old_string);
assert(new_string);
+ if (!text)
+ return NULL;
+
old_len = strlen(old_string);
new_len = strlen(new_string);
l = strlen(text);
- r = new(char, l+1);
- if (!r)
+ if (!GREEDY_REALLOC(ret, allocated, l+1))
return NULL;
f = text;
- t = r;
+ t = ret;
while (*f) {
- char *a;
size_t d, nl;
if (!startswith(f, old_string)) {
continue;
}
- d = t - r;
+ d = t - ret;
nl = l - old_len + new_len;
- a = realloc(r, nl + 1);
- if (!a)
- goto oom;
+
+ if (!GREEDY_REALLOC(ret, allocated, nl + 1))
+ return mfree(ret);
l = nl;
- r = a;
- t = r + d;
+ t = ret + d;
t = stpcpy(t, new_string);
f += old_len;
}
*t = 0;
- return r;
+ return ret;
+}
+
+static void advance_offsets(ssize_t diff, size_t offsets[2], size_t shift[2], size_t size) {
+ if (!offsets)
+ return;
-oom:
- return mfree(r);
+ if ((size_t) diff < offsets[0])
+ shift[0] += size;
+ if ((size_t) diff < offsets[1])
+ shift[1] += size;
}
-char *strip_tab_ansi(char **ibuf, size_t *_isz) {
+char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
const char *i, *begin = NULL;
enum {
STATE_OTHER,
STATE_ESCAPE,
- STATE_BRACKET
+ STATE_CSI,
+ STATE_CSO,
} state = STATE_OTHER;
char *obuf = NULL;
- size_t osz = 0, isz;
+ size_t osz = 0, isz, shift[2] = {};
FILE *f;
assert(ibuf);
assert(*ibuf);
- /* Strips ANSI color and replaces TABs by 8 spaces */
+ /* This does three things:
+ *
+ * 1. Replaces TABs by 8 spaces
+ * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
+ * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
+ *
+ * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
+ * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
+ * most basic formatting noise, but nothing else.
+ *
+ * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
isz = _isz ? *_isz : strlen(*ibuf);
if (!f)
return NULL;
- /* Note we use the _unlocked() stdio variants on f for performance
- * reasons. It's safe to do so since we created f here and it
- * doesn't leave our scope.
- */
+ /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here
+ * and it doesn't leave our scope. */
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
for (i = *ibuf; i < *ibuf + isz + 1; i++) {
break;
else if (*i == '\x1B')
state = STATE_ESCAPE;
- else if (*i == '\t')
- fputs_unlocked(" ", f);
- else
- fputc_unlocked(*i, f);
+ else if (*i == '\t') {
+ fputs(" ", f);
+ advance_offsets(i - *ibuf, highlight, shift, 7);
+ } else
+ fputc(*i, f);
+
break;
case STATE_ESCAPE:
if (i >= *ibuf + isz) { /* EOT */
- fputc_unlocked('\x1B', f);
+ fputc('\x1B', f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
break;
- } else if (*i == '[') {
- state = STATE_BRACKET;
+ } else if (*i == '[') { /* ANSI CSI */
+ state = STATE_CSI;
+ begin = i + 1;
+ } else if (*i == ']') { /* ANSI CSO */
+ state = STATE_CSO;
begin = i + 1;
} else {
- fputc_unlocked('\x1B', f);
- fputc_unlocked(*i, f);
+ fputc('\x1B', f);
+ fputc(*i, f);
+ advance_offsets(i - *ibuf, highlight, shift, 1);
state = STATE_OTHER;
}
break;
- case STATE_BRACKET:
+ case STATE_CSI:
- if (i >= *ibuf + isz || /* EOT */
- (!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) {
- fputc_unlocked('\x1B', f);
- fputc_unlocked('[', f);
+ if (i >= *ibuf + isz || /* EOT … */
+ !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc('[', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
state = STATE_OTHER;
i = begin-1;
} else if (*i == 'm')
state = STATE_OTHER;
+
+ break;
+
+ case STATE_CSO:
+
+ if (i >= *ibuf + isz || /* EOT … */
+ (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
+ fputc('\x1B', f);
+ fputc(']', f);
+ advance_offsets(i - *ibuf, highlight, shift, 2);
+ state = STATE_OTHER;
+ i = begin-1;
+ } else if (*i == '\a')
+ state = STATE_OTHER;
+
break;
}
}
- if (ferror(f)) {
+ if (fflush_and_check(f) < 0) {
fclose(f);
return mfree(obuf);
}
if (_isz)
*_isz = osz;
+ if (highlight) {
+ highlight[0] += shift[0];
+ highlight[1] += shift[1];
+ }
+
return obuf;
}
-char *strextend(char **x, ...) {
- va_list ap;
- size_t f, l;
+char *strextend_with_separator(char **x, const char *separator, ...) {
+ bool need_separator;
+ size_t f, l, l_separator;
char *r, *p;
+ va_list ap;
assert(x);
l = f = strlen_ptr(*x);
- va_start(ap, x);
+ need_separator = !isempty(*x);
+ l_separator = strlen_ptr(separator);
+
+ va_start(ap, separator);
for (;;) {
const char *t;
size_t n;
break;
n = strlen(t);
+
+ if (need_separator)
+ n += l_separator;
+
if (n > ((size_t) -1) - l) {
va_end(ap);
return NULL;
}
l += n;
+ need_separator = true;
}
va_end(ap);
+ need_separator = !isempty(*x);
+
r = realloc(*x, l+1);
if (!r)
return NULL;
p = r + f;
- va_start(ap, x);
+ va_start(ap, separator);
for (;;) {
const char *t;
if (!t)
break;
+ if (need_separator && separator)
+ p = stpcpy(p, separator);
+
p = stpcpy(p, t);
+
+ need_separator = true;
}
va_end(ap);
+ assert(p == r + l);
+
*p = 0;
*x = r;