1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #include "alloc-util.h"
11 #include "extract-word.h"
14 #include "locale-util.h"
16 #include "memory-util.h"
17 #include "string-util.h"
18 #include "terminal-util.h"
22 int strcmp_ptr(const char *a
, const char *b
) {
23 /* Like strcmp(), but tries to make sense of NULL pointers */
27 return CMP(a
, b
); /* Direct comparison of pointers, one of which is NULL */
30 int strcasecmp_ptr(const char *a
, const char *b
) {
31 /* Like strcasecmp(), but tries to make sense of NULL pointers */
34 return strcasecmp(a
, b
);
35 return CMP(a
, b
); /* Direct comparison of pointers, one of which is NULL */
38 char* endswith(const char *s
, const char *postfix
) {
48 return (char*) s
+ sl
;
53 if (memcmp(s
+ sl
- pl
, postfix
, pl
) != 0)
56 return (char*) s
+ sl
- pl
;
59 char* endswith_no_case(const char *s
, const char *postfix
) {
69 return (char*) s
+ sl
;
74 if (strcasecmp(s
+ sl
- pl
, postfix
) != 0)
77 return (char*) s
+ sl
- pl
;
80 char* first_word(const char *s
, const char *word
) {
87 /* Checks if the string starts with the specified word, either
88 * followed by NUL or by whitespace. Returns a pointer to the
89 * NUL or the first character after the whitespace. */
100 if (memcmp(s
, word
, wl
) != 0)
107 if (!strchr(WHITESPACE
, *p
))
110 p
+= strspn(p
, WHITESPACE
);
114 static size_t strcspn_escaped(const char *s
, const char *reject
) {
115 bool escaped
= false;
118 for (n
= 0; s
[n
] != '\0'; n
++) {
121 else if (s
[n
] == '\\')
123 else if (strchr(reject
, s
[n
]))
130 /* Split a string into words. */
134 const char *separator
,
143 separator
= WHITESPACE
;
147 if (*current
== '\0') /* already at the end? */
150 current
+= strspn(current
, separator
); /* skip leading separators */
151 if (*current
== '\0') { /* at the end now? */
156 if (FLAGS_SET(flags
, SPLIT_QUOTES
)) {
158 if (strchr(QUOTES
, *current
)) {
159 /* We are looking at a quote */
160 *l
= strcspn_escaped(current
+ 1, CHAR_TO_STR(*current
));
161 if (current
[*l
+ 1] != *current
||
162 (current
[*l
+ 2] != 0 && !strchr(separator
, current
[*l
+ 2]))) {
163 /* right quote missing or garbage at the end */
164 if (FLAGS_SET(flags
, SPLIT_RELAX
)) {
165 *state
= current
+ *l
+ 1 + (current
[*l
+ 1] != '\0');
171 *state
= current
++ + *l
+ 2;
174 /* We are looking at a something that is not a quote */
175 *l
= strcspn_escaped(current
, separator
);
176 if (current
[*l
] && !strchr(separator
, current
[*l
]) && !FLAGS_SET(flags
, SPLIT_RELAX
)) {
177 /* unfinished escape */
181 *state
= current
+ *l
;
184 *l
= strcspn(current
, separator
);
185 *state
= current
+ *l
;
191 char *strnappend(const char *s
, const char *suffix
, size_t b
) {
199 return strndup(suffix
, b
);
208 if (b
> ((size_t) -1) - a
)
211 r
= new(char, a
+b
+1);
216 memcpy(r
+a
, suffix
, b
);
222 char *strjoin_real(const char *x
, ...) {
236 t
= va_arg(ap
, const char *);
241 if (n
> ((size_t) -1) - l
) {
265 t
= va_arg(ap
, const char *);
279 char *strstrip(char *s
) {
283 /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
285 return delete_trailing_chars(skip_leading_chars(s
, WHITESPACE
), WHITESPACE
);
288 char *delete_chars(char *s
, const char *bad
) {
291 /* Drops all specified bad characters, regardless where in the string */
299 for (f
= s
, t
= s
; *f
; f
++) {
311 char *delete_trailing_chars(char *s
, const char *bad
) {
314 /* Drops all specified bad characters, at the end of the string */
323 if (!strchr(bad
, *p
))
331 char *truncate_nl(char *s
) {
334 s
[strcspn(s
, NEWLINE
)] = 0;
338 char ascii_tolower(char x
) {
340 if (x
>= 'A' && x
<= 'Z')
341 return x
- 'A' + 'a';
346 char ascii_toupper(char x
) {
348 if (x
>= 'a' && x
<= 'z')
349 return x
- 'a' + 'A';
354 char *ascii_strlower(char *t
) {
360 *p
= ascii_tolower(*p
);
365 char *ascii_strupper(char *t
) {
371 *p
= ascii_toupper(*p
);
376 char *ascii_strlower_n(char *t
, size_t n
) {
382 for (i
= 0; i
< n
; i
++)
383 t
[i
] = ascii_tolower(t
[i
]);
388 int ascii_strcasecmp_n(const char *a
, const char *b
, size_t n
) {
390 for (; n
> 0; a
++, b
++, n
--) {
393 x
= (int) (uint8_t) ascii_tolower(*a
);
394 y
= (int) (uint8_t) ascii_tolower(*b
);
403 int ascii_strcasecmp_nn(const char *a
, size_t n
, const char *b
, size_t m
) {
406 r
= ascii_strcasecmp_n(a
, b
, MIN(n
, m
));
413 bool chars_intersect(const char *a
, const char *b
) {
416 /* Returns true if any of the chars in a are in b. */
424 bool string_has_cc(const char *p
, const char *ok
) {
430 * Check if a string contains control characters. If 'ok' is
431 * non-NULL it may be a string containing additional CCs to be
435 for (t
= p
; *t
; t
++) {
436 if (ok
&& strchr(ok
, *t
))
439 if (*t
> 0 && *t
< ' ')
449 static int write_ellipsis(char *buf
, bool unicode
) {
450 if (unicode
|| is_locale_utf8()) {
451 buf
[0] = 0xe2; /* tri-dot ellipsis: … */
463 static char *ascii_ellipsize_mem(const char *s
, size_t old_length
, size_t new_length
, unsigned percent
) {
464 size_t x
, need_space
, suffix_len
;
468 assert(percent
<= 100);
469 assert(new_length
!= (size_t) -1);
471 if (old_length
<= new_length
)
472 return strndup(s
, old_length
);
474 /* Special case short ellipsations */
475 switch (new_length
) {
481 if (is_locale_utf8())
482 return strdup("…");
487 if (!is_locale_utf8())
496 /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
497 * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
498 * either for the UTF-8 encoded character or for three ASCII characters. */
499 need_space
= is_locale_utf8() ? 1 : 3;
501 t
= new(char, new_length
+3);
505 assert(new_length
>= need_space
);
507 x
= ((new_length
- need_space
) * percent
+ 50) / 100;
508 assert(x
<= new_length
- need_space
);
511 write_ellipsis(t
+ x
, false);
512 suffix_len
= new_length
- x
- need_space
;
513 memcpy(t
+ x
+ 3, s
+ old_length
- suffix_len
, suffix_len
);
514 *(t
+ x
+ 3 + suffix_len
) = '\0';
519 char *ellipsize_mem(const char *s
, size_t old_length
, size_t new_length
, unsigned percent
) {
520 size_t x
, k
, len
, len2
;
525 /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
526 * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
529 * Ellipsation is done in a locale-dependent way:
530 * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
531 * 2. Otherwise, a unicode ellipsis is used ("…")
533 * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
534 * the current locale is UTF-8.
538 assert(percent
<= 100);
540 if (new_length
== (size_t) -1)
541 return strndup(s
, old_length
);
546 /* If no multibyte characters use ascii_ellipsize_mem for speed */
547 if (ascii_is_valid_n(s
, old_length
))
548 return ascii_ellipsize_mem(s
, old_length
, new_length
, percent
);
550 x
= ((new_length
- 1) * percent
) / 100;
551 assert(x
<= new_length
- 1);
554 for (i
= s
; i
< s
+ old_length
; i
= utf8_next_char(i
)) {
558 r
= utf8_encoded_to_unichar(i
, &c
);
562 w
= unichar_iswide(c
) ? 2 : 1;
569 for (j
= s
+ old_length
; j
> i
; ) {
574 jj
= utf8_prev_char(j
);
575 r
= utf8_encoded_to_unichar(jj
, &c
);
579 w
= unichar_iswide(c
) ? 2 : 1;
580 if (k
+ w
<= new_length
) {
588 /* we don't actually need to ellipsize */
590 return memdup_suffix0(s
, old_length
);
592 /* make space for ellipsis, if possible */
593 if (j
< s
+ old_length
)
594 j
= utf8_next_char(j
);
596 i
= utf8_prev_char(i
);
599 len2
= s
+ old_length
- j
;
600 e
= new(char, len
+ 3 + len2
+ 1);
605 printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
606 old_length, new_length, x, len, len2, k);
610 write_ellipsis(e
+ len
, true);
611 memcpy(e
+ len
+ 3, j
, len2
);
612 *(e
+ len
+ 3 + len2
) = '\0';
617 char *cellescape(char *buf
, size_t len
, const char *s
) {
618 /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
619 * characters are copied as they are, everything else is escaped. The result
620 * is different then if escaping and ellipsization was performed in two
621 * separate steps, because each sequence is either stored in full or skipped.
623 * This function should be used for logging about strings which expected to
624 * be plain ASCII in a safe way.
626 * An ellipsis will be used if s is too long. It was always placed at the
630 size_t i
= 0, last_char_width
[4] = {}, k
= 0, j
;
632 assert(len
> 0); /* at least a terminating NUL */
638 if (*s
== 0) /* terminating NUL detected? then we are done! */
641 w
= cescape_char(*s
, four
);
642 if (i
+ w
+ 1 > len
) /* This character doesn't fit into the buffer anymore? In that case let's
643 * ellipsize at the previous location */
646 /* OK, there was space, let's add this escaped character to the buffer */
647 memcpy(buf
+ i
, four
, w
);
650 /* And remember its width in the ring buffer */
651 last_char_width
[k
] = w
;
657 /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
658 * characters ideally, but the buffer is shorter than that in the first place take what we can get */
659 for (j
= 0; j
< ELEMENTSOF(last_char_width
); j
++) {
661 if (i
+ 4 <= len
) /* nice, we reached our space goal */
664 k
= k
== 0 ? 3 : k
- 1;
665 if (last_char_width
[k
] == 0) /* bummer, we reached the beginning of the strings */
668 assert(i
>= last_char_width
[k
]);
669 i
-= last_char_width
[k
];
672 if (i
+ 4 <= len
) /* yay, enough space */
673 i
+= write_ellipsis(buf
+ i
, false);
674 else if (i
+ 3 <= len
) { /* only space for ".." */
677 } else if (i
+ 2 <= len
) /* only space for a single "." */
680 assert(i
+ 1 <= len
);
687 char* strshorten(char *s
, size_t l
) {
690 if (strnlen(s
, l
+1) > l
)
696 char *strreplace(const char *text
, const char *old_string
, const char *new_string
) {
697 size_t l
, old_len
, new_len
, allocated
= 0;
698 char *t
, *ret
= NULL
;
707 old_len
= strlen(old_string
);
708 new_len
= strlen(new_string
);
711 if (!GREEDY_REALLOC(ret
, allocated
, l
+1))
719 if (!startswith(f
, old_string
)) {
725 nl
= l
- old_len
+ new_len
;
727 if (!GREEDY_REALLOC(ret
, allocated
, nl
+ 1))
733 t
= stpcpy(t
, new_string
);
741 static void advance_offsets(
743 size_t offsets
[2], /* note: we can't use [static 2] here, since this may be NULL */
744 size_t shift
[static 2],
752 if ((size_t) diff
< offsets
[0])
754 if ((size_t) diff
< offsets
[1])
758 char *strip_tab_ansi(char **ibuf
, size_t *_isz
, size_t highlight
[2]) {
759 const char *begin
= NULL
;
765 } state
= STATE_OTHER
;
767 size_t osz
= 0, isz
, shift
[2] = {}, n_carriage_returns
= 0;
773 /* This does three things:
775 * 1. Replaces TABs by 8 spaces
776 * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
777 * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
778 * 4. Strip trailing \r characters (since they would "move the cursor", but have no
781 * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as
782 * are any other special characters. Truncated ANSI sequences are left-as is too. This call is
783 * supposed to suppress the most basic formatting noise, but nothing else.
785 * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
787 isz
= _isz
? *_isz
: strlen(*ibuf
);
789 /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we
790 * created f here and it doesn't leave our scope. */
791 f
= open_memstream_unlocked(&obuf
, &osz
);
795 for (const char *i
= *ibuf
; i
< *ibuf
+ isz
+ 1; i
++) {
800 if (i
>= *ibuf
+ isz
) /* EOT */
804 n_carriage_returns
++;
806 } else if (*i
== '\n')
807 /* Ignore carriage returns before new line */
808 n_carriage_returns
= 0;
809 for (; n_carriage_returns
> 0; n_carriage_returns
--)
813 state
= STATE_ESCAPE
;
814 else if (*i
== '\t') {
816 advance_offsets(i
- *ibuf
, highlight
, shift
, 7);
823 assert(n_carriage_returns
== 0);
825 if (i
>= *ibuf
+ isz
) { /* EOT */
827 advance_offsets(i
- *ibuf
, highlight
, shift
, 1);
829 } else if (*i
== '[') { /* ANSI CSI */
832 } else if (*i
== ']') { /* ANSI CSO */
838 advance_offsets(i
- *ibuf
, highlight
, shift
, 1);
845 assert(n_carriage_returns
== 0);
847 if (i
>= *ibuf
+ isz
|| /* EOT … */
848 !strchr("01234567890;m", *i
)) { /* … or invalid chars in sequence */
851 advance_offsets(i
- *ibuf
, highlight
, shift
, 2);
854 } else if (*i
== 'm')
860 assert(n_carriage_returns
== 0);
862 if (i
>= *ibuf
+ isz
|| /* EOT … */
863 (*i
!= '\a' && (uint8_t) *i
< 32U) || (uint8_t) *i
> 126U) { /* … or invalid chars in sequence */
866 advance_offsets(i
- *ibuf
, highlight
, shift
, 2);
869 } else if (*i
== '\a')
876 if (fflush_and_check(f
) < 0) {
882 free_and_replace(*ibuf
, obuf
);
888 highlight
[0] += shift
[0];
889 highlight
[1] += shift
[1];
895 char *strextend_with_separator(char **x
, const char *separator
, ...) {
897 size_t f
, l
, l_separator
;
903 l
= f
= strlen_ptr(*x
);
905 need_separator
= !isempty(*x
);
906 l_separator
= strlen_ptr(separator
);
908 va_start(ap
, separator
);
913 t
= va_arg(ap
, const char *);
922 if (n
> ((size_t) -1) - l
) {
928 need_separator
= true;
932 need_separator
= !isempty(*x
);
934 r
= realloc(*x
, l
+1);
940 va_start(ap
, separator
);
944 t
= va_arg(ap
, const char *);
948 if (need_separator
&& separator
)
949 p
= stpcpy(p
, separator
);
953 need_separator
= true;
965 char *strrep(const char *s
, unsigned n
) {
973 p
= r
= malloc(l
* n
+ 1);
977 for (i
= 0; i
< n
; i
++)
984 int split_pair(const char *s
, const char *sep
, char **l
, char **r
) {
999 a
= strndup(s
, x
- s
);
1003 b
= strdup(x
+ strlen(sep
));
1015 int free_and_strdup(char **p
, const char *s
) {
1020 /* Replaces a string pointer with a strdup()ed new string,
1021 * possibly freeing the old one. */
1023 if (streq_ptr(*p
, s
))
1039 int free_and_strndup(char **p
, const char *s
, size_t l
) {
1043 assert(s
|| l
== 0);
1045 /* Replaces a string pointer with a strndup()ed new string,
1046 * freeing the old one. */
1051 if (*p
&& s
&& strneq(*p
, s
, l
) && (l
> strlen(*p
) || (*p
)[l
] == '\0'))
1061 free_and_replace(*p
, t
);
1065 bool string_is_safe(const char *p
) {
1071 /* Checks if the specified string contains no quotes or control characters */
1073 for (t
= p
; *t
; t
++) {
1074 if (*t
> 0 && *t
< ' ') /* no control characters */
1077 if (strchr(QUOTES
"\\\x7f", *t
))
1084 char* string_erase(char *x
) {
1088 /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we
1090 explicit_bzero_safe(x
, strlen(x
));
1094 int string_truncate_lines(const char *s
, size_t n_lines
, char **ret
) {
1095 const char *p
= s
, *e
= s
;
1096 bool truncation_applied
= false;
1102 /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if
1103 * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not
1104 * generated either. */
1109 k
= strcspn(p
, "\n");
1112 if (k
== 0) /* final empty line */
1115 if (n
>= n_lines
) /* above threshold */
1118 e
= p
+ k
; /* last line to include */
1122 assert(p
[k
] == '\n');
1134 /* e points after the last character we want to keep */
1138 if (!in_charset(e
, "\n")) /* We only consider things truncated if we remove something that
1139 * isn't a new-line or a series of them */
1140 truncation_applied
= true;
1142 copy
= strndup(s
, e
- s
);
1148 return truncation_applied
;
1151 int string_extract_line(const char *s
, size_t i
, char **ret
) {
1155 /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that,
1156 * and == 0 if we are looking at the last line or already beyond the last line. As special
1157 * optimization, if the first line is requested and the string only consists of one line we return
1158 * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very
1164 q
= strchr(p
, '\n');
1166 /* The line we are looking for! */
1171 m
= strndup(p
, q
- p
);
1176 return !isempty(q
+ 1); /* more coming? */
1179 *ret
= NULL
; /* Just use the input string */
1190 return 0; /* The end */
1197 /* No more lines, return empty line */
1204 return 0; /* The end */
1212 int string_contains_word(const char *string
, const char *separators
, const char *word
) {
1213 /* In the default mode with no separators specified, we split on whitespace and
1214 * don't coalesce separators. */
1215 const ExtractFlags flags
= separators
? EXTRACT_DONT_COALESCE_SEPARATORS
: 0;
1217 for (const char *p
= string
;;) {
1218 _cleanup_free_
char *w
= NULL
;
1221 r
= extract_first_word(&p
, &w
, separators
, flags
);