]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/string-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
12 #include <stdio_ext.h>
16 #include "alloc-util.h"
18 #include "locale-util.h"
20 #include "string-util.h"
21 #include "terminal-util.h"
26 int strcmp_ptr(const char *a
, const char *b
) {
28 /* Like strcmp(), but tries to make sense of NULL pointers */
41 char* endswith(const char *s
, const char *postfix
) {
51 return (char*) s
+ sl
;
56 if (memcmp(s
+ sl
- pl
, postfix
, pl
) != 0)
59 return (char*) s
+ sl
- pl
;
62 char* endswith_no_case(const char *s
, const char *postfix
) {
72 return (char*) s
+ sl
;
77 if (strcasecmp(s
+ sl
- pl
, postfix
) != 0)
80 return (char*) s
+ sl
- pl
;
83 char* first_word(const char *s
, const char *word
) {
90 /* Checks if the string starts with the specified word, either
91 * followed by NUL or by whitespace. Returns a pointer to the
92 * NUL or the first character after the whitespace. */
103 if (memcmp(s
, word
, wl
) != 0)
110 if (!strchr(WHITESPACE
, *p
))
113 p
+= strspn(p
, WHITESPACE
);
117 static size_t strcspn_escaped(const char *s
, const char *reject
) {
118 bool escaped
= false;
121 for (n
=0; s
[n
]; n
++) {
124 else if (s
[n
] == '\\')
126 else if (strchr(reject
, s
[n
]))
130 /* if s ends in \, return index of previous char */
134 /* Split a string into words. */
135 const char* split(const char **state
, size_t *l
, const char *separator
, bool quoted
) {
141 assert(**state
== '\0');
145 current
+= strspn(current
, separator
);
151 if (quoted
&& strchr("\'\"", *current
)) {
152 char quotechars
[2] = {*current
, '\0'};
154 *l
= strcspn_escaped(current
+ 1, quotechars
);
155 if (current
[*l
+ 1] == '\0' || current
[*l
+ 1] != quotechars
[0] ||
156 (current
[*l
+ 2] && !strchr(separator
, current
[*l
+ 2]))) {
157 /* right quote missing or garbage at the end */
161 *state
= current
++ + *l
+ 2;
163 *l
= strcspn_escaped(current
, separator
);
164 if (current
[*l
] && !strchr(separator
, current
[*l
])) {
165 /* unfinished escape */
169 *state
= current
+ *l
;
171 *l
= strcspn(current
, separator
);
172 *state
= current
+ *l
;
178 char *strnappend(const char *s
, const char *suffix
, size_t b
) {
186 return strndup(suffix
, b
);
195 if (b
> ((size_t) -1) - a
)
198 r
= new(char, a
+b
+1);
203 memcpy(r
+a
, suffix
, b
);
209 char *strappend(const char *s
, const char *suffix
) {
210 return strnappend(s
, suffix
, strlen_ptr(suffix
));
213 char *strjoin_real(const char *x
, ...) {
227 t
= va_arg(ap
, const char *);
232 if (n
> ((size_t) -1) - l
) {
256 t
= va_arg(ap
, const char *);
270 char *strstrip(char *s
) {
276 /* Drops trailing whitespace. Modifies the string in
277 * place. Returns pointer to first non-space character */
279 s
+= strspn(s
, WHITESPACE
);
281 for (e
= strchr(s
, 0); e
> s
; e
--)
282 if (!strchr(WHITESPACE
, e
[-1]))
290 char *delete_chars(char *s
, const char *bad
) {
293 /* Drops all specified bad characters, regardless where in the string */
301 for (f
= s
, t
= s
; *f
; f
++) {
313 char *delete_trailing_chars(char *s
, const char *bad
) {
316 /* Drops all specified bad characters, at the end of the string */
325 if (!strchr(bad
, *p
))
333 char *truncate_nl(char *s
) {
336 s
[strcspn(s
, NEWLINE
)] = 0;
340 char ascii_tolower(char x
) {
342 if (x
>= 'A' && x
<= 'Z')
343 return x
- 'A' + 'a';
348 char ascii_toupper(char x
) {
350 if (x
>= 'a' && x
<= 'z')
351 return x
- 'a' + 'A';
356 char *ascii_strlower(char *t
) {
362 *p
= ascii_tolower(*p
);
367 char *ascii_strupper(char *t
) {
373 *p
= ascii_toupper(*p
);
378 char *ascii_strlower_n(char *t
, size_t n
) {
384 for (i
= 0; i
< n
; i
++)
385 t
[i
] = ascii_tolower(t
[i
]);
390 int ascii_strcasecmp_n(const char *a
, const char *b
, size_t n
) {
392 for (; n
> 0; a
++, b
++, n
--) {
395 x
= (int) (uint8_t) ascii_tolower(*a
);
396 y
= (int) (uint8_t) ascii_tolower(*b
);
405 int ascii_strcasecmp_nn(const char *a
, size_t n
, const char *b
, size_t m
) {
408 r
= ascii_strcasecmp_n(a
, b
, MIN(n
, m
));
420 bool chars_intersect(const char *a
, const char *b
) {
423 /* Returns true if any of the chars in a are in b. */
431 bool string_has_cc(const char *p
, const char *ok
) {
437 * Check if a string contains control characters. If 'ok' is
438 * non-NULL it may be a string containing additional CCs to be
442 for (t
= p
; *t
; t
++) {
443 if (ok
&& strchr(ok
, *t
))
446 if (*t
> 0 && *t
< ' ')
456 static char *ascii_ellipsize_mem(const char *s
, size_t old_length
, size_t new_length
, unsigned percent
) {
457 size_t x
, need_space
;
461 assert(percent
<= 100);
462 assert(new_length
!= (size_t) -1);
464 if (old_length
<= new_length
)
465 return strndup(s
, old_length
);
467 /* Special case short ellipsations */
468 switch (new_length
) {
474 if (is_locale_utf8())
480 if (!is_locale_utf8())
489 /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
490 * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
491 * either for the UTF-8 encoded character or for three ASCII characters. */
492 need_space
= is_locale_utf8() ? 1 : 3;
494 r
= new(char, new_length
+3);
498 assert(new_length
>= need_space
);
500 x
= ((new_length
- need_space
) * percent
+ 50) / 100;
501 assert(x
<= new_length
- need_space
);
505 if (is_locale_utf8()) {
506 r
[x
+0] = 0xe2; /* tri-dot ellipsis: … */
516 s
+ old_length
- (new_length
- x
- need_space
),
517 new_length
- x
- need_space
+ 1);
522 char *ellipsize_mem(const char *s
, size_t old_length
, size_t new_length
, unsigned percent
) {
523 size_t x
, k
, len
, len2
;
528 /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
529 * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
532 * Ellipsation is done in a locale-dependent way:
533 * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
534 * 2. Otherwise, a unicode ellipsis is used ("…")
536 * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
537 * the current locale is UTF-8.
541 assert(percent
<= 100);
543 if (new_length
== (size_t) -1)
544 return strndup(s
, old_length
);
549 /* If no multibyte characters use ascii_ellipsize_mem for speed */
550 if (ascii_is_valid(s
))
551 return ascii_ellipsize_mem(s
, old_length
, new_length
, percent
);
553 x
= ((new_length
- 1) * percent
) / 100;
554 assert(x
<= new_length
- 1);
557 for (i
= s
; k
< x
&& i
< s
+ old_length
; i
= utf8_next_char(i
)) {
560 r
= utf8_encoded_to_unichar(i
, &c
);
563 k
+= unichar_iswide(c
) ? 2 : 1;
566 if (k
> x
) /* last character was wide and went over quota */
569 for (j
= s
+ old_length
; k
< new_length
&& j
> i
; ) {
572 j
= utf8_prev_char(j
);
573 r
= utf8_encoded_to_unichar(j
, &c
);
576 k
+= unichar_iswide(c
) ? 2 : 1;
580 /* we don't actually need to ellipsize */
582 return memdup(s
, old_length
+ 1);
584 /* make space for ellipsis */
585 j
= utf8_next_char(j
);
588 len2
= s
+ old_length
- j
;
589 e
= new(char, len
+ 3 + len2
+ 1);
594 printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
595 old_length, new_length, x, len, len2, k);
599 e
[len
+ 0] = 0xe2; /* tri-dot ellipsis: … */
603 memcpy(e
+ len
+ 3, j
, len2
+ 1);
608 char *ellipsize(const char *s
, size_t length
, unsigned percent
) {
610 if (length
== (size_t) -1)
613 return ellipsize_mem(s
, strlen(s
), length
, percent
);
616 bool nulstr_contains(const char *nulstr
, const char *needle
) {
622 NULSTR_FOREACH(i
, nulstr
)
623 if (streq(i
, needle
))
629 char* strshorten(char *s
, size_t l
) {
632 if (strnlen(s
, l
+1) > l
)
638 char *strreplace(const char *text
, const char *old_string
, const char *new_string
) {
639 size_t l
, old_len
, new_len
, allocated
= 0;
640 char *t
, *ret
= NULL
;
649 old_len
= strlen(old_string
);
650 new_len
= strlen(new_string
);
653 if (!GREEDY_REALLOC(ret
, allocated
, l
+1))
661 if (!startswith(f
, old_string
)) {
667 nl
= l
- old_len
+ new_len
;
669 if (!GREEDY_REALLOC(ret
, allocated
, nl
+ 1))
675 t
= stpcpy(t
, new_string
);
683 static void advance_offsets(ssize_t diff
, size_t offsets
[2], size_t shift
[2], size_t size
) {
687 if ((size_t) diff
< offsets
[0])
689 if ((size_t) diff
< offsets
[1])
693 char *strip_tab_ansi(char **ibuf
, size_t *_isz
, size_t highlight
[2]) {
694 const char *i
, *begin
= NULL
;
700 } state
= STATE_OTHER
;
702 size_t osz
= 0, isz
, shift
[2] = {};
708 /* This does three things:
710 * 1. Replaces TABs by 8 spaces
711 * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
712 * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
714 * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
715 * other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
716 * most basic formatting noise, but nothing else.
718 * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
720 isz
= _isz
? *_isz
: strlen(*ibuf
);
722 f
= open_memstream(&obuf
, &osz
);
726 /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we created f here
727 * and it doesn't leave our scope. */
729 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
731 for (i
= *ibuf
; i
< *ibuf
+ isz
+ 1; i
++) {
736 if (i
>= *ibuf
+ isz
) /* EOT */
738 else if (*i
== '\x1B')
739 state
= STATE_ESCAPE
;
740 else if (*i
== '\t') {
742 advance_offsets(i
- *ibuf
, highlight
, shift
, 7);
749 if (i
>= *ibuf
+ isz
) { /* EOT */
751 advance_offsets(i
- *ibuf
, highlight
, shift
, 1);
753 } else if (*i
== '[') { /* ANSI CSI */
756 } else if (*i
== ']') { /* ANSI CSO */
762 advance_offsets(i
- *ibuf
, highlight
, shift
, 1);
770 if (i
>= *ibuf
+ isz
|| /* EOT … */
771 !strchr("01234567890;m", *i
)) { /* … or invalid chars in sequence */
774 advance_offsets(i
- *ibuf
, highlight
, shift
, 2);
777 } else if (*i
== 'm')
784 if (i
>= *ibuf
+ isz
|| /* EOT … */
785 (*i
!= '\a' && (uint8_t) *i
< 32U) || (uint8_t) *i
> 126U) { /* … or invalid chars in sequence */
788 advance_offsets(i
- *ibuf
, highlight
, shift
, 2);
791 } else if (*i
== '\a')
798 if (fflush_and_check(f
) < 0) {
812 highlight
[0] += shift
[0];
813 highlight
[1] += shift
[1];
819 char *strextend_with_separator(char **x
, const char *separator
, ...) {
821 size_t f
, l
, l_separator
;
827 l
= f
= strlen_ptr(*x
);
829 need_separator
= !isempty(*x
);
830 l_separator
= strlen_ptr(separator
);
832 va_start(ap
, separator
);
837 t
= va_arg(ap
, const char *);
846 if (n
> ((size_t) -1) - l
) {
852 need_separator
= true;
856 need_separator
= !isempty(*x
);
858 r
= realloc(*x
, l
+1);
864 va_start(ap
, separator
);
868 t
= va_arg(ap
, const char *);
872 if (need_separator
&& separator
)
873 p
= stpcpy(p
, separator
);
877 need_separator
= true;
889 char *strrep(const char *s
, unsigned n
) {
897 p
= r
= malloc(l
* n
+ 1);
901 for (i
= 0; i
< n
; i
++)
908 int split_pair(const char *s
, const char *sep
, char **l
, char **r
) {
923 a
= strndup(s
, x
- s
);
927 b
= strdup(x
+ strlen(sep
));
939 int free_and_strdup(char **p
, const char *s
) {
944 /* Replaces a string pointer with an strdup()ed new string,
945 * possibly freeing the old one. */
947 if (streq_ptr(*p
, s
))
963 #if !HAVE_EXPLICIT_BZERO
965 * Pointer to memset is volatile so that compiler must de-reference
966 * the pointer and can't assume that it points to any function in
967 * particular (such as memset, which it then might further "optimize")
968 * This approach is inspired by openssl's crypto/mem_clr.c.
970 typedef void *(*memset_t
)(void *,int,size_t);
972 static volatile memset_t memset_func
= memset
;
974 void explicit_bzero(void *p
, size_t l
) {
975 memset_func(p
, '\0', l
);
979 char* string_erase(char *x
) {
983 /* A delicious drop of snake-oil! To be called on memory where
984 * we stored passphrases or so, after we used them. */
985 explicit_bzero(x
, strlen(x
));
989 char *string_free_erase(char *s
) {
990 return mfree(string_erase(s
));
993 bool string_is_safe(const char *p
) {
999 for (t
= p
; *t
; t
++) {
1000 if (*t
> 0 && *t
< ' ') /* no control characters */
1003 if (strchr(QUOTES
"\\\x7f", *t
))