]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/string-util.c
Merge pull request #7508 from poettering/journal-n-drop
[thirdparty/systemd.git] / src / basic / string-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <stdarg.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "alloc-util.h"
29 #include "gunicode.h"
30 #include "macro.h"
31 #include "string-util.h"
32 #include "utf8.h"
33 #include "util.h"
34
35 int strcmp_ptr(const char *a, const char *b) {
36
37 /* Like strcmp(), but tries to make sense of NULL pointers */
38 if (a && b)
39 return strcmp(a, b);
40
41 if (!a && b)
42 return -1;
43
44 if (a && !b)
45 return 1;
46
47 return 0;
48 }
49
50 char* endswith(const char *s, const char *postfix) {
51 size_t sl, pl;
52
53 assert(s);
54 assert(postfix);
55
56 sl = strlen(s);
57 pl = strlen(postfix);
58
59 if (pl == 0)
60 return (char*) s + sl;
61
62 if (sl < pl)
63 return NULL;
64
65 if (memcmp(s + sl - pl, postfix, pl) != 0)
66 return NULL;
67
68 return (char*) s + sl - pl;
69 }
70
71 char* endswith_no_case(const char *s, const char *postfix) {
72 size_t sl, pl;
73
74 assert(s);
75 assert(postfix);
76
77 sl = strlen(s);
78 pl = strlen(postfix);
79
80 if (pl == 0)
81 return (char*) s + sl;
82
83 if (sl < pl)
84 return NULL;
85
86 if (strcasecmp(s + sl - pl, postfix) != 0)
87 return NULL;
88
89 return (char*) s + sl - pl;
90 }
91
92 char* first_word(const char *s, const char *word) {
93 size_t sl, wl;
94 const char *p;
95
96 assert(s);
97 assert(word);
98
99 /* Checks if the string starts with the specified word, either
100 * followed by NUL or by whitespace. Returns a pointer to the
101 * NUL or the first character after the whitespace. */
102
103 sl = strlen(s);
104 wl = strlen(word);
105
106 if (sl < wl)
107 return NULL;
108
109 if (wl == 0)
110 return (char*) s;
111
112 if (memcmp(s, word, wl) != 0)
113 return NULL;
114
115 p = s + wl;
116 if (*p == 0)
117 return (char*) p;
118
119 if (!strchr(WHITESPACE, *p))
120 return NULL;
121
122 p += strspn(p, WHITESPACE);
123 return (char*) p;
124 }
125
126 static size_t strcspn_escaped(const char *s, const char *reject) {
127 bool escaped = false;
128 int n;
129
130 for (n=0; s[n]; n++) {
131 if (escaped)
132 escaped = false;
133 else if (s[n] == '\\')
134 escaped = true;
135 else if (strchr(reject, s[n]))
136 break;
137 }
138
139 /* if s ends in \, return index of previous char */
140 return n - escaped;
141 }
142
143 /* Split a string into words. */
144 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
145 const char *current;
146
147 current = *state;
148
149 if (!*current) {
150 assert(**state == '\0');
151 return NULL;
152 }
153
154 current += strspn(current, separator);
155 if (!*current) {
156 *state = current;
157 return NULL;
158 }
159
160 if (quoted && strchr("\'\"", *current)) {
161 char quotechars[2] = {*current, '\0'};
162
163 *l = strcspn_escaped(current + 1, quotechars);
164 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
165 (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
166 /* right quote missing or garbage at the end */
167 *state = current;
168 return NULL;
169 }
170 *state = current++ + *l + 2;
171 } else if (quoted) {
172 *l = strcspn_escaped(current, separator);
173 if (current[*l] && !strchr(separator, current[*l])) {
174 /* unfinished escape */
175 *state = current;
176 return NULL;
177 }
178 *state = current + *l;
179 } else {
180 *l = strcspn(current, separator);
181 *state = current + *l;
182 }
183
184 return current;
185 }
186
187 char *strnappend(const char *s, const char *suffix, size_t b) {
188 size_t a;
189 char *r;
190
191 if (!s && !suffix)
192 return strdup("");
193
194 if (!s)
195 return strndup(suffix, b);
196
197 if (!suffix)
198 return strdup(s);
199
200 assert(s);
201 assert(suffix);
202
203 a = strlen(s);
204 if (b > ((size_t) -1) - a)
205 return NULL;
206
207 r = new(char, a+b+1);
208 if (!r)
209 return NULL;
210
211 memcpy(r, s, a);
212 memcpy(r+a, suffix, b);
213 r[a+b] = 0;
214
215 return r;
216 }
217
218 char *strappend(const char *s, const char *suffix) {
219 return strnappend(s, suffix, strlen_ptr(suffix));
220 }
221
222 char *strjoin_real(const char *x, ...) {
223 va_list ap;
224 size_t l;
225 char *r, *p;
226
227 va_start(ap, x);
228
229 if (x) {
230 l = strlen(x);
231
232 for (;;) {
233 const char *t;
234 size_t n;
235
236 t = va_arg(ap, const char *);
237 if (!t)
238 break;
239
240 n = strlen(t);
241 if (n > ((size_t) -1) - l) {
242 va_end(ap);
243 return NULL;
244 }
245
246 l += n;
247 }
248 } else
249 l = 0;
250
251 va_end(ap);
252
253 r = new(char, l+1);
254 if (!r)
255 return NULL;
256
257 if (x) {
258 p = stpcpy(r, x);
259
260 va_start(ap, x);
261
262 for (;;) {
263 const char *t;
264
265 t = va_arg(ap, const char *);
266 if (!t)
267 break;
268
269 p = stpcpy(p, t);
270 }
271
272 va_end(ap);
273 } else
274 r[0] = 0;
275
276 return r;
277 }
278
279 char *strstrip(char *s) {
280 char *e;
281
282 if (!s)
283 return NULL;
284
285 /* Drops trailing whitespace. Modifies the string in
286 * place. Returns pointer to first non-space character */
287
288 s += strspn(s, WHITESPACE);
289
290 for (e = strchr(s, 0); e > s; e --)
291 if (!strchr(WHITESPACE, e[-1]))
292 break;
293
294 *e = 0;
295
296 return s;
297 }
298
299 char *delete_chars(char *s, const char *bad) {
300 char *f, *t;
301
302 /* Drops all specified bad characters, regardless where in the string */
303
304 if (!s)
305 return NULL;
306
307 if (!bad)
308 bad = WHITESPACE;
309
310 for (f = s, t = s; *f; f++) {
311 if (strchr(bad, *f))
312 continue;
313
314 *(t++) = *f;
315 }
316
317 *t = 0;
318
319 return s;
320 }
321
322 char *delete_trailing_chars(char *s, const char *bad) {
323 char *p, *c = s;
324
325 /* Drops all specified bad characters, at the end of the string */
326
327 if (!s)
328 return NULL;
329
330 if (!bad)
331 bad = WHITESPACE;
332
333 for (p = s; *p; p++)
334 if (!strchr(bad, *p))
335 c = p + 1;
336
337 *c = 0;
338
339 return s;
340 }
341
342 char *truncate_nl(char *s) {
343 assert(s);
344
345 s[strcspn(s, NEWLINE)] = 0;
346 return s;
347 }
348
349 char ascii_tolower(char x) {
350
351 if (x >= 'A' && x <= 'Z')
352 return x - 'A' + 'a';
353
354 return x;
355 }
356
357 char ascii_toupper(char x) {
358
359 if (x >= 'a' && x <= 'z')
360 return x - 'a' + 'A';
361
362 return x;
363 }
364
365 char *ascii_strlower(char *t) {
366 char *p;
367
368 assert(t);
369
370 for (p = t; *p; p++)
371 *p = ascii_tolower(*p);
372
373 return t;
374 }
375
376 char *ascii_strupper(char *t) {
377 char *p;
378
379 assert(t);
380
381 for (p = t; *p; p++)
382 *p = ascii_toupper(*p);
383
384 return t;
385 }
386
387 char *ascii_strlower_n(char *t, size_t n) {
388 size_t i;
389
390 if (n <= 0)
391 return t;
392
393 for (i = 0; i < n; i++)
394 t[i] = ascii_tolower(t[i]);
395
396 return t;
397 }
398
399 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
400
401 for (; n > 0; a++, b++, n--) {
402 int x, y;
403
404 x = (int) (uint8_t) ascii_tolower(*a);
405 y = (int) (uint8_t) ascii_tolower(*b);
406
407 if (x != y)
408 return x - y;
409 }
410
411 return 0;
412 }
413
414 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
415 int r;
416
417 r = ascii_strcasecmp_n(a, b, MIN(n, m));
418 if (r != 0)
419 return r;
420
421 if (n < m)
422 return -1;
423 else if (n > m)
424 return 1;
425 else
426 return 0;
427 }
428
429 bool chars_intersect(const char *a, const char *b) {
430 const char *p;
431
432 /* Returns true if any of the chars in a are in b. */
433 for (p = a; *p; p++)
434 if (strchr(b, *p))
435 return true;
436
437 return false;
438 }
439
440 bool string_has_cc(const char *p, const char *ok) {
441 const char *t;
442
443 assert(p);
444
445 /*
446 * Check if a string contains control characters. If 'ok' is
447 * non-NULL it may be a string containing additional CCs to be
448 * considered OK.
449 */
450
451 for (t = p; *t; t++) {
452 if (ok && strchr(ok, *t))
453 continue;
454
455 if (*t > 0 && *t < ' ')
456 return true;
457
458 if (*t == 127)
459 return true;
460 }
461
462 return false;
463 }
464
465 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
466 size_t x;
467 char *r;
468
469 assert(s);
470 assert(percent <= 100);
471 assert(new_length >= 3);
472
473 if (old_length <= 3 || old_length <= new_length)
474 return strndup(s, old_length);
475
476 r = new0(char, new_length+3);
477 if (!r)
478 return NULL;
479
480 x = (new_length * percent) / 100;
481
482 if (x > new_length - 3)
483 x = new_length - 3;
484
485 memcpy(r, s, x);
486 r[x] = 0xe2; /* tri-dot ellipsis: … */
487 r[x+1] = 0x80;
488 r[x+2] = 0xa6;
489 memcpy(r + x + 3,
490 s + old_length - (new_length - x - 1),
491 new_length - x - 1);
492
493 return r;
494 }
495
496 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
497 size_t x;
498 char *e;
499 const char *i, *j;
500 unsigned k, len, len2;
501 int r;
502
503 assert(s);
504 assert(percent <= 100);
505
506 if (new_length == (size_t) -1)
507 return strndup(s, old_length);
508
509 assert(new_length >= 3);
510
511 /* if no multibyte characters use ascii_ellipsize_mem for speed */
512 if (ascii_is_valid(s))
513 return ascii_ellipsize_mem(s, old_length, new_length, percent);
514
515 if (old_length <= 3 || old_length <= new_length)
516 return strndup(s, old_length);
517
518 x = (new_length * percent) / 100;
519
520 if (x > new_length - 3)
521 x = new_length - 3;
522
523 k = 0;
524 for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
525 char32_t c;
526
527 r = utf8_encoded_to_unichar(i, &c);
528 if (r < 0)
529 return NULL;
530 k += unichar_iswide(c) ? 2 : 1;
531 }
532
533 if (k > x) /* last character was wide and went over quota */
534 x++;
535
536 for (j = s + old_length; k < new_length && j > i; ) {
537 char32_t c;
538
539 j = utf8_prev_char(j);
540 r = utf8_encoded_to_unichar(j, &c);
541 if (r < 0)
542 return NULL;
543 k += unichar_iswide(c) ? 2 : 1;
544 }
545 assert(i <= j);
546
547 /* we don't actually need to ellipsize */
548 if (i == j)
549 return memdup(s, old_length + 1);
550
551 /* make space for ellipsis */
552 j = utf8_next_char(j);
553
554 len = i - s;
555 len2 = s + old_length - j;
556 e = new(char, len + 3 + len2 + 1);
557 if (!e)
558 return NULL;
559
560 /*
561 printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
562 old_length, new_length, x, len, len2, k);
563 */
564
565 memcpy(e, s, len);
566 e[len] = 0xe2; /* tri-dot ellipsis: … */
567 e[len + 1] = 0x80;
568 e[len + 2] = 0xa6;
569
570 memcpy(e + len + 3, j, len2 + 1);
571
572 return e;
573 }
574
575 char *ellipsize(const char *s, size_t length, unsigned percent) {
576
577 if (length == (size_t) -1)
578 return strdup(s);
579
580 return ellipsize_mem(s, strlen(s), length, percent);
581 }
582
583 bool nulstr_contains(const char *nulstr, const char *needle) {
584 const char *i;
585
586 if (!nulstr)
587 return false;
588
589 NULSTR_FOREACH(i, nulstr)
590 if (streq(i, needle))
591 return true;
592
593 return false;
594 }
595
596 char* strshorten(char *s, size_t l) {
597 assert(s);
598
599 if (strnlen(s, l+1) > l)
600 s[l] = 0;
601
602 return s;
603 }
604
605 char *strreplace(const char *text, const char *old_string, const char *new_string) {
606 const char *f;
607 char *t, *r;
608 size_t l, old_len, new_len;
609
610 assert(text);
611 assert(old_string);
612 assert(new_string);
613
614 old_len = strlen(old_string);
615 new_len = strlen(new_string);
616
617 l = strlen(text);
618 r = new(char, l+1);
619 if (!r)
620 return NULL;
621
622 f = text;
623 t = r;
624 while (*f) {
625 char *a;
626 size_t d, nl;
627
628 if (!startswith(f, old_string)) {
629 *(t++) = *(f++);
630 continue;
631 }
632
633 d = t - r;
634 nl = l - old_len + new_len;
635 a = realloc(r, nl + 1);
636 if (!a)
637 goto oom;
638
639 l = nl;
640 r = a;
641 t = r + d;
642
643 t = stpcpy(t, new_string);
644 f += old_len;
645 }
646
647 *t = 0;
648 return r;
649
650 oom:
651 return mfree(r);
652 }
653
654 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
655 const char *i, *begin = NULL;
656 enum {
657 STATE_OTHER,
658 STATE_ESCAPE,
659 STATE_BRACKET
660 } state = STATE_OTHER;
661 char *obuf = NULL;
662 size_t osz = 0, isz;
663 FILE *f;
664
665 assert(ibuf);
666 assert(*ibuf);
667
668 /* Strips ANSI color and replaces TABs by 8 spaces */
669
670 isz = _isz ? *_isz : strlen(*ibuf);
671
672 f = open_memstream(&obuf, &osz);
673 if (!f)
674 return NULL;
675
676 /* Note we use the _unlocked() stdio variants on f for performance
677 * reasons. It's safe to do so since we created f here and it
678 * doesn't leave our scope.
679 */
680
681 for (i = *ibuf; i < *ibuf + isz + 1; i++) {
682
683 switch (state) {
684
685 case STATE_OTHER:
686 if (i >= *ibuf + isz) /* EOT */
687 break;
688 else if (*i == '\x1B')
689 state = STATE_ESCAPE;
690 else if (*i == '\t')
691 fputs_unlocked(" ", f);
692 else
693 fputc_unlocked(*i, f);
694 break;
695
696 case STATE_ESCAPE:
697 if (i >= *ibuf + isz) { /* EOT */
698 fputc_unlocked('\x1B', f);
699 break;
700 } else if (*i == '[') {
701 state = STATE_BRACKET;
702 begin = i + 1;
703 } else {
704 fputc_unlocked('\x1B', f);
705 fputc_unlocked(*i, f);
706 state = STATE_OTHER;
707 }
708
709 break;
710
711 case STATE_BRACKET:
712
713 if (i >= *ibuf + isz || /* EOT */
714 (!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) {
715 fputc_unlocked('\x1B', f);
716 fputc_unlocked('[', f);
717 state = STATE_OTHER;
718 i = begin-1;
719 } else if (*i == 'm')
720 state = STATE_OTHER;
721 break;
722 }
723 }
724
725 if (ferror(f)) {
726 fclose(f);
727 return mfree(obuf);
728 }
729
730 fclose(f);
731
732 free(*ibuf);
733 *ibuf = obuf;
734
735 if (_isz)
736 *_isz = osz;
737
738 return obuf;
739 }
740
741 char *strextend(char **x, ...) {
742 va_list ap;
743 size_t f, l;
744 char *r, *p;
745
746 assert(x);
747
748 l = f = strlen_ptr(*x);
749
750 va_start(ap, x);
751 for (;;) {
752 const char *t;
753 size_t n;
754
755 t = va_arg(ap, const char *);
756 if (!t)
757 break;
758
759 n = strlen(t);
760 if (n > ((size_t) -1) - l) {
761 va_end(ap);
762 return NULL;
763 }
764
765 l += n;
766 }
767 va_end(ap);
768
769 r = realloc(*x, l+1);
770 if (!r)
771 return NULL;
772
773 p = r + f;
774
775 va_start(ap, x);
776 for (;;) {
777 const char *t;
778
779 t = va_arg(ap, const char *);
780 if (!t)
781 break;
782
783 p = stpcpy(p, t);
784 }
785 va_end(ap);
786
787 *p = 0;
788 *x = r;
789
790 return r + l;
791 }
792
793 char *strrep(const char *s, unsigned n) {
794 size_t l;
795 char *r, *p;
796 unsigned i;
797
798 assert(s);
799
800 l = strlen(s);
801 p = r = malloc(l * n + 1);
802 if (!r)
803 return NULL;
804
805 for (i = 0; i < n; i++)
806 p = stpcpy(p, s);
807
808 *p = 0;
809 return r;
810 }
811
812 int split_pair(const char *s, const char *sep, char **l, char **r) {
813 char *x, *a, *b;
814
815 assert(s);
816 assert(sep);
817 assert(l);
818 assert(r);
819
820 if (isempty(sep))
821 return -EINVAL;
822
823 x = strstr(s, sep);
824 if (!x)
825 return -EINVAL;
826
827 a = strndup(s, x - s);
828 if (!a)
829 return -ENOMEM;
830
831 b = strdup(x + strlen(sep));
832 if (!b) {
833 free(a);
834 return -ENOMEM;
835 }
836
837 *l = a;
838 *r = b;
839
840 return 0;
841 }
842
843 int free_and_strdup(char **p, const char *s) {
844 char *t;
845
846 assert(p);
847
848 /* Replaces a string pointer with an strdup()ed new string,
849 * possibly freeing the old one. */
850
851 if (streq_ptr(*p, s))
852 return 0;
853
854 if (s) {
855 t = strdup(s);
856 if (!t)
857 return -ENOMEM;
858 } else
859 t = NULL;
860
861 free(*p);
862 *p = t;
863
864 return 1;
865 }
866
867 #if !HAVE_EXPLICIT_BZERO
868 /*
869 * Pointer to memset is volatile so that compiler must de-reference
870 * the pointer and can't assume that it points to any function in
871 * particular (such as memset, which it then might further "optimize")
872 * This approach is inspired by openssl's crypto/mem_clr.c.
873 */
874 typedef void *(*memset_t)(void *,int,size_t);
875
876 static volatile memset_t memset_func = memset;
877
878 void explicit_bzero(void *p, size_t l) {
879 memset_func(p, '\0', l);
880 }
881 #endif
882
883 char* string_erase(char *x) {
884 if (!x)
885 return NULL;
886
887 /* A delicious drop of snake-oil! To be called on memory where
888 * we stored passphrases or so, after we used them. */
889 explicit_bzero(x, strlen(x));
890 return x;
891 }
892
893 char *string_free_erase(char *s) {
894 return mfree(string_erase(s));
895 }
896
897 bool string_is_safe(const char *p) {
898 const char *t;
899
900 if (!p)
901 return false;
902
903 for (t = p; *t; t++) {
904 if (*t > 0 && *t < ' ') /* no control characters */
905 return false;
906
907 if (strchr(QUOTES "\\\x7f", *t))
908 return false;
909 }
910
911 return true;
912 }