]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
07630cea | 2 | |
11c3a366 TA |
3 | #include <errno.h> |
4 | #include <stdarg.h> | |
5 | #include <stdint.h> | |
6 | #include <stdio.h> | |
7 | #include <stdlib.h> | |
8 | ||
b5efdb8a | 9 | #include "alloc-util.h" |
8409f688 | 10 | #include "escape.h" |
53cd7f33 | 11 | #include "extract-word.h" |
090a9c1e | 12 | #include "fileio.h" |
07630cea | 13 | #include "gunicode.h" |
c30a49b2 | 14 | #include "locale-util.h" |
11c3a366 | 15 | #include "macro.h" |
090a9c1e | 16 | #include "memory-util.h" |
b11d6a7b | 17 | #include "string-util.h" |
46bf625a | 18 | #include "strv.h" |
b4766d5f | 19 | #include "terminal-util.h" |
07630cea LP |
20 | #include "utf8.h" |
21 | #include "util.h" | |
07630cea | 22 | |
07630cea LP |
23 | char* first_word(const char *s, const char *word) { |
24 | size_t sl, wl; | |
25 | const char *p; | |
26 | ||
27 | assert(s); | |
28 | assert(word); | |
29 | ||
30 | /* Checks if the string starts with the specified word, either | |
31 | * followed by NUL or by whitespace. Returns a pointer to the | |
32 | * NUL or the first character after the whitespace. */ | |
33 | ||
34 | sl = strlen(s); | |
35 | wl = strlen(word); | |
36 | ||
37 | if (sl < wl) | |
38 | return NULL; | |
39 | ||
40 | if (wl == 0) | |
41 | return (char*) s; | |
42 | ||
43 | if (memcmp(s, word, wl) != 0) | |
44 | return NULL; | |
45 | ||
46 | p = s + wl; | |
47 | if (*p == 0) | |
48 | return (char*) p; | |
49 | ||
50 | if (!strchr(WHITESPACE, *p)) | |
51 | return NULL; | |
52 | ||
53 | p += strspn(p, WHITESPACE); | |
54 | return (char*) p; | |
55 | } | |
56 | ||
07630cea LP |
57 | char *strnappend(const char *s, const char *suffix, size_t b) { |
58 | size_t a; | |
59 | char *r; | |
60 | ||
61 | if (!s && !suffix) | |
62 | return strdup(""); | |
63 | ||
64 | if (!s) | |
65 | return strndup(suffix, b); | |
66 | ||
67 | if (!suffix) | |
68 | return strdup(s); | |
69 | ||
70 | assert(s); | |
71 | assert(suffix); | |
72 | ||
73 | a = strlen(s); | |
fd4e991d | 74 | if (b > SIZE_MAX - a) |
07630cea LP |
75 | return NULL; |
76 | ||
77 | r = new(char, a+b+1); | |
78 | if (!r) | |
79 | return NULL; | |
80 | ||
81 | memcpy(r, s, a); | |
82 | memcpy(r+a, suffix, b); | |
83 | r[a+b] = 0; | |
84 | ||
85 | return r; | |
86 | } | |
87 | ||
605405c6 | 88 | char *strjoin_real(const char *x, ...) { |
07630cea | 89 | va_list ap; |
6ced0770 | 90 | size_t l = 1; |
07630cea LP |
91 | char *r, *p; |
92 | ||
93 | va_start(ap, x); | |
020003f2 RV |
94 | for (const char *t = x; t; t = va_arg(ap, const char *)) { |
95 | size_t n; | |
07630cea | 96 | |
020003f2 RV |
97 | n = strlen(t); |
98 | if (n > SIZE_MAX - l) { | |
99 | va_end(ap); | |
100 | return NULL; | |
07630cea | 101 | } |
020003f2 RV |
102 | l += n; |
103 | } | |
07630cea LP |
104 | va_end(ap); |
105 | ||
6ced0770 | 106 | p = r = new(char, l); |
07630cea LP |
107 | if (!r) |
108 | return NULL; | |
109 | ||
020003f2 RV |
110 | va_start(ap, x); |
111 | for (const char *t = x; t; t = va_arg(ap, const char *)) | |
112 | p = stpcpy(p, t); | |
113 | va_end(ap); | |
07630cea | 114 | |
020003f2 | 115 | *p = 0; |
07630cea LP |
116 | |
117 | return r; | |
118 | } | |
119 | ||
120 | char *strstrip(char *s) { | |
7546145e LP |
121 | if (!s) |
122 | return NULL; | |
123 | ||
0a6ffc5c | 124 | /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */ |
07630cea | 125 | |
0a6ffc5c | 126 | return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE); |
07630cea LP |
127 | } |
128 | ||
129 | char *delete_chars(char *s, const char *bad) { | |
130 | char *f, *t; | |
131 | ||
7546145e LP |
132 | /* Drops all specified bad characters, regardless where in the string */ |
133 | ||
134 | if (!s) | |
135 | return NULL; | |
136 | ||
137 | if (!bad) | |
138 | bad = WHITESPACE; | |
07630cea LP |
139 | |
140 | for (f = s, t = s; *f; f++) { | |
141 | if (strchr(bad, *f)) | |
142 | continue; | |
143 | ||
144 | *(t++) = *f; | |
145 | } | |
146 | ||
147 | *t = 0; | |
148 | ||
149 | return s; | |
150 | } | |
151 | ||
7546145e | 152 | char *delete_trailing_chars(char *s, const char *bad) { |
a01080ce | 153 | char *c = s; |
7546145e LP |
154 | |
155 | /* Drops all specified bad characters, at the end of the string */ | |
156 | ||
157 | if (!s) | |
158 | return NULL; | |
159 | ||
160 | if (!bad) | |
161 | bad = WHITESPACE; | |
162 | ||
a01080ce | 163 | for (char *p = s; *p; p++) |
7546145e LP |
164 | if (!strchr(bad, *p)) |
165 | c = p + 1; | |
166 | ||
167 | *c = 0; | |
168 | ||
169 | return s; | |
170 | } | |
171 | ||
07630cea LP |
172 | char *truncate_nl(char *s) { |
173 | assert(s); | |
174 | ||
175 | s[strcspn(s, NEWLINE)] = 0; | |
176 | return s; | |
177 | } | |
178 | ||
b577e3d5 LP |
179 | char ascii_tolower(char x) { |
180 | ||
181 | if (x >= 'A' && x <= 'Z') | |
182 | return x - 'A' + 'a'; | |
183 | ||
184 | return x; | |
185 | } | |
186 | ||
846b8fc3 LP |
187 | char ascii_toupper(char x) { |
188 | ||
189 | if (x >= 'a' && x <= 'z') | |
190 | return x - 'a' + 'A'; | |
191 | ||
192 | return x; | |
193 | } | |
194 | ||
07630cea | 195 | char *ascii_strlower(char *t) { |
07630cea LP |
196 | assert(t); |
197 | ||
a01080ce | 198 | for (char *p = t; *p; p++) |
b577e3d5 LP |
199 | *p = ascii_tolower(*p); |
200 | ||
201 | return t; | |
202 | } | |
203 | ||
846b8fc3 | 204 | char *ascii_strupper(char *t) { |
846b8fc3 LP |
205 | assert(t); |
206 | ||
a01080ce | 207 | for (char *p = t; *p; p++) |
846b8fc3 LP |
208 | *p = ascii_toupper(*p); |
209 | ||
210 | return t; | |
211 | } | |
212 | ||
b577e3d5 | 213 | char *ascii_strlower_n(char *t, size_t n) { |
b577e3d5 LP |
214 | if (n <= 0) |
215 | return t; | |
216 | ||
a01080ce | 217 | for (size_t i = 0; i < n; i++) |
b577e3d5 | 218 | t[i] = ascii_tolower(t[i]); |
07630cea LP |
219 | |
220 | return t; | |
221 | } | |
522d85ae LP |
222 | |
223 | int ascii_strcasecmp_n(const char *a, const char *b, size_t n) { | |
224 | ||
225 | for (; n > 0; a++, b++, n--) { | |
226 | int x, y; | |
227 | ||
228 | x = (int) (uint8_t) ascii_tolower(*a); | |
229 | y = (int) (uint8_t) ascii_tolower(*b); | |
230 | ||
231 | if (x != y) | |
232 | return x - y; | |
233 | } | |
234 | ||
235 | return 0; | |
236 | } | |
c1749834 LP |
237 | |
238 | int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) { | |
239 | int r; | |
240 | ||
241 | r = ascii_strcasecmp_n(a, b, MIN(n, m)); | |
242 | if (r != 0) | |
243 | return r; | |
244 | ||
6dd91b36 | 245 | return CMP(n, m); |
c1749834 | 246 | } |
07630cea LP |
247 | |
248 | bool chars_intersect(const char *a, const char *b) { | |
07630cea | 249 | /* Returns true if any of the chars in a are in b. */ |
a01080ce | 250 | for (const char *p = a; *p; p++) |
07630cea LP |
251 | if (strchr(b, *p)) |
252 | return true; | |
253 | ||
254 | return false; | |
255 | } | |
256 | ||
257 | bool string_has_cc(const char *p, const char *ok) { | |
07630cea LP |
258 | assert(p); |
259 | ||
260 | /* | |
261 | * Check if a string contains control characters. If 'ok' is | |
262 | * non-NULL it may be a string containing additional CCs to be | |
263 | * considered OK. | |
264 | */ | |
265 | ||
a01080ce | 266 | for (const char *t = p; *t; t++) { |
07630cea LP |
267 | if (ok && strchr(ok, *t)) |
268 | continue; | |
269 | ||
6302d386 | 270 | if (char_is_cc(*t)) |
07630cea LP |
271 | return true; |
272 | } | |
273 | ||
274 | return false; | |
275 | } | |
276 | ||
8409f688 ZJS |
277 | static int write_ellipsis(char *buf, bool unicode) { |
278 | if (unicode || is_locale_utf8()) { | |
279 | buf[0] = 0xe2; /* tri-dot ellipsis: … */ | |
280 | buf[1] = 0x80; | |
281 | buf[2] = 0xa6; | |
282 | } else { | |
283 | buf[0] = '.'; | |
284 | buf[1] = '.'; | |
285 | buf[2] = '.'; | |
286 | } | |
287 | ||
288 | return 3; | |
289 | } | |
290 | ||
07630cea | 291 | static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { |
9924aef6 ZJS |
292 | size_t x, need_space, suffix_len; |
293 | char *t; | |
07630cea LP |
294 | |
295 | assert(s); | |
296 | assert(percent <= 100); | |
f5fbe71d | 297 | assert(new_length != SIZE_MAX); |
07630cea | 298 | |
c30a49b2 | 299 | if (old_length <= new_length) |
07630cea LP |
300 | return strndup(s, old_length); |
301 | ||
c30a49b2 LP |
302 | /* Special case short ellipsations */ |
303 | switch (new_length) { | |
304 | ||
305 | case 0: | |
306 | return strdup(""); | |
307 | ||
308 | case 1: | |
309 | if (is_locale_utf8()) | |
310 | return strdup("…"); | |
311 | else | |
312 | return strdup("."); | |
313 | ||
314 | case 2: | |
315 | if (!is_locale_utf8()) | |
316 | return strdup(".."); | |
317 | ||
318 | break; | |
319 | ||
320 | default: | |
321 | break; | |
322 | } | |
323 | ||
324 | /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one | |
325 | * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage, | |
326 | * either for the UTF-8 encoded character or for three ASCII characters. */ | |
327 | need_space = is_locale_utf8() ? 1 : 3; | |
328 | ||
9924aef6 ZJS |
329 | t = new(char, new_length+3); |
330 | if (!t) | |
07630cea LP |
331 | return NULL; |
332 | ||
c30a49b2 | 333 | assert(new_length >= need_space); |
07630cea | 334 | |
c30a49b2 LP |
335 | x = ((new_length - need_space) * percent + 50) / 100; |
336 | assert(x <= new_length - need_space); | |
07630cea | 337 | |
9924aef6 ZJS |
338 | memcpy(t, s, x); |
339 | write_ellipsis(t + x, false); | |
340 | suffix_len = new_length - x - need_space; | |
341 | memcpy(t + x + 3, s + old_length - suffix_len, suffix_len); | |
342 | *(t + x + 3 + suffix_len) = '\0'; | |
07630cea | 343 | |
9924aef6 | 344 | return t; |
07630cea LP |
345 | } |
346 | ||
347 | char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) { | |
c30a49b2 | 348 | size_t x, k, len, len2; |
07630cea | 349 | const char *i, *j; |
c30a49b2 | 350 | char *e; |
c932fb71 | 351 | int r; |
07630cea | 352 | |
c30a49b2 LP |
353 | /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up |
354 | * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8 | |
355 | * strings. | |
356 | * | |
357 | * Ellipsation is done in a locale-dependent way: | |
358 | * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...") | |
359 | * 2. Otherwise, a unicode ellipsis is used ("…") | |
360 | * | |
361 | * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or | |
362 | * the current locale is UTF-8. | |
363 | */ | |
364 | ||
07630cea LP |
365 | assert(s); |
366 | assert(percent <= 100); | |
ddbc9319 | 367 | |
f5fbe71d | 368 | if (new_length == SIZE_MAX) |
ddbc9319 LP |
369 | return strndup(s, old_length); |
370 | ||
c30a49b2 LP |
371 | if (new_length == 0) |
372 | return strdup(""); | |
07630cea | 373 | |
c30a49b2 | 374 | /* If no multibyte characters use ascii_ellipsize_mem for speed */ |
21e4e3e0 | 375 | if (ascii_is_valid_n(s, old_length)) |
07630cea LP |
376 | return ascii_ellipsize_mem(s, old_length, new_length, percent); |
377 | ||
c30a49b2 LP |
378 | x = ((new_length - 1) * percent) / 100; |
379 | assert(x <= new_length - 1); | |
07630cea LP |
380 | |
381 | k = 0; | |
9924aef6 | 382 | for (i = s; i < s + old_length; i = utf8_next_char(i)) { |
c932fb71 | 383 | char32_t c; |
9924aef6 | 384 | int w; |
07630cea | 385 | |
c932fb71 SL |
386 | r = utf8_encoded_to_unichar(i, &c); |
387 | if (r < 0) | |
07630cea | 388 | return NULL; |
07630cea | 389 | |
9924aef6 ZJS |
390 | w = unichar_iswide(c) ? 2 : 1; |
391 | if (k + w <= x) | |
392 | k += w; | |
393 | else | |
394 | break; | |
395 | } | |
07630cea | 396 | |
9924aef6 | 397 | for (j = s + old_length; j > i; ) { |
c932fb71 | 398 | char32_t c; |
9924aef6 ZJS |
399 | int w; |
400 | const char *jj; | |
07630cea | 401 | |
9924aef6 ZJS |
402 | jj = utf8_prev_char(j); |
403 | r = utf8_encoded_to_unichar(jj, &c); | |
c932fb71 | 404 | if (r < 0) |
07630cea | 405 | return NULL; |
9924aef6 ZJS |
406 | |
407 | w = unichar_iswide(c) ? 2 : 1; | |
408 | if (k + w <= new_length) { | |
409 | k += w; | |
410 | j = jj; | |
411 | } else | |
412 | break; | |
07630cea LP |
413 | } |
414 | assert(i <= j); | |
415 | ||
416 | /* we don't actually need to ellipsize */ | |
417 | if (i == j) | |
9924aef6 | 418 | return memdup_suffix0(s, old_length); |
07630cea | 419 | |
9924aef6 ZJS |
420 | /* make space for ellipsis, if possible */ |
421 | if (j < s + old_length) | |
422 | j = utf8_next_char(j); | |
423 | else if (i > s) | |
424 | i = utf8_prev_char(i); | |
07630cea LP |
425 | |
426 | len = i - s; | |
427 | len2 = s + old_length - j; | |
428 | e = new(char, len + 3 + len2 + 1); | |
429 | if (!e) | |
430 | return NULL; | |
431 | ||
432 | /* | |
433 | printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n", | |
434 | old_length, new_length, x, len, len2, k); | |
435 | */ | |
436 | ||
437 | memcpy(e, s, len); | |
8409f688 | 438 | write_ellipsis(e + len, true); |
9924aef6 ZJS |
439 | memcpy(e + len + 3, j, len2); |
440 | *(e + len + 3 + len2) = '\0'; | |
07630cea LP |
441 | |
442 | return e; | |
443 | } | |
444 | ||
8409f688 ZJS |
445 | char *cellescape(char *buf, size_t len, const char *s) { |
446 | /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII | |
447 | * characters are copied as they are, everything else is escaped. The result | |
448 | * is different then if escaping and ellipsization was performed in two | |
449 | * separate steps, because each sequence is either stored in full or skipped. | |
450 | * | |
451 | * This function should be used for logging about strings which expected to | |
452 | * be plain ASCII in a safe way. | |
453 | * | |
454 | * An ellipsis will be used if s is too long. It was always placed at the | |
455 | * very end. | |
456 | */ | |
457 | ||
a01080ce | 458 | size_t i = 0, last_char_width[4] = {}, k = 0; |
61f6e276 LP |
459 | |
460 | assert(len > 0); /* at least a terminating NUL */ | |
8409f688 | 461 | |
61f6e276 LP |
462 | for (;;) { |
463 | char four[4]; | |
464 | int w; | |
8409f688 | 465 | |
61f6e276 | 466 | if (*s == 0) /* terminating NUL detected? then we are done! */ |
8409f688 | 467 | goto done; |
61f6e276 LP |
468 | |
469 | w = cescape_char(*s, four); | |
470 | if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's | |
471 | * ellipsize at the previous location */ | |
472 | break; | |
473 | ||
474 | /* OK, there was space, let's add this escaped character to the buffer */ | |
475 | memcpy(buf + i, four, w); | |
476 | i += w; | |
477 | ||
478 | /* And remember its width in the ring buffer */ | |
479 | last_char_width[k] = w; | |
480 | k = (k + 1) % 4; | |
481 | ||
482 | s++; | |
8409f688 ZJS |
483 | } |
484 | ||
61f6e276 LP |
485 | /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4 |
486 | * characters ideally, but the buffer is shorter than that in the first place take what we can get */ | |
a01080ce | 487 | for (size_t j = 0; j < ELEMENTSOF(last_char_width); j++) { |
61f6e276 LP |
488 | |
489 | if (i + 4 <= len) /* nice, we reached our space goal */ | |
490 | break; | |
491 | ||
492 | k = k == 0 ? 3 : k - 1; | |
493 | if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */ | |
494 | break; | |
495 | ||
496 | assert(i >= last_char_width[k]); | |
497 | i -= last_char_width[k]; | |
8409f688 ZJS |
498 | } |
499 | ||
61f6e276 LP |
500 | if (i + 4 <= len) /* yay, enough space */ |
501 | i += write_ellipsis(buf + i, false); | |
502 | else if (i + 3 <= len) { /* only space for ".." */ | |
503 | buf[i++] = '.'; | |
504 | buf[i++] = '.'; | |
505 | } else if (i + 2 <= len) /* only space for a single "." */ | |
506 | buf[i++] = '.'; | |
507 | else | |
508 | assert(i + 1 <= len); | |
509 | ||
8409f688 ZJS |
510 | done: |
511 | buf[i] = '\0'; | |
512 | return buf; | |
513 | } | |
514 | ||
07630cea LP |
515 | char* strshorten(char *s, size_t l) { |
516 | assert(s); | |
517 | ||
47b33c7d | 518 | if (strnlen(s, l+1) > l) |
07630cea LP |
519 | s[l] = 0; |
520 | ||
521 | return s; | |
522 | } | |
523 | ||
524 | char *strreplace(const char *text, const char *old_string, const char *new_string) { | |
9d73565a LP |
525 | size_t l, old_len, new_len, allocated = 0; |
526 | char *t, *ret = NULL; | |
07630cea | 527 | const char *f; |
07630cea | 528 | |
07630cea LP |
529 | assert(old_string); |
530 | assert(new_string); | |
531 | ||
9d73565a LP |
532 | if (!text) |
533 | return NULL; | |
534 | ||
07630cea LP |
535 | old_len = strlen(old_string); |
536 | new_len = strlen(new_string); | |
537 | ||
538 | l = strlen(text); | |
9d73565a | 539 | if (!GREEDY_REALLOC(ret, allocated, l+1)) |
07630cea LP |
540 | return NULL; |
541 | ||
542 | f = text; | |
9d73565a | 543 | t = ret; |
07630cea | 544 | while (*f) { |
07630cea LP |
545 | size_t d, nl; |
546 | ||
547 | if (!startswith(f, old_string)) { | |
548 | *(t++) = *(f++); | |
549 | continue; | |
550 | } | |
551 | ||
9d73565a | 552 | d = t - ret; |
07630cea | 553 | nl = l - old_len + new_len; |
9d73565a LP |
554 | |
555 | if (!GREEDY_REALLOC(ret, allocated, nl + 1)) | |
556 | return mfree(ret); | |
07630cea LP |
557 | |
558 | l = nl; | |
9d73565a | 559 | t = ret + d; |
07630cea LP |
560 | |
561 | t = stpcpy(t, new_string); | |
562 | f += old_len; | |
563 | } | |
564 | ||
565 | *t = 0; | |
9d73565a | 566 | return ret; |
07630cea LP |
567 | } |
568 | ||
6fb05690 LP |
569 | static void advance_offsets( |
570 | ssize_t diff, | |
571 | size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */ | |
572 | size_t shift[static 2], | |
573 | size_t size) { | |
574 | ||
b4766d5f ZJS |
575 | if (!offsets) |
576 | return; | |
577 | ||
6fb05690 LP |
578 | assert(shift); |
579 | ||
b4766d5f ZJS |
580 | if ((size_t) diff < offsets[0]) |
581 | shift[0] += size; | |
582 | if ((size_t) diff < offsets[1]) | |
583 | shift[1] += size; | |
584 | } | |
585 | ||
586 | char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { | |
62a3fc6d | 587 | const char *begin = NULL; |
07630cea LP |
588 | enum { |
589 | STATE_OTHER, | |
590 | STATE_ESCAPE, | |
695a944c LP |
591 | STATE_CSI, |
592 | STATE_CSO, | |
07630cea LP |
593 | } state = STATE_OTHER; |
594 | char *obuf = NULL; | |
62a3fc6d | 595 | size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0; |
07630cea LP |
596 | FILE *f; |
597 | ||
598 | assert(ibuf); | |
599 | assert(*ibuf); | |
600 | ||
695a944c LP |
601 | /* This does three things: |
602 | * | |
603 | * 1. Replaces TABs by 8 spaces | |
604 | * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences | |
605 | * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences | |
62a3fc6d ZJS |
606 | * 4. Strip trailing \r characters (since they would "move the cursor", but have no |
607 | * other effect). | |
695a944c | 608 | * |
2fe21124 ZJS |
609 | * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as |
610 | * are any other special characters. Truncated ANSI sequences are left-as is too. This call is | |
611 | * supposed to suppress the most basic formatting noise, but nothing else. | |
695a944c LP |
612 | * |
613 | * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */ | |
07630cea LP |
614 | |
615 | isz = _isz ? *_isz : strlen(*ibuf); | |
616 | ||
2fe21124 ZJS |
617 | /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we |
618 | * created f here and it doesn't leave our scope. */ | |
619 | f = open_memstream_unlocked(&obuf, &osz); | |
07630cea LP |
620 | if (!f) |
621 | return NULL; | |
622 | ||
62a3fc6d | 623 | for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) { |
07630cea LP |
624 | |
625 | switch (state) { | |
626 | ||
627 | case STATE_OTHER: | |
628 | if (i >= *ibuf + isz) /* EOT */ | |
629 | break; | |
62a3fc6d ZJS |
630 | |
631 | if (*i == '\r') { | |
632 | n_carriage_returns++; | |
633 | break; | |
634 | } else if (*i == '\n') | |
635 | /* Ignore carriage returns before new line */ | |
636 | n_carriage_returns = 0; | |
637 | for (; n_carriage_returns > 0; n_carriage_returns--) | |
638 | fputc('\r', f); | |
639 | ||
640 | if (*i == '\x1B') | |
07630cea | 641 | state = STATE_ESCAPE; |
b4766d5f | 642 | else if (*i == '\t') { |
0d536673 | 643 | fputs(" ", f); |
b4766d5f ZJS |
644 | advance_offsets(i - *ibuf, highlight, shift, 7); |
645 | } else | |
0d536673 | 646 | fputc(*i, f); |
b4766d5f | 647 | |
07630cea LP |
648 | break; |
649 | ||
650 | case STATE_ESCAPE: | |
62a3fc6d ZJS |
651 | assert(n_carriage_returns == 0); |
652 | ||
07630cea | 653 | if (i >= *ibuf + isz) { /* EOT */ |
0d536673 | 654 | fputc('\x1B', f); |
b4766d5f | 655 | advance_offsets(i - *ibuf, highlight, shift, 1); |
07630cea | 656 | break; |
695a944c LP |
657 | } else if (*i == '[') { /* ANSI CSI */ |
658 | state = STATE_CSI; | |
659 | begin = i + 1; | |
660 | } else if (*i == ']') { /* ANSI CSO */ | |
661 | state = STATE_CSO; | |
07630cea LP |
662 | begin = i + 1; |
663 | } else { | |
0d536673 LP |
664 | fputc('\x1B', f); |
665 | fputc(*i, f); | |
b4766d5f | 666 | advance_offsets(i - *ibuf, highlight, shift, 1); |
07630cea LP |
667 | state = STATE_OTHER; |
668 | } | |
669 | ||
670 | break; | |
671 | ||
695a944c | 672 | case STATE_CSI: |
62a3fc6d | 673 | assert(n_carriage_returns == 0); |
07630cea | 674 | |
695a944c LP |
675 | if (i >= *ibuf + isz || /* EOT … */ |
676 | !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */ | |
0d536673 LP |
677 | fputc('\x1B', f); |
678 | fputc('[', f); | |
b4766d5f | 679 | advance_offsets(i - *ibuf, highlight, shift, 2); |
07630cea LP |
680 | state = STATE_OTHER; |
681 | i = begin-1; | |
682 | } else if (*i == 'm') | |
683 | state = STATE_OTHER; | |
695a944c LP |
684 | |
685 | break; | |
686 | ||
687 | case STATE_CSO: | |
62a3fc6d | 688 | assert(n_carriage_returns == 0); |
695a944c LP |
689 | |
690 | if (i >= *ibuf + isz || /* EOT … */ | |
691 | (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ | |
692 | fputc('\x1B', f); | |
693 | fputc(']', f); | |
694 | advance_offsets(i - *ibuf, highlight, shift, 2); | |
695 | state = STATE_OTHER; | |
696 | i = begin-1; | |
697 | } else if (*i == '\a') | |
698 | state = STATE_OTHER; | |
699 | ||
07630cea LP |
700 | break; |
701 | } | |
702 | } | |
703 | ||
c7e03d2e | 704 | if (fflush_and_check(f) < 0) { |
07630cea | 705 | fclose(f); |
6b430fdb | 706 | return mfree(obuf); |
07630cea | 707 | } |
07630cea LP |
708 | fclose(f); |
709 | ||
6fb05690 | 710 | free_and_replace(*ibuf, obuf); |
07630cea LP |
711 | |
712 | if (_isz) | |
713 | *_isz = osz; | |
714 | ||
b4766d5f ZJS |
715 | if (highlight) { |
716 | highlight[0] += shift[0]; | |
717 | highlight[1] += shift[1]; | |
718 | } | |
719 | ||
6fb05690 | 720 | return *ibuf; |
07630cea LP |
721 | } |
722 | ||
c2bc710b | 723 | char *strextend_with_separator_internal(char **x, const char *separator, ...) { |
bb8ad9ea | 724 | size_t f, l, l_separator; |
c2bc710b LP |
725 | bool need_separator; |
726 | char *nr, *p; | |
bb8ad9ea | 727 | va_list ap; |
07630cea LP |
728 | |
729 | assert(x); | |
730 | ||
7bf7ce28 | 731 | l = f = strlen_ptr(*x); |
07630cea | 732 | |
bb8ad9ea LP |
733 | need_separator = !isempty(*x); |
734 | l_separator = strlen_ptr(separator); | |
735 | ||
736 | va_start(ap, separator); | |
07630cea LP |
737 | for (;;) { |
738 | const char *t; | |
739 | size_t n; | |
740 | ||
741 | t = va_arg(ap, const char *); | |
742 | if (!t) | |
743 | break; | |
744 | ||
745 | n = strlen(t); | |
bb8ad9ea LP |
746 | |
747 | if (need_separator) | |
748 | n += l_separator; | |
749 | ||
c2bc710b | 750 | if (n >= SIZE_MAX - l) { |
07630cea LP |
751 | va_end(ap); |
752 | return NULL; | |
753 | } | |
754 | ||
755 | l += n; | |
bb8ad9ea | 756 | need_separator = true; |
07630cea LP |
757 | } |
758 | va_end(ap); | |
759 | ||
bb8ad9ea LP |
760 | need_separator = !isempty(*x); |
761 | ||
2a4e1fd0 | 762 | nr = realloc(*x, GREEDY_ALLOC_ROUND_UP(l+1)); |
c2bc710b | 763 | if (!nr) |
07630cea LP |
764 | return NULL; |
765 | ||
c2bc710b LP |
766 | *x = nr; |
767 | p = nr + f; | |
07630cea | 768 | |
bb8ad9ea | 769 | va_start(ap, separator); |
07630cea LP |
770 | for (;;) { |
771 | const char *t; | |
772 | ||
773 | t = va_arg(ap, const char *); | |
774 | if (!t) | |
775 | break; | |
776 | ||
bb8ad9ea LP |
777 | if (need_separator && separator) |
778 | p = stpcpy(p, separator); | |
779 | ||
07630cea | 780 | p = stpcpy(p, t); |
bb8ad9ea LP |
781 | |
782 | need_separator = true; | |
07630cea LP |
783 | } |
784 | va_end(ap); | |
785 | ||
c2bc710b | 786 | assert(p == nr + l); |
bb8ad9ea | 787 | |
07630cea | 788 | *p = 0; |
07630cea | 789 | |
c2bc710b | 790 | return p; |
07630cea LP |
791 | } |
792 | ||
e9b88a6d LP |
793 | int strextendf(char **x, const char *format, ...) { |
794 | size_t m, a; | |
795 | va_list ap; | |
796 | int l; | |
797 | ||
798 | /* Appends a formatted string to the specified string. Don't use this in inner loops, since then | |
799 | * we'll spend a tonload of time in determining the length of the string passed in, over and over | |
800 | * again. */ | |
801 | ||
802 | assert(x); | |
803 | assert(format); | |
804 | ||
805 | /* Let's try to use the allocated buffer, if there's room at the end still. Otherwise let's extend by 64 chars. */ | |
806 | if (*x) { | |
807 | m = strlen(*x); | |
6df28e1f | 808 | a = MALLOC_SIZEOF_SAFE(*x); |
e9b88a6d LP |
809 | assert(a >= m + 1); |
810 | } else | |
811 | m = a = 0; | |
812 | ||
813 | if (a - m < 17) { /* if there's less than 16 chars space, then enlarge the buffer first */ | |
814 | char *n; | |
815 | ||
816 | if (_unlikely_(m > SIZE_MAX - 64)) /* overflow check */ | |
817 | return -ENOMEM; | |
818 | ||
819 | n = realloc(*x, m + 64); | |
820 | if (!n) | |
821 | return -ENOMEM; | |
822 | ||
823 | *x = n; | |
6df28e1f | 824 | a = MALLOC_SIZEOF_SAFE(*x); |
e9b88a6d LP |
825 | } |
826 | ||
827 | /* Now, let's try to format the string into it */ | |
828 | va_start(ap, format); | |
829 | l = vsnprintf(*x + m, a - m, format, ap); | |
830 | va_end(ap); | |
831 | ||
832 | assert(l >= 0); | |
833 | ||
834 | if ((size_t) l < a - m) { | |
835 | char *n; | |
836 | ||
837 | /* Nice! This worked. We are done. But first, let's return the extra space we don't | |
838 | * need. This should be a cheap operation, since we only lower the allocation size here, | |
839 | * never increase. */ | |
840 | n = realloc(*x, m + (size_t) l + 1); | |
841 | if (n) | |
842 | *x = n; | |
843 | } else { | |
844 | char *n; | |
845 | ||
846 | /* Wasn't enough. Then let's allocate exactly what we need. */ | |
847 | ||
848 | if (_unlikely_((size_t) l > SIZE_MAX - 1)) /* overflow check #1 */ | |
849 | goto oom; | |
850 | if (_unlikely_(m > SIZE_MAX - ((size_t) l + 1))) /* overflow check #2 */ | |
851 | goto oom; | |
852 | ||
853 | a = m + (size_t) l + 1; | |
854 | n = realloc(*x, a); | |
855 | if (!n) | |
856 | goto oom; | |
857 | *x = n; | |
858 | ||
859 | va_start(ap, format); | |
860 | l = vsnprintf(*x + m, a - m, format, ap); | |
861 | va_end(ap); | |
862 | ||
863 | assert((size_t) l < a - m); | |
864 | } | |
865 | ||
866 | return 0; | |
867 | ||
868 | oom: | |
869 | /* truncate the bytes added after the first vsnprintf() attempt again */ | |
870 | (*x)[m] = 0; | |
871 | return -ENOMEM; | |
872 | } | |
873 | ||
07630cea | 874 | char *strrep(const char *s, unsigned n) { |
07630cea | 875 | char *r, *p; |
fe96c0f8 | 876 | size_t l; |
07630cea LP |
877 | |
878 | assert(s); | |
879 | ||
880 | l = strlen(s); | |
881 | p = r = malloc(l * n + 1); | |
882 | if (!r) | |
883 | return NULL; | |
884 | ||
fe96c0f8 | 885 | for (unsigned i = 0; i < n; i++) |
07630cea LP |
886 | p = stpcpy(p, s); |
887 | ||
888 | *p = 0; | |
889 | return r; | |
890 | } | |
891 | ||
892 | int split_pair(const char *s, const char *sep, char **l, char **r) { | |
893 | char *x, *a, *b; | |
894 | ||
895 | assert(s); | |
896 | assert(sep); | |
897 | assert(l); | |
898 | assert(r); | |
899 | ||
900 | if (isempty(sep)) | |
901 | return -EINVAL; | |
902 | ||
903 | x = strstr(s, sep); | |
904 | if (!x) | |
905 | return -EINVAL; | |
906 | ||
907 | a = strndup(s, x - s); | |
908 | if (!a) | |
909 | return -ENOMEM; | |
910 | ||
911 | b = strdup(x + strlen(sep)); | |
912 | if (!b) { | |
913 | free(a); | |
914 | return -ENOMEM; | |
915 | } | |
916 | ||
917 | *l = a; | |
918 | *r = b; | |
919 | ||
920 | return 0; | |
921 | } | |
922 | ||
923 | int free_and_strdup(char **p, const char *s) { | |
924 | char *t; | |
925 | ||
926 | assert(p); | |
927 | ||
7f546026 | 928 | /* Replaces a string pointer with a strdup()ed new string, |
07630cea LP |
929 | * possibly freeing the old one. */ |
930 | ||
931 | if (streq_ptr(*p, s)) | |
932 | return 0; | |
933 | ||
934 | if (s) { | |
935 | t = strdup(s); | |
936 | if (!t) | |
937 | return -ENOMEM; | |
938 | } else | |
939 | t = NULL; | |
940 | ||
941 | free(*p); | |
942 | *p = t; | |
943 | ||
944 | return 1; | |
945 | } | |
946 | ||
7f546026 ZJS |
947 | int free_and_strndup(char **p, const char *s, size_t l) { |
948 | char *t; | |
949 | ||
950 | assert(p); | |
951 | assert(s || l == 0); | |
952 | ||
953 | /* Replaces a string pointer with a strndup()ed new string, | |
954 | * freeing the old one. */ | |
955 | ||
956 | if (!*p && !s) | |
957 | return 0; | |
958 | ||
959 | if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0')) | |
960 | return 0; | |
961 | ||
962 | if (s) { | |
963 | t = strndup(s, l); | |
964 | if (!t) | |
965 | return -ENOMEM; | |
966 | } else | |
967 | t = NULL; | |
968 | ||
969 | free_and_replace(*p, t); | |
970 | return 1; | |
971 | } | |
972 | ||
f3e2e81d | 973 | bool string_is_safe(const char *p) { |
f3e2e81d LP |
974 | if (!p) |
975 | return false; | |
976 | ||
839d1b20 LP |
977 | /* Checks if the specified string contains no quotes or control characters */ |
978 | ||
a01080ce | 979 | for (const char *t = p; *t; t++) { |
f3e2e81d LP |
980 | if (*t > 0 && *t < ' ') /* no control characters */ |
981 | return false; | |
982 | ||
983 | if (strchr(QUOTES "\\\x7f", *t)) | |
984 | return false; | |
985 | } | |
986 | ||
987 | return true; | |
988 | } | |
53caaffd LP |
989 | |
990 | char* string_erase(char *x) { | |
991 | if (!x) | |
992 | return NULL; | |
993 | ||
994 | /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we | |
995 | * used them. */ | |
996 | explicit_bzero_safe(x, strlen(x)); | |
997 | return x; | |
998 | } | |
8dd6491e LP |
999 | |
1000 | int string_truncate_lines(const char *s, size_t n_lines, char **ret) { | |
1001 | const char *p = s, *e = s; | |
1002 | bool truncation_applied = false; | |
1003 | char *copy; | |
1004 | size_t n = 0; | |
1005 | ||
1006 | assert(s); | |
1007 | ||
1008 | /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if | |
1009 | * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not | |
1010 | * generated either. */ | |
1011 | ||
1012 | for (;;) { | |
1013 | size_t k; | |
1014 | ||
1015 | k = strcspn(p, "\n"); | |
1016 | ||
1017 | if (p[k] == 0) { | |
1018 | if (k == 0) /* final empty line */ | |
1019 | break; | |
1020 | ||
1021 | if (n >= n_lines) /* above threshold */ | |
1022 | break; | |
1023 | ||
1024 | e = p + k; /* last line to include */ | |
1025 | break; | |
1026 | } | |
1027 | ||
1028 | assert(p[k] == '\n'); | |
1029 | ||
1030 | if (n >= n_lines) | |
1031 | break; | |
1032 | ||
1033 | if (k > 0) | |
1034 | e = p + k; | |
1035 | ||
1036 | p += k + 1; | |
1037 | n++; | |
1038 | } | |
1039 | ||
1040 | /* e points after the last character we want to keep */ | |
1041 | if (isempty(e)) | |
1042 | copy = strdup(s); | |
1043 | else { | |
1044 | if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that | |
1045 | * isn't a new-line or a series of them */ | |
1046 | truncation_applied = true; | |
1047 | ||
1048 | copy = strndup(s, e - s); | |
1049 | } | |
1050 | if (!copy) | |
1051 | return -ENOMEM; | |
1052 | ||
1053 | *ret = copy; | |
1054 | return truncation_applied; | |
1055 | } | |
f6857fa6 LP |
1056 | |
1057 | int string_extract_line(const char *s, size_t i, char **ret) { | |
1058 | const char *p = s; | |
1059 | size_t c = 0; | |
1060 | ||
1061 | /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that, | |
1062 | * and == 0 if we are looking at the last line or already beyond the last line. As special | |
1063 | * optimization, if the first line is requested and the string only consists of one line we return | |
1064 | * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very | |
1065 | * common case. */ | |
1066 | ||
1067 | for (;;) { | |
1068 | const char *q; | |
1069 | ||
1070 | q = strchr(p, '\n'); | |
1071 | if (i == c) { | |
1072 | /* The line we are looking for! */ | |
1073 | ||
1074 | if (q) { | |
1075 | char *m; | |
1076 | ||
1077 | m = strndup(p, q - p); | |
1078 | if (!m) | |
1079 | return -ENOMEM; | |
1080 | ||
1081 | *ret = m; | |
1082 | return !isempty(q + 1); /* more coming? */ | |
1083 | } else { | |
1084 | if (p == s) | |
1085 | *ret = NULL; /* Just use the input string */ | |
1086 | else { | |
1087 | char *m; | |
1088 | ||
1089 | m = strdup(p); | |
1090 | if (!m) | |
1091 | return -ENOMEM; | |
1092 | ||
1093 | *ret = m; | |
1094 | } | |
1095 | ||
1096 | return 0; /* The end */ | |
1097 | } | |
1098 | } | |
1099 | ||
1100 | if (!q) { | |
1101 | char *m; | |
1102 | ||
1103 | /* No more lines, return empty line */ | |
1104 | ||
1105 | m = strdup(""); | |
1106 | if (!m) | |
1107 | return -ENOMEM; | |
1108 | ||
1109 | *ret = m; | |
1110 | return 0; /* The end */ | |
1111 | } | |
1112 | ||
1113 | p = q + 1; | |
1114 | c++; | |
1115 | } | |
1116 | } | |
53cd7f33 | 1117 | |
46bf625a | 1118 | int string_contains_word_strv(const char *string, const char *separators, char **words, const char **ret_word) { |
53cd7f33 ZJS |
1119 | /* In the default mode with no separators specified, we split on whitespace and |
1120 | * don't coalesce separators. */ | |
1121 | const ExtractFlags flags = separators ? EXTRACT_DONT_COALESCE_SEPARATORS : 0; | |
1122 | ||
46bf625a ZJS |
1123 | const char *found = NULL; |
1124 | ||
53cd7f33 ZJS |
1125 | for (const char *p = string;;) { |
1126 | _cleanup_free_ char *w = NULL; | |
1127 | int r; | |
1128 | ||
1129 | r = extract_first_word(&p, &w, separators, flags); | |
1130 | if (r < 0) | |
1131 | return r; | |
1132 | if (r == 0) | |
46bf625a ZJS |
1133 | break; |
1134 | ||
1135 | found = strv_find(words, w); | |
1136 | if (found) | |
1137 | break; | |
53cd7f33 | 1138 | } |
46bf625a ZJS |
1139 | |
1140 | if (ret_word) | |
1141 | *ret_word = found; | |
1142 | return !!found; | |
53cd7f33 | 1143 | } |