src/basic/string-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <stdarg.h>
   5 #include <stdint.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8
   9 #include "alloc-util.h"
  10 #include "escape.h"
  11 #include "extract-word.h"
  12 #include "fileio.h"
  13 #include "gunicode.h"
  14 #include "locale-util.h"
  15 #include "macro.h"
  16 #include "memory-util.h"
  17 #include "string-util.h"
  18 #include "terminal-util.h"
  19 #include "utf8.h"
  20 #include "util.h"
  21
  22 int strcmp_ptr(const char *a, const char *b) {
  23         /* Like strcmp(), but tries to make sense of NULL pointers */
  24
  25         if (a && b)
  26                 return strcmp(a, b);
  27         return CMP(a, b); /* Direct comparison of pointers, one of which is NULL */
  28 }
  29
  30 int strcasecmp_ptr(const char *a, const char *b) {
  31         /* Like strcasecmp(), but tries to make sense of NULL pointers */
  32
  33         if (a && b)
  34                 return strcasecmp(a, b);
  35         return CMP(a, b); /* Direct comparison of pointers, one of which is NULL */
  36 }
  37
  38 char* endswith(const char *s, const char *postfix) {
  39         size_t sl, pl;
  40
  41         assert(s);
  42         assert(postfix);
  43
  44         sl = strlen(s);
  45         pl = strlen(postfix);
  46
  47         if (pl == 0)
  48                 return (char*) s + sl;
  49
  50         if (sl < pl)
  51                 return NULL;
  52
  53         if (memcmp(s + sl - pl, postfix, pl) != 0)
  54                 return NULL;
  55
  56         return (char*) s + sl - pl;
  57 }
  58
  59 char* endswith_no_case(const char *s, const char *postfix) {
  60         size_t sl, pl;
  61
  62         assert(s);
  63         assert(postfix);
  64
  65         sl = strlen(s);
  66         pl = strlen(postfix);
  67
  68         if (pl == 0)
  69                 return (char*) s + sl;
  70
  71         if (sl < pl)
  72                 return NULL;
  73
  74         if (strcasecmp(s + sl - pl, postfix) != 0)
  75                 return NULL;
  76
  77         return (char*) s + sl - pl;
  78 }
  79
  80 char* first_word(const char *s, const char *word) {
  81         size_t sl, wl;
  82         const char *p;
  83
  84         assert(s);
  85         assert(word);
  86
  87         /* Checks if the string starts with the specified word, either
  88          * followed by NUL or by whitespace. Returns a pointer to the
  89          * NUL or the first character after the whitespace. */
  90
  91         sl = strlen(s);
  92         wl = strlen(word);
  93
  94         if (sl < wl)
  95                 return NULL;
  96
  97         if (wl == 0)
  98                 return (char*) s;
  99
 100         if (memcmp(s, word, wl) != 0)
 101                 return NULL;
 102
 103         p = s + wl;
 104         if (*p == 0)
 105                 return (char*) p;
 106
 107         if (!strchr(WHITESPACE, *p))
 108                 return NULL;
 109
 110         p += strspn(p, WHITESPACE);
 111         return (char*) p;
 112 }
 113
 114 static size_t strcspn_escaped(const char *s, const char *reject) {
 115         bool escaped = false;
 116         int n;
 117
 118         for (n = 0; s[n] != '\0'; n++) {
 119                 if (escaped)
 120                         escaped = false;
 121                 else if (s[n] == '\\')
 122                         escaped = true;
 123                 else if (strchr(reject, s[n]))
 124                         break;
 125         }
 126
 127         return n;
 128 }
 129
 130 /* Split a string into words. */
 131 const char* split(
 132                 const char **state,
 133                 size_t *l,
 134                 const char *separator,
 135                 SplitFlags flags) {
 136
 137         const char *current;
 138
 139         assert(state);
 140         assert(l);
 141
 142         if (!separator)
 143                 separator = WHITESPACE;
 144
 145         current = *state;
 146
 147         if (*current == '\0') /* already at the end? */
 148                 return NULL;
 149
 150         current += strspn(current, separator); /* skip leading separators */
 151         if (*current == '\0') { /* at the end now? */
 152                 *state = current;
 153                 return NULL;
 154         }
 155
 156         if (FLAGS_SET(flags, SPLIT_QUOTES)) {
 157
 158                 if (strchr(QUOTES, *current)) {
 159                         /* We are looking at a quote */
 160                         *l = strcspn_escaped(current + 1, CHAR_TO_STR(*current));
 161                         if (current[*l + 1] != *current ||
 162                             (current[*l + 2] != 0 && !strchr(separator, current[*l + 2]))) {
 163                                 /* right quote missing or garbage at the end */
 164                                 if (FLAGS_SET(flags, SPLIT_RELAX)) {
 165                                         *state = current + *l + 1 + (current[*l + 1] != '\0');
 166                                         return current + 1;
 167                                 }
 168                                 *state = current;
 169                                 return NULL;
 170                         }
 171                         *state = current++ + *l + 2;
 172
 173                 } else {
 174                         /* We are looking at a something that is not a quote */
 175                         *l = strcspn_escaped(current, separator);
 176                         if (current[*l] && !strchr(separator, current[*l]) && !FLAGS_SET(flags, SPLIT_RELAX)) {
 177                                 /* unfinished escape */
 178                                 *state = current;
 179                                 return NULL;
 180                         }
 181                         *state = current + *l;
 182                 }
 183         } else {
 184                 *l = strcspn(current, separator);
 185                 *state = current + *l;
 186         }
 187
 188         return current;
 189 }
 190
 191 char *strnappend(const char *s, const char *suffix, size_t b) {
 192         size_t a;
 193         char *r;
 194
 195         if (!s && !suffix)
 196                 return strdup("");
 197
 198         if (!s)
 199                 return strndup(suffix, b);
 200
 201         if (!suffix)
 202                 return strdup(s);
 203
 204         assert(s);
 205         assert(suffix);
 206
 207         a = strlen(s);
 208         if (b > ((size_t) -1) - a)
 209                 return NULL;
 210
 211         r = new(char, a+b+1);
 212         if (!r)
 213                 return NULL;
 214
 215         memcpy(r, s, a);
 216         memcpy(r+a, suffix, b);
 217         r[a+b] = 0;
 218
 219         return r;
 220 }
 221
 222 char *strjoin_real(const char *x, ...) {
 223         va_list ap;
 224         size_t l;
 225         char *r, *p;
 226
 227         va_start(ap, x);
 228
 229         if (x) {
 230                 l = strlen(x);
 231
 232                 for (;;) {
 233                         const char *t;
 234                         size_t n;
 235
 236                         t = va_arg(ap, const char *);
 237                         if (!t)
 238                                 break;
 239
 240                         n = strlen(t);
 241                         if (n > ((size_t) -1) - l) {
 242                                 va_end(ap);
 243                                 return NULL;
 244                         }
 245
 246                         l += n;
 247                 }
 248         } else
 249                 l = 0;
 250
 251         va_end(ap);
 252
 253         r = new(char, l+1);
 254         if (!r)
 255                 return NULL;
 256
 257         if (x) {
 258                 p = stpcpy(r, x);
 259
 260                 va_start(ap, x);
 261
 262                 for (;;) {
 263                         const char *t;
 264
 265                         t = va_arg(ap, const char *);
 266                         if (!t)
 267                                 break;
 268
 269                         p = stpcpy(p, t);
 270                 }
 271
 272                 va_end(ap);
 273         } else
 274                 r[0] = 0;
 275
 276         return r;
 277 }
 278
 279 char *strstrip(char *s) {
 280         if (!s)
 281                 return NULL;
 282
 283         /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
 284
 285         return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
 286 }
 287
 288 char *delete_chars(char *s, const char *bad) {
 289         char *f, *t;
 290
 291         /* Drops all specified bad characters, regardless where in the string */
 292
 293         if (!s)
 294                 return NULL;
 295
 296         if (!bad)
 297                 bad = WHITESPACE;
 298
 299         for (f = s, t = s; *f; f++) {
 300                 if (strchr(bad, *f))
 301                         continue;
 302
 303                 *(t++) = *f;
 304         }
 305
 306         *t = 0;
 307
 308         return s;
 309 }
 310
 311 char *delete_trailing_chars(char *s, const char *bad) {
 312         char *p, *c = s;
 313
 314         /* Drops all specified bad characters, at the end of the string */
 315
 316         if (!s)
 317                 return NULL;
 318
 319         if (!bad)
 320                 bad = WHITESPACE;
 321
 322         for (p = s; *p; p++)
 323                 if (!strchr(bad, *p))
 324                         c = p + 1;
 325
 326         *c = 0;
 327
 328         return s;
 329 }
 330
 331 char *truncate_nl(char *s) {
 332         assert(s);
 333
 334         s[strcspn(s, NEWLINE)] = 0;
 335         return s;
 336 }
 337
 338 char ascii_tolower(char x) {
 339
 340         if (x >= 'A' && x <= 'Z')
 341                 return x - 'A' + 'a';
 342
 343         return x;
 344 }
 345
 346 char ascii_toupper(char x) {
 347
 348         if (x >= 'a' && x <= 'z')
 349                 return x - 'a' + 'A';
 350
 351         return x;
 352 }
 353
 354 char *ascii_strlower(char *t) {
 355         char *p;
 356
 357         assert(t);
 358
 359         for (p = t; *p; p++)
 360                 *p = ascii_tolower(*p);
 361
 362         return t;
 363 }
 364
 365 char *ascii_strupper(char *t) {
 366         char *p;
 367
 368         assert(t);
 369
 370         for (p = t; *p; p++)
 371                 *p = ascii_toupper(*p);
 372
 373         return t;
 374 }
 375
 376 char *ascii_strlower_n(char *t, size_t n) {
 377         size_t i;
 378
 379         if (n <= 0)
 380                 return t;
 381
 382         for (i = 0; i < n; i++)
 383                 t[i] = ascii_tolower(t[i]);
 384
 385         return t;
 386 }
 387
 388 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 389
 390         for (; n > 0; a++, b++, n--) {
 391                 int x, y;
 392
 393                 x = (int) (uint8_t) ascii_tolower(*a);
 394                 y = (int) (uint8_t) ascii_tolower(*b);
 395
 396                 if (x != y)
 397                         return x - y;
 398         }
 399
 400         return 0;
 401 }
 402
 403 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 404         int r;
 405
 406         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 407         if (r != 0)
 408                 return r;
 409
 410         return CMP(n, m);
 411 }
 412
 413 bool chars_intersect(const char *a, const char *b) {
 414         const char *p;
 415
 416         /* Returns true if any of the chars in a are in b. */
 417         for (p = a; *p; p++)
 418                 if (strchr(b, *p))
 419                         return true;
 420
 421         return false;
 422 }
 423
 424 bool string_has_cc(const char *p, const char *ok) {
 425         const char *t;
 426
 427         assert(p);
 428
 429         /*
 430          * Check if a string contains control characters. If 'ok' is
 431          * non-NULL it may be a string containing additional CCs to be
 432          * considered OK.
 433          */
 434
 435         for (t = p; *t; t++) {
 436                 if (ok && strchr(ok, *t))
 437                         continue;
 438
 439                 if (*t > 0 && *t < ' ')
 440                         return true;
 441
 442                 if (*t == 127)
 443                         return true;
 444         }
 445
 446         return false;
 447 }
 448
 449 static int write_ellipsis(char *buf, bool unicode) {
 450         if (unicode || is_locale_utf8()) {
 451                 buf[0] = 0xe2; /* tri-dot ellipsis: … */
 452                 buf[1] = 0x80;
 453                 buf[2] = 0xa6;
 454         } else {
 455                 buf[0] = '.';
 456                 buf[1] = '.';
 457                 buf[2] = '.';
 458         }
 459
 460         return 3;
 461 }
 462
 463 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 464         size_t x, need_space, suffix_len;
 465         char *t;
 466
 467         assert(s);
 468         assert(percent <= 100);
 469         assert(new_length != (size_t) -1);
 470
 471         if (old_length <= new_length)
 472                 return strndup(s, old_length);
 473
 474         /* Special case short ellipsations */
 475         switch (new_length) {
 476
 477         case 0:
 478                 return strdup("");
 479
 480         case 1:
 481                 if (is_locale_utf8())
 482                         return strdup("…");
 483                 else
 484                         return strdup(".");
 485
 486         case 2:
 487                 if (!is_locale_utf8())
 488                         return strdup("..");
 489
 490                 break;
 491
 492         default:
 493                 break;
 494         }
 495
 496         /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
 497          * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
 498          * either for the UTF-8 encoded character or for three ASCII characters. */
 499         need_space = is_locale_utf8() ? 1 : 3;
 500
 501         t = new(char, new_length+3);
 502         if (!t)
 503                 return NULL;
 504
 505         assert(new_length >= need_space);
 506
 507         x = ((new_length - need_space) * percent + 50) / 100;
 508         assert(x <= new_length - need_space);
 509
 510         memcpy(t, s, x);
 511         write_ellipsis(t + x, false);
 512         suffix_len = new_length - x - need_space;
 513         memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
 514         *(t + x + 3 + suffix_len) = '\0';
 515
 516         return t;
 517 }
 518
 519 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 520         size_t x, k, len, len2;
 521         const char *i, *j;
 522         char *e;
 523         int r;
 524
 525         /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
 526          * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
 527          * strings.
 528          *
 529          * Ellipsation is done in a locale-dependent way:
 530          * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
 531          * 2. Otherwise, a unicode ellipsis is used ("…")
 532          *
 533          * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
 534          * the current locale is UTF-8.
 535          */
 536
 537         assert(s);
 538         assert(percent <= 100);
 539
 540         if (new_length == (size_t) -1)
 541                 return strndup(s, old_length);
 542
 543         if (new_length == 0)
 544                 return strdup("");
 545
 546         /* If no multibyte characters use ascii_ellipsize_mem for speed */
 547         if (ascii_is_valid_n(s, old_length))
 548                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 549
 550         x = ((new_length - 1) * percent) / 100;
 551         assert(x <= new_length - 1);
 552
 553         k = 0;
 554         for (i = s; i < s + old_length; i = utf8_next_char(i)) {
 555                 char32_t c;
 556                 int w;
 557
 558                 r = utf8_encoded_to_unichar(i, &c);
 559                 if (r < 0)
 560                         return NULL;
 561
 562                 w = unichar_iswide(c) ? 2 : 1;
 563                 if (k + w <= x)
 564                         k += w;
 565                 else
 566                         break;
 567         }
 568
 569         for (j = s + old_length; j > i; ) {
 570                 char32_t c;
 571                 int w;
 572                 const char *jj;
 573
 574                 jj = utf8_prev_char(j);
 575                 r = utf8_encoded_to_unichar(jj, &c);
 576                 if (r < 0)
 577                         return NULL;
 578
 579                 w = unichar_iswide(c) ? 2 : 1;
 580                 if (k + w <= new_length) {
 581                         k += w;
 582                         j = jj;
 583                 } else
 584                         break;
 585         }
 586         assert(i <= j);
 587
 588         /* we don't actually need to ellipsize */
 589         if (i == j)
 590                 return memdup_suffix0(s, old_length);
 591
 592         /* make space for ellipsis, if possible */
 593         if (j < s + old_length)
 594                 j = utf8_next_char(j);
 595         else if (i > s)
 596                 i = utf8_prev_char(i);
 597
 598         len = i - s;
 599         len2 = s + old_length - j;
 600         e = new(char, len + 3 + len2 + 1);
 601         if (!e)
 602                 return NULL;
 603
 604         /*
 605         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 606                old_length, new_length, x, len, len2, k);
 607         */
 608
 609         memcpy(e, s, len);
 610         write_ellipsis(e + len, true);
 611         memcpy(e + len + 3, j, len2);
 612         *(e + len + 3 + len2) = '\0';
 613
 614         return e;
 615 }
 616
 617 char *cellescape(char *buf, size_t len, const char *s) {
 618         /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
 619          * characters are copied as they are, everything else is escaped. The result
 620          * is different then if escaping and ellipsization was performed in two
 621          * separate steps, because each sequence is either stored in full or skipped.
 622          *
 623          * This function should be used for logging about strings which expected to
 624          * be plain ASCII in a safe way.
 625          *
 626          * An ellipsis will be used if s is too long. It was always placed at the
 627          * very end.
 628          */
 629
 630         size_t i = 0, last_char_width[4] = {}, k = 0, j;
 631
 632         assert(len > 0); /* at least a terminating NUL */
 633
 634         for (;;) {
 635                 char four[4];
 636                 int w;
 637
 638                 if (*s == 0) /* terminating NUL detected? then we are done! */
 639                         goto done;
 640
 641                 w = cescape_char(*s, four);
 642                 if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
 643                                       * ellipsize at the previous location */
 644                         break;
 645
 646                 /* OK, there was space, let's add this escaped character to the buffer */
 647                 memcpy(buf + i, four, w);
 648                 i += w;
 649
 650                 /* And remember its width in the ring buffer */
 651                 last_char_width[k] = w;
 652                 k = (k + 1) % 4;
 653
 654                 s++;
 655         }
 656
 657         /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
 658          * characters ideally, but the buffer is shorter than that in the first place take what we can get */
 659         for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
 660
 661                 if (i + 4 <= len) /* nice, we reached our space goal */
 662                         break;
 663
 664                 k = k == 0 ? 3 : k - 1;
 665                 if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
 666                         break;
 667
 668                 assert(i >= last_char_width[k]);
 669                 i -= last_char_width[k];
 670         }
 671
 672         if (i + 4 <= len) /* yay, enough space */
 673                 i += write_ellipsis(buf + i, false);
 674         else if (i + 3 <= len) { /* only space for ".." */
 675                 buf[i++] = '.';
 676                 buf[i++] = '.';
 677         } else if (i + 2 <= len) /* only space for a single "." */
 678                 buf[i++] = '.';
 679         else
 680                 assert(i + 1 <= len);
 681
 682  done:
 683         buf[i] = '\0';
 684         return buf;
 685 }
 686
 687 char* strshorten(char *s, size_t l) {
 688         assert(s);
 689
 690         if (strnlen(s, l+1) > l)
 691                 s[l] = 0;
 692
 693         return s;
 694 }
 695
 696 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 697         size_t l, old_len, new_len, allocated = 0;
 698         char *t, *ret = NULL;
 699         const char *f;
 700
 701         assert(old_string);
 702         assert(new_string);
 703
 704         if (!text)
 705                 return NULL;
 706
 707         old_len = strlen(old_string);
 708         new_len = strlen(new_string);
 709
 710         l = strlen(text);
 711         if (!GREEDY_REALLOC(ret, allocated, l+1))
 712                 return NULL;
 713
 714         f = text;
 715         t = ret;
 716         while (*f) {
 717                 size_t d, nl;
 718
 719                 if (!startswith(f, old_string)) {
 720                         *(t++) = *(f++);
 721                         continue;
 722                 }
 723
 724                 d = t - ret;
 725                 nl = l - old_len + new_len;
 726
 727                 if (!GREEDY_REALLOC(ret, allocated, nl + 1))
 728                         return mfree(ret);
 729
 730                 l = nl;
 731                 t = ret + d;
 732
 733                 t = stpcpy(t, new_string);
 734                 f += old_len;
 735         }
 736
 737         *t = 0;
 738         return ret;
 739 }
 740
 741 static void advance_offsets(
 742                 ssize_t diff,
 743                 size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */
 744                 size_t shift[static 2],
 745                 size_t size) {
 746
 747         if (!offsets)
 748                 return;
 749
 750         assert(shift);
 751
 752         if ((size_t) diff < offsets[0])
 753                 shift[0] += size;
 754         if ((size_t) diff < offsets[1])
 755                 shift[1] += size;
 756 }
 757
 758 char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
 759         const char *begin = NULL;
 760         enum {
 761                 STATE_OTHER,
 762                 STATE_ESCAPE,
 763                 STATE_CSI,
 764                 STATE_CSO,
 765         } state = STATE_OTHER;
 766         char *obuf = NULL;
 767         size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0;
 768         FILE *f;
 769
 770         assert(ibuf);
 771         assert(*ibuf);
 772
 773         /* This does three things:
 774          *
 775          * 1. Replaces TABs by 8 spaces
 776          * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
 777          * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
 778          * 4. Strip trailing \r characters (since they would "move the cursor", but have no
 779          *    other effect).
 780          *
 781          * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as
 782          * are any other special characters. Truncated ANSI sequences are left-as is too. This call is
 783          * supposed to suppress the most basic formatting noise, but nothing else.
 784          *
 785          * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
 786
 787         isz = _isz ? *_isz : strlen(*ibuf);
 788
 789         /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we
 790          * created f here and it doesn't leave our scope. */
 791         f = open_memstream_unlocked(&obuf, &osz);
 792         if (!f)
 793                 return NULL;
 794
 795         for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {
 796
 797                 switch (state) {
 798
 799                 case STATE_OTHER:
 800                         if (i >= *ibuf + isz) /* EOT */
 801                                 break;
 802
 803                         if (*i == '\r') {
 804                                 n_carriage_returns++;
 805                                 break;
 806                         } else if (*i == '\n')
 807                                 /* Ignore carriage returns before new line */
 808                                 n_carriage_returns = 0;
 809                         for (; n_carriage_returns > 0; n_carriage_returns--)
 810                                 fputc('\r', f);
 811
 812                         if (*i == '\x1B')
 813                                 state = STATE_ESCAPE;
 814                         else if (*i == '\t') {
 815                                 fputs("        ", f);
 816                                 advance_offsets(i - *ibuf, highlight, shift, 7);
 817                         } else
 818                                 fputc(*i, f);
 819
 820                         break;
 821
 822                 case STATE_ESCAPE:
 823                         assert(n_carriage_returns == 0);
 824
 825                         if (i >= *ibuf + isz) { /* EOT */
 826                                 fputc('\x1B', f);
 827                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 828                                 break;
 829                         } else if (*i == '[') { /* ANSI CSI */
 830                                 state = STATE_CSI;
 831                                 begin = i + 1;
 832                         } else if (*i == ']') { /* ANSI CSO */
 833                                 state = STATE_CSO;
 834                                 begin = i + 1;
 835                         } else {
 836                                 fputc('\x1B', f);
 837                                 fputc(*i, f);
 838                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 839                                 state = STATE_OTHER;
 840                         }
 841
 842                         break;
 843
 844                 case STATE_CSI:
 845                         assert(n_carriage_returns == 0);
 846
 847                         if (i >= *ibuf + isz || /* EOT … */
 848                             !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
 849                                 fputc('\x1B', f);
 850                                 fputc('[', f);
 851                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 852                                 state = STATE_OTHER;
 853                                 i = begin-1;
 854                         } else if (*i == 'm')
 855                                 state = STATE_OTHER;
 856
 857                         break;
 858
 859                 case STATE_CSO:
 860                         assert(n_carriage_returns == 0);
 861
 862                         if (i >= *ibuf + isz || /* EOT … */
 863                             (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
 864                                 fputc('\x1B', f);
 865                                 fputc(']', f);
 866                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 867                                 state = STATE_OTHER;
 868                                 i = begin-1;
 869                         } else if (*i == '\a')
 870                                 state = STATE_OTHER;
 871
 872                         break;
 873                 }
 874         }
 875
 876         if (fflush_and_check(f) < 0) {
 877                 fclose(f);
 878                 return mfree(obuf);
 879         }
 880         fclose(f);
 881
 882         free_and_replace(*ibuf, obuf);
 883
 884         if (_isz)
 885                 *_isz = osz;
 886
 887         if (highlight) {
 888                 highlight[0] += shift[0];
 889                 highlight[1] += shift[1];
 890         }
 891
 892         return *ibuf;
 893 }
 894
 895 char *strextend_with_separator(char **x, const char *separator, ...) {
 896         bool need_separator;
 897         size_t f, l, l_separator;
 898         char *r, *p;
 899         va_list ap;
 900
 901         assert(x);
 902
 903         l = f = strlen_ptr(*x);
 904
 905         need_separator = !isempty(*x);
 906         l_separator = strlen_ptr(separator);
 907
 908         va_start(ap, separator);
 909         for (;;) {
 910                 const char *t;
 911                 size_t n;
 912
 913                 t = va_arg(ap, const char *);
 914                 if (!t)
 915                         break;
 916
 917                 n = strlen(t);
 918
 919                 if (need_separator)
 920                         n += l_separator;
 921
 922                 if (n > ((size_t) -1) - l) {
 923                         va_end(ap);
 924                         return NULL;
 925                 }
 926
 927                 l += n;
 928                 need_separator = true;
 929         }
 930         va_end(ap);
 931
 932         need_separator = !isempty(*x);
 933
 934         r = realloc(*x, l+1);
 935         if (!r)
 936                 return NULL;
 937
 938         p = r + f;
 939
 940         va_start(ap, separator);
 941         for (;;) {
 942                 const char *t;
 943
 944                 t = va_arg(ap, const char *);
 945                 if (!t)
 946                         break;
 947
 948                 if (need_separator && separator)
 949                         p = stpcpy(p, separator);
 950
 951                 p = stpcpy(p, t);
 952
 953                 need_separator = true;
 954         }
 955         va_end(ap);
 956
 957         assert(p == r + l);
 958
 959         *p = 0;
 960         *x = r;
 961
 962         return r + l;
 963 }
 964
 965 char *strrep(const char *s, unsigned n) {
 966         size_t l;
 967         char *r, *p;
 968         unsigned i;
 969
 970         assert(s);
 971
 972         l = strlen(s);
 973         p = r = malloc(l * n + 1);
 974         if (!r)
 975                 return NULL;
 976
 977         for (i = 0; i < n; i++)
 978                 p = stpcpy(p, s);
 979
 980         *p = 0;
 981         return r;
 982 }
 983
 984 int split_pair(const char *s, const char *sep, char **l, char **r) {
 985         char *x, *a, *b;
 986
 987         assert(s);
 988         assert(sep);
 989         assert(l);
 990         assert(r);
 991
 992         if (isempty(sep))
 993                 return -EINVAL;
 994
 995         x = strstr(s, sep);
 996         if (!x)
 997                 return -EINVAL;
 998
 999         a = strndup(s, x - s);
1000         if (!a)
1001                 return -ENOMEM;
1002
1003         b = strdup(x + strlen(sep));
1004         if (!b) {
1005                 free(a);
1006                 return -ENOMEM;
1007         }
1008
1009         *l = a;
1010         *r = b;
1011
1012         return 0;
1013 }
1014
1015 int free_and_strdup(char **p, const char *s) {
1016         char *t;
1017
1018         assert(p);
1019
1020         /* Replaces a string pointer with a strdup()ed new string,
1021          * possibly freeing the old one. */
1022
1023         if (streq_ptr(*p, s))
1024                 return 0;
1025
1026         if (s) {
1027                 t = strdup(s);
1028                 if (!t)
1029                         return -ENOMEM;
1030         } else
1031                 t = NULL;
1032
1033         free(*p);
1034         *p = t;
1035
1036         return 1;
1037 }
1038
1039 int free_and_strndup(char **p, const char *s, size_t l) {
1040         char *t;
1041
1042         assert(p);
1043         assert(s || l == 0);
1044
1045         /* Replaces a string pointer with a strndup()ed new string,
1046          * freeing the old one. */
1047
1048         if (!*p && !s)
1049                 return 0;
1050
1051         if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
1052                 return 0;
1053
1054         if (s) {
1055                 t = strndup(s, l);
1056                 if (!t)
1057                         return -ENOMEM;
1058         } else
1059                 t = NULL;
1060
1061         free_and_replace(*p, t);
1062         return 1;
1063 }
1064
1065 bool string_is_safe(const char *p) {
1066         const char *t;
1067
1068         if (!p)
1069                 return false;
1070
1071         /* Checks if the specified string contains no quotes or control characters */
1072
1073         for (t = p; *t; t++) {
1074                 if (*t > 0 && *t < ' ') /* no control characters */
1075                         return false;
1076
1077                 if (strchr(QUOTES "\\\x7f", *t))
1078                         return false;
1079         }
1080
1081         return true;
1082 }
1083
1084 char* string_erase(char *x) {
1085         if (!x)
1086                 return NULL;
1087
1088         /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we
1089          * used them. */
1090         explicit_bzero_safe(x, strlen(x));
1091         return x;
1092 }
1093
1094 int string_truncate_lines(const char *s, size_t n_lines, char **ret) {
1095         const char *p = s, *e = s;
1096         bool truncation_applied = false;
1097         char *copy;
1098         size_t n = 0;
1099
1100         assert(s);
1101
1102         /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if
1103          * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not
1104          * generated either. */
1105
1106         for (;;) {
1107                 size_t k;
1108
1109                 k = strcspn(p, "\n");
1110
1111                 if (p[k] == 0) {
1112                         if (k == 0) /* final empty line */
1113                                 break;
1114
1115                         if (n >= n_lines) /* above threshold */
1116                                 break;
1117
1118                         e = p + k; /* last line to include */
1119                         break;
1120                 }
1121
1122                 assert(p[k] == '\n');
1123
1124                 if (n >= n_lines)
1125                         break;
1126
1127                 if (k > 0)
1128                         e = p + k;
1129
1130                 p += k + 1;
1131                 n++;
1132         }
1133
1134         /* e points after the last character we want to keep */
1135         if (isempty(e))
1136                 copy = strdup(s);
1137         else {
1138                 if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that
1139                                            * isn't a new-line or a series of them */
1140                         truncation_applied = true;
1141
1142                 copy = strndup(s, e - s);
1143         }
1144         if (!copy)
1145                 return -ENOMEM;
1146
1147         *ret = copy;
1148         return truncation_applied;
1149 }
1150
1151 int string_extract_line(const char *s, size_t i, char **ret) {
1152         const char *p = s;
1153         size_t c = 0;
1154
1155         /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that,
1156          * and == 0 if we are looking at the last line or already beyond the last line. As special
1157          * optimization, if the first line is requested and the string only consists of one line we return
1158          * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very
1159          * common case. */
1160
1161         for (;;) {
1162                 const char *q;
1163
1164                 q = strchr(p, '\n');
1165                 if (i == c) {
1166                         /* The line we are looking for! */
1167
1168                         if (q) {
1169                                 char *m;
1170
1171                                 m = strndup(p, q - p);
1172                                 if (!m)
1173                                         return -ENOMEM;
1174
1175                                 *ret = m;
1176                                 return !isempty(q + 1); /* more coming? */
1177                         } else {
1178                                 if (p == s)
1179                                         *ret = NULL; /* Just use the input string */
1180                                 else {
1181                                         char *m;
1182
1183                                         m = strdup(p);
1184                                         if (!m)
1185                                                 return -ENOMEM;
1186
1187                                         *ret = m;
1188                                 }
1189
1190                                 return 0; /* The end */
1191                         }
1192                 }
1193
1194                 if (!q) {
1195                         char *m;
1196
1197                         /* No more lines, return empty line */
1198
1199                         m = strdup("");
1200                         if (!m)
1201                                 return -ENOMEM;
1202
1203                         *ret = m;
1204                         return 0; /* The end */
1205                 }
1206
1207                 p = q + 1;
1208                 c++;
1209         }
1210 }
1211
1212 int string_contains_word(const char *string, const char *separators, const char *word) {
1213         /* In the default mode with no separators specified, we split on whitespace and
1214          * don't coalesce separators. */
1215         const ExtractFlags flags = separators ? EXTRACT_DONT_COALESCE_SEPARATORS : 0;
1216
1217         for (const char *p = string;;) {
1218                 _cleanup_free_ char *w = NULL;
1219                 int r;
1220
1221                 r = extract_first_word(&p, &w, separators, flags);
1222                 if (r < 0)
1223                         return r;
1224                 if (r == 0)
1225                         return false;
1226                 if (streq(w, word))
1227                         return true;
1228         }
1229 }