src/basic/string-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <stdarg.h>
   5 #include <stdint.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8
   9 #include "alloc-util.h"
  10 #include "escape.h"
  11 #include "fileio.h"
  12 #include "gunicode.h"
  13 #include "locale-util.h"
  14 #include "macro.h"
  15 #include "memory-util.h"
  16 #include "string-util.h"
  17 #include "terminal-util.h"
  18 #include "utf8.h"
  19 #include "util.h"
  20
  21 int strcmp_ptr(const char *a, const char *b) {
  22         /* Like strcmp(), but tries to make sense of NULL pointers */
  23
  24         if (a && b)
  25                 return strcmp(a, b);
  26         return CMP(a, b); /* Direct comparison of pointers, one of which is NULL */
  27 }
  28
  29 int strcasecmp_ptr(const char *a, const char *b) {
  30         /* Like strcasecmp(), but tries to make sense of NULL pointers */
  31
  32         if (a && b)
  33                 return strcasecmp(a, b);
  34         return CMP(a, b); /* Direct comparison of pointers, one of which is NULL */
  35 }
  36
  37 char* endswith(const char *s, const char *postfix) {
  38         size_t sl, pl;
  39
  40         assert(s);
  41         assert(postfix);
  42
  43         sl = strlen(s);
  44         pl = strlen(postfix);
  45
  46         if (pl == 0)
  47                 return (char*) s + sl;
  48
  49         if (sl < pl)
  50                 return NULL;
  51
  52         if (memcmp(s + sl - pl, postfix, pl) != 0)
  53                 return NULL;
  54
  55         return (char*) s + sl - pl;
  56 }
  57
  58 char* endswith_no_case(const char *s, const char *postfix) {
  59         size_t sl, pl;
  60
  61         assert(s);
  62         assert(postfix);
  63
  64         sl = strlen(s);
  65         pl = strlen(postfix);
  66
  67         if (pl == 0)
  68                 return (char*) s + sl;
  69
  70         if (sl < pl)
  71                 return NULL;
  72
  73         if (strcasecmp(s + sl - pl, postfix) != 0)
  74                 return NULL;
  75
  76         return (char*) s + sl - pl;
  77 }
  78
  79 char* first_word(const char *s, const char *word) {
  80         size_t sl, wl;
  81         const char *p;
  82
  83         assert(s);
  84         assert(word);
  85
  86         /* Checks if the string starts with the specified word, either
  87          * followed by NUL or by whitespace. Returns a pointer to the
  88          * NUL or the first character after the whitespace. */
  89
  90         sl = strlen(s);
  91         wl = strlen(word);
  92
  93         if (sl < wl)
  94                 return NULL;
  95
  96         if (wl == 0)
  97                 return (char*) s;
  98
  99         if (memcmp(s, word, wl) != 0)
 100                 return NULL;
 101
 102         p = s + wl;
 103         if (*p == 0)
 104                 return (char*) p;
 105
 106         if (!strchr(WHITESPACE, *p))
 107                 return NULL;
 108
 109         p += strspn(p, WHITESPACE);
 110         return (char*) p;
 111 }
 112
 113 static size_t strcspn_escaped(const char *s, const char *reject) {
 114         bool escaped = false;
 115         int n;
 116
 117         for (n = 0; s[n] != '\0'; n++) {
 118                 if (escaped)
 119                         escaped = false;
 120                 else if (s[n] == '\\')
 121                         escaped = true;
 122                 else if (strchr(reject, s[n]))
 123                         break;
 124         }
 125
 126         return n;
 127 }
 128
 129 /* Split a string into words. */
 130 const char* split(
 131                 const char **state,
 132                 size_t *l,
 133                 const char *separator,
 134                 SplitFlags flags) {
 135
 136         const char *current;
 137
 138         assert(state);
 139         assert(l);
 140
 141         if (!separator)
 142                 separator = WHITESPACE;
 143
 144         current = *state;
 145
 146         if (*current == '\0') /* already at the end? */
 147                 return NULL;
 148
 149         current += strspn(current, separator); /* skip leading separators */
 150         if (*current == '\0') { /* at the end now? */
 151                 *state = current;
 152                 return NULL;
 153         }
 154
 155         if (FLAGS_SET(flags, SPLIT_QUOTES)) {
 156
 157                 if (strchr(QUOTES, *current)) {
 158                         /* We are looking at a quote */
 159                         *l = strcspn_escaped(current + 1, CHAR_TO_STR(*current));
 160                         if (current[*l + 1] != *current ||
 161                             (current[*l + 2] != 0 && !strchr(separator, current[*l + 2]))) {
 162                                 /* right quote missing or garbage at the end */
 163                                 if (FLAGS_SET(flags, SPLIT_RELAX)) {
 164                                         *state = current + *l + 1 + (current[*l + 1] != '\0');
 165                                         return current + 1;
 166                                 }
 167                                 *state = current;
 168                                 return NULL;
 169                         }
 170                         *state = current++ + *l + 2;
 171
 172                 } else {
 173                         /* We are looking at a something that is not a quote */
 174                         *l = strcspn_escaped(current, separator);
 175                         if (current[*l] && !strchr(separator, current[*l]) && !FLAGS_SET(flags, SPLIT_RELAX)) {
 176                                 /* unfinished escape */
 177                                 *state = current;
 178                                 return NULL;
 179                         }
 180                         *state = current + *l;
 181                 }
 182         } else {
 183                 *l = strcspn(current, separator);
 184                 *state = current + *l;
 185         }
 186
 187         return current;
 188 }
 189
 190 char *strnappend(const char *s, const char *suffix, size_t b) {
 191         size_t a;
 192         char *r;
 193
 194         if (!s && !suffix)
 195                 return strdup("");
 196
 197         if (!s)
 198                 return strndup(suffix, b);
 199
 200         if (!suffix)
 201                 return strdup(s);
 202
 203         assert(s);
 204         assert(suffix);
 205
 206         a = strlen(s);
 207         if (b > ((size_t) -1) - a)
 208                 return NULL;
 209
 210         r = new(char, a+b+1);
 211         if (!r)
 212                 return NULL;
 213
 214         memcpy(r, s, a);
 215         memcpy(r+a, suffix, b);
 216         r[a+b] = 0;
 217
 218         return r;
 219 }
 220
 221 char *strjoin_real(const char *x, ...) {
 222         va_list ap;
 223         size_t l;
 224         char *r, *p;
 225
 226         va_start(ap, x);
 227
 228         if (x) {
 229                 l = strlen(x);
 230
 231                 for (;;) {
 232                         const char *t;
 233                         size_t n;
 234
 235                         t = va_arg(ap, const char *);
 236                         if (!t)
 237                                 break;
 238
 239                         n = strlen(t);
 240                         if (n > ((size_t) -1) - l) {
 241                                 va_end(ap);
 242                                 return NULL;
 243                         }
 244
 245                         l += n;
 246                 }
 247         } else
 248                 l = 0;
 249
 250         va_end(ap);
 251
 252         r = new(char, l+1);
 253         if (!r)
 254                 return NULL;
 255
 256         if (x) {
 257                 p = stpcpy(r, x);
 258
 259                 va_start(ap, x);
 260
 261                 for (;;) {
 262                         const char *t;
 263
 264                         t = va_arg(ap, const char *);
 265                         if (!t)
 266                                 break;
 267
 268                         p = stpcpy(p, t);
 269                 }
 270
 271                 va_end(ap);
 272         } else
 273                 r[0] = 0;
 274
 275         return r;
 276 }
 277
 278 char *strstrip(char *s) {
 279         if (!s)
 280                 return NULL;
 281
 282         /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
 283
 284         return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
 285 }
 286
 287 char *delete_chars(char *s, const char *bad) {
 288         char *f, *t;
 289
 290         /* Drops all specified bad characters, regardless where in the string */
 291
 292         if (!s)
 293                 return NULL;
 294
 295         if (!bad)
 296                 bad = WHITESPACE;
 297
 298         for (f = s, t = s; *f; f++) {
 299                 if (strchr(bad, *f))
 300                         continue;
 301
 302                 *(t++) = *f;
 303         }
 304
 305         *t = 0;
 306
 307         return s;
 308 }
 309
 310 char *delete_trailing_chars(char *s, const char *bad) {
 311         char *p, *c = s;
 312
 313         /* Drops all specified bad characters, at the end of the string */
 314
 315         if (!s)
 316                 return NULL;
 317
 318         if (!bad)
 319                 bad = WHITESPACE;
 320
 321         for (p = s; *p; p++)
 322                 if (!strchr(bad, *p))
 323                         c = p + 1;
 324
 325         *c = 0;
 326
 327         return s;
 328 }
 329
 330 char *truncate_nl(char *s) {
 331         assert(s);
 332
 333         s[strcspn(s, NEWLINE)] = 0;
 334         return s;
 335 }
 336
 337 char ascii_tolower(char x) {
 338
 339         if (x >= 'A' && x <= 'Z')
 340                 return x - 'A' + 'a';
 341
 342         return x;
 343 }
 344
 345 char ascii_toupper(char x) {
 346
 347         if (x >= 'a' && x <= 'z')
 348                 return x - 'a' + 'A';
 349
 350         return x;
 351 }
 352
 353 char *ascii_strlower(char *t) {
 354         char *p;
 355
 356         assert(t);
 357
 358         for (p = t; *p; p++)
 359                 *p = ascii_tolower(*p);
 360
 361         return t;
 362 }
 363
 364 char *ascii_strupper(char *t) {
 365         char *p;
 366
 367         assert(t);
 368
 369         for (p = t; *p; p++)
 370                 *p = ascii_toupper(*p);
 371
 372         return t;
 373 }
 374
 375 char *ascii_strlower_n(char *t, size_t n) {
 376         size_t i;
 377
 378         if (n <= 0)
 379                 return t;
 380
 381         for (i = 0; i < n; i++)
 382                 t[i] = ascii_tolower(t[i]);
 383
 384         return t;
 385 }
 386
 387 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 388
 389         for (; n > 0; a++, b++, n--) {
 390                 int x, y;
 391
 392                 x = (int) (uint8_t) ascii_tolower(*a);
 393                 y = (int) (uint8_t) ascii_tolower(*b);
 394
 395                 if (x != y)
 396                         return x - y;
 397         }
 398
 399         return 0;
 400 }
 401
 402 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 403         int r;
 404
 405         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 406         if (r != 0)
 407                 return r;
 408
 409         return CMP(n, m);
 410 }
 411
 412 bool chars_intersect(const char *a, const char *b) {
 413         const char *p;
 414
 415         /* Returns true if any of the chars in a are in b. */
 416         for (p = a; *p; p++)
 417                 if (strchr(b, *p))
 418                         return true;
 419
 420         return false;
 421 }
 422
 423 bool string_has_cc(const char *p, const char *ok) {
 424         const char *t;
 425
 426         assert(p);
 427
 428         /*
 429          * Check if a string contains control characters. If 'ok' is
 430          * non-NULL it may be a string containing additional CCs to be
 431          * considered OK.
 432          */
 433
 434         for (t = p; *t; t++) {
 435                 if (ok && strchr(ok, *t))
 436                         continue;
 437
 438                 if (*t > 0 && *t < ' ')
 439                         return true;
 440
 441                 if (*t == 127)
 442                         return true;
 443         }
 444
 445         return false;
 446 }
 447
 448 static int write_ellipsis(char *buf, bool unicode) {
 449         if (unicode || is_locale_utf8()) {
 450                 buf[0] = 0xe2; /* tri-dot ellipsis: … */
 451                 buf[1] = 0x80;
 452                 buf[2] = 0xa6;
 453         } else {
 454                 buf[0] = '.';
 455                 buf[1] = '.';
 456                 buf[2] = '.';
 457         }
 458
 459         return 3;
 460 }
 461
 462 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 463         size_t x, need_space, suffix_len;
 464         char *t;
 465
 466         assert(s);
 467         assert(percent <= 100);
 468         assert(new_length != (size_t) -1);
 469
 470         if (old_length <= new_length)
 471                 return strndup(s, old_length);
 472
 473         /* Special case short ellipsations */
 474         switch (new_length) {
 475
 476         case 0:
 477                 return strdup("");
 478
 479         case 1:
 480                 if (is_locale_utf8())
 481                         return strdup("…");
 482                 else
 483                         return strdup(".");
 484
 485         case 2:
 486                 if (!is_locale_utf8())
 487                         return strdup("..");
 488
 489                 break;
 490
 491         default:
 492                 break;
 493         }
 494
 495         /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
 496          * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
 497          * either for the UTF-8 encoded character or for three ASCII characters. */
 498         need_space = is_locale_utf8() ? 1 : 3;
 499
 500         t = new(char, new_length+3);
 501         if (!t)
 502                 return NULL;
 503
 504         assert(new_length >= need_space);
 505
 506         x = ((new_length - need_space) * percent + 50) / 100;
 507         assert(x <= new_length - need_space);
 508
 509         memcpy(t, s, x);
 510         write_ellipsis(t + x, false);
 511         suffix_len = new_length - x - need_space;
 512         memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
 513         *(t + x + 3 + suffix_len) = '\0';
 514
 515         return t;
 516 }
 517
 518 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 519         size_t x, k, len, len2;
 520         const char *i, *j;
 521         char *e;
 522         int r;
 523
 524         /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
 525          * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
 526          * strings.
 527          *
 528          * Ellipsation is done in a locale-dependent way:
 529          * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
 530          * 2. Otherwise, a unicode ellipsis is used ("…")
 531          *
 532          * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
 533          * the current locale is UTF-8.
 534          */
 535
 536         assert(s);
 537         assert(percent <= 100);
 538
 539         if (new_length == (size_t) -1)
 540                 return strndup(s, old_length);
 541
 542         if (new_length == 0)
 543                 return strdup("");
 544
 545         /* If no multibyte characters use ascii_ellipsize_mem for speed */
 546         if (ascii_is_valid_n(s, old_length))
 547                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 548
 549         x = ((new_length - 1) * percent) / 100;
 550         assert(x <= new_length - 1);
 551
 552         k = 0;
 553         for (i = s; i < s + old_length; i = utf8_next_char(i)) {
 554                 char32_t c;
 555                 int w;
 556
 557                 r = utf8_encoded_to_unichar(i, &c);
 558                 if (r < 0)
 559                         return NULL;
 560
 561                 w = unichar_iswide(c) ? 2 : 1;
 562                 if (k + w <= x)
 563                         k += w;
 564                 else
 565                         break;
 566         }
 567
 568         for (j = s + old_length; j > i; ) {
 569                 char32_t c;
 570                 int w;
 571                 const char *jj;
 572
 573                 jj = utf8_prev_char(j);
 574                 r = utf8_encoded_to_unichar(jj, &c);
 575                 if (r < 0)
 576                         return NULL;
 577
 578                 w = unichar_iswide(c) ? 2 : 1;
 579                 if (k + w <= new_length) {
 580                         k += w;
 581                         j = jj;
 582                 } else
 583                         break;
 584         }
 585         assert(i <= j);
 586
 587         /* we don't actually need to ellipsize */
 588         if (i == j)
 589                 return memdup_suffix0(s, old_length);
 590
 591         /* make space for ellipsis, if possible */
 592         if (j < s + old_length)
 593                 j = utf8_next_char(j);
 594         else if (i > s)
 595                 i = utf8_prev_char(i);
 596
 597         len = i - s;
 598         len2 = s + old_length - j;
 599         e = new(char, len + 3 + len2 + 1);
 600         if (!e)
 601                 return NULL;
 602
 603         /*
 604         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 605                old_length, new_length, x, len, len2, k);
 606         */
 607
 608         memcpy(e, s, len);
 609         write_ellipsis(e + len, true);
 610         memcpy(e + len + 3, j, len2);
 611         *(e + len + 3 + len2) = '\0';
 612
 613         return e;
 614 }
 615
 616 char *cellescape(char *buf, size_t len, const char *s) {
 617         /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
 618          * characters are copied as they are, everything else is escaped. The result
 619          * is different then if escaping and ellipsization was performed in two
 620          * separate steps, because each sequence is either stored in full or skipped.
 621          *
 622          * This function should be used for logging about strings which expected to
 623          * be plain ASCII in a safe way.
 624          *
 625          * An ellipsis will be used if s is too long. It was always placed at the
 626          * very end.
 627          */
 628
 629         size_t i = 0, last_char_width[4] = {}, k = 0, j;
 630
 631         assert(len > 0); /* at least a terminating NUL */
 632
 633         for (;;) {
 634                 char four[4];
 635                 int w;
 636
 637                 if (*s == 0) /* terminating NUL detected? then we are done! */
 638                         goto done;
 639
 640                 w = cescape_char(*s, four);
 641                 if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
 642                                       * ellipsize at the previous location */
 643                         break;
 644
 645                 /* OK, there was space, let's add this escaped character to the buffer */
 646                 memcpy(buf + i, four, w);
 647                 i += w;
 648
 649                 /* And remember its width in the ring buffer */
 650                 last_char_width[k] = w;
 651                 k = (k + 1) % 4;
 652
 653                 s++;
 654         }
 655
 656         /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
 657          * characters ideally, but the buffer is shorter than that in the first place take what we can get */
 658         for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
 659
 660                 if (i + 4 <= len) /* nice, we reached our space goal */
 661                         break;
 662
 663                 k = k == 0 ? 3 : k - 1;
 664                 if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
 665                         break;
 666
 667                 assert(i >= last_char_width[k]);
 668                 i -= last_char_width[k];
 669         }
 670
 671         if (i + 4 <= len) /* yay, enough space */
 672                 i += write_ellipsis(buf + i, false);
 673         else if (i + 3 <= len) { /* only space for ".." */
 674                 buf[i++] = '.';
 675                 buf[i++] = '.';
 676         } else if (i + 2 <= len) /* only space for a single "." */
 677                 buf[i++] = '.';
 678         else
 679                 assert(i + 1 <= len);
 680
 681  done:
 682         buf[i] = '\0';
 683         return buf;
 684 }
 685
 686 char* strshorten(char *s, size_t l) {
 687         assert(s);
 688
 689         if (strnlen(s, l+1) > l)
 690                 s[l] = 0;
 691
 692         return s;
 693 }
 694
 695 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 696         size_t l, old_len, new_len, allocated = 0;
 697         char *t, *ret = NULL;
 698         const char *f;
 699
 700         assert(old_string);
 701         assert(new_string);
 702
 703         if (!text)
 704                 return NULL;
 705
 706         old_len = strlen(old_string);
 707         new_len = strlen(new_string);
 708
 709         l = strlen(text);
 710         if (!GREEDY_REALLOC(ret, allocated, l+1))
 711                 return NULL;
 712
 713         f = text;
 714         t = ret;
 715         while (*f) {
 716                 size_t d, nl;
 717
 718                 if (!startswith(f, old_string)) {
 719                         *(t++) = *(f++);
 720                         continue;
 721                 }
 722
 723                 d = t - ret;
 724                 nl = l - old_len + new_len;
 725
 726                 if (!GREEDY_REALLOC(ret, allocated, nl + 1))
 727                         return mfree(ret);
 728
 729                 l = nl;
 730                 t = ret + d;
 731
 732                 t = stpcpy(t, new_string);
 733                 f += old_len;
 734         }
 735
 736         *t = 0;
 737         return ret;
 738 }
 739
 740 static void advance_offsets(
 741                 ssize_t diff,
 742                 size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */
 743                 size_t shift[static 2],
 744                 size_t size) {
 745
 746         if (!offsets)
 747                 return;
 748
 749         assert(shift);
 750
 751         if ((size_t) diff < offsets[0])
 752                 shift[0] += size;
 753         if ((size_t) diff < offsets[1])
 754                 shift[1] += size;
 755 }
 756
 757 char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
 758         const char *begin = NULL;
 759         enum {
 760                 STATE_OTHER,
 761                 STATE_ESCAPE,
 762                 STATE_CSI,
 763                 STATE_CSO,
 764         } state = STATE_OTHER;
 765         char *obuf = NULL;
 766         size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0;
 767         FILE *f;
 768
 769         assert(ibuf);
 770         assert(*ibuf);
 771
 772         /* This does three things:
 773          *
 774          * 1. Replaces TABs by 8 spaces
 775          * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
 776          * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
 777          * 4. Strip trailing \r characters (since they would "move the cursor", but have no
 778          *    other effect).
 779          *
 780          * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as
 781          * are any other special characters. Truncated ANSI sequences are left-as is too. This call is
 782          * supposed to suppress the most basic formatting noise, but nothing else.
 783          *
 784          * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
 785
 786         isz = _isz ? *_isz : strlen(*ibuf);
 787
 788         /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we
 789          * created f here and it doesn't leave our scope. */
 790         f = open_memstream_unlocked(&obuf, &osz);
 791         if (!f)
 792                 return NULL;
 793
 794         for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {
 795
 796                 switch (state) {
 797
 798                 case STATE_OTHER:
 799                         if (i >= *ibuf + isz) /* EOT */
 800                                 break;
 801
 802                         if (*i == '\r') {
 803                                 n_carriage_returns++;
 804                                 break;
 805                         } else if (*i == '\n')
 806                                 /* Ignore carriage returns before new line */
 807                                 n_carriage_returns = 0;
 808                         for (; n_carriage_returns > 0; n_carriage_returns--)
 809                                 fputc('\r', f);
 810
 811                         if (*i == '\x1B')
 812                                 state = STATE_ESCAPE;
 813                         else if (*i == '\t') {
 814                                 fputs("        ", f);
 815                                 advance_offsets(i - *ibuf, highlight, shift, 7);
 816                         } else
 817                                 fputc(*i, f);
 818
 819                         break;
 820
 821                 case STATE_ESCAPE:
 822                         assert(n_carriage_returns == 0);
 823
 824                         if (i >= *ibuf + isz) { /* EOT */
 825                                 fputc('\x1B', f);
 826                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 827                                 break;
 828                         } else if (*i == '[') { /* ANSI CSI */
 829                                 state = STATE_CSI;
 830                                 begin = i + 1;
 831                         } else if (*i == ']') { /* ANSI CSO */
 832                                 state = STATE_CSO;
 833                                 begin = i + 1;
 834                         } else {
 835                                 fputc('\x1B', f);
 836                                 fputc(*i, f);
 837                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 838                                 state = STATE_OTHER;
 839                         }
 840
 841                         break;
 842
 843                 case STATE_CSI:
 844                         assert(n_carriage_returns == 0);
 845
 846                         if (i >= *ibuf + isz || /* EOT … */
 847                             !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
 848                                 fputc('\x1B', f);
 849                                 fputc('[', f);
 850                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 851                                 state = STATE_OTHER;
 852                                 i = begin-1;
 853                         } else if (*i == 'm')
 854                                 state = STATE_OTHER;
 855
 856                         break;
 857
 858                 case STATE_CSO:
 859                         assert(n_carriage_returns == 0);
 860
 861                         if (i >= *ibuf + isz || /* EOT … */
 862                             (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
 863                                 fputc('\x1B', f);
 864                                 fputc(']', f);
 865                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 866                                 state = STATE_OTHER;
 867                                 i = begin-1;
 868                         } else if (*i == '\a')
 869                                 state = STATE_OTHER;
 870
 871                         break;
 872                 }
 873         }
 874
 875         if (fflush_and_check(f) < 0) {
 876                 fclose(f);
 877                 return mfree(obuf);
 878         }
 879         fclose(f);
 880
 881         free_and_replace(*ibuf, obuf);
 882
 883         if (_isz)
 884                 *_isz = osz;
 885
 886         if (highlight) {
 887                 highlight[0] += shift[0];
 888                 highlight[1] += shift[1];
 889         }
 890
 891         return *ibuf;
 892 }
 893
 894 char *strextend_with_separator(char **x, const char *separator, ...) {
 895         bool need_separator;
 896         size_t f, l, l_separator;
 897         char *r, *p;
 898         va_list ap;
 899
 900         assert(x);
 901
 902         l = f = strlen_ptr(*x);
 903
 904         need_separator = !isempty(*x);
 905         l_separator = strlen_ptr(separator);
 906
 907         va_start(ap, separator);
 908         for (;;) {
 909                 const char *t;
 910                 size_t n;
 911
 912                 t = va_arg(ap, const char *);
 913                 if (!t)
 914                         break;
 915
 916                 n = strlen(t);
 917
 918                 if (need_separator)
 919                         n += l_separator;
 920
 921                 if (n > ((size_t) -1) - l) {
 922                         va_end(ap);
 923                         return NULL;
 924                 }
 925
 926                 l += n;
 927                 need_separator = true;
 928         }
 929         va_end(ap);
 930
 931         need_separator = !isempty(*x);
 932
 933         r = realloc(*x, l+1);
 934         if (!r)
 935                 return NULL;
 936
 937         p = r + f;
 938
 939         va_start(ap, separator);
 940         for (;;) {
 941                 const char *t;
 942
 943                 t = va_arg(ap, const char *);
 944                 if (!t)
 945                         break;
 946
 947                 if (need_separator && separator)
 948                         p = stpcpy(p, separator);
 949
 950                 p = stpcpy(p, t);
 951
 952                 need_separator = true;
 953         }
 954         va_end(ap);
 955
 956         assert(p == r + l);
 957
 958         *p = 0;
 959         *x = r;
 960
 961         return r + l;
 962 }
 963
 964 char *strrep(const char *s, unsigned n) {
 965         size_t l;
 966         char *r, *p;
 967         unsigned i;
 968
 969         assert(s);
 970
 971         l = strlen(s);
 972         p = r = malloc(l * n + 1);
 973         if (!r)
 974                 return NULL;
 975
 976         for (i = 0; i < n; i++)
 977                 p = stpcpy(p, s);
 978
 979         *p = 0;
 980         return r;
 981 }
 982
 983 int split_pair(const char *s, const char *sep, char **l, char **r) {
 984         char *x, *a, *b;
 985
 986         assert(s);
 987         assert(sep);
 988         assert(l);
 989         assert(r);
 990
 991         if (isempty(sep))
 992                 return -EINVAL;
 993
 994         x = strstr(s, sep);
 995         if (!x)
 996                 return -EINVAL;
 997
 998         a = strndup(s, x - s);
 999         if (!a)
1000                 return -ENOMEM;
1001
1002         b = strdup(x + strlen(sep));
1003         if (!b) {
1004                 free(a);
1005                 return -ENOMEM;
1006         }
1007
1008         *l = a;
1009         *r = b;
1010
1011         return 0;
1012 }
1013
1014 int free_and_strdup(char **p, const char *s) {
1015         char *t;
1016
1017         assert(p);
1018
1019         /* Replaces a string pointer with a strdup()ed new string,
1020          * possibly freeing the old one. */
1021
1022         if (streq_ptr(*p, s))
1023                 return 0;
1024
1025         if (s) {
1026                 t = strdup(s);
1027                 if (!t)
1028                         return -ENOMEM;
1029         } else
1030                 t = NULL;
1031
1032         free(*p);
1033         *p = t;
1034
1035         return 1;
1036 }
1037
1038 int free_and_strndup(char **p, const char *s, size_t l) {
1039         char *t;
1040
1041         assert(p);
1042         assert(s || l == 0);
1043
1044         /* Replaces a string pointer with a strndup()ed new string,
1045          * freeing the old one. */
1046
1047         if (!*p && !s)
1048                 return 0;
1049
1050         if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
1051                 return 0;
1052
1053         if (s) {
1054                 t = strndup(s, l);
1055                 if (!t)
1056                         return -ENOMEM;
1057         } else
1058                 t = NULL;
1059
1060         free_and_replace(*p, t);
1061         return 1;
1062 }
1063
1064 bool string_is_safe(const char *p) {
1065         const char *t;
1066
1067         if (!p)
1068                 return false;
1069
1070         /* Checks if the specified string contains no quotes or control characters */
1071
1072         for (t = p; *t; t++) {
1073                 if (*t > 0 && *t < ' ') /* no control characters */
1074                         return false;
1075
1076                 if (strchr(QUOTES "\\\x7f", *t))
1077                         return false;
1078         }
1079
1080         return true;
1081 }
1082
1083 char* string_erase(char *x) {
1084         if (!x)
1085                 return NULL;
1086
1087         /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we
1088          * used them. */
1089         explicit_bzero_safe(x, strlen(x));
1090         return x;
1091 }
1092
1093 int string_truncate_lines(const char *s, size_t n_lines, char **ret) {
1094         const char *p = s, *e = s;
1095         bool truncation_applied = false;
1096         char *copy;
1097         size_t n = 0;
1098
1099         assert(s);
1100
1101         /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if
1102          * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not
1103          * generated either. */
1104
1105         for (;;) {
1106                 size_t k;
1107
1108                 k = strcspn(p, "\n");
1109
1110                 if (p[k] == 0) {
1111                         if (k == 0) /* final empty line */
1112                                 break;
1113
1114                         if (n >= n_lines) /* above threshold */
1115                                 break;
1116
1117                         e = p + k; /* last line to include */
1118                         break;
1119                 }
1120
1121                 assert(p[k] == '\n');
1122
1123                 if (n >= n_lines)
1124                         break;
1125
1126                 if (k > 0)
1127                         e = p + k;
1128
1129                 p += k + 1;
1130                 n++;
1131         }
1132
1133         /* e points after the last character we want to keep */
1134         if (isempty(e))
1135                 copy = strdup(s);
1136         else {
1137                 if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that
1138                                            * isn't a new-line or a series of them */
1139                         truncation_applied = true;
1140
1141                 copy = strndup(s, e - s);
1142         }
1143         if (!copy)
1144                 return -ENOMEM;
1145
1146         *ret = copy;
1147         return truncation_applied;
1148 }
1149
1150 int string_extract_line(const char *s, size_t i, char **ret) {
1151         const char *p = s;
1152         size_t c = 0;
1153
1154         /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that,
1155          * and == 0 if we are looking at the last line or already beyond the last line. As special
1156          * optimization, if the first line is requested and the string only consists of one line we return
1157          * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very
1158          * common case. */
1159
1160         for (;;) {
1161                 const char *q;
1162
1163                 q = strchr(p, '\n');
1164                 if (i == c) {
1165                         /* The line we are looking for! */
1166
1167                         if (q) {
1168                                 char *m;
1169
1170                                 m = strndup(p, q - p);
1171                                 if (!m)
1172                                         return -ENOMEM;
1173
1174                                 *ret = m;
1175                                 return !isempty(q + 1); /* more coming? */
1176                         } else {
1177                                 if (p == s)
1178                                         *ret = NULL; /* Just use the input string */
1179                                 else {
1180                                         char *m;
1181
1182                                         m = strdup(p);
1183                                         if (!m)
1184                                                 return -ENOMEM;
1185
1186                                         *ret = m;
1187                                 }
1188
1189                                 return 0; /* The end */
1190                         }
1191                 }
1192
1193                 if (!q) {
1194                         char *m;
1195
1196                         /* No more lines, return empty line */
1197
1198                         m = strdup("");
1199                         if (!m)
1200                                 return -ENOMEM;
1201
1202                         *ret = m;
1203                         return 0; /* The end */
1204                 }
1205
1206                 p = q + 1;
1207                 c++;
1208         }
1209 }