src/basic/string-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include <errno.h>
   4 #include <stdarg.h>
   5 #include <stdint.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8
   9 #include "alloc-util.h"
  10 #include "escape.h"
  11 #include "fileio.h"
  12 #include "gunicode.h"
  13 #include "locale-util.h"
  14 #include "macro.h"
  15 #include "memory-util.h"
  16 #include "string-util.h"
  17 #include "terminal-util.h"
  18 #include "utf8.h"
  19 #include "util.h"
  20
  21 int strcmp_ptr(const char *a, const char *b) {
  22
  23         /* Like strcmp(), but tries to make sense of NULL pointers */
  24         if (a && b)
  25                 return strcmp(a, b);
  26
  27         if (!a && b)
  28                 return -1;
  29
  30         if (a && !b)
  31                 return 1;
  32
  33         return 0;
  34 }
  35
  36 char* endswith(const char *s, const char *postfix) {
  37         size_t sl, pl;
  38
  39         assert(s);
  40         assert(postfix);
  41
  42         sl = strlen(s);
  43         pl = strlen(postfix);
  44
  45         if (pl == 0)
  46                 return (char*) s + sl;
  47
  48         if (sl < pl)
  49                 return NULL;
  50
  51         if (memcmp(s + sl - pl, postfix, pl) != 0)
  52                 return NULL;
  53
  54         return (char*) s + sl - pl;
  55 }
  56
  57 char* endswith_no_case(const char *s, const char *postfix) {
  58         size_t sl, pl;
  59
  60         assert(s);
  61         assert(postfix);
  62
  63         sl = strlen(s);
  64         pl = strlen(postfix);
  65
  66         if (pl == 0)
  67                 return (char*) s + sl;
  68
  69         if (sl < pl)
  70                 return NULL;
  71
  72         if (strcasecmp(s + sl - pl, postfix) != 0)
  73                 return NULL;
  74
  75         return (char*) s + sl - pl;
  76 }
  77
  78 char* first_word(const char *s, const char *word) {
  79         size_t sl, wl;
  80         const char *p;
  81
  82         assert(s);
  83         assert(word);
  84
  85         /* Checks if the string starts with the specified word, either
  86          * followed by NUL or by whitespace. Returns a pointer to the
  87          * NUL or the first character after the whitespace. */
  88
  89         sl = strlen(s);
  90         wl = strlen(word);
  91
  92         if (sl < wl)
  93                 return NULL;
  94
  95         if (wl == 0)
  96                 return (char*) s;
  97
  98         if (memcmp(s, word, wl) != 0)
  99                 return NULL;
 100
 101         p = s + wl;
 102         if (*p == 0)
 103                 return (char*) p;
 104
 105         if (!strchr(WHITESPACE, *p))
 106                 return NULL;
 107
 108         p += strspn(p, WHITESPACE);
 109         return (char*) p;
 110 }
 111
 112 static size_t strcspn_escaped(const char *s, const char *reject) {
 113         bool escaped = false;
 114         int n;
 115
 116         for (n=0; s[n]; n++) {
 117                 if (escaped)
 118                         escaped = false;
 119                 else if (s[n] == '\\')
 120                         escaped = true;
 121                 else if (strchr(reject, s[n]))
 122                         break;
 123         }
 124
 125         /* if s ends in \, return index of previous char */
 126         return n - escaped;
 127 }
 128
 129 /* Split a string into words. */
 130 const char* split(const char **state, size_t *l, const char *separator, SplitFlags flags) {
 131         const char *current;
 132
 133         current = *state;
 134
 135         if (!*current) {
 136                 assert(**state == '\0');
 137                 return NULL;
 138         }
 139
 140         current += strspn(current, separator);
 141         if (!*current) {
 142                 *state = current;
 143                 return NULL;
 144         }
 145
 146         if (flags & SPLIT_QUOTES && strchr("\'\"", *current)) {
 147                 char quotechars[2] = {*current, '\0'};
 148
 149                 *l = strcspn_escaped(current + 1, quotechars);
 150                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 151                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 152                         /* right quote missing or garbage at the end */
 153                         if (flags & SPLIT_RELAX) {
 154                                 *state = current + *l + 1 + (current[*l + 1] != '\0');
 155                                 return current + 1;
 156                         }
 157                         *state = current;
 158                         return NULL;
 159                 }
 160                 *state = current++ + *l + 2;
 161         } else if (flags & SPLIT_QUOTES) {
 162                 *l = strcspn_escaped(current, separator);
 163                 if (current[*l] && !strchr(separator, current[*l]) && !(flags & SPLIT_RELAX)) {
 164                         /* unfinished escape */
 165                         *state = current;
 166                         return NULL;
 167                 }
 168                 *state = current + *l;
 169         } else {
 170                 *l = strcspn(current, separator);
 171                 *state = current + *l;
 172         }
 173
 174         return current;
 175 }
 176
 177 char *strnappend(const char *s, const char *suffix, size_t b) {
 178         size_t a;
 179         char *r;
 180
 181         if (!s && !suffix)
 182                 return strdup("");
 183
 184         if (!s)
 185                 return strndup(suffix, b);
 186
 187         if (!suffix)
 188                 return strdup(s);
 189
 190         assert(s);
 191         assert(suffix);
 192
 193         a = strlen(s);
 194         if (b > ((size_t) -1) - a)
 195                 return NULL;
 196
 197         r = new(char, a+b+1);
 198         if (!r)
 199                 return NULL;
 200
 201         memcpy(r, s, a);
 202         memcpy(r+a, suffix, b);
 203         r[a+b] = 0;
 204
 205         return r;
 206 }
 207
 208 char *strjoin_real(const char *x, ...) {
 209         va_list ap;
 210         size_t l;
 211         char *r, *p;
 212
 213         va_start(ap, x);
 214
 215         if (x) {
 216                 l = strlen(x);
 217
 218                 for (;;) {
 219                         const char *t;
 220                         size_t n;
 221
 222                         t = va_arg(ap, const char *);
 223                         if (!t)
 224                                 break;
 225
 226                         n = strlen(t);
 227                         if (n > ((size_t) -1) - l) {
 228                                 va_end(ap);
 229                                 return NULL;
 230                         }
 231
 232                         l += n;
 233                 }
 234         } else
 235                 l = 0;
 236
 237         va_end(ap);
 238
 239         r = new(char, l+1);
 240         if (!r)
 241                 return NULL;
 242
 243         if (x) {
 244                 p = stpcpy(r, x);
 245
 246                 va_start(ap, x);
 247
 248                 for (;;) {
 249                         const char *t;
 250
 251                         t = va_arg(ap, const char *);
 252                         if (!t)
 253                                 break;
 254
 255                         p = stpcpy(p, t);
 256                 }
 257
 258                 va_end(ap);
 259         } else
 260                 r[0] = 0;
 261
 262         return r;
 263 }
 264
 265 char *strstrip(char *s) {
 266         if (!s)
 267                 return NULL;
 268
 269         /* Drops trailing whitespace. Modifies the string in place. Returns pointer to first non-space character */
 270
 271         return delete_trailing_chars(skip_leading_chars(s, WHITESPACE), WHITESPACE);
 272 }
 273
 274 char *delete_chars(char *s, const char *bad) {
 275         char *f, *t;
 276
 277         /* Drops all specified bad characters, regardless where in the string */
 278
 279         if (!s)
 280                 return NULL;
 281
 282         if (!bad)
 283                 bad = WHITESPACE;
 284
 285         for (f = s, t = s; *f; f++) {
 286                 if (strchr(bad, *f))
 287                         continue;
 288
 289                 *(t++) = *f;
 290         }
 291
 292         *t = 0;
 293
 294         return s;
 295 }
 296
 297 char *delete_trailing_chars(char *s, const char *bad) {
 298         char *p, *c = s;
 299
 300         /* Drops all specified bad characters, at the end of the string */
 301
 302         if (!s)
 303                 return NULL;
 304
 305         if (!bad)
 306                 bad = WHITESPACE;
 307
 308         for (p = s; *p; p++)
 309                 if (!strchr(bad, *p))
 310                         c = p + 1;
 311
 312         *c = 0;
 313
 314         return s;
 315 }
 316
 317 char *truncate_nl(char *s) {
 318         assert(s);
 319
 320         s[strcspn(s, NEWLINE)] = 0;
 321         return s;
 322 }
 323
 324 char ascii_tolower(char x) {
 325
 326         if (x >= 'A' && x <= 'Z')
 327                 return x - 'A' + 'a';
 328
 329         return x;
 330 }
 331
 332 char ascii_toupper(char x) {
 333
 334         if (x >= 'a' && x <= 'z')
 335                 return x - 'a' + 'A';
 336
 337         return x;
 338 }
 339
 340 char *ascii_strlower(char *t) {
 341         char *p;
 342
 343         assert(t);
 344
 345         for (p = t; *p; p++)
 346                 *p = ascii_tolower(*p);
 347
 348         return t;
 349 }
 350
 351 char *ascii_strupper(char *t) {
 352         char *p;
 353
 354         assert(t);
 355
 356         for (p = t; *p; p++)
 357                 *p = ascii_toupper(*p);
 358
 359         return t;
 360 }
 361
 362 char *ascii_strlower_n(char *t, size_t n) {
 363         size_t i;
 364
 365         if (n <= 0)
 366                 return t;
 367
 368         for (i = 0; i < n; i++)
 369                 t[i] = ascii_tolower(t[i]);
 370
 371         return t;
 372 }
 373
 374 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 375
 376         for (; n > 0; a++, b++, n--) {
 377                 int x, y;
 378
 379                 x = (int) (uint8_t) ascii_tolower(*a);
 380                 y = (int) (uint8_t) ascii_tolower(*b);
 381
 382                 if (x != y)
 383                         return x - y;
 384         }
 385
 386         return 0;
 387 }
 388
 389 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 390         int r;
 391
 392         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 393         if (r != 0)
 394                 return r;
 395
 396         return CMP(n, m);
 397 }
 398
 399 bool chars_intersect(const char *a, const char *b) {
 400         const char *p;
 401
 402         /* Returns true if any of the chars in a are in b. */
 403         for (p = a; *p; p++)
 404                 if (strchr(b, *p))
 405                         return true;
 406
 407         return false;
 408 }
 409
 410 bool string_has_cc(const char *p, const char *ok) {
 411         const char *t;
 412
 413         assert(p);
 414
 415         /*
 416          * Check if a string contains control characters. If 'ok' is
 417          * non-NULL it may be a string containing additional CCs to be
 418          * considered OK.
 419          */
 420
 421         for (t = p; *t; t++) {
 422                 if (ok && strchr(ok, *t))
 423                         continue;
 424
 425                 if (*t > 0 && *t < ' ')
 426                         return true;
 427
 428                 if (*t == 127)
 429                         return true;
 430         }
 431
 432         return false;
 433 }
 434
 435 static int write_ellipsis(char *buf, bool unicode) {
 436         if (unicode || is_locale_utf8()) {
 437                 buf[0] = 0xe2; /* tri-dot ellipsis: … */
 438                 buf[1] = 0x80;
 439                 buf[2] = 0xa6;
 440         } else {
 441                 buf[0] = '.';
 442                 buf[1] = '.';
 443                 buf[2] = '.';
 444         }
 445
 446         return 3;
 447 }
 448
 449 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 450         size_t x, need_space, suffix_len;
 451         char *t;
 452
 453         assert(s);
 454         assert(percent <= 100);
 455         assert(new_length != (size_t) -1);
 456
 457         if (old_length <= new_length)
 458                 return strndup(s, old_length);
 459
 460         /* Special case short ellipsations */
 461         switch (new_length) {
 462
 463         case 0:
 464                 return strdup("");
 465
 466         case 1:
 467                 if (is_locale_utf8())
 468                         return strdup("…");
 469                 else
 470                         return strdup(".");
 471
 472         case 2:
 473                 if (!is_locale_utf8())
 474                         return strdup("..");
 475
 476                 break;
 477
 478         default:
 479                 break;
 480         }
 481
 482         /* Calculate how much space the ellipsis will take up. If we are in UTF-8 mode we only need space for one
 483          * character ("…"), otherwise for three characters ("..."). Note that in both cases we need 3 bytes of storage,
 484          * either for the UTF-8 encoded character or for three ASCII characters. */
 485         need_space = is_locale_utf8() ? 1 : 3;
 486
 487         t = new(char, new_length+3);
 488         if (!t)
 489                 return NULL;
 490
 491         assert(new_length >= need_space);
 492
 493         x = ((new_length - need_space) * percent + 50) / 100;
 494         assert(x <= new_length - need_space);
 495
 496         memcpy(t, s, x);
 497         write_ellipsis(t + x, false);
 498         suffix_len = new_length - x - need_space;
 499         memcpy(t + x + 3, s + old_length - suffix_len, suffix_len);
 500         *(t + x + 3 + suffix_len) = '\0';
 501
 502         return t;
 503 }
 504
 505 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 506         size_t x, k, len, len2;
 507         const char *i, *j;
 508         char *e;
 509         int r;
 510
 511         /* Note that 'old_length' refers to bytes in the string, while 'new_length' refers to character cells taken up
 512          * on screen. This distinction doesn't matter for ASCII strings, but it does matter for non-ASCII UTF-8
 513          * strings.
 514          *
 515          * Ellipsation is done in a locale-dependent way:
 516          * 1. If the string passed in is fully ASCII and the current locale is not UTF-8, three dots are used ("...")
 517          * 2. Otherwise, a unicode ellipsis is used ("…")
 518          *
 519          * In other words: you'll get a unicode ellipsis as soon as either the string contains non-ASCII characters or
 520          * the current locale is UTF-8.
 521          */
 522
 523         assert(s);
 524         assert(percent <= 100);
 525
 526         if (new_length == (size_t) -1)
 527                 return strndup(s, old_length);
 528
 529         if (new_length == 0)
 530                 return strdup("");
 531
 532         /* If no multibyte characters use ascii_ellipsize_mem for speed */
 533         if (ascii_is_valid_n(s, old_length))
 534                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 535
 536         x = ((new_length - 1) * percent) / 100;
 537         assert(x <= new_length - 1);
 538
 539         k = 0;
 540         for (i = s; i < s + old_length; i = utf8_next_char(i)) {
 541                 char32_t c;
 542                 int w;
 543
 544                 r = utf8_encoded_to_unichar(i, &c);
 545                 if (r < 0)
 546                         return NULL;
 547
 548                 w = unichar_iswide(c) ? 2 : 1;
 549                 if (k + w <= x)
 550                         k += w;
 551                 else
 552                         break;
 553         }
 554
 555         for (j = s + old_length; j > i; ) {
 556                 char32_t c;
 557                 int w;
 558                 const char *jj;
 559
 560                 jj = utf8_prev_char(j);
 561                 r = utf8_encoded_to_unichar(jj, &c);
 562                 if (r < 0)
 563                         return NULL;
 564
 565                 w = unichar_iswide(c) ? 2 : 1;
 566                 if (k + w <= new_length) {
 567                         k += w;
 568                         j = jj;
 569                 } else
 570                         break;
 571         }
 572         assert(i <= j);
 573
 574         /* we don't actually need to ellipsize */
 575         if (i == j)
 576                 return memdup_suffix0(s, old_length);
 577
 578         /* make space for ellipsis, if possible */
 579         if (j < s + old_length)
 580                 j = utf8_next_char(j);
 581         else if (i > s)
 582                 i = utf8_prev_char(i);
 583
 584         len = i - s;
 585         len2 = s + old_length - j;
 586         e = new(char, len + 3 + len2 + 1);
 587         if (!e)
 588                 return NULL;
 589
 590         /*
 591         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 592                old_length, new_length, x, len, len2, k);
 593         */
 594
 595         memcpy(e, s, len);
 596         write_ellipsis(e + len, true);
 597         memcpy(e + len + 3, j, len2);
 598         *(e + len + 3 + len2) = '\0';
 599
 600         return e;
 601 }
 602
 603 char *cellescape(char *buf, size_t len, const char *s) {
 604         /* Escape and ellipsize s into buffer buf of size len. Only non-control ASCII
 605          * characters are copied as they are, everything else is escaped. The result
 606          * is different then if escaping and ellipsization was performed in two
 607          * separate steps, because each sequence is either stored in full or skipped.
 608          *
 609          * This function should be used for logging about strings which expected to
 610          * be plain ASCII in a safe way.
 611          *
 612          * An ellipsis will be used if s is too long. It was always placed at the
 613          * very end.
 614          */
 615
 616         size_t i = 0, last_char_width[4] = {}, k = 0, j;
 617
 618         assert(len > 0); /* at least a terminating NUL */
 619
 620         for (;;) {
 621                 char four[4];
 622                 int w;
 623
 624                 if (*s == 0) /* terminating NUL detected? then we are done! */
 625                         goto done;
 626
 627                 w = cescape_char(*s, four);
 628                 if (i + w + 1 > len) /* This character doesn't fit into the buffer anymore? In that case let's
 629                                       * ellipsize at the previous location */
 630                         break;
 631
 632                 /* OK, there was space, let's add this escaped character to the buffer */
 633                 memcpy(buf + i, four, w);
 634                 i += w;
 635
 636                 /* And remember its width in the ring buffer */
 637                 last_char_width[k] = w;
 638                 k = (k + 1) % 4;
 639
 640                 s++;
 641         }
 642
 643         /* Ellipsation is necessary. This means we might need to truncate the string again to make space for 4
 644          * characters ideally, but the buffer is shorter than that in the first place take what we can get */
 645         for (j = 0; j < ELEMENTSOF(last_char_width); j++) {
 646
 647                 if (i + 4 <= len) /* nice, we reached our space goal */
 648                         break;
 649
 650                 k = k == 0 ? 3 : k - 1;
 651                 if (last_char_width[k] == 0) /* bummer, we reached the beginning of the strings */
 652                         break;
 653
 654                 assert(i >= last_char_width[k]);
 655                 i -= last_char_width[k];
 656         }
 657
 658         if (i + 4 <= len) /* yay, enough space */
 659                 i += write_ellipsis(buf + i, false);
 660         else if (i + 3 <= len) { /* only space for ".." */
 661                 buf[i++] = '.';
 662                 buf[i++] = '.';
 663         } else if (i + 2 <= len) /* only space for a single "." */
 664                 buf[i++] = '.';
 665         else
 666                 assert(i + 1 <= len);
 667
 668  done:
 669         buf[i] = '\0';
 670         return buf;
 671 }
 672
 673 char* strshorten(char *s, size_t l) {
 674         assert(s);
 675
 676         if (strnlen(s, l+1) > l)
 677                 s[l] = 0;
 678
 679         return s;
 680 }
 681
 682 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 683         size_t l, old_len, new_len, allocated = 0;
 684         char *t, *ret = NULL;
 685         const char *f;
 686
 687         assert(old_string);
 688         assert(new_string);
 689
 690         if (!text)
 691                 return NULL;
 692
 693         old_len = strlen(old_string);
 694         new_len = strlen(new_string);
 695
 696         l = strlen(text);
 697         if (!GREEDY_REALLOC(ret, allocated, l+1))
 698                 return NULL;
 699
 700         f = text;
 701         t = ret;
 702         while (*f) {
 703                 size_t d, nl;
 704
 705                 if (!startswith(f, old_string)) {
 706                         *(t++) = *(f++);
 707                         continue;
 708                 }
 709
 710                 d = t - ret;
 711                 nl = l - old_len + new_len;
 712
 713                 if (!GREEDY_REALLOC(ret, allocated, nl + 1))
 714                         return mfree(ret);
 715
 716                 l = nl;
 717                 t = ret + d;
 718
 719                 t = stpcpy(t, new_string);
 720                 f += old_len;
 721         }
 722
 723         *t = 0;
 724         return ret;
 725 }
 726
 727 static void advance_offsets(
 728                 ssize_t diff,
 729                 size_t offsets[2], /* note: we can't use [static 2] here, since this may be NULL */
 730                 size_t shift[static 2],
 731                 size_t size) {
 732
 733         if (!offsets)
 734                 return;
 735
 736         assert(shift);
 737
 738         if ((size_t) diff < offsets[0])
 739                 shift[0] += size;
 740         if ((size_t) diff < offsets[1])
 741                 shift[1] += size;
 742 }
 743
 744 char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
 745         const char *begin = NULL;
 746         enum {
 747                 STATE_OTHER,
 748                 STATE_ESCAPE,
 749                 STATE_CSI,
 750                 STATE_CSO,
 751         } state = STATE_OTHER;
 752         char *obuf = NULL;
 753         size_t osz = 0, isz, shift[2] = {}, n_carriage_returns = 0;
 754         FILE *f;
 755
 756         assert(ibuf);
 757         assert(*ibuf);
 758
 759         /* This does three things:
 760          *
 761          * 1. Replaces TABs by 8 spaces
 762          * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences
 763          * 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' … BEL sequences
 764          * 4. Strip trailing \r characters (since they would "move the cursor", but have no
 765          *    other effect).
 766          *
 767          * Everything else will be left as it is. In particular other ANSI sequences are left as they are, as
 768          * are any other special characters. Truncated ANSI sequences are left-as is too. This call is
 769          * supposed to suppress the most basic formatting noise, but nothing else.
 770          *
 771          * Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
 772
 773         isz = _isz ? *_isz : strlen(*ibuf);
 774
 775         /* Note we turn off internal locking on f for performance reasons. It's safe to do so since we
 776          * created f here and it doesn't leave our scope. */
 777         f = open_memstream_unlocked(&obuf, &osz);
 778         if (!f)
 779                 return NULL;
 780
 781         for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {
 782
 783                 switch (state) {
 784
 785                 case STATE_OTHER:
 786                         if (i >= *ibuf + isz) /* EOT */
 787                                 break;
 788
 789                         if (*i == '\r') {
 790                                 n_carriage_returns++;
 791                                 break;
 792                         } else if (*i == '\n')
 793                                 /* Ignore carriage returns before new line */
 794                                 n_carriage_returns = 0;
 795                         for (; n_carriage_returns > 0; n_carriage_returns--)
 796                                 fputc('\r', f);
 797
 798                         if (*i == '\x1B')
 799                                 state = STATE_ESCAPE;
 800                         else if (*i == '\t') {
 801                                 fputs("        ", f);
 802                                 advance_offsets(i - *ibuf, highlight, shift, 7);
 803                         } else
 804                                 fputc(*i, f);
 805
 806                         break;
 807
 808                 case STATE_ESCAPE:
 809                         assert(n_carriage_returns == 0);
 810
 811                         if (i >= *ibuf + isz) { /* EOT */
 812                                 fputc('\x1B', f);
 813                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 814                                 break;
 815                         } else if (*i == '[') { /* ANSI CSI */
 816                                 state = STATE_CSI;
 817                                 begin = i + 1;
 818                         } else if (*i == ']') { /* ANSI CSO */
 819                                 state = STATE_CSO;
 820                                 begin = i + 1;
 821                         } else {
 822                                 fputc('\x1B', f);
 823                                 fputc(*i, f);
 824                                 advance_offsets(i - *ibuf, highlight, shift, 1);
 825                                 state = STATE_OTHER;
 826                         }
 827
 828                         break;
 829
 830                 case STATE_CSI:
 831                         assert(n_carriage_returns == 0);
 832
 833                         if (i >= *ibuf + isz || /* EOT … */
 834                             !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
 835                                 fputc('\x1B', f);
 836                                 fputc('[', f);
 837                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 838                                 state = STATE_OTHER;
 839                                 i = begin-1;
 840                         } else if (*i == 'm')
 841                                 state = STATE_OTHER;
 842
 843                         break;
 844
 845                 case STATE_CSO:
 846                         assert(n_carriage_returns == 0);
 847
 848                         if (i >= *ibuf + isz || /* EOT … */
 849                             (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
 850                                 fputc('\x1B', f);
 851                                 fputc(']', f);
 852                                 advance_offsets(i - *ibuf, highlight, shift, 2);
 853                                 state = STATE_OTHER;
 854                                 i = begin-1;
 855                         } else if (*i == '\a')
 856                                 state = STATE_OTHER;
 857
 858                         break;
 859                 }
 860         }
 861
 862         if (fflush_and_check(f) < 0) {
 863                 fclose(f);
 864                 return mfree(obuf);
 865         }
 866         fclose(f);
 867
 868         free_and_replace(*ibuf, obuf);
 869
 870         if (_isz)
 871                 *_isz = osz;
 872
 873         if (highlight) {
 874                 highlight[0] += shift[0];
 875                 highlight[1] += shift[1];
 876         }
 877
 878         return *ibuf;
 879 }
 880
 881 char *strextend_with_separator(char **x, const char *separator, ...) {
 882         bool need_separator;
 883         size_t f, l, l_separator;
 884         char *r, *p;
 885         va_list ap;
 886
 887         assert(x);
 888
 889         l = f = strlen_ptr(*x);
 890
 891         need_separator = !isempty(*x);
 892         l_separator = strlen_ptr(separator);
 893
 894         va_start(ap, separator);
 895         for (;;) {
 896                 const char *t;
 897                 size_t n;
 898
 899                 t = va_arg(ap, const char *);
 900                 if (!t)
 901                         break;
 902
 903                 n = strlen(t);
 904
 905                 if (need_separator)
 906                         n += l_separator;
 907
 908                 if (n > ((size_t) -1) - l) {
 909                         va_end(ap);
 910                         return NULL;
 911                 }
 912
 913                 l += n;
 914                 need_separator = true;
 915         }
 916         va_end(ap);
 917
 918         need_separator = !isempty(*x);
 919
 920         r = realloc(*x, l+1);
 921         if (!r)
 922                 return NULL;
 923
 924         p = r + f;
 925
 926         va_start(ap, separator);
 927         for (;;) {
 928                 const char *t;
 929
 930                 t = va_arg(ap, const char *);
 931                 if (!t)
 932                         break;
 933
 934                 if (need_separator && separator)
 935                         p = stpcpy(p, separator);
 936
 937                 p = stpcpy(p, t);
 938
 939                 need_separator = true;
 940         }
 941         va_end(ap);
 942
 943         assert(p == r + l);
 944
 945         *p = 0;
 946         *x = r;
 947
 948         return r + l;
 949 }
 950
 951 char *strrep(const char *s, unsigned n) {
 952         size_t l;
 953         char *r, *p;
 954         unsigned i;
 955
 956         assert(s);
 957
 958         l = strlen(s);
 959         p = r = malloc(l * n + 1);
 960         if (!r)
 961                 return NULL;
 962
 963         for (i = 0; i < n; i++)
 964                 p = stpcpy(p, s);
 965
 966         *p = 0;
 967         return r;
 968 }
 969
 970 int split_pair(const char *s, const char *sep, char **l, char **r) {
 971         char *x, *a, *b;
 972
 973         assert(s);
 974         assert(sep);
 975         assert(l);
 976         assert(r);
 977
 978         if (isempty(sep))
 979                 return -EINVAL;
 980
 981         x = strstr(s, sep);
 982         if (!x)
 983                 return -EINVAL;
 984
 985         a = strndup(s, x - s);
 986         if (!a)
 987                 return -ENOMEM;
 988
 989         b = strdup(x + strlen(sep));
 990         if (!b) {
 991                 free(a);
 992                 return -ENOMEM;
 993         }
 994
 995         *l = a;
 996         *r = b;
 997
 998         return 0;
 999 }
1000
1001 int free_and_strdup(char **p, const char *s) {
1002         char *t;
1003
1004         assert(p);
1005
1006         /* Replaces a string pointer with a strdup()ed new string,
1007          * possibly freeing the old one. */
1008
1009         if (streq_ptr(*p, s))
1010                 return 0;
1011
1012         if (s) {
1013                 t = strdup(s);
1014                 if (!t)
1015                         return -ENOMEM;
1016         } else
1017                 t = NULL;
1018
1019         free(*p);
1020         *p = t;
1021
1022         return 1;
1023 }
1024
1025 int free_and_strndup(char **p, const char *s, size_t l) {
1026         char *t;
1027
1028         assert(p);
1029         assert(s || l == 0);
1030
1031         /* Replaces a string pointer with a strndup()ed new string,
1032          * freeing the old one. */
1033
1034         if (!*p && !s)
1035                 return 0;
1036
1037         if (*p && s && strneq(*p, s, l) && (l > strlen(*p) || (*p)[l] == '\0'))
1038                 return 0;
1039
1040         if (s) {
1041                 t = strndup(s, l);
1042                 if (!t)
1043                         return -ENOMEM;
1044         } else
1045                 t = NULL;
1046
1047         free_and_replace(*p, t);
1048         return 1;
1049 }
1050
1051 bool string_is_safe(const char *p) {
1052         const char *t;
1053
1054         if (!p)
1055                 return false;
1056
1057         for (t = p; *t; t++) {
1058                 if (*t > 0 && *t < ' ') /* no control characters */
1059                         return false;
1060
1061                 if (strchr(QUOTES "\\\x7f", *t))
1062                         return false;
1063         }
1064
1065         return true;
1066 }
1067
1068 char* string_erase(char *x) {
1069         if (!x)
1070                 return NULL;
1071
1072         /* A delicious drop of snake-oil! To be called on memory where we stored passphrases or so, after we
1073          * used them. */
1074         explicit_bzero_safe(x, strlen(x));
1075         return x;
1076 }
1077
1078 int string_truncate_lines(const char *s, size_t n_lines, char **ret) {
1079         const char *p = s, *e = s;
1080         bool truncation_applied = false;
1081         char *copy;
1082         size_t n = 0;
1083
1084         assert(s);
1085
1086         /* Truncate after the specified number of lines. Returns > 0 if a truncation was applied or == 0 if
1087          * there were fewer lines in the string anyway. Trailing newlines on input are ignored, and not
1088          * generated either. */
1089
1090         for (;;) {
1091                 size_t k;
1092
1093                 k = strcspn(p, "\n");
1094
1095                 if (p[k] == 0) {
1096                         if (k == 0) /* final empty line */
1097                                 break;
1098
1099                         if (n >= n_lines) /* above threshold */
1100                                 break;
1101
1102                         e = p + k; /* last line to include */
1103                         break;
1104                 }
1105
1106                 assert(p[k] == '\n');
1107
1108                 if (n >= n_lines)
1109                         break;
1110
1111                 if (k > 0)
1112                         e = p + k;
1113
1114                 p += k + 1;
1115                 n++;
1116         }
1117
1118         /* e points after the last character we want to keep */
1119         if (isempty(e))
1120                 copy = strdup(s);
1121         else {
1122                 if (!in_charset(e, "\n")) /* We only consider things truncated if we remove something that
1123                                            * isn't a new-line or a series of them */
1124                         truncation_applied = true;
1125
1126                 copy = strndup(s, e - s);
1127         }
1128         if (!copy)
1129                 return -ENOMEM;
1130
1131         *ret = copy;
1132         return truncation_applied;
1133 }
1134
1135 int string_extract_line(const char *s, size_t i, char **ret) {
1136         const char *p = s;
1137         size_t c = 0;
1138
1139         /* Extract the i'nth line from the specified string. Returns > 0 if there are more lines after that,
1140          * and == 0 if we are looking at the last line or already beyond the last line. As special
1141          * optimization, if the first line is requested and the string only consists of one line we return
1142          * NULL, indicating the input string should be used as is, and avoid a memory allocation for a very
1143          * common case. */
1144
1145         for (;;) {
1146                 const char *q;
1147
1148                 q = strchr(p, '\n');
1149                 if (i == c) {
1150                         /* The line we are looking for! */
1151
1152                         if (q) {
1153                                 char *m;
1154
1155                                 m = strndup(p, q - p);
1156                                 if (!m)
1157                                         return -ENOMEM;
1158
1159                                 *ret = m;
1160                                 return !isempty(q + 1); /* more coming? */
1161                         } else {
1162                                 if (p == s)
1163                                         *ret = NULL; /* Just use the input string */
1164                                 else {
1165                                         char *m;
1166
1167                                         m = strdup(p);
1168                                         if (!m)
1169                                                 return -ENOMEM;
1170
1171                                         *ret = m;
1172                                 }
1173
1174                                 return 0; /* The end */
1175                         }
1176                 }
1177
1178                 if (!q) {
1179                         char *m;
1180
1181                         /* No more lines, return empty line */
1182
1183                         m = strdup("");
1184                         if (!m)
1185                                 return -ENOMEM;
1186
1187                         *ret = m;
1188                         return 0; /* The end */
1189                 }
1190
1191                 p = q + 1;
1192                 c++;
1193         }
1194 }