src/basic/string-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 #include <stdarg.h>
  23 #include <stdint.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27
  28 #include "alloc-util.h"
  29 #include "gunicode.h"
  30 #include "macro.h"
  31 #include "string-util.h"
  32 #include "utf8.h"
  33 #include "util.h"
  34
  35 int strcmp_ptr(const char *a, const char *b) {
  36
  37         /* Like strcmp(), but tries to make sense of NULL pointers */
  38         if (a && b)
  39                 return strcmp(a, b);
  40
  41         if (!a && b)
  42                 return -1;
  43
  44         if (a && !b)
  45                 return 1;
  46
  47         return 0;
  48 }
  49
  50 char* endswith(const char *s, const char *postfix) {
  51         size_t sl, pl;
  52
  53         assert(s);
  54         assert(postfix);
  55
  56         sl = strlen(s);
  57         pl = strlen(postfix);
  58
  59         if (pl == 0)
  60                 return (char*) s + sl;
  61
  62         if (sl < pl)
  63                 return NULL;
  64
  65         if (memcmp(s + sl - pl, postfix, pl) != 0)
  66                 return NULL;
  67
  68         return (char*) s + sl - pl;
  69 }
  70
  71 char* endswith_no_case(const char *s, const char *postfix) {
  72         size_t sl, pl;
  73
  74         assert(s);
  75         assert(postfix);
  76
  77         sl = strlen(s);
  78         pl = strlen(postfix);
  79
  80         if (pl == 0)
  81                 return (char*) s + sl;
  82
  83         if (sl < pl)
  84                 return NULL;
  85
  86         if (strcasecmp(s + sl - pl, postfix) != 0)
  87                 return NULL;
  88
  89         return (char*) s + sl - pl;
  90 }
  91
  92 char* first_word(const char *s, const char *word) {
  93         size_t sl, wl;
  94         const char *p;
  95
  96         assert(s);
  97         assert(word);
  98
  99         /* Checks if the string starts with the specified word, either
 100          * followed by NUL or by whitespace. Returns a pointer to the
 101          * NUL or the first character after the whitespace. */
 102
 103         sl = strlen(s);
 104         wl = strlen(word);
 105
 106         if (sl < wl)
 107                 return NULL;
 108
 109         if (wl == 0)
 110                 return (char*) s;
 111
 112         if (memcmp(s, word, wl) != 0)
 113                 return NULL;
 114
 115         p = s + wl;
 116         if (*p == 0)
 117                 return (char*) p;
 118
 119         if (!strchr(WHITESPACE, *p))
 120                 return NULL;
 121
 122         p += strspn(p, WHITESPACE);
 123         return (char*) p;
 124 }
 125
 126 static size_t strcspn_escaped(const char *s, const char *reject) {
 127         bool escaped = false;
 128         int n;
 129
 130         for (n=0; s[n]; n++) {
 131                 if (escaped)
 132                         escaped = false;
 133                 else if (s[n] == '\\')
 134                         escaped = true;
 135                 else if (strchr(reject, s[n]))
 136                         break;
 137         }
 138
 139         /* if s ends in \, return index of previous char */
 140         return n - escaped;
 141 }
 142
 143 /* Split a string into words. */
 144 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
 145         const char *current;
 146
 147         current = *state;
 148
 149         if (!*current) {
 150                 assert(**state == '\0');
 151                 return NULL;
 152         }
 153
 154         current += strspn(current, separator);
 155         if (!*current) {
 156                 *state = current;
 157                 return NULL;
 158         }
 159
 160         if (quoted && strchr("\'\"", *current)) {
 161                 char quotechars[2] = {*current, '\0'};
 162
 163                 *l = strcspn_escaped(current + 1, quotechars);
 164                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 165                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 166                         /* right quote missing or garbage at the end */
 167                         *state = current;
 168                         return NULL;
 169                 }
 170                 *state = current++ + *l + 2;
 171         } else if (quoted) {
 172                 *l = strcspn_escaped(current, separator);
 173                 if (current[*l] && !strchr(separator, current[*l])) {
 174                         /* unfinished escape */
 175                         *state = current;
 176                         return NULL;
 177                 }
 178                 *state = current + *l;
 179         } else {
 180                 *l = strcspn(current, separator);
 181                 *state = current + *l;
 182         }
 183
 184         return current;
 185 }
 186
 187 char *strnappend(const char *s, const char *suffix, size_t b) {
 188         size_t a;
 189         char *r;
 190
 191         if (!s && !suffix)
 192                 return strdup("");
 193
 194         if (!s)
 195                 return strndup(suffix, b);
 196
 197         if (!suffix)
 198                 return strdup(s);
 199
 200         assert(s);
 201         assert(suffix);
 202
 203         a = strlen(s);
 204         if (b > ((size_t) -1) - a)
 205                 return NULL;
 206
 207         r = new(char, a+b+1);
 208         if (!r)
 209                 return NULL;
 210
 211         memcpy(r, s, a);
 212         memcpy(r+a, suffix, b);
 213         r[a+b] = 0;
 214
 215         return r;
 216 }
 217
 218 char *strappend(const char *s, const char *suffix) {
 219         return strnappend(s, suffix, strlen_ptr(suffix));
 220 }
 221
 222 char *strjoin_real(const char *x, ...) {
 223         va_list ap;
 224         size_t l;
 225         char *r, *p;
 226
 227         va_start(ap, x);
 228
 229         if (x) {
 230                 l = strlen(x);
 231
 232                 for (;;) {
 233                         const char *t;
 234                         size_t n;
 235
 236                         t = va_arg(ap, const char *);
 237                         if (!t)
 238                                 break;
 239
 240                         n = strlen(t);
 241                         if (n > ((size_t) -1) - l) {
 242                                 va_end(ap);
 243                                 return NULL;
 244                         }
 245
 246                         l += n;
 247                 }
 248         } else
 249                 l = 0;
 250
 251         va_end(ap);
 252
 253         r = new(char, l+1);
 254         if (!r)
 255                 return NULL;
 256
 257         if (x) {
 258                 p = stpcpy(r, x);
 259
 260                 va_start(ap, x);
 261
 262                 for (;;) {
 263                         const char *t;
 264
 265                         t = va_arg(ap, const char *);
 266                         if (!t)
 267                                 break;
 268
 269                         p = stpcpy(p, t);
 270                 }
 271
 272                 va_end(ap);
 273         } else
 274                 r[0] = 0;
 275
 276         return r;
 277 }
 278
 279 char *strstrip(char *s) {
 280         char *e;
 281
 282         if (!s)
 283                 return NULL;
 284
 285         /* Drops trailing whitespace. Modifies the string in
 286          * place. Returns pointer to first non-space character */
 287
 288         s += strspn(s, WHITESPACE);
 289
 290         for (e = strchr(s, 0); e > s; e --)
 291                 if (!strchr(WHITESPACE, e[-1]))
 292                         break;
 293
 294         *e = 0;
 295
 296         return s;
 297 }
 298
 299 char *delete_chars(char *s, const char *bad) {
 300         char *f, *t;
 301
 302         /* Drops all specified bad characters, regardless where in the string */
 303
 304         if (!s)
 305                 return NULL;
 306
 307         if (!bad)
 308                 bad = WHITESPACE;
 309
 310         for (f = s, t = s; *f; f++) {
 311                 if (strchr(bad, *f))
 312                         continue;
 313
 314                 *(t++) = *f;
 315         }
 316
 317         *t = 0;
 318
 319         return s;
 320 }
 321
 322 char *delete_trailing_chars(char *s, const char *bad) {
 323         char *p, *c = s;
 324
 325         /* Drops all specified bad characters, at the end of the string */
 326
 327         if (!s)
 328                 return NULL;
 329
 330         if (!bad)
 331                 bad = WHITESPACE;
 332
 333         for (p = s; *p; p++)
 334                 if (!strchr(bad, *p))
 335                         c = p + 1;
 336
 337         *c = 0;
 338
 339         return s;
 340 }
 341
 342 char *truncate_nl(char *s) {
 343         assert(s);
 344
 345         s[strcspn(s, NEWLINE)] = 0;
 346         return s;
 347 }
 348
 349 char ascii_tolower(char x) {
 350
 351         if (x >= 'A' && x <= 'Z')
 352                 return x - 'A' + 'a';
 353
 354         return x;
 355 }
 356
 357 char ascii_toupper(char x) {
 358
 359         if (x >= 'a' && x <= 'z')
 360                 return x - 'a' + 'A';
 361
 362         return x;
 363 }
 364
 365 char *ascii_strlower(char *t) {
 366         char *p;
 367
 368         assert(t);
 369
 370         for (p = t; *p; p++)
 371                 *p = ascii_tolower(*p);
 372
 373         return t;
 374 }
 375
 376 char *ascii_strupper(char *t) {
 377         char *p;
 378
 379         assert(t);
 380
 381         for (p = t; *p; p++)
 382                 *p = ascii_toupper(*p);
 383
 384         return t;
 385 }
 386
 387 char *ascii_strlower_n(char *t, size_t n) {
 388         size_t i;
 389
 390         if (n <= 0)
 391                 return t;
 392
 393         for (i = 0; i < n; i++)
 394                 t[i] = ascii_tolower(t[i]);
 395
 396         return t;
 397 }
 398
 399 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 400
 401         for (; n > 0; a++, b++, n--) {
 402                 int x, y;
 403
 404                 x = (int) (uint8_t) ascii_tolower(*a);
 405                 y = (int) (uint8_t) ascii_tolower(*b);
 406
 407                 if (x != y)
 408                         return x - y;
 409         }
 410
 411         return 0;
 412 }
 413
 414 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 415         int r;
 416
 417         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 418         if (r != 0)
 419                 return r;
 420
 421         if (n < m)
 422                 return -1;
 423         else if (n > m)
 424                 return 1;
 425         else
 426                 return 0;
 427 }
 428
 429 bool chars_intersect(const char *a, const char *b) {
 430         const char *p;
 431
 432         /* Returns true if any of the chars in a are in b. */
 433         for (p = a; *p; p++)
 434                 if (strchr(b, *p))
 435                         return true;
 436
 437         return false;
 438 }
 439
 440 bool string_has_cc(const char *p, const char *ok) {
 441         const char *t;
 442
 443         assert(p);
 444
 445         /*
 446          * Check if a string contains control characters. If 'ok' is
 447          * non-NULL it may be a string containing additional CCs to be
 448          * considered OK.
 449          */
 450
 451         for (t = p; *t; t++) {
 452                 if (ok && strchr(ok, *t))
 453                         continue;
 454
 455                 if (*t > 0 && *t < ' ')
 456                         return true;
 457
 458                 if (*t == 127)
 459                         return true;
 460         }
 461
 462         return false;
 463 }
 464
 465 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 466         size_t x;
 467         char *r;
 468
 469         assert(s);
 470         assert(percent <= 100);
 471         assert(new_length >= 3);
 472
 473         if (old_length <= 3 || old_length <= new_length)
 474                 return strndup(s, old_length);
 475
 476         r = new0(char, new_length+3);
 477         if (!r)
 478                 return NULL;
 479
 480         x = (new_length * percent) / 100;
 481
 482         if (x > new_length - 3)
 483                 x = new_length - 3;
 484
 485         memcpy(r, s, x);
 486         r[x] = 0xe2; /* tri-dot ellipsis: … */
 487         r[x+1] = 0x80;
 488         r[x+2] = 0xa6;
 489         memcpy(r + x + 3,
 490                s + old_length - (new_length - x - 1),
 491                new_length - x - 1);
 492
 493         return r;
 494 }
 495
 496 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 497         size_t x;
 498         char *e;
 499         const char *i, *j;
 500         unsigned k, len, len2;
 501         int r;
 502
 503         assert(s);
 504         assert(percent <= 100);
 505
 506         if (new_length == (size_t) -1)
 507                 return strndup(s, old_length);
 508
 509         assert(new_length >= 3);
 510
 511         /* if no multibyte characters use ascii_ellipsize_mem for speed */
 512         if (ascii_is_valid(s))
 513                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 514
 515         if (old_length <= 3 || old_length <= new_length)
 516                 return strndup(s, old_length);
 517
 518         x = (new_length * percent) / 100;
 519
 520         if (x > new_length - 3)
 521                 x = new_length - 3;
 522
 523         k = 0;
 524         for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
 525                 char32_t c;
 526
 527                 r = utf8_encoded_to_unichar(i, &c);
 528                 if (r < 0)
 529                         return NULL;
 530                 k += unichar_iswide(c) ? 2 : 1;
 531         }
 532
 533         if (k > x) /* last character was wide and went over quota */
 534                 x++;
 535
 536         for (j = s + old_length; k < new_length && j > i; ) {
 537                 char32_t c;
 538
 539                 j = utf8_prev_char(j);
 540                 r = utf8_encoded_to_unichar(j, &c);
 541                 if (r < 0)
 542                         return NULL;
 543                 k += unichar_iswide(c) ? 2 : 1;
 544         }
 545         assert(i <= j);
 546
 547         /* we don't actually need to ellipsize */
 548         if (i == j)
 549                 return memdup(s, old_length + 1);
 550
 551         /* make space for ellipsis */
 552         j = utf8_next_char(j);
 553
 554         len = i - s;
 555         len2 = s + old_length - j;
 556         e = new(char, len + 3 + len2 + 1);
 557         if (!e)
 558                 return NULL;
 559
 560         /*
 561         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 562                old_length, new_length, x, len, len2, k);
 563         */
 564
 565         memcpy(e, s, len);
 566         e[len]   = 0xe2; /* tri-dot ellipsis: … */
 567         e[len + 1] = 0x80;
 568         e[len + 2] = 0xa6;
 569
 570         memcpy(e + len + 3, j, len2 + 1);
 571
 572         return e;
 573 }
 574
 575 char *ellipsize(const char *s, size_t length, unsigned percent) {
 576
 577         if (length == (size_t) -1)
 578                 return strdup(s);
 579
 580         return ellipsize_mem(s, strlen(s), length, percent);
 581 }
 582
 583 bool nulstr_contains(const char *nulstr, const char *needle) {
 584         const char *i;
 585
 586         if (!nulstr)
 587                 return false;
 588
 589         NULSTR_FOREACH(i, nulstr)
 590                 if (streq(i, needle))
 591                         return true;
 592
 593         return false;
 594 }
 595
 596 char* strshorten(char *s, size_t l) {
 597         assert(s);
 598
 599         if (strnlen(s, l+1) > l)
 600                 s[l] = 0;
 601
 602         return s;
 603 }
 604
 605 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 606         const char *f;
 607         char *t, *r;
 608         size_t l, old_len, new_len;
 609
 610         assert(text);
 611         assert(old_string);
 612         assert(new_string);
 613
 614         old_len = strlen(old_string);
 615         new_len = strlen(new_string);
 616
 617         l = strlen(text);
 618         r = new(char, l+1);
 619         if (!r)
 620                 return NULL;
 621
 622         f = text;
 623         t = r;
 624         while (*f) {
 625                 char *a;
 626                 size_t d, nl;
 627
 628                 if (!startswith(f, old_string)) {
 629                         *(t++) = *(f++);
 630                         continue;
 631                 }
 632
 633                 d = t - r;
 634                 nl = l - old_len + new_len;
 635                 a = realloc(r, nl + 1);
 636                 if (!a)
 637                         goto oom;
 638
 639                 l = nl;
 640                 r = a;
 641                 t = r + d;
 642
 643                 t = stpcpy(t, new_string);
 644                 f += old_len;
 645         }
 646
 647         *t = 0;
 648         return r;
 649
 650 oom:
 651         return mfree(r);
 652 }
 653
 654 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 655         const char *i, *begin = NULL;
 656         enum {
 657                 STATE_OTHER,
 658                 STATE_ESCAPE,
 659                 STATE_BRACKET
 660         } state = STATE_OTHER;
 661         char *obuf = NULL;
 662         size_t osz = 0, isz;
 663         FILE *f;
 664
 665         assert(ibuf);
 666         assert(*ibuf);
 667
 668         /* Strips ANSI color and replaces TABs by 8 spaces */
 669
 670         isz = _isz ? *_isz : strlen(*ibuf);
 671
 672         f = open_memstream(&obuf, &osz);
 673         if (!f)
 674                 return NULL;
 675
 676         /* Note we use the _unlocked() stdio variants on f for performance
 677          * reasons.  It's safe to do so since we created f here and it
 678          * doesn't leave our scope.
 679          */
 680
 681         for (i = *ibuf; i < *ibuf + isz + 1; i++) {
 682
 683                 switch (state) {
 684
 685                 case STATE_OTHER:
 686                         if (i >= *ibuf + isz) /* EOT */
 687                                 break;
 688                         else if (*i == '\x1B')
 689                                 state = STATE_ESCAPE;
 690                         else if (*i == '\t')
 691                                 fputs_unlocked("        ", f);
 692                         else
 693                                 fputc_unlocked(*i, f);
 694                         break;
 695
 696                 case STATE_ESCAPE:
 697                         if (i >= *ibuf + isz) { /* EOT */
 698                                 fputc_unlocked('\x1B', f);
 699                                 break;
 700                         } else if (*i == '[') {
 701                                 state = STATE_BRACKET;
 702                                 begin = i + 1;
 703                         } else {
 704                                 fputc_unlocked('\x1B', f);
 705                                 fputc_unlocked(*i, f);
 706                                 state = STATE_OTHER;
 707                         }
 708
 709                         break;
 710
 711                 case STATE_BRACKET:
 712
 713                         if (i >= *ibuf + isz || /* EOT */
 714                             (!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) {
 715                                 fputc_unlocked('\x1B', f);
 716                                 fputc_unlocked('[', f);
 717                                 state = STATE_OTHER;
 718                                 i = begin-1;
 719                         } else if (*i == 'm')
 720                                 state = STATE_OTHER;
 721                         break;
 722                 }
 723         }
 724
 725         if (ferror(f)) {
 726                 fclose(f);
 727                 return mfree(obuf);
 728         }
 729
 730         fclose(f);
 731
 732         free(*ibuf);
 733         *ibuf = obuf;
 734
 735         if (_isz)
 736                 *_isz = osz;
 737
 738         return obuf;
 739 }
 740
 741 char *strextend(char **x, ...) {
 742         va_list ap;
 743         size_t f, l;
 744         char *r, *p;
 745
 746         assert(x);
 747
 748         l = f = strlen_ptr(*x);
 749
 750         va_start(ap, x);
 751         for (;;) {
 752                 const char *t;
 753                 size_t n;
 754
 755                 t = va_arg(ap, const char *);
 756                 if (!t)
 757                         break;
 758
 759                 n = strlen(t);
 760                 if (n > ((size_t) -1) - l) {
 761                         va_end(ap);
 762                         return NULL;
 763                 }
 764
 765                 l += n;
 766         }
 767         va_end(ap);
 768
 769         r = realloc(*x, l+1);
 770         if (!r)
 771                 return NULL;
 772
 773         p = r + f;
 774
 775         va_start(ap, x);
 776         for (;;) {
 777                 const char *t;
 778
 779                 t = va_arg(ap, const char *);
 780                 if (!t)
 781                         break;
 782
 783                 p = stpcpy(p, t);
 784         }
 785         va_end(ap);
 786
 787         *p = 0;
 788         *x = r;
 789
 790         return r + l;
 791 }
 792
 793 char *strrep(const char *s, unsigned n) {
 794         size_t l;
 795         char *r, *p;
 796         unsigned i;
 797
 798         assert(s);
 799
 800         l = strlen(s);
 801         p = r = malloc(l * n + 1);
 802         if (!r)
 803                 return NULL;
 804
 805         for (i = 0; i < n; i++)
 806                 p = stpcpy(p, s);
 807
 808         *p = 0;
 809         return r;
 810 }
 811
 812 int split_pair(const char *s, const char *sep, char **l, char **r) {
 813         char *x, *a, *b;
 814
 815         assert(s);
 816         assert(sep);
 817         assert(l);
 818         assert(r);
 819
 820         if (isempty(sep))
 821                 return -EINVAL;
 822
 823         x = strstr(s, sep);
 824         if (!x)
 825                 return -EINVAL;
 826
 827         a = strndup(s, x - s);
 828         if (!a)
 829                 return -ENOMEM;
 830
 831         b = strdup(x + strlen(sep));
 832         if (!b) {
 833                 free(a);
 834                 return -ENOMEM;
 835         }
 836
 837         *l = a;
 838         *r = b;
 839
 840         return 0;
 841 }
 842
 843 int free_and_strdup(char **p, const char *s) {
 844         char *t;
 845
 846         assert(p);
 847
 848         /* Replaces a string pointer with an strdup()ed new string,
 849          * possibly freeing the old one. */
 850
 851         if (streq_ptr(*p, s))
 852                 return 0;
 853
 854         if (s) {
 855                 t = strdup(s);
 856                 if (!t)
 857                         return -ENOMEM;
 858         } else
 859                 t = NULL;
 860
 861         free(*p);
 862         *p = t;
 863
 864         return 1;
 865 }
 866
 867 #if !HAVE_EXPLICIT_BZERO
 868 /*
 869  * Pointer to memset is volatile so that compiler must de-reference
 870  * the pointer and can't assume that it points to any function in
 871  * particular (such as memset, which it then might further "optimize")
 872  * This approach is inspired by openssl's crypto/mem_clr.c.
 873  */
 874 typedef void *(*memset_t)(void *,int,size_t);
 875
 876 static volatile memset_t memset_func = memset;
 877
 878 void explicit_bzero(void *p, size_t l) {
 879         memset_func(p, '\0', l);
 880 }
 881 #endif
 882
 883 char* string_erase(char *x) {
 884         if (!x)
 885                 return NULL;
 886
 887         /* A delicious drop of snake-oil! To be called on memory where
 888          * we stored passphrases or so, after we used them. */
 889         explicit_bzero(x, strlen(x));
 890         return x;
 891 }
 892
 893 char *string_free_erase(char *s) {
 894         return mfree(string_erase(s));
 895 }
 896
 897 bool string_is_safe(const char *p) {
 898         const char *t;
 899
 900         if (!p)
 901                 return false;
 902
 903         for (t = p; *t; t++) {
 904                 if (*t > 0 && *t < ' ') /* no control characters */
 905                         return false;
 906
 907                 if (strchr(QUOTES "\\\x7f", *t))
 908                         return false;
 909         }
 910
 911         return true;
 912 }