src/basic/string-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <stdarg.h>
  22 #include <stdint.h>
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #include "alloc-util.h"
  28 #include "gunicode.h"
  29 #include "macro.h"
  30 #include "string-util.h"
  31 #include "utf8.h"
  32 #include "util.h"
  33
  34 int strcmp_ptr(const char *a, const char *b) {
  35
  36         /* Like strcmp(), but tries to make sense of NULL pointers */
  37         if (a && b)
  38                 return strcmp(a, b);
  39
  40         if (!a && b)
  41                 return -1;
  42
  43         if (a && !b)
  44                 return 1;
  45
  46         return 0;
  47 }
  48
  49 char* endswith(const char *s, const char *postfix) {
  50         size_t sl, pl;
  51
  52         assert(s);
  53         assert(postfix);
  54
  55         sl = strlen(s);
  56         pl = strlen(postfix);
  57
  58         if (pl == 0)
  59                 return (char*) s + sl;
  60
  61         if (sl < pl)
  62                 return NULL;
  63
  64         if (memcmp(s + sl - pl, postfix, pl) != 0)
  65                 return NULL;
  66
  67         return (char*) s + sl - pl;
  68 }
  69
  70 char* endswith_no_case(const char *s, const char *postfix) {
  71         size_t sl, pl;
  72
  73         assert(s);
  74         assert(postfix);
  75
  76         sl = strlen(s);
  77         pl = strlen(postfix);
  78
  79         if (pl == 0)
  80                 return (char*) s + sl;
  81
  82         if (sl < pl)
  83                 return NULL;
  84
  85         if (strcasecmp(s + sl - pl, postfix) != 0)
  86                 return NULL;
  87
  88         return (char*) s + sl - pl;
  89 }
  90
  91 char* first_word(const char *s, const char *word) {
  92         size_t sl, wl;
  93         const char *p;
  94
  95         assert(s);
  96         assert(word);
  97
  98         /* Checks if the string starts with the specified word, either
  99          * followed by NUL or by whitespace. Returns a pointer to the
 100          * NUL or the first character after the whitespace. */
 101
 102         sl = strlen(s);
 103         wl = strlen(word);
 104
 105         if (sl < wl)
 106                 return NULL;
 107
 108         if (wl == 0)
 109                 return (char*) s;
 110
 111         if (memcmp(s, word, wl) != 0)
 112                 return NULL;
 113
 114         p = s + wl;
 115         if (*p == 0)
 116                 return (char*) p;
 117
 118         if (!strchr(WHITESPACE, *p))
 119                 return NULL;
 120
 121         p += strspn(p, WHITESPACE);
 122         return (char*) p;
 123 }
 124
 125 static size_t strcspn_escaped(const char *s, const char *reject) {
 126         bool escaped = false;
 127         int n;
 128
 129         for (n=0; s[n]; n++) {
 130                 if (escaped)
 131                         escaped = false;
 132                 else if (s[n] == '\\')
 133                         escaped = true;
 134                 else if (strchr(reject, s[n]))
 135                         break;
 136         }
 137
 138         /* if s ends in \, return index of previous char */
 139         return n - escaped;
 140 }
 141
 142 /* Split a string into words. */
 143 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
 144         const char *current;
 145
 146         current = *state;
 147
 148         if (!*current) {
 149                 assert(**state == '\0');
 150                 return NULL;
 151         }
 152
 153         current += strspn(current, separator);
 154         if (!*current) {
 155                 *state = current;
 156                 return NULL;
 157         }
 158
 159         if (quoted && strchr("\'\"", *current)) {
 160                 char quotechars[2] = {*current, '\0'};
 161
 162                 *l = strcspn_escaped(current + 1, quotechars);
 163                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 164                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 165                         /* right quote missing or garbage at the end */
 166                         *state = current;
 167                         return NULL;
 168                 }
 169                 *state = current++ + *l + 2;
 170         } else if (quoted) {
 171                 *l = strcspn_escaped(current, separator);
 172                 if (current[*l] && !strchr(separator, current[*l])) {
 173                         /* unfinished escape */
 174                         *state = current;
 175                         return NULL;
 176                 }
 177                 *state = current + *l;
 178         } else {
 179                 *l = strcspn(current, separator);
 180                 *state = current + *l;
 181         }
 182
 183         return current;
 184 }
 185
 186 char *strnappend(const char *s, const char *suffix, size_t b) {
 187         size_t a;
 188         char *r;
 189
 190         if (!s && !suffix)
 191                 return strdup("");
 192
 193         if (!s)
 194                 return strndup(suffix, b);
 195
 196         if (!suffix)
 197                 return strdup(s);
 198
 199         assert(s);
 200         assert(suffix);
 201
 202         a = strlen(s);
 203         if (b > ((size_t) -1) - a)
 204                 return NULL;
 205
 206         r = new(char, a+b+1);
 207         if (!r)
 208                 return NULL;
 209
 210         memcpy(r, s, a);
 211         memcpy(r+a, suffix, b);
 212         r[a+b] = 0;
 213
 214         return r;
 215 }
 216
 217 char *strappend(const char *s, const char *suffix) {
 218         return strnappend(s, suffix, strlen_ptr(suffix));
 219 }
 220
 221 char *strjoin_real(const char *x, ...) {
 222         va_list ap;
 223         size_t l;
 224         char *r, *p;
 225
 226         va_start(ap, x);
 227
 228         if (x) {
 229                 l = strlen(x);
 230
 231                 for (;;) {
 232                         const char *t;
 233                         size_t n;
 234
 235                         t = va_arg(ap, const char *);
 236                         if (!t)
 237                                 break;
 238
 239                         n = strlen(t);
 240                         if (n > ((size_t) -1) - l) {
 241                                 va_end(ap);
 242                                 return NULL;
 243                         }
 244
 245                         l += n;
 246                 }
 247         } else
 248                 l = 0;
 249
 250         va_end(ap);
 251
 252         r = new(char, l+1);
 253         if (!r)
 254                 return NULL;
 255
 256         if (x) {
 257                 p = stpcpy(r, x);
 258
 259                 va_start(ap, x);
 260
 261                 for (;;) {
 262                         const char *t;
 263
 264                         t = va_arg(ap, const char *);
 265                         if (!t)
 266                                 break;
 267
 268                         p = stpcpy(p, t);
 269                 }
 270
 271                 va_end(ap);
 272         } else
 273                 r[0] = 0;
 274
 275         return r;
 276 }
 277
 278 char *strstrip(char *s) {
 279         char *e;
 280
 281         if (!s)
 282                 return NULL;
 283
 284         /* Drops trailing whitespace. Modifies the string in
 285          * place. Returns pointer to first non-space character */
 286
 287         s += strspn(s, WHITESPACE);
 288
 289         for (e = strchr(s, 0); e > s; e --)
 290                 if (!strchr(WHITESPACE, e[-1]))
 291                         break;
 292
 293         *e = 0;
 294
 295         return s;
 296 }
 297
 298 char *delete_chars(char *s, const char *bad) {
 299         char *f, *t;
 300
 301         /* Drops all specified bad characters, regardless where in the string */
 302
 303         if (!s)
 304                 return NULL;
 305
 306         if (!bad)
 307                 bad = WHITESPACE;
 308
 309         for (f = s, t = s; *f; f++) {
 310                 if (strchr(bad, *f))
 311                         continue;
 312
 313                 *(t++) = *f;
 314         }
 315
 316         *t = 0;
 317
 318         return s;
 319 }
 320
 321 char *delete_trailing_chars(char *s, const char *bad) {
 322         char *p, *c = s;
 323
 324         /* Drops all specified bad characters, at the end of the string */
 325
 326         if (!s)
 327                 return NULL;
 328
 329         if (!bad)
 330                 bad = WHITESPACE;
 331
 332         for (p = s; *p; p++)
 333                 if (!strchr(bad, *p))
 334                         c = p + 1;
 335
 336         *c = 0;
 337
 338         return s;
 339 }
 340
 341 char *truncate_nl(char *s) {
 342         assert(s);
 343
 344         s[strcspn(s, NEWLINE)] = 0;
 345         return s;
 346 }
 347
 348 char ascii_tolower(char x) {
 349
 350         if (x >= 'A' && x <= 'Z')
 351                 return x - 'A' + 'a';
 352
 353         return x;
 354 }
 355
 356 char ascii_toupper(char x) {
 357
 358         if (x >= 'a' && x <= 'z')
 359                 return x - 'a' + 'A';
 360
 361         return x;
 362 }
 363
 364 char *ascii_strlower(char *t) {
 365         char *p;
 366
 367         assert(t);
 368
 369         for (p = t; *p; p++)
 370                 *p = ascii_tolower(*p);
 371
 372         return t;
 373 }
 374
 375 char *ascii_strupper(char *t) {
 376         char *p;
 377
 378         assert(t);
 379
 380         for (p = t; *p; p++)
 381                 *p = ascii_toupper(*p);
 382
 383         return t;
 384 }
 385
 386 char *ascii_strlower_n(char *t, size_t n) {
 387         size_t i;
 388
 389         if (n <= 0)
 390                 return t;
 391
 392         for (i = 0; i < n; i++)
 393                 t[i] = ascii_tolower(t[i]);
 394
 395         return t;
 396 }
 397
 398 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 399
 400         for (; n > 0; a++, b++, n--) {
 401                 int x, y;
 402
 403                 x = (int) (uint8_t) ascii_tolower(*a);
 404                 y = (int) (uint8_t) ascii_tolower(*b);
 405
 406                 if (x != y)
 407                         return x - y;
 408         }
 409
 410         return 0;
 411 }
 412
 413 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 414         int r;
 415
 416         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 417         if (r != 0)
 418                 return r;
 419
 420         if (n < m)
 421                 return -1;
 422         else if (n > m)
 423                 return 1;
 424         else
 425                 return 0;
 426 }
 427
 428 bool chars_intersect(const char *a, const char *b) {
 429         const char *p;
 430
 431         /* Returns true if any of the chars in a are in b. */
 432         for (p = a; *p; p++)
 433                 if (strchr(b, *p))
 434                         return true;
 435
 436         return false;
 437 }
 438
 439 bool string_has_cc(const char *p, const char *ok) {
 440         const char *t;
 441
 442         assert(p);
 443
 444         /*
 445          * Check if a string contains control characters. If 'ok' is
 446          * non-NULL it may be a string containing additional CCs to be
 447          * considered OK.
 448          */
 449
 450         for (t = p; *t; t++) {
 451                 if (ok && strchr(ok, *t))
 452                         continue;
 453
 454                 if (*t > 0 && *t < ' ')
 455                         return true;
 456
 457                 if (*t == 127)
 458                         return true;
 459         }
 460
 461         return false;
 462 }
 463
 464 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 465         size_t x;
 466         char *r;
 467
 468         assert(s);
 469         assert(percent <= 100);
 470         assert(new_length >= 3);
 471
 472         if (old_length <= 3 || old_length <= new_length)
 473                 return strndup(s, old_length);
 474
 475         r = new0(char, new_length+3);
 476         if (!r)
 477                 return NULL;
 478
 479         x = (new_length * percent) / 100;
 480
 481         if (x > new_length - 3)
 482                 x = new_length - 3;
 483
 484         memcpy(r, s, x);
 485         r[x] = 0xe2; /* tri-dot ellipsis: … */
 486         r[x+1] = 0x80;
 487         r[x+2] = 0xa6;
 488         memcpy(r + x + 3,
 489                s + old_length - (new_length - x - 1),
 490                new_length - x - 1);
 491
 492         return r;
 493 }
 494
 495 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 496         size_t x;
 497         char *e;
 498         const char *i, *j;
 499         unsigned k, len, len2;
 500         int r;
 501
 502         assert(s);
 503         assert(percent <= 100);
 504
 505         if (new_length == (size_t) -1)
 506                 return strndup(s, old_length);
 507
 508         assert(new_length >= 3);
 509
 510         /* if no multibyte characters use ascii_ellipsize_mem for speed */
 511         if (ascii_is_valid(s))
 512                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 513
 514         if (old_length <= 3 || old_length <= new_length)
 515                 return strndup(s, old_length);
 516
 517         x = (new_length * percent) / 100;
 518
 519         if (x > new_length - 3)
 520                 x = new_length - 3;
 521
 522         k = 0;
 523         for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
 524                 char32_t c;
 525
 526                 r = utf8_encoded_to_unichar(i, &c);
 527                 if (r < 0)
 528                         return NULL;
 529                 k += unichar_iswide(c) ? 2 : 1;
 530         }
 531
 532         if (k > x) /* last character was wide and went over quota */
 533                 x++;
 534
 535         for (j = s + old_length; k < new_length && j > i; ) {
 536                 char32_t c;
 537
 538                 j = utf8_prev_char(j);
 539                 r = utf8_encoded_to_unichar(j, &c);
 540                 if (r < 0)
 541                         return NULL;
 542                 k += unichar_iswide(c) ? 2 : 1;
 543         }
 544         assert(i <= j);
 545
 546         /* we don't actually need to ellipsize */
 547         if (i == j)
 548                 return memdup(s, old_length + 1);
 549
 550         /* make space for ellipsis */
 551         j = utf8_next_char(j);
 552
 553         len = i - s;
 554         len2 = s + old_length - j;
 555         e = new(char, len + 3 + len2 + 1);
 556         if (!e)
 557                 return NULL;
 558
 559         /*
 560         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 561                old_length, new_length, x, len, len2, k);
 562         */
 563
 564         memcpy(e, s, len);
 565         e[len]   = 0xe2; /* tri-dot ellipsis: … */
 566         e[len + 1] = 0x80;
 567         e[len + 2] = 0xa6;
 568
 569         memcpy(e + len + 3, j, len2 + 1);
 570
 571         return e;
 572 }
 573
 574 char *ellipsize(const char *s, size_t length, unsigned percent) {
 575
 576         if (length == (size_t) -1)
 577                 return strdup(s);
 578
 579         return ellipsize_mem(s, strlen(s), length, percent);
 580 }
 581
 582 bool nulstr_contains(const char *nulstr, const char *needle) {
 583         const char *i;
 584
 585         if (!nulstr)
 586                 return false;
 587
 588         NULSTR_FOREACH(i, nulstr)
 589                 if (streq(i, needle))
 590                         return true;
 591
 592         return false;
 593 }
 594
 595 char* strshorten(char *s, size_t l) {
 596         assert(s);
 597
 598         if (strnlen(s, l+1) > l)
 599                 s[l] = 0;
 600
 601         return s;
 602 }
 603
 604 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 605         const char *f;
 606         char *t, *r;
 607         size_t l, old_len, new_len;
 608
 609         assert(text);
 610         assert(old_string);
 611         assert(new_string);
 612
 613         old_len = strlen(old_string);
 614         new_len = strlen(new_string);
 615
 616         l = strlen(text);
 617         r = new(char, l+1);
 618         if (!r)
 619                 return NULL;
 620
 621         f = text;
 622         t = r;
 623         while (*f) {
 624                 char *a;
 625                 size_t d, nl;
 626
 627                 if (!startswith(f, old_string)) {
 628                         *(t++) = *(f++);
 629                         continue;
 630                 }
 631
 632                 d = t - r;
 633                 nl = l - old_len + new_len;
 634                 a = realloc(r, nl + 1);
 635                 if (!a)
 636                         goto oom;
 637
 638                 l = nl;
 639                 r = a;
 640                 t = r + d;
 641
 642                 t = stpcpy(t, new_string);
 643                 f += old_len;
 644         }
 645
 646         *t = 0;
 647         return r;
 648
 649 oom:
 650         return mfree(r);
 651 }
 652
 653 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 654         const char *i, *begin = NULL;
 655         enum {
 656                 STATE_OTHER,
 657                 STATE_ESCAPE,
 658                 STATE_BRACKET
 659         } state = STATE_OTHER;
 660         char *obuf = NULL;
 661         size_t osz = 0, isz;
 662         FILE *f;
 663
 664         assert(ibuf);
 665         assert(*ibuf);
 666
 667         /* Strips ANSI color and replaces TABs by 8 spaces */
 668
 669         isz = _isz ? *_isz : strlen(*ibuf);
 670
 671         f = open_memstream(&obuf, &osz);
 672         if (!f)
 673                 return NULL;
 674
 675         /* Note we use the _unlocked() stdio variants on f for performance
 676          * reasons.  It's safe to do so since we created f here and it
 677          * doesn't leave our scope.
 678          */
 679
 680         for (i = *ibuf; i < *ibuf + isz + 1; i++) {
 681
 682                 switch (state) {
 683
 684                 case STATE_OTHER:
 685                         if (i >= *ibuf + isz) /* EOT */
 686                                 break;
 687                         else if (*i == '\x1B')
 688                                 state = STATE_ESCAPE;
 689                         else if (*i == '\t')
 690                                 fputs_unlocked("        ", f);
 691                         else
 692                                 fputc_unlocked(*i, f);
 693                         break;
 694
 695                 case STATE_ESCAPE:
 696                         if (i >= *ibuf + isz) { /* EOT */
 697                                 fputc_unlocked('\x1B', f);
 698                                 break;
 699                         } else if (*i == '[') {
 700                                 state = STATE_BRACKET;
 701                                 begin = i + 1;
 702                         } else {
 703                                 fputc_unlocked('\x1B', f);
 704                                 fputc_unlocked(*i, f);
 705                                 state = STATE_OTHER;
 706                         }
 707
 708                         break;
 709
 710                 case STATE_BRACKET:
 711
 712                         if (i >= *ibuf + isz || /* EOT */
 713                             (!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) {
 714                                 fputc_unlocked('\x1B', f);
 715                                 fputc_unlocked('[', f);
 716                                 state = STATE_OTHER;
 717                                 i = begin-1;
 718                         } else if (*i == 'm')
 719                                 state = STATE_OTHER;
 720                         break;
 721                 }
 722         }
 723
 724         if (ferror(f)) {
 725                 fclose(f);
 726                 return mfree(obuf);
 727         }
 728
 729         fclose(f);
 730
 731         free(*ibuf);
 732         *ibuf = obuf;
 733
 734         if (_isz)
 735                 *_isz = osz;
 736
 737         return obuf;
 738 }
 739
 740 char *strextend(char **x, ...) {
 741         va_list ap;
 742         size_t f, l;
 743         char *r, *p;
 744
 745         assert(x);
 746
 747         l = f = strlen_ptr(*x);
 748
 749         va_start(ap, x);
 750         for (;;) {
 751                 const char *t;
 752                 size_t n;
 753
 754                 t = va_arg(ap, const char *);
 755                 if (!t)
 756                         break;
 757
 758                 n = strlen(t);
 759                 if (n > ((size_t) -1) - l) {
 760                         va_end(ap);
 761                         return NULL;
 762                 }
 763
 764                 l += n;
 765         }
 766         va_end(ap);
 767
 768         r = realloc(*x, l+1);
 769         if (!r)
 770                 return NULL;
 771
 772         p = r + f;
 773
 774         va_start(ap, x);
 775         for (;;) {
 776                 const char *t;
 777
 778                 t = va_arg(ap, const char *);
 779                 if (!t)
 780                         break;
 781
 782                 p = stpcpy(p, t);
 783         }
 784         va_end(ap);
 785
 786         *p = 0;
 787         *x = r;
 788
 789         return r + l;
 790 }
 791
 792 char *strrep(const char *s, unsigned n) {
 793         size_t l;
 794         char *r, *p;
 795         unsigned i;
 796
 797         assert(s);
 798
 799         l = strlen(s);
 800         p = r = malloc(l * n + 1);
 801         if (!r)
 802                 return NULL;
 803
 804         for (i = 0; i < n; i++)
 805                 p = stpcpy(p, s);
 806
 807         *p = 0;
 808         return r;
 809 }
 810
 811 int split_pair(const char *s, const char *sep, char **l, char **r) {
 812         char *x, *a, *b;
 813
 814         assert(s);
 815         assert(sep);
 816         assert(l);
 817         assert(r);
 818
 819         if (isempty(sep))
 820                 return -EINVAL;
 821
 822         x = strstr(s, sep);
 823         if (!x)
 824                 return -EINVAL;
 825
 826         a = strndup(s, x - s);
 827         if (!a)
 828                 return -ENOMEM;
 829
 830         b = strdup(x + strlen(sep));
 831         if (!b) {
 832                 free(a);
 833                 return -ENOMEM;
 834         }
 835
 836         *l = a;
 837         *r = b;
 838
 839         return 0;
 840 }
 841
 842 int free_and_strdup(char **p, const char *s) {
 843         char *t;
 844
 845         assert(p);
 846
 847         /* Replaces a string pointer with an strdup()ed new string,
 848          * possibly freeing the old one. */
 849
 850         if (streq_ptr(*p, s))
 851                 return 0;
 852
 853         if (s) {
 854                 t = strdup(s);
 855                 if (!t)
 856                         return -ENOMEM;
 857         } else
 858                 t = NULL;
 859
 860         free(*p);
 861         *p = t;
 862
 863         return 1;
 864 }
 865
 866 #if !HAVE_EXPLICIT_BZERO
 867 /*
 868  * Pointer to memset is volatile so that compiler must de-reference
 869  * the pointer and can't assume that it points to any function in
 870  * particular (such as memset, which it then might further "optimize")
 871  * This approach is inspired by openssl's crypto/mem_clr.c.
 872  */
 873 typedef void *(*memset_t)(void *,int,size_t);
 874
 875 static volatile memset_t memset_func = memset;
 876
 877 void explicit_bzero(void *p, size_t l) {
 878         memset_func(p, '\0', l);
 879 }
 880 #endif
 881
 882 char* string_erase(char *x) {
 883         if (!x)
 884                 return NULL;
 885
 886         /* A delicious drop of snake-oil! To be called on memory where
 887          * we stored passphrases or so, after we used them. */
 888         explicit_bzero(x, strlen(x));
 889         return x;
 890 }
 891
 892 char *string_free_erase(char *s) {
 893         return mfree(string_erase(s));
 894 }
 895
 896 bool string_is_safe(const char *p) {
 897         const char *t;
 898
 899         if (!p)
 900                 return false;
 901
 902         for (t = p; *t; t++) {
 903                 if (*t > 0 && *t < ' ') /* no control characters */
 904                         return false;
 905
 906                 if (strchr(QUOTES "\\\x7f", *t))
 907                         return false;
 908         }
 909
 910         return true;
 911 }