src/basic/string-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <stdarg.h>
  22 #include <stdint.h>
  23 #include <stdio.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26
  27 #include "alloc-util.h"
  28 #include "gunicode.h"
  29 #include "macro.h"
  30 #include "string-util.h"
  31 #include "utf8.h"
  32 #include "util.h"
  33
  34 int strcmp_ptr(const char *a, const char *b) {
  35
  36         /* Like strcmp(), but tries to make sense of NULL pointers */
  37         if (a && b)
  38                 return strcmp(a, b);
  39
  40         if (!a && b)
  41                 return -1;
  42
  43         if (a && !b)
  44                 return 1;
  45
  46         return 0;
  47 }
  48
  49 char* endswith(const char *s, const char *postfix) {
  50         size_t sl, pl;
  51
  52         assert(s);
  53         assert(postfix);
  54
  55         sl = strlen(s);
  56         pl = strlen(postfix);
  57
  58         if (pl == 0)
  59                 return (char*) s + sl;
  60
  61         if (sl < pl)
  62                 return NULL;
  63
  64         if (memcmp(s + sl - pl, postfix, pl) != 0)
  65                 return NULL;
  66
  67         return (char*) s + sl - pl;
  68 }
  69
  70 char* endswith_no_case(const char *s, const char *postfix) {
  71         size_t sl, pl;
  72
  73         assert(s);
  74         assert(postfix);
  75
  76         sl = strlen(s);
  77         pl = strlen(postfix);
  78
  79         if (pl == 0)
  80                 return (char*) s + sl;
  81
  82         if (sl < pl)
  83                 return NULL;
  84
  85         if (strcasecmp(s + sl - pl, postfix) != 0)
  86                 return NULL;
  87
  88         return (char*) s + sl - pl;
  89 }
  90
  91 char* first_word(const char *s, const char *word) {
  92         size_t sl, wl;
  93         const char *p;
  94
  95         assert(s);
  96         assert(word);
  97
  98         /* Checks if the string starts with the specified word, either
  99          * followed by NUL or by whitespace. Returns a pointer to the
 100          * NUL or the first character after the whitespace. */
 101
 102         sl = strlen(s);
 103         wl = strlen(word);
 104
 105         if (sl < wl)
 106                 return NULL;
 107
 108         if (wl == 0)
 109                 return (char*) s;
 110
 111         if (memcmp(s, word, wl) != 0)
 112                 return NULL;
 113
 114         p = s + wl;
 115         if (*p == 0)
 116                 return (char*) p;
 117
 118         if (!strchr(WHITESPACE, *p))
 119                 return NULL;
 120
 121         p += strspn(p, WHITESPACE);
 122         return (char*) p;
 123 }
 124
 125 static size_t strcspn_escaped(const char *s, const char *reject) {
 126         bool escaped = false;
 127         int n;
 128
 129         for (n=0; s[n]; n++) {
 130                 if (escaped)
 131                         escaped = false;
 132                 else if (s[n] == '\\')
 133                         escaped = true;
 134                 else if (strchr(reject, s[n]))
 135                         break;
 136         }
 137
 138         /* if s ends in \, return index of previous char */
 139         return n - escaped;
 140 }
 141
 142 /* Split a string into words. */
 143 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
 144         const char *current;
 145
 146         current = *state;
 147
 148         if (!*current) {
 149                 assert(**state == '\0');
 150                 return NULL;
 151         }
 152
 153         current += strspn(current, separator);
 154         if (!*current) {
 155                 *state = current;
 156                 return NULL;
 157         }
 158
 159         if (quoted && strchr("\'\"", *current)) {
 160                 char quotechars[2] = {*current, '\0'};
 161
 162                 *l = strcspn_escaped(current + 1, quotechars);
 163                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 164                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 165                         /* right quote missing or garbage at the end */
 166                         *state = current;
 167                         return NULL;
 168                 }
 169                 *state = current++ + *l + 2;
 170         } else if (quoted) {
 171                 *l = strcspn_escaped(current, separator);
 172                 if (current[*l] && !strchr(separator, current[*l])) {
 173                         /* unfinished escape */
 174                         *state = current;
 175                         return NULL;
 176                 }
 177                 *state = current + *l;
 178         } else {
 179                 *l = strcspn(current, separator);
 180                 *state = current + *l;
 181         }
 182
 183         return current;
 184 }
 185
 186 char *strnappend(const char *s, const char *suffix, size_t b) {
 187         size_t a;
 188         char *r;
 189
 190         if (!s && !suffix)
 191                 return strdup("");
 192
 193         if (!s)
 194                 return strndup(suffix, b);
 195
 196         if (!suffix)
 197                 return strdup(s);
 198
 199         assert(s);
 200         assert(suffix);
 201
 202         a = strlen(s);
 203         if (b > ((size_t) -1) - a)
 204                 return NULL;
 205
 206         r = new(char, a+b+1);
 207         if (!r)
 208                 return NULL;
 209
 210         memcpy(r, s, a);
 211         memcpy(r+a, suffix, b);
 212         r[a+b] = 0;
 213
 214         return r;
 215 }
 216
 217 char *strappend(const char *s, const char *suffix) {
 218         return strnappend(s, suffix, suffix ? strlen(suffix) : 0);
 219 }
 220
 221 char *strjoin_real(const char *x, ...) {
 222         va_list ap;
 223         size_t l;
 224         char *r, *p;
 225
 226         va_start(ap, x);
 227
 228         if (x) {
 229                 l = strlen(x);
 230
 231                 for (;;) {
 232                         const char *t;
 233                         size_t n;
 234
 235                         t = va_arg(ap, const char *);
 236                         if (!t)
 237                                 break;
 238
 239                         n = strlen(t);
 240                         if (n > ((size_t) -1) - l) {
 241                                 va_end(ap);
 242                                 return NULL;
 243                         }
 244
 245                         l += n;
 246                 }
 247         } else
 248                 l = 0;
 249
 250         va_end(ap);
 251
 252         r = new(char, l+1);
 253         if (!r)
 254                 return NULL;
 255
 256         if (x) {
 257                 p = stpcpy(r, x);
 258
 259                 va_start(ap, x);
 260
 261                 for (;;) {
 262                         const char *t;
 263
 264                         t = va_arg(ap, const char *);
 265                         if (!t)
 266                                 break;
 267
 268                         p = stpcpy(p, t);
 269                 }
 270
 271                 va_end(ap);
 272         } else
 273                 r[0] = 0;
 274
 275         return r;
 276 }
 277
 278 char *strstrip(char *s) {
 279         char *e;
 280
 281         /* Drops trailing whitespace. Modifies the string in
 282          * place. Returns pointer to first non-space character */
 283
 284         s += strspn(s, WHITESPACE);
 285
 286         for (e = strchr(s, 0); e > s; e --)
 287                 if (!strchr(WHITESPACE, e[-1]))
 288                         break;
 289
 290         *e = 0;
 291
 292         return s;
 293 }
 294
 295 char *delete_chars(char *s, const char *bad) {
 296         char *f, *t;
 297
 298         /* Drops all whitespace, regardless where in the string */
 299
 300         for (f = s, t = s; *f; f++) {
 301                 if (strchr(bad, *f))
 302                         continue;
 303
 304                 *(t++) = *f;
 305         }
 306
 307         *t = 0;
 308
 309         return s;
 310 }
 311
 312 char *truncate_nl(char *s) {
 313         assert(s);
 314
 315         s[strcspn(s, NEWLINE)] = 0;
 316         return s;
 317 }
 318
 319 char ascii_tolower(char x) {
 320
 321         if (x >= 'A' && x <= 'Z')
 322                 return x - 'A' + 'a';
 323
 324         return x;
 325 }
 326
 327 char ascii_toupper(char x) {
 328
 329         if (x >= 'a' && x <= 'z')
 330                 return x - 'a' + 'A';
 331
 332         return x;
 333 }
 334
 335 char *ascii_strlower(char *t) {
 336         char *p;
 337
 338         assert(t);
 339
 340         for (p = t; *p; p++)
 341                 *p = ascii_tolower(*p);
 342
 343         return t;
 344 }
 345
 346 char *ascii_strupper(char *t) {
 347         char *p;
 348
 349         assert(t);
 350
 351         for (p = t; *p; p++)
 352                 *p = ascii_toupper(*p);
 353
 354         return t;
 355 }
 356
 357 char *ascii_strlower_n(char *t, size_t n) {
 358         size_t i;
 359
 360         if (n <= 0)
 361                 return t;
 362
 363         for (i = 0; i < n; i++)
 364                 t[i] = ascii_tolower(t[i]);
 365
 366         return t;
 367 }
 368
 369 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 370
 371         for (; n > 0; a++, b++, n--) {
 372                 int x, y;
 373
 374                 x = (int) (uint8_t) ascii_tolower(*a);
 375                 y = (int) (uint8_t) ascii_tolower(*b);
 376
 377                 if (x != y)
 378                         return x - y;
 379         }
 380
 381         return 0;
 382 }
 383
 384 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 385         int r;
 386
 387         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 388         if (r != 0)
 389                 return r;
 390
 391         if (n < m)
 392                 return -1;
 393         else if (n > m)
 394                 return 1;
 395         else
 396                 return 0;
 397 }
 398
 399 bool chars_intersect(const char *a, const char *b) {
 400         const char *p;
 401
 402         /* Returns true if any of the chars in a are in b. */
 403         for (p = a; *p; p++)
 404                 if (strchr(b, *p))
 405                         return true;
 406
 407         return false;
 408 }
 409
 410 bool string_has_cc(const char *p, const char *ok) {
 411         const char *t;
 412
 413         assert(p);
 414
 415         /*
 416          * Check if a string contains control characters. If 'ok' is
 417          * non-NULL it may be a string containing additional CCs to be
 418          * considered OK.
 419          */
 420
 421         for (t = p; *t; t++) {
 422                 if (ok && strchr(ok, *t))
 423                         continue;
 424
 425                 if (*t > 0 && *t < ' ')
 426                         return true;
 427
 428                 if (*t == 127)
 429                         return true;
 430         }
 431
 432         return false;
 433 }
 434
 435 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 436         size_t x;
 437         char *r;
 438
 439         assert(s);
 440         assert(percent <= 100);
 441         assert(new_length >= 3);
 442
 443         if (old_length <= 3 || old_length <= new_length)
 444                 return strndup(s, old_length);
 445
 446         r = new0(char, new_length+3);
 447         if (!r)
 448                 return NULL;
 449
 450         x = (new_length * percent) / 100;
 451
 452         if (x > new_length - 3)
 453                 x = new_length - 3;
 454
 455         memcpy(r, s, x);
 456         r[x] = 0xe2; /* tri-dot ellipsis: … */
 457         r[x+1] = 0x80;
 458         r[x+2] = 0xa6;
 459         memcpy(r + x + 3,
 460                s + old_length - (new_length - x - 1),
 461                new_length - x - 1);
 462
 463         return r;
 464 }
 465
 466 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 467         size_t x;
 468         char *e;
 469         const char *i, *j;
 470         unsigned k, len, len2;
 471         int r;
 472
 473         assert(s);
 474         assert(percent <= 100);
 475         assert(new_length >= 3);
 476
 477         /* if no multibyte characters use ascii_ellipsize_mem for speed */
 478         if (ascii_is_valid(s))
 479                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 480
 481         if (old_length <= 3 || old_length <= new_length)
 482                 return strndup(s, old_length);
 483
 484         x = (new_length * percent) / 100;
 485
 486         if (x > new_length - 3)
 487                 x = new_length - 3;
 488
 489         k = 0;
 490         for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
 491                 char32_t c;
 492
 493                 r = utf8_encoded_to_unichar(i, &c);
 494                 if (r < 0)
 495                         return NULL;
 496                 k += unichar_iswide(c) ? 2 : 1;
 497         }
 498
 499         if (k > x) /* last character was wide and went over quota */
 500                 x++;
 501
 502         for (j = s + old_length; k < new_length && j > i; ) {
 503                 char32_t c;
 504
 505                 j = utf8_prev_char(j);
 506                 r = utf8_encoded_to_unichar(j, &c);
 507                 if (r < 0)
 508                         return NULL;
 509                 k += unichar_iswide(c) ? 2 : 1;
 510         }
 511         assert(i <= j);
 512
 513         /* we don't actually need to ellipsize */
 514         if (i == j)
 515                 return memdup(s, old_length + 1);
 516
 517         /* make space for ellipsis */
 518         j = utf8_next_char(j);
 519
 520         len = i - s;
 521         len2 = s + old_length - j;
 522         e = new(char, len + 3 + len2 + 1);
 523         if (!e)
 524                 return NULL;
 525
 526         /*
 527         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 528                old_length, new_length, x, len, len2, k);
 529         */
 530
 531         memcpy(e, s, len);
 532         e[len]   = 0xe2; /* tri-dot ellipsis: … */
 533         e[len + 1] = 0x80;
 534         e[len + 2] = 0xa6;
 535
 536         memcpy(e + len + 3, j, len2 + 1);
 537
 538         return e;
 539 }
 540
 541 char *ellipsize(const char *s, size_t length, unsigned percent) {
 542         return ellipsize_mem(s, strlen(s), length, percent);
 543 }
 544
 545 bool nulstr_contains(const char *nulstr, const char *needle) {
 546         const char *i;
 547
 548         if (!nulstr)
 549                 return false;
 550
 551         NULSTR_FOREACH(i, nulstr)
 552                 if (streq(i, needle))
 553                         return true;
 554
 555         return false;
 556 }
 557
 558 char* strshorten(char *s, size_t l) {
 559         assert(s);
 560
 561         if (l < strlen(s))
 562                 s[l] = 0;
 563
 564         return s;
 565 }
 566
 567 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 568         const char *f;
 569         char *t, *r;
 570         size_t l, old_len, new_len;
 571
 572         assert(text);
 573         assert(old_string);
 574         assert(new_string);
 575
 576         old_len = strlen(old_string);
 577         new_len = strlen(new_string);
 578
 579         l = strlen(text);
 580         r = new(char, l+1);
 581         if (!r)
 582                 return NULL;
 583
 584         f = text;
 585         t = r;
 586         while (*f) {
 587                 char *a;
 588                 size_t d, nl;
 589
 590                 if (!startswith(f, old_string)) {
 591                         *(t++) = *(f++);
 592                         continue;
 593                 }
 594
 595                 d = t - r;
 596                 nl = l - old_len + new_len;
 597                 a = realloc(r, nl + 1);
 598                 if (!a)
 599                         goto oom;
 600
 601                 l = nl;
 602                 r = a;
 603                 t = r + d;
 604
 605                 t = stpcpy(t, new_string);
 606                 f += old_len;
 607         }
 608
 609         *t = 0;
 610         return r;
 611
 612 oom:
 613         return mfree(r);
 614 }
 615
 616 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 617         const char *i, *begin = NULL;
 618         enum {
 619                 STATE_OTHER,
 620                 STATE_ESCAPE,
 621                 STATE_BRACKET
 622         } state = STATE_OTHER;
 623         char *obuf = NULL;
 624         size_t osz = 0, isz;
 625         FILE *f;
 626
 627         assert(ibuf);
 628         assert(*ibuf);
 629
 630         /* Strips ANSI color and replaces TABs by 8 spaces */
 631
 632         isz = _isz ? *_isz : strlen(*ibuf);
 633
 634         f = open_memstream(&obuf, &osz);
 635         if (!f)
 636                 return NULL;
 637
 638         /* Note we use the _unlocked() stdio variants on f for performance
 639          * reasons.  It's safe to do so since we created f here and it
 640          * doesn't leave our scope.
 641          */
 642
 643         for (i = *ibuf; i < *ibuf + isz + 1; i++) {
 644
 645                 switch (state) {
 646
 647                 case STATE_OTHER:
 648                         if (i >= *ibuf + isz) /* EOT */
 649                                 break;
 650                         else if (*i == '\x1B')
 651                                 state = STATE_ESCAPE;
 652                         else if (*i == '\t')
 653                                 fputs_unlocked("        ", f);
 654                         else
 655                                 fputc_unlocked(*i, f);
 656                         break;
 657
 658                 case STATE_ESCAPE:
 659                         if (i >= *ibuf + isz) { /* EOT */
 660                                 fputc_unlocked('\x1B', f);
 661                                 break;
 662                         } else if (*i == '[') {
 663                                 state = STATE_BRACKET;
 664                                 begin = i + 1;
 665                         } else {
 666                                 fputc_unlocked('\x1B', f);
 667                                 fputc_unlocked(*i, f);
 668                                 state = STATE_OTHER;
 669                         }
 670
 671                         break;
 672
 673                 case STATE_BRACKET:
 674
 675                         if (i >= *ibuf + isz || /* EOT */
 676                             (!(*i >= '0' && *i <= '9') && *i != ';' && *i != 'm')) {
 677                                 fputc_unlocked('\x1B', f);
 678                                 fputc_unlocked('[', f);
 679                                 state = STATE_OTHER;
 680                                 i = begin-1;
 681                         } else if (*i == 'm')
 682                                 state = STATE_OTHER;
 683                         break;
 684                 }
 685         }
 686
 687         if (ferror(f)) {
 688                 fclose(f);
 689                 return mfree(obuf);
 690         }
 691
 692         fclose(f);
 693
 694         free(*ibuf);
 695         *ibuf = obuf;
 696
 697         if (_isz)
 698                 *_isz = osz;
 699
 700         return obuf;
 701 }
 702
 703 char *strextend(char **x, ...) {
 704         va_list ap;
 705         size_t f, l;
 706         char *r, *p;
 707
 708         assert(x);
 709
 710         l = f = *x ? strlen(*x) : 0;
 711
 712         va_start(ap, x);
 713         for (;;) {
 714                 const char *t;
 715                 size_t n;
 716
 717                 t = va_arg(ap, const char *);
 718                 if (!t)
 719                         break;
 720
 721                 n = strlen(t);
 722                 if (n > ((size_t) -1) - l) {
 723                         va_end(ap);
 724                         return NULL;
 725                 }
 726
 727                 l += n;
 728         }
 729         va_end(ap);
 730
 731         r = realloc(*x, l+1);
 732         if (!r)
 733                 return NULL;
 734
 735         p = r + f;
 736
 737         va_start(ap, x);
 738         for (;;) {
 739                 const char *t;
 740
 741                 t = va_arg(ap, const char *);
 742                 if (!t)
 743                         break;
 744
 745                 p = stpcpy(p, t);
 746         }
 747         va_end(ap);
 748
 749         *p = 0;
 750         *x = r;
 751
 752         return r + l;
 753 }
 754
 755 char *strrep(const char *s, unsigned n) {
 756         size_t l;
 757         char *r, *p;
 758         unsigned i;
 759
 760         assert(s);
 761
 762         l = strlen(s);
 763         p = r = malloc(l * n + 1);
 764         if (!r)
 765                 return NULL;
 766
 767         for (i = 0; i < n; i++)
 768                 p = stpcpy(p, s);
 769
 770         *p = 0;
 771         return r;
 772 }
 773
 774 int split_pair(const char *s, const char *sep, char **l, char **r) {
 775         char *x, *a, *b;
 776
 777         assert(s);
 778         assert(sep);
 779         assert(l);
 780         assert(r);
 781
 782         if (isempty(sep))
 783                 return -EINVAL;
 784
 785         x = strstr(s, sep);
 786         if (!x)
 787                 return -EINVAL;
 788
 789         a = strndup(s, x - s);
 790         if (!a)
 791                 return -ENOMEM;
 792
 793         b = strdup(x + strlen(sep));
 794         if (!b) {
 795                 free(a);
 796                 return -ENOMEM;
 797         }
 798
 799         *l = a;
 800         *r = b;
 801
 802         return 0;
 803 }
 804
 805 int free_and_strdup(char **p, const char *s) {
 806         char *t;
 807
 808         assert(p);
 809
 810         /* Replaces a string pointer with an strdup()ed new string,
 811          * possibly freeing the old one. */
 812
 813         if (streq_ptr(*p, s))
 814                 return 0;
 815
 816         if (s) {
 817                 t = strdup(s);
 818                 if (!t)
 819                         return -ENOMEM;
 820         } else
 821                 t = NULL;
 822
 823         free(*p);
 824         *p = t;
 825
 826         return 1;
 827 }
 828
 829 #if !HAVE_DECL_EXPLICIT_BZERO
 830 /*
 831  * Pointer to memset is volatile so that compiler must de-reference
 832  * the pointer and can't assume that it points to any function in
 833  * particular (such as memset, which it then might further "optimize")
 834  * This approach is inspired by openssl's crypto/mem_clr.c.
 835  */
 836 typedef void *(*memset_t)(void *,int,size_t);
 837
 838 static volatile memset_t memset_func = memset;
 839
 840 void explicit_bzero(void *p, size_t l) {
 841         memset_func(p, '\0', l);
 842 }
 843 #endif
 844
 845 char* string_erase(char *x) {
 846         if (!x)
 847                 return NULL;
 848
 849         /* A delicious drop of snake-oil! To be called on memory where
 850          * we stored passphrases or so, after we used them. */
 851         explicit_bzero(x, strlen(x));
 852         return x;
 853 }
 854
 855 char *string_free_erase(char *s) {
 856         return mfree(string_erase(s));
 857 }
 858
 859 bool string_is_safe(const char *p) {
 860         const char *t;
 861
 862         if (!p)
 863                 return false;
 864
 865         for (t = p; *t; t++) {
 866                 if (*t > 0 && *t < ' ') /* no control characters */
 867                         return false;
 868
 869                 if (strchr(QUOTES "\\\x7f", *t))
 870                         return false;
 871         }
 872
 873         return true;
 874 }