src/basic/string-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <stdarg.h>
  24 #include <stdint.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27
  28 #include "alloc-util.h"
  29 #include "gunicode.h"
  30 #include "macro.h"
  31 #include "string-util.h"
  32 #include "utf8.h"
  33 #include "util.h"
  34
  35 int strcmp_ptr(const char *a, const char *b) {
  36
  37         /* Like strcmp(), but tries to make sense of NULL pointers */
  38         if (a && b)
  39                 return strcmp(a, b);
  40
  41         if (!a && b)
  42                 return -1;
  43
  44         if (a && !b)
  45                 return 1;
  46
  47         return 0;
  48 }
  49
  50 char* endswith(const char *s, const char *postfix) {
  51         size_t sl, pl;
  52
  53         assert(s);
  54         assert(postfix);
  55
  56         sl = strlen(s);
  57         pl = strlen(postfix);
  58
  59         if (pl == 0)
  60                 return (char*) s + sl;
  61
  62         if (sl < pl)
  63                 return NULL;
  64
  65         if (memcmp(s + sl - pl, postfix, pl) != 0)
  66                 return NULL;
  67
  68         return (char*) s + sl - pl;
  69 }
  70
  71 char* endswith_no_case(const char *s, const char *postfix) {
  72         size_t sl, pl;
  73
  74         assert(s);
  75         assert(postfix);
  76
  77         sl = strlen(s);
  78         pl = strlen(postfix);
  79
  80         if (pl == 0)
  81                 return (char*) s + sl;
  82
  83         if (sl < pl)
  84                 return NULL;
  85
  86         if (strcasecmp(s + sl - pl, postfix) != 0)
  87                 return NULL;
  88
  89         return (char*) s + sl - pl;
  90 }
  91
  92 char* first_word(const char *s, const char *word) {
  93         size_t sl, wl;
  94         const char *p;
  95
  96         assert(s);
  97         assert(word);
  98
  99         /* Checks if the string starts with the specified word, either
 100          * followed by NUL or by whitespace. Returns a pointer to the
 101          * NUL or the first character after the whitespace. */
 102
 103         sl = strlen(s);
 104         wl = strlen(word);
 105
 106         if (sl < wl)
 107                 return NULL;
 108
 109         if (wl == 0)
 110                 return (char*) s;
 111
 112         if (memcmp(s, word, wl) != 0)
 113                 return NULL;
 114
 115         p = s + wl;
 116         if (*p == 0)
 117                 return (char*) p;
 118
 119         if (!strchr(WHITESPACE, *p))
 120                 return NULL;
 121
 122         p += strspn(p, WHITESPACE);
 123         return (char*) p;
 124 }
 125
 126 static size_t strcspn_escaped(const char *s, const char *reject) {
 127         bool escaped = false;
 128         int n;
 129
 130         for (n=0; s[n]; n++) {
 131                 if (escaped)
 132                         escaped = false;
 133                 else if (s[n] == '\\')
 134                         escaped = true;
 135                 else if (strchr(reject, s[n]))
 136                         break;
 137         }
 138
 139         /* if s ends in \, return index of previous char */
 140         return n - escaped;
 141 }
 142
 143 /* Split a string into words. */
 144 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
 145         const char *current;
 146
 147         current = *state;
 148
 149         if (!*current) {
 150                 assert(**state == '\0');
 151                 return NULL;
 152         }
 153
 154         current += strspn(current, separator);
 155         if (!*current) {
 156                 *state = current;
 157                 return NULL;
 158         }
 159
 160         if (quoted && strchr("\'\"", *current)) {
 161                 char quotechars[2] = {*current, '\0'};
 162
 163                 *l = strcspn_escaped(current + 1, quotechars);
 164                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 165                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 166                         /* right quote missing or garbage at the end */
 167                         *state = current;
 168                         return NULL;
 169                 }
 170                 *state = current++ + *l + 2;
 171         } else if (quoted) {
 172                 *l = strcspn_escaped(current, separator);
 173                 if (current[*l] && !strchr(separator, current[*l])) {
 174                         /* unfinished escape */
 175                         *state = current;
 176                         return NULL;
 177                 }
 178                 *state = current + *l;
 179         } else {
 180                 *l = strcspn(current, separator);
 181                 *state = current + *l;
 182         }
 183
 184         return current;
 185 }
 186
 187 char *strnappend(const char *s, const char *suffix, size_t b) {
 188         size_t a;
 189         char *r;
 190
 191         if (!s && !suffix)
 192                 return strdup("");
 193
 194         if (!s)
 195                 return strndup(suffix, b);
 196
 197         if (!suffix)
 198                 return strdup(s);
 199
 200         assert(s);
 201         assert(suffix);
 202
 203         a = strlen(s);
 204         if (b > ((size_t) -1) - a)
 205                 return NULL;
 206
 207         r = new(char, a+b+1);
 208         if (!r)
 209                 return NULL;
 210
 211         memcpy(r, s, a);
 212         memcpy(r+a, suffix, b);
 213         r[a+b] = 0;
 214
 215         return r;
 216 }
 217
 218 char *strappend(const char *s, const char *suffix) {
 219         return strnappend(s, suffix, suffix ? strlen(suffix) : 0);
 220 }
 221
 222 char *strjoin(const char *x, ...) {
 223         va_list ap;
 224         size_t l;
 225         char *r, *p;
 226
 227         va_start(ap, x);
 228
 229         if (x) {
 230                 l = strlen(x);
 231
 232                 for (;;) {
 233                         const char *t;
 234                         size_t n;
 235
 236                         t = va_arg(ap, const char *);
 237                         if (!t)
 238                                 break;
 239
 240                         n = strlen(t);
 241                         if (n > ((size_t) -1) - l) {
 242                                 va_end(ap);
 243                                 return NULL;
 244                         }
 245
 246                         l += n;
 247                 }
 248         } else
 249                 l = 0;
 250
 251         va_end(ap);
 252
 253         r = new(char, l+1);
 254         if (!r)
 255                 return NULL;
 256
 257         if (x) {
 258                 p = stpcpy(r, x);
 259
 260                 va_start(ap, x);
 261
 262                 for (;;) {
 263                         const char *t;
 264
 265                         t = va_arg(ap, const char *);
 266                         if (!t)
 267                                 break;
 268
 269                         p = stpcpy(p, t);
 270                 }
 271
 272                 va_end(ap);
 273         } else
 274                 r[0] = 0;
 275
 276         return r;
 277 }
 278
 279 char *strstrip(char *s) {
 280         char *e;
 281
 282         /* Drops trailing whitespace. Modifies the string in
 283          * place. Returns pointer to first non-space character */
 284
 285         s += strspn(s, WHITESPACE);
 286
 287         for (e = strchr(s, 0); e > s; e --)
 288                 if (!strchr(WHITESPACE, e[-1]))
 289                         break;
 290
 291         *e = 0;
 292
 293         return s;
 294 }
 295
 296 char *delete_chars(char *s, const char *bad) {
 297         char *f, *t;
 298
 299         /* Drops all whitespace, regardless where in the string */
 300
 301         for (f = s, t = s; *f; f++) {
 302                 if (strchr(bad, *f))
 303                         continue;
 304
 305                 *(t++) = *f;
 306         }
 307
 308         *t = 0;
 309
 310         return s;
 311 }
 312
 313 char *truncate_nl(char *s) {
 314         assert(s);
 315
 316         s[strcspn(s, NEWLINE)] = 0;
 317         return s;
 318 }
 319
 320 char ascii_tolower(char x) {
 321
 322         if (x >= 'A' && x <= 'Z')
 323                 return x - 'A' + 'a';
 324
 325         return x;
 326 }
 327
 328 char *ascii_strlower(char *t) {
 329         char *p;
 330
 331         assert(t);
 332
 333         for (p = t; *p; p++)
 334                 *p = ascii_tolower(*p);
 335
 336         return t;
 337 }
 338
 339 char *ascii_strlower_n(char *t, size_t n) {
 340         size_t i;
 341
 342         if (n <= 0)
 343                 return t;
 344
 345         for (i = 0; i < n; i++)
 346                 t[i] = ascii_tolower(t[i]);
 347
 348         return t;
 349 }
 350
 351 bool chars_intersect(const char *a, const char *b) {
 352         const char *p;
 353
 354         /* Returns true if any of the chars in a are in b. */
 355         for (p = a; *p; p++)
 356                 if (strchr(b, *p))
 357                         return true;
 358
 359         return false;
 360 }
 361
 362 bool string_has_cc(const char *p, const char *ok) {
 363         const char *t;
 364
 365         assert(p);
 366
 367         /*
 368          * Check if a string contains control characters. If 'ok' is
 369          * non-NULL it may be a string containing additional CCs to be
 370          * considered OK.
 371          */
 372
 373         for (t = p; *t; t++) {
 374                 if (ok && strchr(ok, *t))
 375                         continue;
 376
 377                 if (*t > 0 && *t < ' ')
 378                         return true;
 379
 380                 if (*t == 127)
 381                         return true;
 382         }
 383
 384         return false;
 385 }
 386
 387 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 388         size_t x;
 389         char *r;
 390
 391         assert(s);
 392         assert(percent <= 100);
 393         assert(new_length >= 3);
 394
 395         if (old_length <= 3 || old_length <= new_length)
 396                 return strndup(s, old_length);
 397
 398         r = new0(char, new_length+1);
 399         if (!r)
 400                 return NULL;
 401
 402         x = (new_length * percent) / 100;
 403
 404         if (x > new_length - 3)
 405                 x = new_length - 3;
 406
 407         memcpy(r, s, x);
 408         r[x] = '.';
 409         r[x+1] = '.';
 410         r[x+2] = '.';
 411         memcpy(r + x + 3,
 412                s + old_length - (new_length - x - 3),
 413                new_length - x - 3);
 414
 415         return r;
 416 }
 417
 418 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 419         size_t x;
 420         char *e;
 421         const char *i, *j;
 422         unsigned k, len, len2;
 423
 424         assert(s);
 425         assert(percent <= 100);
 426         assert(new_length >= 3);
 427
 428         /* if no multibyte characters use ascii_ellipsize_mem for speed */
 429         if (ascii_is_valid(s))
 430                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 431
 432         if (old_length <= 3 || old_length <= new_length)
 433                 return strndup(s, old_length);
 434
 435         x = (new_length * percent) / 100;
 436
 437         if (x > new_length - 3)
 438                 x = new_length - 3;
 439
 440         k = 0;
 441         for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
 442                 int c;
 443
 444                 c = utf8_encoded_to_unichar(i);
 445                 if (c < 0)
 446                         return NULL;
 447                 k += unichar_iswide(c) ? 2 : 1;
 448         }
 449
 450         if (k > x) /* last character was wide and went over quota */
 451                 x ++;
 452
 453         for (j = s + old_length; k < new_length && j > i; ) {
 454                 int c;
 455
 456                 j = utf8_prev_char(j);
 457                 c = utf8_encoded_to_unichar(j);
 458                 if (c < 0)
 459                         return NULL;
 460                 k += unichar_iswide(c) ? 2 : 1;
 461         }
 462         assert(i <= j);
 463
 464         /* we don't actually need to ellipsize */
 465         if (i == j)
 466                 return memdup(s, old_length + 1);
 467
 468         /* make space for ellipsis */
 469         j = utf8_next_char(j);
 470
 471         len = i - s;
 472         len2 = s + old_length - j;
 473         e = new(char, len + 3 + len2 + 1);
 474         if (!e)
 475                 return NULL;
 476
 477         /*
 478         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 479                old_length, new_length, x, len, len2, k);
 480         */
 481
 482         memcpy(e, s, len);
 483         e[len]   = 0xe2; /* tri-dot ellipsis: … */
 484         e[len + 1] = 0x80;
 485         e[len + 2] = 0xa6;
 486
 487         memcpy(e + len + 3, j, len2 + 1);
 488
 489         return e;
 490 }
 491
 492 char *ellipsize(const char *s, size_t length, unsigned percent) {
 493         return ellipsize_mem(s, strlen(s), length, percent);
 494 }
 495
 496 bool nulstr_contains(const char*nulstr, const char *needle) {
 497         const char *i;
 498
 499         if (!nulstr)
 500                 return false;
 501
 502         NULSTR_FOREACH(i, nulstr)
 503                 if (streq(i, needle))
 504                         return true;
 505
 506         return false;
 507 }
 508
 509 char* strshorten(char *s, size_t l) {
 510         assert(s);
 511
 512         if (l < strlen(s))
 513                 s[l] = 0;
 514
 515         return s;
 516 }
 517
 518 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 519         const char *f;
 520         char *t, *r;
 521         size_t l, old_len, new_len;
 522
 523         assert(text);
 524         assert(old_string);
 525         assert(new_string);
 526
 527         old_len = strlen(old_string);
 528         new_len = strlen(new_string);
 529
 530         l = strlen(text);
 531         r = new(char, l+1);
 532         if (!r)
 533                 return NULL;
 534
 535         f = text;
 536         t = r;
 537         while (*f) {
 538                 char *a;
 539                 size_t d, nl;
 540
 541                 if (!startswith(f, old_string)) {
 542                         *(t++) = *(f++);
 543                         continue;
 544                 }
 545
 546                 d = t - r;
 547                 nl = l - old_len + new_len;
 548                 a = realloc(r, nl + 1);
 549                 if (!a)
 550                         goto oom;
 551
 552                 l = nl;
 553                 r = a;
 554                 t = r + d;
 555
 556                 t = stpcpy(t, new_string);
 557                 f += old_len;
 558         }
 559
 560         *t = 0;
 561         return r;
 562
 563 oom:
 564         free(r);
 565         return NULL;
 566 }
 567
 568 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 569         const char *i, *begin = NULL;
 570         enum {
 571                 STATE_OTHER,
 572                 STATE_ESCAPE,
 573                 STATE_BRACKET
 574         } state = STATE_OTHER;
 575         char *obuf = NULL;
 576         size_t osz = 0, isz;
 577         FILE *f;
 578
 579         assert(ibuf);
 580         assert(*ibuf);
 581
 582         /* Strips ANSI color and replaces TABs by 8 spaces */
 583
 584         isz = _isz ? *_isz : strlen(*ibuf);
 585
 586         f = open_memstream(&obuf, &osz);
 587         if (!f)
 588                 return NULL;
 589
 590         for (i = *ibuf; i < *ibuf + isz + 1; i++) {
 591
 592                 switch (state) {
 593
 594                 case STATE_OTHER:
 595                         if (i >= *ibuf + isz) /* EOT */
 596                                 break;
 597                         else if (*i == '\x1B')
 598                                 state = STATE_ESCAPE;
 599                         else if (*i == '\t')
 600                                 fputs("        ", f);
 601                         else
 602                                 fputc(*i, f);
 603                         break;
 604
 605                 case STATE_ESCAPE:
 606                         if (i >= *ibuf + isz) { /* EOT */
 607                                 fputc('\x1B', f);
 608                                 break;
 609                         } else if (*i == '[') {
 610                                 state = STATE_BRACKET;
 611                                 begin = i + 1;
 612                         } else {
 613                                 fputc('\x1B', f);
 614                                 fputc(*i, f);
 615                                 state = STATE_OTHER;
 616                         }
 617
 618                         break;
 619
 620                 case STATE_BRACKET:
 621
 622                         if (i >= *ibuf + isz || /* EOT */
 623                             (!(*i >= '0' && *i <= '9') && *i != ';' && *i != 'm')) {
 624                                 fputc('\x1B', f);
 625                                 fputc('[', f);
 626                                 state = STATE_OTHER;
 627                                 i = begin-1;
 628                         } else if (*i == 'm')
 629                                 state = STATE_OTHER;
 630                         break;
 631                 }
 632         }
 633
 634         if (ferror(f)) {
 635                 fclose(f);
 636                 free(obuf);
 637                 return NULL;
 638         }
 639
 640         fclose(f);
 641
 642         free(*ibuf);
 643         *ibuf = obuf;
 644
 645         if (_isz)
 646                 *_isz = osz;
 647
 648         return obuf;
 649 }
 650
 651 char *strextend(char **x, ...) {
 652         va_list ap;
 653         size_t f, l;
 654         char *r, *p;
 655
 656         assert(x);
 657
 658         l = f = *x ? strlen(*x) : 0;
 659
 660         va_start(ap, x);
 661         for (;;) {
 662                 const char *t;
 663                 size_t n;
 664
 665                 t = va_arg(ap, const char *);
 666                 if (!t)
 667                         break;
 668
 669                 n = strlen(t);
 670                 if (n > ((size_t) -1) - l) {
 671                         va_end(ap);
 672                         return NULL;
 673                 }
 674
 675                 l += n;
 676         }
 677         va_end(ap);
 678
 679         r = realloc(*x, l+1);
 680         if (!r)
 681                 return NULL;
 682
 683         p = r + f;
 684
 685         va_start(ap, x);
 686         for (;;) {
 687                 const char *t;
 688
 689                 t = va_arg(ap, const char *);
 690                 if (!t)
 691                         break;
 692
 693                 p = stpcpy(p, t);
 694         }
 695         va_end(ap);
 696
 697         *p = 0;
 698         *x = r;
 699
 700         return r + l;
 701 }
 702
 703 char *strrep(const char *s, unsigned n) {
 704         size_t l;
 705         char *r, *p;
 706         unsigned i;
 707
 708         assert(s);
 709
 710         l = strlen(s);
 711         p = r = malloc(l * n + 1);
 712         if (!r)
 713                 return NULL;
 714
 715         for (i = 0; i < n; i++)
 716                 p = stpcpy(p, s);
 717
 718         *p = 0;
 719         return r;
 720 }
 721
 722 int split_pair(const char *s, const char *sep, char **l, char **r) {
 723         char *x, *a, *b;
 724
 725         assert(s);
 726         assert(sep);
 727         assert(l);
 728         assert(r);
 729
 730         if (isempty(sep))
 731                 return -EINVAL;
 732
 733         x = strstr(s, sep);
 734         if (!x)
 735                 return -EINVAL;
 736
 737         a = strndup(s, x - s);
 738         if (!a)
 739                 return -ENOMEM;
 740
 741         b = strdup(x + strlen(sep));
 742         if (!b) {
 743                 free(a);
 744                 return -ENOMEM;
 745         }
 746
 747         *l = a;
 748         *r = b;
 749
 750         return 0;
 751 }
 752
 753 int free_and_strdup(char **p, const char *s) {
 754         char *t;
 755
 756         assert(p);
 757
 758         /* Replaces a string pointer with an strdup()ed new string,
 759          * possibly freeing the old one. */
 760
 761         if (streq_ptr(*p, s))
 762                 return 0;
 763
 764         if (s) {
 765                 t = strdup(s);
 766                 if (!t)
 767                         return -ENOMEM;
 768         } else
 769                 t = NULL;
 770
 771         free(*p);
 772         *p = t;
 773
 774         return 1;
 775 }
 776
 777 #pragma GCC push_options
 778 #pragma GCC optimize("O0")
 779
 780 void* memory_erase(void *p, size_t l) {
 781         volatile uint8_t* x = (volatile uint8_t*) p;
 782
 783         /* This basically does what memset() does, but hopefully isn't
 784          * optimized away by the compiler. One of those days, when
 785          * glibc learns memset_s() we should replace this call by
 786          * memset_s(), but until then this has to do. */
 787
 788         for (; l > 0; l--)
 789                 *(x++) = 'x';
 790
 791         return p;
 792 }
 793
 794 #pragma GCC pop_options
 795
 796 char* string_erase(char *x) {
 797
 798         if (!x)
 799                 return NULL;
 800
 801         /* A delicious drop of snake-oil! To be called on memory where
 802          * we stored passphrases or so, after we used them. */
 803
 804         return memory_erase(x, strlen(x));
 805 }
 806
 807 char *string_free_erase(char *s) {
 808         return mfree(string_erase(s));
 809 }
 810
 811 bool string_is_safe(const char *p) {
 812         const char *t;
 813
 814         if (!p)
 815                 return false;
 816
 817         for (t = p; *t; t++) {
 818                 if (*t > 0 && *t < ' ') /* no control characters */
 819                         return false;
 820
 821                 if (strchr(QUOTES "\\\x7f", *t))
 822                         return false;
 823         }
 824
 825         return true;
 826 }