src/basic/string-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <stdarg.h>
  24 #include <stdint.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27
  28 #include "alloc-util.h"
  29 #include "gunicode.h"
  30 #include "macro.h"
  31 #include "string-util.h"
  32 #include "utf8.h"
  33 #include "util.h"
  34
  35 int strcmp_ptr(const char *a, const char *b) {
  36
  37         /* Like strcmp(), but tries to make sense of NULL pointers */
  38         if (a && b)
  39                 return strcmp(a, b);
  40
  41         if (!a && b)
  42                 return -1;
  43
  44         if (a && !b)
  45                 return 1;
  46
  47         return 0;
  48 }
  49
  50 char* endswith(const char *s, const char *postfix) {
  51         size_t sl, pl;
  52
  53         assert(s);
  54         assert(postfix);
  55
  56         sl = strlen(s);
  57         pl = strlen(postfix);
  58
  59         if (pl == 0)
  60                 return (char*) s + sl;
  61
  62         if (sl < pl)
  63                 return NULL;
  64
  65         if (memcmp(s + sl - pl, postfix, pl) != 0)
  66                 return NULL;
  67
  68         return (char*) s + sl - pl;
  69 }
  70
  71 char* endswith_no_case(const char *s, const char *postfix) {
  72         size_t sl, pl;
  73
  74         assert(s);
  75         assert(postfix);
  76
  77         sl = strlen(s);
  78         pl = strlen(postfix);
  79
  80         if (pl == 0)
  81                 return (char*) s + sl;
  82
  83         if (sl < pl)
  84                 return NULL;
  85
  86         if (strcasecmp(s + sl - pl, postfix) != 0)
  87                 return NULL;
  88
  89         return (char*) s + sl - pl;
  90 }
  91
  92 char* first_word(const char *s, const char *word) {
  93         size_t sl, wl;
  94         const char *p;
  95
  96         assert(s);
  97         assert(word);
  98
  99         /* Checks if the string starts with the specified word, either
 100          * followed by NUL or by whitespace. Returns a pointer to the
 101          * NUL or the first character after the whitespace. */
 102
 103         sl = strlen(s);
 104         wl = strlen(word);
 105
 106         if (sl < wl)
 107                 return NULL;
 108
 109         if (wl == 0)
 110                 return (char*) s;
 111
 112         if (memcmp(s, word, wl) != 0)
 113                 return NULL;
 114
 115         p = s + wl;
 116         if (*p == 0)
 117                 return (char*) p;
 118
 119         if (!strchr(WHITESPACE, *p))
 120                 return NULL;
 121
 122         p += strspn(p, WHITESPACE);
 123         return (char*) p;
 124 }
 125
 126 static size_t strcspn_escaped(const char *s, const char *reject) {
 127         bool escaped = false;
 128         int n;
 129
 130         for (n=0; s[n]; n++) {
 131                 if (escaped)
 132                         escaped = false;
 133                 else if (s[n] == '\\')
 134                         escaped = true;
 135                 else if (strchr(reject, s[n]))
 136                         break;
 137         }
 138
 139         /* if s ends in \, return index of previous char */
 140         return n - escaped;
 141 }
 142
 143 /* Split a string into words. */
 144 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
 145         const char *current;
 146
 147         current = *state;
 148
 149         if (!*current) {
 150                 assert(**state == '\0');
 151                 return NULL;
 152         }
 153
 154         current += strspn(current, separator);
 155         if (!*current) {
 156                 *state = current;
 157                 return NULL;
 158         }
 159
 160         if (quoted && strchr("\'\"", *current)) {
 161                 char quotechars[2] = {*current, '\0'};
 162
 163                 *l = strcspn_escaped(current + 1, quotechars);
 164                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 165                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 166                         /* right quote missing or garbage at the end */
 167                         *state = current;
 168                         return NULL;
 169                 }
 170                 *state = current++ + *l + 2;
 171         } else if (quoted) {
 172                 *l = strcspn_escaped(current, separator);
 173                 if (current[*l] && !strchr(separator, current[*l])) {
 174                         /* unfinished escape */
 175                         *state = current;
 176                         return NULL;
 177                 }
 178                 *state = current + *l;
 179         } else {
 180                 *l = strcspn(current, separator);
 181                 *state = current + *l;
 182         }
 183
 184         return current;
 185 }
 186
 187 char *strnappend(const char *s, const char *suffix, size_t b) {
 188         size_t a;
 189         char *r;
 190
 191         if (!s && !suffix)
 192                 return strdup("");
 193
 194         if (!s)
 195                 return strndup(suffix, b);
 196
 197         if (!suffix)
 198                 return strdup(s);
 199
 200         assert(s);
 201         assert(suffix);
 202
 203         a = strlen(s);
 204         if (b > ((size_t) -1) - a)
 205                 return NULL;
 206
 207         r = new(char, a+b+1);
 208         if (!r)
 209                 return NULL;
 210
 211         memcpy(r, s, a);
 212         memcpy(r+a, suffix, b);
 213         r[a+b] = 0;
 214
 215         return r;
 216 }
 217
 218 char *strappend(const char *s, const char *suffix) {
 219         return strnappend(s, suffix, suffix ? strlen(suffix) : 0);
 220 }
 221
 222 char *strjoin(const char *x, ...) {
 223         va_list ap;
 224         size_t l;
 225         char *r, *p;
 226
 227         va_start(ap, x);
 228
 229         if (x) {
 230                 l = strlen(x);
 231
 232                 for (;;) {
 233                         const char *t;
 234                         size_t n;
 235
 236                         t = va_arg(ap, const char *);
 237                         if (!t)
 238                                 break;
 239
 240                         n = strlen(t);
 241                         if (n > ((size_t) -1) - l) {
 242                                 va_end(ap);
 243                                 return NULL;
 244                         }
 245
 246                         l += n;
 247                 }
 248         } else
 249                 l = 0;
 250
 251         va_end(ap);
 252
 253         r = new(char, l+1);
 254         if (!r)
 255                 return NULL;
 256
 257         if (x) {
 258                 p = stpcpy(r, x);
 259
 260                 va_start(ap, x);
 261
 262                 for (;;) {
 263                         const char *t;
 264
 265                         t = va_arg(ap, const char *);
 266                         if (!t)
 267                                 break;
 268
 269                         p = stpcpy(p, t);
 270                 }
 271
 272                 va_end(ap);
 273         } else
 274                 r[0] = 0;
 275
 276         return r;
 277 }
 278
 279 char *strstrip(char *s) {
 280         char *e;
 281
 282         /* Drops trailing whitespace. Modifies the string in
 283          * place. Returns pointer to first non-space character */
 284
 285         s += strspn(s, WHITESPACE);
 286
 287         for (e = strchr(s, 0); e > s; e --)
 288                 if (!strchr(WHITESPACE, e[-1]))
 289                         break;
 290
 291         *e = 0;
 292
 293         return s;
 294 }
 295
 296 char *delete_chars(char *s, const char *bad) {
 297         char *f, *t;
 298
 299         /* Drops all whitespace, regardless where in the string */
 300
 301         for (f = s, t = s; *f; f++) {
 302                 if (strchr(bad, *f))
 303                         continue;
 304
 305                 *(t++) = *f;
 306         }
 307
 308         *t = 0;
 309
 310         return s;
 311 }
 312
 313 char *truncate_nl(char *s) {
 314         assert(s);
 315
 316         s[strcspn(s, NEWLINE)] = 0;
 317         return s;
 318 }
 319
 320 char ascii_tolower(char x) {
 321
 322         if (x >= 'A' && x <= 'Z')
 323                 return x - 'A' + 'a';
 324
 325         return x;
 326 }
 327
 328 char *ascii_strlower(char *t) {
 329         char *p;
 330
 331         assert(t);
 332
 333         for (p = t; *p; p++)
 334                 *p = ascii_tolower(*p);
 335
 336         return t;
 337 }
 338
 339 char *ascii_strlower_n(char *t, size_t n) {
 340         size_t i;
 341
 342         if (n <= 0)
 343                 return t;
 344
 345         for (i = 0; i < n; i++)
 346                 t[i] = ascii_tolower(t[i]);
 347
 348         return t;
 349 }
 350
 351 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 352
 353         for (; n > 0; a++, b++, n--) {
 354                 int x, y;
 355
 356                 x = (int) (uint8_t) ascii_tolower(*a);
 357                 y = (int) (uint8_t) ascii_tolower(*b);
 358
 359                 if (x != y)
 360                         return x - y;
 361         }
 362
 363         return 0;
 364 }
 365
 366 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 367         int r;
 368
 369         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 370         if (r != 0)
 371                 return r;
 372
 373         if (n < m)
 374                 return -1;
 375         else if (n > m)
 376                 return 1;
 377         else
 378                 return 0;
 379 }
 380
 381 bool chars_intersect(const char *a, const char *b) {
 382         const char *p;
 383
 384         /* Returns true if any of the chars in a are in b. */
 385         for (p = a; *p; p++)
 386                 if (strchr(b, *p))
 387                         return true;
 388
 389         return false;
 390 }
 391
 392 bool string_has_cc(const char *p, const char *ok) {
 393         const char *t;
 394
 395         assert(p);
 396
 397         /*
 398          * Check if a string contains control characters. If 'ok' is
 399          * non-NULL it may be a string containing additional CCs to be
 400          * considered OK.
 401          */
 402
 403         for (t = p; *t; t++) {
 404                 if (ok && strchr(ok, *t))
 405                         continue;
 406
 407                 if (*t > 0 && *t < ' ')
 408                         return true;
 409
 410                 if (*t == 127)
 411                         return true;
 412         }
 413
 414         return false;
 415 }
 416
 417 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 418         size_t x;
 419         char *r;
 420
 421         assert(s);
 422         assert(percent <= 100);
 423         assert(new_length >= 3);
 424
 425         if (old_length <= 3 || old_length <= new_length)
 426                 return strndup(s, old_length);
 427
 428         r = new0(char, new_length+1);
 429         if (!r)
 430                 return NULL;
 431
 432         x = (new_length * percent) / 100;
 433
 434         if (x > new_length - 3)
 435                 x = new_length - 3;
 436
 437         memcpy(r, s, x);
 438         r[x] = '.';
 439         r[x+1] = '.';
 440         r[x+2] = '.';
 441         memcpy(r + x + 3,
 442                s + old_length - (new_length - x - 3),
 443                new_length - x - 3);
 444
 445         return r;
 446 }
 447
 448 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 449         size_t x;
 450         char *e;
 451         const char *i, *j;
 452         unsigned k, len, len2;
 453         int r;
 454
 455         assert(s);
 456         assert(percent <= 100);
 457         assert(new_length >= 3);
 458
 459         /* if no multibyte characters use ascii_ellipsize_mem for speed */
 460         if (ascii_is_valid(s))
 461                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 462
 463         if (old_length <= 3 || old_length <= new_length)
 464                 return strndup(s, old_length);
 465
 466         x = (new_length * percent) / 100;
 467
 468         if (x > new_length - 3)
 469                 x = new_length - 3;
 470
 471         k = 0;
 472         for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
 473                 char32_t c;
 474
 475                 r = utf8_encoded_to_unichar(i, &c);
 476                 if (r < 0)
 477                         return NULL;
 478                 k += unichar_iswide(c) ? 2 : 1;
 479         }
 480
 481         if (k > x) /* last character was wide and went over quota */
 482                 x ++;
 483
 484         for (j = s + old_length; k < new_length && j > i; ) {
 485                 char32_t c;
 486
 487                 j = utf8_prev_char(j);
 488                 r = utf8_encoded_to_unichar(j, &c);
 489                 if (r < 0)
 490                         return NULL;
 491                 k += unichar_iswide(c) ? 2 : 1;
 492         }
 493         assert(i <= j);
 494
 495         /* we don't actually need to ellipsize */
 496         if (i == j)
 497                 return memdup(s, old_length + 1);
 498
 499         /* make space for ellipsis */
 500         j = utf8_next_char(j);
 501
 502         len = i - s;
 503         len2 = s + old_length - j;
 504         e = new(char, len + 3 + len2 + 1);
 505         if (!e)
 506                 return NULL;
 507
 508         /*
 509         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 510                old_length, new_length, x, len, len2, k);
 511         */
 512
 513         memcpy(e, s, len);
 514         e[len]   = 0xe2; /* tri-dot ellipsis: … */
 515         e[len + 1] = 0x80;
 516         e[len + 2] = 0xa6;
 517
 518         memcpy(e + len + 3, j, len2 + 1);
 519
 520         return e;
 521 }
 522
 523 char *ellipsize(const char *s, size_t length, unsigned percent) {
 524         return ellipsize_mem(s, strlen(s), length, percent);
 525 }
 526
 527 bool nulstr_contains(const char*nulstr, const char *needle) {
 528         const char *i;
 529
 530         if (!nulstr)
 531                 return false;
 532
 533         NULSTR_FOREACH(i, nulstr)
 534                 if (streq(i, needle))
 535                         return true;
 536
 537         return false;
 538 }
 539
 540 char* strshorten(char *s, size_t l) {
 541         assert(s);
 542
 543         if (l < strlen(s))
 544                 s[l] = 0;
 545
 546         return s;
 547 }
 548
 549 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 550         const char *f;
 551         char *t, *r;
 552         size_t l, old_len, new_len;
 553
 554         assert(text);
 555         assert(old_string);
 556         assert(new_string);
 557
 558         old_len = strlen(old_string);
 559         new_len = strlen(new_string);
 560
 561         l = strlen(text);
 562         r = new(char, l+1);
 563         if (!r)
 564                 return NULL;
 565
 566         f = text;
 567         t = r;
 568         while (*f) {
 569                 char *a;
 570                 size_t d, nl;
 571
 572                 if (!startswith(f, old_string)) {
 573                         *(t++) = *(f++);
 574                         continue;
 575                 }
 576
 577                 d = t - r;
 578                 nl = l - old_len + new_len;
 579                 a = realloc(r, nl + 1);
 580                 if (!a)
 581                         goto oom;
 582
 583                 l = nl;
 584                 r = a;
 585                 t = r + d;
 586
 587                 t = stpcpy(t, new_string);
 588                 f += old_len;
 589         }
 590
 591         *t = 0;
 592         return r;
 593
 594 oom:
 595         free(r);
 596         return NULL;
 597 }
 598
 599 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 600         const char *i, *begin = NULL;
 601         enum {
 602                 STATE_OTHER,
 603                 STATE_ESCAPE,
 604                 STATE_BRACKET
 605         } state = STATE_OTHER;
 606         char *obuf = NULL;
 607         size_t osz = 0, isz;
 608         FILE *f;
 609
 610         assert(ibuf);
 611         assert(*ibuf);
 612
 613         /* Strips ANSI color and replaces TABs by 8 spaces */
 614
 615         isz = _isz ? *_isz : strlen(*ibuf);
 616
 617         f = open_memstream(&obuf, &osz);
 618         if (!f)
 619                 return NULL;
 620
 621         for (i = *ibuf; i < *ibuf + isz + 1; i++) {
 622
 623                 switch (state) {
 624
 625                 case STATE_OTHER:
 626                         if (i >= *ibuf + isz) /* EOT */
 627                                 break;
 628                         else if (*i == '\x1B')
 629                                 state = STATE_ESCAPE;
 630                         else if (*i == '\t')
 631                                 fputs("        ", f);
 632                         else
 633                                 fputc(*i, f);
 634                         break;
 635
 636                 case STATE_ESCAPE:
 637                         if (i >= *ibuf + isz) { /* EOT */
 638                                 fputc('\x1B', f);
 639                                 break;
 640                         } else if (*i == '[') {
 641                                 state = STATE_BRACKET;
 642                                 begin = i + 1;
 643                         } else {
 644                                 fputc('\x1B', f);
 645                                 fputc(*i, f);
 646                                 state = STATE_OTHER;
 647                         }
 648
 649                         break;
 650
 651                 case STATE_BRACKET:
 652
 653                         if (i >= *ibuf + isz || /* EOT */
 654                             (!(*i >= '0' && *i <= '9') && *i != ';' && *i != 'm')) {
 655                                 fputc('\x1B', f);
 656                                 fputc('[', f);
 657                                 state = STATE_OTHER;
 658                                 i = begin-1;
 659                         } else if (*i == 'm')
 660                                 state = STATE_OTHER;
 661                         break;
 662                 }
 663         }
 664
 665         if (ferror(f)) {
 666                 fclose(f);
 667                 free(obuf);
 668                 return NULL;
 669         }
 670
 671         fclose(f);
 672
 673         free(*ibuf);
 674         *ibuf = obuf;
 675
 676         if (_isz)
 677                 *_isz = osz;
 678
 679         return obuf;
 680 }
 681
 682 char *strextend(char **x, ...) {
 683         va_list ap;
 684         size_t f, l;
 685         char *r, *p;
 686
 687         assert(x);
 688
 689         l = f = *x ? strlen(*x) : 0;
 690
 691         va_start(ap, x);
 692         for (;;) {
 693                 const char *t;
 694                 size_t n;
 695
 696                 t = va_arg(ap, const char *);
 697                 if (!t)
 698                         break;
 699
 700                 n = strlen(t);
 701                 if (n > ((size_t) -1) - l) {
 702                         va_end(ap);
 703                         return NULL;
 704                 }
 705
 706                 l += n;
 707         }
 708         va_end(ap);
 709
 710         r = realloc(*x, l+1);
 711         if (!r)
 712                 return NULL;
 713
 714         p = r + f;
 715
 716         va_start(ap, x);
 717         for (;;) {
 718                 const char *t;
 719
 720                 t = va_arg(ap, const char *);
 721                 if (!t)
 722                         break;
 723
 724                 p = stpcpy(p, t);
 725         }
 726         va_end(ap);
 727
 728         *p = 0;
 729         *x = r;
 730
 731         return r + l;
 732 }
 733
 734 char *strrep(const char *s, unsigned n) {
 735         size_t l;
 736         char *r, *p;
 737         unsigned i;
 738
 739         assert(s);
 740
 741         l = strlen(s);
 742         p = r = malloc(l * n + 1);
 743         if (!r)
 744                 return NULL;
 745
 746         for (i = 0; i < n; i++)
 747                 p = stpcpy(p, s);
 748
 749         *p = 0;
 750         return r;
 751 }
 752
 753 int split_pair(const char *s, const char *sep, char **l, char **r) {
 754         char *x, *a, *b;
 755
 756         assert(s);
 757         assert(sep);
 758         assert(l);
 759         assert(r);
 760
 761         if (isempty(sep))
 762                 return -EINVAL;
 763
 764         x = strstr(s, sep);
 765         if (!x)
 766                 return -EINVAL;
 767
 768         a = strndup(s, x - s);
 769         if (!a)
 770                 return -ENOMEM;
 771
 772         b = strdup(x + strlen(sep));
 773         if (!b) {
 774                 free(a);
 775                 return -ENOMEM;
 776         }
 777
 778         *l = a;
 779         *r = b;
 780
 781         return 0;
 782 }
 783
 784 int free_and_strdup(char **p, const char *s) {
 785         char *t;
 786
 787         assert(p);
 788
 789         /* Replaces a string pointer with an strdup()ed new string,
 790          * possibly freeing the old one. */
 791
 792         if (streq_ptr(*p, s))
 793                 return 0;
 794
 795         if (s) {
 796                 t = strdup(s);
 797                 if (!t)
 798                         return -ENOMEM;
 799         } else
 800                 t = NULL;
 801
 802         free(*p);
 803         *p = t;
 804
 805         return 1;
 806 }
 807
 808 #pragma GCC push_options
 809 #pragma GCC optimize("O0")
 810
 811 void* memory_erase(void *p, size_t l) {
 812         volatile uint8_t* x = (volatile uint8_t*) p;
 813
 814         /* This basically does what memset() does, but hopefully isn't
 815          * optimized away by the compiler. One of those days, when
 816          * glibc learns memset_s() we should replace this call by
 817          * memset_s(), but until then this has to do. */
 818
 819         for (; l > 0; l--)
 820                 *(x++) = 'x';
 821
 822         return p;
 823 }
 824
 825 #pragma GCC pop_options
 826
 827 char* string_erase(char *x) {
 828
 829         if (!x)
 830                 return NULL;
 831
 832         /* A delicious drop of snake-oil! To be called on memory where
 833          * we stored passphrases or so, after we used them. */
 834
 835         return memory_erase(x, strlen(x));
 836 }
 837
 838 char *string_free_erase(char *s) {
 839         return mfree(string_erase(s));
 840 }
 841
 842 bool string_is_safe(const char *p) {
 843         const char *t;
 844
 845         if (!p)
 846                 return false;
 847
 848         for (t = p; *t; t++) {
 849                 if (*t > 0 && *t < ' ') /* no control characters */
 850                         return false;
 851
 852                 if (strchr(QUOTES "\\\x7f", *t))
 853                         return false;
 854         }
 855
 856         return true;
 857 }