src/basic/string-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <stdarg.h>
  24 #include <stdint.h>
  25 #include <stdio.h>
  26 #include <stdlib.h>
  27
  28 #include "alloc-util.h"
  29 #include "gunicode.h"
  30 #include "macro.h"
  31 #include "string-util.h"
  32 #include "utf8.h"
  33 #include "util.h"
  34
  35 int strcmp_ptr(const char *a, const char *b) {
  36
  37         /* Like strcmp(), but tries to make sense of NULL pointers */
  38         if (a && b)
  39                 return strcmp(a, b);
  40
  41         if (!a && b)
  42                 return -1;
  43
  44         if (a && !b)
  45                 return 1;
  46
  47         return 0;
  48 }
  49
  50 char* endswith(const char *s, const char *postfix) {
  51         size_t sl, pl;
  52
  53         assert(s);
  54         assert(postfix);
  55
  56         sl = strlen(s);
  57         pl = strlen(postfix);
  58
  59         if (pl == 0)
  60                 return (char*) s + sl;
  61
  62         if (sl < pl)
  63                 return NULL;
  64
  65         if (memcmp(s + sl - pl, postfix, pl) != 0)
  66                 return NULL;
  67
  68         return (char*) s + sl - pl;
  69 }
  70
  71 char* endswith_no_case(const char *s, const char *postfix) {
  72         size_t sl, pl;
  73
  74         assert(s);
  75         assert(postfix);
  76
  77         sl = strlen(s);
  78         pl = strlen(postfix);
  79
  80         if (pl == 0)
  81                 return (char*) s + sl;
  82
  83         if (sl < pl)
  84                 return NULL;
  85
  86         if (strcasecmp(s + sl - pl, postfix) != 0)
  87                 return NULL;
  88
  89         return (char*) s + sl - pl;
  90 }
  91
  92 char* first_word(const char *s, const char *word) {
  93         size_t sl, wl;
  94         const char *p;
  95
  96         assert(s);
  97         assert(word);
  98
  99         /* Checks if the string starts with the specified word, either
 100          * followed by NUL or by whitespace. Returns a pointer to the
 101          * NUL or the first character after the whitespace. */
 102
 103         sl = strlen(s);
 104         wl = strlen(word);
 105
 106         if (sl < wl)
 107                 return NULL;
 108
 109         if (wl == 0)
 110                 return (char*) s;
 111
 112         if (memcmp(s, word, wl) != 0)
 113                 return NULL;
 114
 115         p = s + wl;
 116         if (*p == 0)
 117                 return (char*) p;
 118
 119         if (!strchr(WHITESPACE, *p))
 120                 return NULL;
 121
 122         p += strspn(p, WHITESPACE);
 123         return (char*) p;
 124 }
 125
 126 static size_t strcspn_escaped(const char *s, const char *reject) {
 127         bool escaped = false;
 128         int n;
 129
 130         for (n=0; s[n]; n++) {
 131                 if (escaped)
 132                         escaped = false;
 133                 else if (s[n] == '\\')
 134                         escaped = true;
 135                 else if (strchr(reject, s[n]))
 136                         break;
 137         }
 138
 139         /* if s ends in \, return index of previous char */
 140         return n - escaped;
 141 }
 142
 143 /* Split a string into words. */
 144 const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
 145         const char *current;
 146
 147         current = *state;
 148
 149         if (!*current) {
 150                 assert(**state == '\0');
 151                 return NULL;
 152         }
 153
 154         current += strspn(current, separator);
 155         if (!*current) {
 156                 *state = current;
 157                 return NULL;
 158         }
 159
 160         if (quoted && strchr("\'\"", *current)) {
 161                 char quotechars[2] = {*current, '\0'};
 162
 163                 *l = strcspn_escaped(current + 1, quotechars);
 164                 if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
 165                     (current[*l + 2] && !strchr(separator, current[*l + 2]))) {
 166                         /* right quote missing or garbage at the end */
 167                         *state = current;
 168                         return NULL;
 169                 }
 170                 *state = current++ + *l + 2;
 171         } else if (quoted) {
 172                 *l = strcspn_escaped(current, separator);
 173                 if (current[*l] && !strchr(separator, current[*l])) {
 174                         /* unfinished escape */
 175                         *state = current;
 176                         return NULL;
 177                 }
 178                 *state = current + *l;
 179         } else {
 180                 *l = strcspn(current, separator);
 181                 *state = current + *l;
 182         }
 183
 184         return current;
 185 }
 186
 187 char *strnappend(const char *s, const char *suffix, size_t b) {
 188         size_t a;
 189         char *r;
 190
 191         if (!s && !suffix)
 192                 return strdup("");
 193
 194         if (!s)
 195                 return strndup(suffix, b);
 196
 197         if (!suffix)
 198                 return strdup(s);
 199
 200         assert(s);
 201         assert(suffix);
 202
 203         a = strlen(s);
 204         if (b > ((size_t) -1) - a)
 205                 return NULL;
 206
 207         r = new(char, a+b+1);
 208         if (!r)
 209                 return NULL;
 210
 211         memcpy(r, s, a);
 212         memcpy(r+a, suffix, b);
 213         r[a+b] = 0;
 214
 215         return r;
 216 }
 217
 218 char *strappend(const char *s, const char *suffix) {
 219         return strnappend(s, suffix, suffix ? strlen(suffix) : 0);
 220 }
 221
 222 char *strjoin(const char *x, ...) {
 223         va_list ap;
 224         size_t l;
 225         char *r, *p;
 226
 227         va_start(ap, x);
 228
 229         if (x) {
 230                 l = strlen(x);
 231
 232                 for (;;) {
 233                         const char *t;
 234                         size_t n;
 235
 236                         t = va_arg(ap, const char *);
 237                         if (!t)
 238                                 break;
 239
 240                         n = strlen(t);
 241                         if (n > ((size_t) -1) - l) {
 242                                 va_end(ap);
 243                                 return NULL;
 244                         }
 245
 246                         l += n;
 247                 }
 248         } else
 249                 l = 0;
 250
 251         va_end(ap);
 252
 253         r = new(char, l+1);
 254         if (!r)
 255                 return NULL;
 256
 257         if (x) {
 258                 p = stpcpy(r, x);
 259
 260                 va_start(ap, x);
 261
 262                 for (;;) {
 263                         const char *t;
 264
 265                         t = va_arg(ap, const char *);
 266                         if (!t)
 267                                 break;
 268
 269                         p = stpcpy(p, t);
 270                 }
 271
 272                 va_end(ap);
 273         } else
 274                 r[0] = 0;
 275
 276         return r;
 277 }
 278
 279 char *strstrip(char *s) {
 280         char *e;
 281
 282         /* Drops trailing whitespace. Modifies the string in
 283          * place. Returns pointer to first non-space character */
 284
 285         s += strspn(s, WHITESPACE);
 286
 287         for (e = strchr(s, 0); e > s; e --)
 288                 if (!strchr(WHITESPACE, e[-1]))
 289                         break;
 290
 291         *e = 0;
 292
 293         return s;
 294 }
 295
 296 char *delete_chars(char *s, const char *bad) {
 297         char *f, *t;
 298
 299         /* Drops all whitespace, regardless where in the string */
 300
 301         for (f = s, t = s; *f; f++) {
 302                 if (strchr(bad, *f))
 303                         continue;
 304
 305                 *(t++) = *f;
 306         }
 307
 308         *t = 0;
 309
 310         return s;
 311 }
 312
 313 char *truncate_nl(char *s) {
 314         assert(s);
 315
 316         s[strcspn(s, NEWLINE)] = 0;
 317         return s;
 318 }
 319
 320 char ascii_tolower(char x) {
 321
 322         if (x >= 'A' && x <= 'Z')
 323                 return x - 'A' + 'a';
 324
 325         return x;
 326 }
 327
 328 char *ascii_strlower(char *t) {
 329         char *p;
 330
 331         assert(t);
 332
 333         for (p = t; *p; p++)
 334                 *p = ascii_tolower(*p);
 335
 336         return t;
 337 }
 338
 339 char *ascii_strlower_n(char *t, size_t n) {
 340         size_t i;
 341
 342         if (n <= 0)
 343                 return t;
 344
 345         for (i = 0; i < n; i++)
 346                 t[i] = ascii_tolower(t[i]);
 347
 348         return t;
 349 }
 350
 351 int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
 352
 353         for (; n > 0; a++, b++, n--) {
 354                 int x, y;
 355
 356                 x = (int) (uint8_t) ascii_tolower(*a);
 357                 y = (int) (uint8_t) ascii_tolower(*b);
 358
 359                 if (x != y)
 360                         return x - y;
 361         }
 362
 363         return 0;
 364 }
 365
 366 int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
 367         int r;
 368
 369         r = ascii_strcasecmp_n(a, b, MIN(n, m));
 370         if (r != 0)
 371                 return r;
 372
 373         if (n < m)
 374                 return -1;
 375         else if (n > m)
 376                 return 1;
 377         else
 378                 return 0;
 379 }
 380
 381 bool chars_intersect(const char *a, const char *b) {
 382         const char *p;
 383
 384         /* Returns true if any of the chars in a are in b. */
 385         for (p = a; *p; p++)
 386                 if (strchr(b, *p))
 387                         return true;
 388
 389         return false;
 390 }
 391
 392 bool string_has_cc(const char *p, const char *ok) {
 393         const char *t;
 394
 395         assert(p);
 396
 397         /*
 398          * Check if a string contains control characters. If 'ok' is
 399          * non-NULL it may be a string containing additional CCs to be
 400          * considered OK.
 401          */
 402
 403         for (t = p; *t; t++) {
 404                 if (ok && strchr(ok, *t))
 405                         continue;
 406
 407                 if (*t > 0 && *t < ' ')
 408                         return true;
 409
 410                 if (*t == 127)
 411                         return true;
 412         }
 413
 414         return false;
 415 }
 416
 417 static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 418         size_t x;
 419         char *r;
 420
 421         assert(s);
 422         assert(percent <= 100);
 423         assert(new_length >= 3);
 424
 425         if (old_length <= 3 || old_length <= new_length)
 426                 return strndup(s, old_length);
 427
 428         r = new0(char, new_length+1);
 429         if (!r)
 430                 return NULL;
 431
 432         x = (new_length * percent) / 100;
 433
 434         if (x > new_length - 3)
 435                 x = new_length - 3;
 436
 437         memcpy(r, s, x);
 438         r[x] = '.';
 439         r[x+1] = '.';
 440         r[x+2] = '.';
 441         memcpy(r + x + 3,
 442                s + old_length - (new_length - x - 3),
 443                new_length - x - 3);
 444
 445         return r;
 446 }
 447
 448 char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
 449         size_t x;
 450         char *e;
 451         const char *i, *j;
 452         unsigned k, len, len2;
 453
 454         assert(s);
 455         assert(percent <= 100);
 456         assert(new_length >= 3);
 457
 458         /* if no multibyte characters use ascii_ellipsize_mem for speed */
 459         if (ascii_is_valid(s))
 460                 return ascii_ellipsize_mem(s, old_length, new_length, percent);
 461
 462         if (old_length <= 3 || old_length <= new_length)
 463                 return strndup(s, old_length);
 464
 465         x = (new_length * percent) / 100;
 466
 467         if (x > new_length - 3)
 468                 x = new_length - 3;
 469
 470         k = 0;
 471         for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
 472                 int c;
 473
 474                 c = utf8_encoded_to_unichar(i);
 475                 if (c < 0)
 476                         return NULL;
 477                 k += unichar_iswide(c) ? 2 : 1;
 478         }
 479
 480         if (k > x) /* last character was wide and went over quota */
 481                 x ++;
 482
 483         for (j = s + old_length; k < new_length && j > i; ) {
 484                 int c;
 485
 486                 j = utf8_prev_char(j);
 487                 c = utf8_encoded_to_unichar(j);
 488                 if (c < 0)
 489                         return NULL;
 490                 k += unichar_iswide(c) ? 2 : 1;
 491         }
 492         assert(i <= j);
 493
 494         /* we don't actually need to ellipsize */
 495         if (i == j)
 496                 return memdup(s, old_length + 1);
 497
 498         /* make space for ellipsis */
 499         j = utf8_next_char(j);
 500
 501         len = i - s;
 502         len2 = s + old_length - j;
 503         e = new(char, len + 3 + len2 + 1);
 504         if (!e)
 505                 return NULL;
 506
 507         /*
 508         printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
 509                old_length, new_length, x, len, len2, k);
 510         */
 511
 512         memcpy(e, s, len);
 513         e[len]   = 0xe2; /* tri-dot ellipsis: … */
 514         e[len + 1] = 0x80;
 515         e[len + 2] = 0xa6;
 516
 517         memcpy(e + len + 3, j, len2 + 1);
 518
 519         return e;
 520 }
 521
 522 char *ellipsize(const char *s, size_t length, unsigned percent) {
 523         return ellipsize_mem(s, strlen(s), length, percent);
 524 }
 525
 526 bool nulstr_contains(const char*nulstr, const char *needle) {
 527         const char *i;
 528
 529         if (!nulstr)
 530                 return false;
 531
 532         NULSTR_FOREACH(i, nulstr)
 533                 if (streq(i, needle))
 534                         return true;
 535
 536         return false;
 537 }
 538
 539 char* strshorten(char *s, size_t l) {
 540         assert(s);
 541
 542         if (l < strlen(s))
 543                 s[l] = 0;
 544
 545         return s;
 546 }
 547
 548 char *strreplace(const char *text, const char *old_string, const char *new_string) {
 549         const char *f;
 550         char *t, *r;
 551         size_t l, old_len, new_len;
 552
 553         assert(text);
 554         assert(old_string);
 555         assert(new_string);
 556
 557         old_len = strlen(old_string);
 558         new_len = strlen(new_string);
 559
 560         l = strlen(text);
 561         r = new(char, l+1);
 562         if (!r)
 563                 return NULL;
 564
 565         f = text;
 566         t = r;
 567         while (*f) {
 568                 char *a;
 569                 size_t d, nl;
 570
 571                 if (!startswith(f, old_string)) {
 572                         *(t++) = *(f++);
 573                         continue;
 574                 }
 575
 576                 d = t - r;
 577                 nl = l - old_len + new_len;
 578                 a = realloc(r, nl + 1);
 579                 if (!a)
 580                         goto oom;
 581
 582                 l = nl;
 583                 r = a;
 584                 t = r + d;
 585
 586                 t = stpcpy(t, new_string);
 587                 f += old_len;
 588         }
 589
 590         *t = 0;
 591         return r;
 592
 593 oom:
 594         free(r);
 595         return NULL;
 596 }
 597
 598 char *strip_tab_ansi(char **ibuf, size_t *_isz) {
 599         const char *i, *begin = NULL;
 600         enum {
 601                 STATE_OTHER,
 602                 STATE_ESCAPE,
 603                 STATE_BRACKET
 604         } state = STATE_OTHER;
 605         char *obuf = NULL;
 606         size_t osz = 0, isz;
 607         FILE *f;
 608
 609         assert(ibuf);
 610         assert(*ibuf);
 611
 612         /* Strips ANSI color and replaces TABs by 8 spaces */
 613
 614         isz = _isz ? *_isz : strlen(*ibuf);
 615
 616         f = open_memstream(&obuf, &osz);
 617         if (!f)
 618                 return NULL;
 619
 620         for (i = *ibuf; i < *ibuf + isz + 1; i++) {
 621
 622                 switch (state) {
 623
 624                 case STATE_OTHER:
 625                         if (i >= *ibuf + isz) /* EOT */
 626                                 break;
 627                         else if (*i == '\x1B')
 628                                 state = STATE_ESCAPE;
 629                         else if (*i == '\t')
 630                                 fputs("        ", f);
 631                         else
 632                                 fputc(*i, f);
 633                         break;
 634
 635                 case STATE_ESCAPE:
 636                         if (i >= *ibuf + isz) { /* EOT */
 637                                 fputc('\x1B', f);
 638                                 break;
 639                         } else if (*i == '[') {
 640                                 state = STATE_BRACKET;
 641                                 begin = i + 1;
 642                         } else {
 643                                 fputc('\x1B', f);
 644                                 fputc(*i, f);
 645                                 state = STATE_OTHER;
 646                         }
 647
 648                         break;
 649
 650                 case STATE_BRACKET:
 651
 652                         if (i >= *ibuf + isz || /* EOT */
 653                             (!(*i >= '0' && *i <= '9') && *i != ';' && *i != 'm')) {
 654                                 fputc('\x1B', f);
 655                                 fputc('[', f);
 656                                 state = STATE_OTHER;
 657                                 i = begin-1;
 658                         } else if (*i == 'm')
 659                                 state = STATE_OTHER;
 660                         break;
 661                 }
 662         }
 663
 664         if (ferror(f)) {
 665                 fclose(f);
 666                 free(obuf);
 667                 return NULL;
 668         }
 669
 670         fclose(f);
 671
 672         free(*ibuf);
 673         *ibuf = obuf;
 674
 675         if (_isz)
 676                 *_isz = osz;
 677
 678         return obuf;
 679 }
 680
 681 char *strextend(char **x, ...) {
 682         va_list ap;
 683         size_t f, l;
 684         char *r, *p;
 685
 686         assert(x);
 687
 688         l = f = *x ? strlen(*x) : 0;
 689
 690         va_start(ap, x);
 691         for (;;) {
 692                 const char *t;
 693                 size_t n;
 694
 695                 t = va_arg(ap, const char *);
 696                 if (!t)
 697                         break;
 698
 699                 n = strlen(t);
 700                 if (n > ((size_t) -1) - l) {
 701                         va_end(ap);
 702                         return NULL;
 703                 }
 704
 705                 l += n;
 706         }
 707         va_end(ap);
 708
 709         r = realloc(*x, l+1);
 710         if (!r)
 711                 return NULL;
 712
 713         p = r + f;
 714
 715         va_start(ap, x);
 716         for (;;) {
 717                 const char *t;
 718
 719                 t = va_arg(ap, const char *);
 720                 if (!t)
 721                         break;
 722
 723                 p = stpcpy(p, t);
 724         }
 725         va_end(ap);
 726
 727         *p = 0;
 728         *x = r;
 729
 730         return r + l;
 731 }
 732
 733 char *strrep(const char *s, unsigned n) {
 734         size_t l;
 735         char *r, *p;
 736         unsigned i;
 737
 738         assert(s);
 739
 740         l = strlen(s);
 741         p = r = malloc(l * n + 1);
 742         if (!r)
 743                 return NULL;
 744
 745         for (i = 0; i < n; i++)
 746                 p = stpcpy(p, s);
 747
 748         *p = 0;
 749         return r;
 750 }
 751
 752 int split_pair(const char *s, const char *sep, char **l, char **r) {
 753         char *x, *a, *b;
 754
 755         assert(s);
 756         assert(sep);
 757         assert(l);
 758         assert(r);
 759
 760         if (isempty(sep))
 761                 return -EINVAL;
 762
 763         x = strstr(s, sep);
 764         if (!x)
 765                 return -EINVAL;
 766
 767         a = strndup(s, x - s);
 768         if (!a)
 769                 return -ENOMEM;
 770
 771         b = strdup(x + strlen(sep));
 772         if (!b) {
 773                 free(a);
 774                 return -ENOMEM;
 775         }
 776
 777         *l = a;
 778         *r = b;
 779
 780         return 0;
 781 }
 782
 783 int free_and_strdup(char **p, const char *s) {
 784         char *t;
 785
 786         assert(p);
 787
 788         /* Replaces a string pointer with an strdup()ed new string,
 789          * possibly freeing the old one. */
 790
 791         if (streq_ptr(*p, s))
 792                 return 0;
 793
 794         if (s) {
 795                 t = strdup(s);
 796                 if (!t)
 797                         return -ENOMEM;
 798         } else
 799                 t = NULL;
 800
 801         free(*p);
 802         *p = t;
 803
 804         return 1;
 805 }
 806
 807 #pragma GCC push_options
 808 #pragma GCC optimize("O0")
 809
 810 void* memory_erase(void *p, size_t l) {
 811         volatile uint8_t* x = (volatile uint8_t*) p;
 812
 813         /* This basically does what memset() does, but hopefully isn't
 814          * optimized away by the compiler. One of those days, when
 815          * glibc learns memset_s() we should replace this call by
 816          * memset_s(), but until then this has to do. */
 817
 818         for (; l > 0; l--)
 819                 *(x++) = 'x';
 820
 821         return p;
 822 }
 823
 824 #pragma GCC pop_options
 825
 826 char* string_erase(char *x) {
 827
 828         if (!x)
 829                 return NULL;
 830
 831         /* A delicious drop of snake-oil! To be called on memory where
 832          * we stored passphrases or so, after we used them. */
 833
 834         return memory_erase(x, strlen(x));
 835 }
 836
 837 char *string_free_erase(char *s) {
 838         return mfree(string_erase(s));
 839 }
 840
 841 bool string_is_safe(const char *p) {
 842         const char *t;
 843
 844         if (!p)
 845                 return false;
 846
 847         for (t = p; *t; t++) {
 848                 if (*t > 0 && *t < ' ') /* no control characters */
 849                         return false;
 850
 851                 if (strchr(QUOTES "\\\x7f", *t))
 852                         return false;
 853         }
 854
 855         return true;
 856 }