src/basic/escape.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <stdlib.h>
   5 #include <string.h>
   6
   7 #include "alloc-util.h"
   8 #include "escape.h"
   9 #include "hexdecoct.h"
  10 #include "macro.h"
  11 #include "utf8.h"
  12
  13 int cescape_char(char c, char *buf) {
  14         char *buf_old = buf;
  15
  16         /* Needs space for 4 characters in the buffer */
  17
  18         switch (c) {
  19
  20                 case '\a':
  21                         *(buf++) = '\\';
  22                         *(buf++) = 'a';
  23                         break;
  24                 case '\b':
  25                         *(buf++) = '\\';
  26                         *(buf++) = 'b';
  27                         break;
  28                 case '\f':
  29                         *(buf++) = '\\';
  30                         *(buf++) = 'f';
  31                         break;
  32                 case '\n':
  33                         *(buf++) = '\\';
  34                         *(buf++) = 'n';
  35                         break;
  36                 case '\r':
  37                         *(buf++) = '\\';
  38                         *(buf++) = 'r';
  39                         break;
  40                 case '\t':
  41                         *(buf++) = '\\';
  42                         *(buf++) = 't';
  43                         break;
  44                 case '\v':
  45                         *(buf++) = '\\';
  46                         *(buf++) = 'v';
  47                         break;
  48                 case '\\':
  49                         *(buf++) = '\\';
  50                         *(buf++) = '\\';
  51                         break;
  52                 case '"':
  53                         *(buf++) = '\\';
  54                         *(buf++) = '"';
  55                         break;
  56                 case '\'':
  57                         *(buf++) = '\\';
  58                         *(buf++) = '\'';
  59                         break;
  60
  61                 default:
  62                         /* For special chars we prefer octal over
  63                          * hexadecimal encoding, simply because glib's
  64                          * g_strescape() does the same */
  65                         if ((c < ' ') || (c >= 127)) {
  66                                 *(buf++) = '\\';
  67                                 *(buf++) = octchar((unsigned char) c >> 6);
  68                                 *(buf++) = octchar((unsigned char) c >> 3);
  69                                 *(buf++) = octchar((unsigned char) c);
  70                         } else
  71                                 *(buf++) = c;
  72                         break;
  73         }
  74
  75         return buf - buf_old;
  76 }
  77
  78 char* cescape_length(const char *s, size_t n) {
  79         const char *f;
  80         char *r, *t;
  81
  82         assert(s || n == 0);
  83
  84         /* Does C style string escaping. May be reversed with
  85          * cunescape(). */
  86
  87         r = new(char, n*4 + 1);
  88         if (!r)
  89                 return NULL;
  90
  91         for (f = s, t = r; f < s + n; f++)
  92                 t += cescape_char(*f, t);
  93
  94         *t = 0;
  95
  96         return r;
  97 }
  98
  99 char* cescape(const char *s) {
 100         assert(s);
 101
 102         return cescape_length(s, strlen(s));
 103 }
 104
 105 int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul) {
 106         int r = 1;
 107
 108         assert(p);
 109         assert(ret);
 110
 111         /* Unescapes C style. Returns the unescaped character in ret.
 112          * Sets *eight_bit to true if the escaped sequence either fits in
 113          * one byte in UTF-8 or is a non-unicode literal byte and should
 114          * instead be copied directly.
 115          */
 116
 117         if (length != (size_t) -1 && length < 1)
 118                 return -EINVAL;
 119
 120         switch (p[0]) {
 121
 122         case 'a':
 123                 *ret = '\a';
 124                 break;
 125         case 'b':
 126                 *ret = '\b';
 127                 break;
 128         case 'f':
 129                 *ret = '\f';
 130                 break;
 131         case 'n':
 132                 *ret = '\n';
 133                 break;
 134         case 'r':
 135                 *ret = '\r';
 136                 break;
 137         case 't':
 138                 *ret = '\t';
 139                 break;
 140         case 'v':
 141                 *ret = '\v';
 142                 break;
 143         case '\\':
 144                 *ret = '\\';
 145                 break;
 146         case '"':
 147                 *ret = '"';
 148                 break;
 149         case '\'':
 150                 *ret = '\'';
 151                 break;
 152
 153         case 's':
 154                 /* This is an extension of the XDG syntax files */
 155                 *ret = ' ';
 156                 break;
 157
 158         case 'x': {
 159                 /* hexadecimal encoding */
 160                 int a, b;
 161
 162                 if (length != (size_t) -1 && length < 3)
 163                         return -EINVAL;
 164
 165                 a = unhexchar(p[1]);
 166                 if (a < 0)
 167                         return -EINVAL;
 168
 169                 b = unhexchar(p[2]);
 170                 if (b < 0)
 171                         return -EINVAL;
 172
 173                 /* Don't allow NUL bytes */
 174                 if (a == 0 && b == 0 && !accept_nul)
 175                         return -EINVAL;
 176
 177                 *ret = (a << 4U) | b;
 178                 *eight_bit = true;
 179                 r = 3;
 180                 break;
 181         }
 182
 183         case 'u': {
 184                 /* C++11 style 16bit unicode */
 185
 186                 int a[4];
 187                 size_t i;
 188                 uint32_t c;
 189
 190                 if (length != (size_t) -1 && length < 5)
 191                         return -EINVAL;
 192
 193                 for (i = 0; i < 4; i++) {
 194                         a[i] = unhexchar(p[1 + i]);
 195                         if (a[i] < 0)
 196                                 return a[i];
 197                 }
 198
 199                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 200
 201                 /* Don't allow 0 chars */
 202                 if (c == 0 && !accept_nul)
 203                         return -EINVAL;
 204
 205                 *ret = c;
 206                 r = 5;
 207                 break;
 208         }
 209
 210         case 'U': {
 211                 /* C++11 style 32bit unicode */
 212
 213                 int a[8];
 214                 size_t i;
 215                 char32_t c;
 216
 217                 if (length != (size_t) -1 && length < 9)
 218                         return -EINVAL;
 219
 220                 for (i = 0; i < 8; i++) {
 221                         a[i] = unhexchar(p[1 + i]);
 222                         if (a[i] < 0)
 223                                 return a[i];
 224                 }
 225
 226                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 227                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 228
 229                 /* Don't allow 0 chars */
 230                 if (c == 0 && !accept_nul)
 231                         return -EINVAL;
 232
 233                 /* Don't allow invalid code points */
 234                 if (!unichar_is_valid(c))
 235                         return -EINVAL;
 236
 237                 *ret = c;
 238                 r = 9;
 239                 break;
 240         }
 241
 242         case '0':
 243         case '1':
 244         case '2':
 245         case '3':
 246         case '4':
 247         case '5':
 248         case '6':
 249         case '7': {
 250                 /* octal encoding */
 251                 int a, b, c;
 252                 char32_t m;
 253
 254                 if (length != (size_t) -1 && length < 3)
 255                         return -EINVAL;
 256
 257                 a = unoctchar(p[0]);
 258                 if (a < 0)
 259                         return -EINVAL;
 260
 261                 b = unoctchar(p[1]);
 262                 if (b < 0)
 263                         return -EINVAL;
 264
 265                 c = unoctchar(p[2]);
 266                 if (c < 0)
 267                         return -EINVAL;
 268
 269                 /* don't allow NUL bytes */
 270                 if (a == 0 && b == 0 && c == 0 && !accept_nul)
 271                         return -EINVAL;
 272
 273                 /* Don't allow bytes above 255 */
 274                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 275                 if (m > 255)
 276                         return -EINVAL;
 277
 278                 *ret = m;
 279                 *eight_bit = true;
 280                 r = 3;
 281                 break;
 282         }
 283
 284         default:
 285                 return -EINVAL;
 286         }
 287
 288         return r;
 289 }
 290
 291 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 292         char *r, *t;
 293         const char *f;
 294         size_t pl;
 295
 296         assert(s);
 297         assert(ret);
 298
 299         /* Undoes C style string escaping, and optionally prefixes it. */
 300
 301         pl = strlen_ptr(prefix);
 302
 303         r = new(char, pl+length+1);
 304         if (!r)
 305                 return -ENOMEM;
 306
 307         if (prefix)
 308                 memcpy(r, prefix, pl);
 309
 310         for (f = s, t = r + pl; f < s + length; f++) {
 311                 size_t remaining;
 312                 bool eight_bit = false;
 313                 char32_t u;
 314                 int k;
 315
 316                 remaining = s + length - f;
 317                 assert(remaining > 0);
 318
 319                 if (*f != '\\') {
 320                         /* A literal, copy verbatim */
 321                         *(t++) = *f;
 322                         continue;
 323                 }
 324
 325                 if (remaining == 1) {
 326                         if (flags & UNESCAPE_RELAX) {
 327                                 /* A trailing backslash, copy verbatim */
 328                                 *(t++) = *f;
 329                                 continue;
 330                         }
 331
 332                         free(r);
 333                         return -EINVAL;
 334                 }
 335
 336                 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit, flags & UNESCAPE_ACCEPT_NUL);
 337                 if (k < 0) {
 338                         if (flags & UNESCAPE_RELAX) {
 339                                 /* Invalid escape code, let's take it literal then */
 340                                 *(t++) = '\\';
 341                                 continue;
 342                         }
 343
 344                         free(r);
 345                         return k;
 346                 }
 347
 348                 f += k;
 349                 if (eight_bit)
 350                         /* One byte? Set directly as specified */
 351                         *(t++) = u;
 352                 else
 353                         /* Otherwise encode as multi-byte UTF-8 */
 354                         t += utf8_encode_unichar(t, u);
 355         }
 356
 357         *t = 0;
 358
 359         *ret = r;
 360         return t - r;
 361 }
 362
 363 char* xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits) {
 364         char *ans, *t, *prev, *prev2;
 365         const char *f;
 366
 367         /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be
 368          * reversed with cunescape(). If eight_bits is true, characters >= 127 are let through unchanged.
 369          * This corresponds to non-ASCII printable characters in pre-unicode encodings.
 370          *
 371          * If console_width is reached, output is truncated and "..." is appended. */
 372
 373         if (console_width == 0)
 374                 return strdup("");
 375
 376         ans = new(char, MIN(strlen(s), console_width) * 4 + 1);
 377         if (!ans)
 378                 return NULL;
 379
 380         memset(ans, '_', MIN(strlen(s), console_width) * 4);
 381         ans[MIN(strlen(s), console_width) * 4] = 0;
 382
 383         for (f = s, t = prev = prev2 = ans; ; f++) {
 384                 char *tmp_t = t;
 385
 386                 if (!*f) {
 387                         *t = 0;
 388                         return ans;
 389                 }
 390
 391                 if ((unsigned char) *f < ' ' || (!eight_bits && (unsigned char) *f >= 127) ||
 392                     *f == '\\' || strchr(bad, *f)) {
 393                         if ((size_t) (t - ans) + 4 > console_width)
 394                                 break;
 395
 396                         *(t++) = '\\';
 397                         *(t++) = 'x';
 398                         *(t++) = hexchar(*f >> 4);
 399                         *(t++) = hexchar(*f);
 400                 } else {
 401                         if ((size_t) (t - ans) + 1 > console_width)
 402                                 break;
 403
 404                         *(t++) = *f;
 405                 }
 406
 407                 /* We might need to go back two cycles to fit three dots, so remember two positions */
 408                 prev2 = prev;
 409                 prev = tmp_t;
 410         }
 411
 412         /* We can just write where we want, since chars are one-byte */
 413         size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */
 414         size_t off;
 415         if (console_width - c >= (size_t) (t - ans))
 416                 off = (size_t) (t - ans);
 417         else if (console_width - c >= (size_t) (prev - ans))
 418                 off = (size_t) (prev - ans);
 419         else if (console_width - c >= (size_t) (prev2 - ans))
 420                 off = (size_t) (prev2 - ans);
 421         else
 422                 off = console_width - c;
 423         assert(off <= (size_t) (t - ans));
 424
 425         memcpy(ans + off, "...", c);
 426         ans[off + c] = '\0';
 427         return ans;
 428 }
 429
 430 char* escape_non_printable_full(const char *str, size_t console_width, bool eight_bit) {
 431         if (eight_bit)
 432                 return xescape_full(str, "", console_width, true);
 433         else
 434                 return utf8_escape_non_printable_full(str, console_width);
 435 }
 436
 437 char* octescape(const char *s, size_t len) {
 438         char *r, *t;
 439         const char *f;
 440
 441         /* Escapes all chars in bad, in addition to \ and " chars,
 442          * in \nnn style escaping. */
 443
 444         r = new(char, len * 4 + 1);
 445         if (!r)
 446                 return NULL;
 447
 448         for (f = s, t = r; f < s + len; f++) {
 449
 450                 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
 451                         *(t++) = '\\';
 452                         *(t++) = '0' + (*f >> 6);
 453                         *(t++) = '0' + ((*f >> 3) & 8);
 454                         *(t++) = '0' + (*f & 8);
 455                 } else
 456                         *(t++) = *f;
 457         }
 458
 459         *t = 0;
 460
 461         return r;
 462
 463 }
 464
 465 static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
 466         assert(bad);
 467
 468         for (; *s; s++) {
 469                 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
 470                         *(t++) = '\\';
 471                         *(t++) = *s == '\n' ? 'n' : 't';
 472                         continue;
 473                 }
 474
 475                 if (*s == '\\' || strchr(bad, *s))
 476                         *(t++) = '\\';
 477
 478                 *(t++) = *s;
 479         }
 480
 481         return t;
 482 }
 483
 484 char* shell_escape(const char *s, const char *bad) {
 485         char *r, *t;
 486
 487         r = new(char, strlen(s)*2+1);
 488         if (!r)
 489                 return NULL;
 490
 491         t = strcpy_backslash_escaped(r, s, bad, false);
 492         *t = 0;
 493
 494         return r;
 495 }
 496
 497 char* shell_maybe_quote(const char *s, EscapeStyle style) {
 498         const char *p;
 499         char *r, *t;
 500
 501         assert(s);
 502
 503         /* Encloses a string in quotes if necessary to make it OK as a shell
 504          * string. Note that we treat benign UTF-8 characters as needing
 505          * escaping too, but that should be OK. */
 506
 507         for (p = s; *p; p++)
 508                 if (*p <= ' ' ||
 509                     *p >= 127 ||
 510                     strchr(SHELL_NEED_QUOTES, *p))
 511                         break;
 512
 513         if (!*p)
 514                 return strdup(s);
 515
 516         r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
 517         if (!r)
 518                 return NULL;
 519
 520         t = r;
 521         switch (style) {
 522         case ESCAPE_BACKSLASH:
 523         case ESCAPE_BACKSLASH_ONELINE:
 524                 *(t++) = '"';
 525                 break;
 526         case ESCAPE_POSIX:
 527                 *(t++) = '$';
 528                 *(t++) = '\'';
 529                 break;
 530         default:
 531                 assert_not_reached("Bad EscapeStyle");
 532         }
 533
 534         t = mempcpy(t, s, p - s);
 535
 536         if (IN_SET(style, ESCAPE_BACKSLASH, ESCAPE_BACKSLASH_ONELINE))
 537                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE,
 538                                              style == ESCAPE_BACKSLASH_ONELINE);
 539         else
 540                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
 541
 542         if (IN_SET(style, ESCAPE_BACKSLASH, ESCAPE_BACKSLASH_ONELINE))
 543                 *(t++) = '"';
 544         else
 545                 *(t++) = '\'';
 546         *t = 0;
 547
 548         return r;
 549 }