src/basic/escape.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6 ***/
   7
   8 #include <errno.h>
   9 #include <stdlib.h>
  10 #include <string.h>
  11
  12 #include "alloc-util.h"
  13 #include "escape.h"
  14 #include "hexdecoct.h"
  15 #include "macro.h"
  16 #include "utf8.h"
  17
  18 int cescape_char(char c, char *buf) {
  19         char *buf_old = buf;
  20
  21         /* Needs space for 4 characters in the buffer */
  22
  23         switch (c) {
  24
  25                 case '\a':
  26                         *(buf++) = '\\';
  27                         *(buf++) = 'a';
  28                         break;
  29                 case '\b':
  30                         *(buf++) = '\\';
  31                         *(buf++) = 'b';
  32                         break;
  33                 case '\f':
  34                         *(buf++) = '\\';
  35                         *(buf++) = 'f';
  36                         break;
  37                 case '\n':
  38                         *(buf++) = '\\';
  39                         *(buf++) = 'n';
  40                         break;
  41                 case '\r':
  42                         *(buf++) = '\\';
  43                         *(buf++) = 'r';
  44                         break;
  45                 case '\t':
  46                         *(buf++) = '\\';
  47                         *(buf++) = 't';
  48                         break;
  49                 case '\v':
  50                         *(buf++) = '\\';
  51                         *(buf++) = 'v';
  52                         break;
  53                 case '\\':
  54                         *(buf++) = '\\';
  55                         *(buf++) = '\\';
  56                         break;
  57                 case '"':
  58                         *(buf++) = '\\';
  59                         *(buf++) = '"';
  60                         break;
  61                 case '\'':
  62                         *(buf++) = '\\';
  63                         *(buf++) = '\'';
  64                         break;
  65
  66                 default:
  67                         /* For special chars we prefer octal over
  68                          * hexadecimal encoding, simply because glib's
  69                          * g_strescape() does the same */
  70                         if ((c < ' ') || (c >= 127)) {
  71                                 *(buf++) = '\\';
  72                                 *(buf++) = octchar((unsigned char) c >> 6);
  73                                 *(buf++) = octchar((unsigned char) c >> 3);
  74                                 *(buf++) = octchar((unsigned char) c);
  75                         } else
  76                                 *(buf++) = c;
  77                         break;
  78         }
  79
  80         return buf - buf_old;
  81 }
  82
  83 char *cescape_length(const char *s, size_t n) {
  84         const char *f;
  85         char *r, *t;
  86
  87         assert(s || n == 0);
  88
  89         /* Does C style string escaping. May be reversed with
  90          * cunescape(). */
  91
  92         r = new(char, n*4 + 1);
  93         if (!r)
  94                 return NULL;
  95
  96         for (f = s, t = r; f < s + n; f++)
  97                 t += cescape_char(*f, t);
  98
  99         *t = 0;
 100
 101         return r;
 102 }
 103
 104 char *cescape(const char *s) {
 105         assert(s);
 106
 107         return cescape_length(s, strlen(s));
 108 }
 109
 110 int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
 111         int r = 1;
 112
 113         assert(p);
 114         assert(*p);
 115         assert(ret);
 116
 117         /* Unescapes C style. Returns the unescaped character in ret.
 118          * Sets *eight_bit to true if the escaped sequence either fits in
 119          * one byte in UTF-8 or is a non-unicode literal byte and should
 120          * instead be copied directly.
 121          */
 122
 123         if (length != (size_t) -1 && length < 1)
 124                 return -EINVAL;
 125
 126         switch (p[0]) {
 127
 128         case 'a':
 129                 *ret = '\a';
 130                 break;
 131         case 'b':
 132                 *ret = '\b';
 133                 break;
 134         case 'f':
 135                 *ret = '\f';
 136                 break;
 137         case 'n':
 138                 *ret = '\n';
 139                 break;
 140         case 'r':
 141                 *ret = '\r';
 142                 break;
 143         case 't':
 144                 *ret = '\t';
 145                 break;
 146         case 'v':
 147                 *ret = '\v';
 148                 break;
 149         case '\\':
 150                 *ret = '\\';
 151                 break;
 152         case '"':
 153                 *ret = '"';
 154                 break;
 155         case '\'':
 156                 *ret = '\'';
 157                 break;
 158
 159         case 's':
 160                 /* This is an extension of the XDG syntax files */
 161                 *ret = ' ';
 162                 break;
 163
 164         case 'x': {
 165                 /* hexadecimal encoding */
 166                 int a, b;
 167
 168                 if (length != (size_t) -1 && length < 3)
 169                         return -EINVAL;
 170
 171                 a = unhexchar(p[1]);
 172                 if (a < 0)
 173                         return -EINVAL;
 174
 175                 b = unhexchar(p[2]);
 176                 if (b < 0)
 177                         return -EINVAL;
 178
 179                 /* Don't allow NUL bytes */
 180                 if (a == 0 && b == 0)
 181                         return -EINVAL;
 182
 183                 *ret = (a << 4U) | b;
 184                 *eight_bit = true;
 185                 r = 3;
 186                 break;
 187         }
 188
 189         case 'u': {
 190                 /* C++11 style 16bit unicode */
 191
 192                 int a[4];
 193                 size_t i;
 194                 uint32_t c;
 195
 196                 if (length != (size_t) -1 && length < 5)
 197                         return -EINVAL;
 198
 199                 for (i = 0; i < 4; i++) {
 200                         a[i] = unhexchar(p[1 + i]);
 201                         if (a[i] < 0)
 202                                 return a[i];
 203                 }
 204
 205                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 206
 207                 /* Don't allow 0 chars */
 208                 if (c == 0)
 209                         return -EINVAL;
 210
 211                 *ret = c;
 212                 r = 5;
 213                 break;
 214         }
 215
 216         case 'U': {
 217                 /* C++11 style 32bit unicode */
 218
 219                 int a[8];
 220                 size_t i;
 221                 char32_t c;
 222
 223                 if (length != (size_t) -1 && length < 9)
 224                         return -EINVAL;
 225
 226                 for (i = 0; i < 8; i++) {
 227                         a[i] = unhexchar(p[1 + i]);
 228                         if (a[i] < 0)
 229                                 return a[i];
 230                 }
 231
 232                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 233                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 234
 235                 /* Don't allow 0 chars */
 236                 if (c == 0)
 237                         return -EINVAL;
 238
 239                 /* Don't allow invalid code points */
 240                 if (!unichar_is_valid(c))
 241                         return -EINVAL;
 242
 243                 *ret = c;
 244                 r = 9;
 245                 break;
 246         }
 247
 248         case '0':
 249         case '1':
 250         case '2':
 251         case '3':
 252         case '4':
 253         case '5':
 254         case '6':
 255         case '7': {
 256                 /* octal encoding */
 257                 int a, b, c;
 258                 char32_t m;
 259
 260                 if (length != (size_t) -1 && length < 3)
 261                         return -EINVAL;
 262
 263                 a = unoctchar(p[0]);
 264                 if (a < 0)
 265                         return -EINVAL;
 266
 267                 b = unoctchar(p[1]);
 268                 if (b < 0)
 269                         return -EINVAL;
 270
 271                 c = unoctchar(p[2]);
 272                 if (c < 0)
 273                         return -EINVAL;
 274
 275                 /* don't allow NUL bytes */
 276                 if (a == 0 && b == 0 && c == 0)
 277                         return -EINVAL;
 278
 279                 /* Don't allow bytes above 255 */
 280                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 281                 if (m > 255)
 282                         return -EINVAL;
 283
 284                 *ret = m;
 285                 *eight_bit = true;
 286                 r = 3;
 287                 break;
 288         }
 289
 290         default:
 291                 return -EINVAL;
 292         }
 293
 294         return r;
 295 }
 296
 297 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 298         char *r, *t;
 299         const char *f;
 300         size_t pl;
 301
 302         assert(s);
 303         assert(ret);
 304
 305         /* Undoes C style string escaping, and optionally prefixes it. */
 306
 307         pl = strlen_ptr(prefix);
 308
 309         r = new(char, pl+length+1);
 310         if (!r)
 311                 return -ENOMEM;
 312
 313         if (prefix)
 314                 memcpy(r, prefix, pl);
 315
 316         for (f = s, t = r + pl; f < s + length; f++) {
 317                 size_t remaining;
 318                 bool eight_bit = false;
 319                 char32_t u;
 320                 int k;
 321
 322                 remaining = s + length - f;
 323                 assert(remaining > 0);
 324
 325                 if (*f != '\\') {
 326                         /* A literal, copy verbatim */
 327                         *(t++) = *f;
 328                         continue;
 329                 }
 330
 331                 if (remaining == 1) {
 332                         if (flags & UNESCAPE_RELAX) {
 333                                 /* A trailing backslash, copy verbatim */
 334                                 *(t++) = *f;
 335                                 continue;
 336                         }
 337
 338                         free(r);
 339                         return -EINVAL;
 340                 }
 341
 342                 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
 343                 if (k < 0) {
 344                         if (flags & UNESCAPE_RELAX) {
 345                                 /* Invalid escape code, let's take it literal then */
 346                                 *(t++) = '\\';
 347                                 continue;
 348                         }
 349
 350                         free(r);
 351                         return k;
 352                 }
 353
 354                 f += k;
 355                 if (eight_bit)
 356                         /* One byte? Set directly as specified */
 357                         *(t++) = u;
 358                 else
 359                         /* Otherwise encode as multi-byte UTF-8 */
 360                         t += utf8_encode_unichar(t, u);
 361         }
 362
 363         *t = 0;
 364
 365         *ret = r;
 366         return t - r;
 367 }
 368
 369 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 370         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 371 }
 372
 373 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 374         return cunescape_length(s, strlen(s), flags, ret);
 375 }
 376
 377 char *xescape(const char *s, const char *bad) {
 378         char *r, *t;
 379         const char *f;
 380
 381         /* Escapes all chars in bad, in addition to \ and all special
 382          * chars, in \xFF style escaping. May be reversed with
 383          * cunescape(). */
 384
 385         r = new(char, strlen(s) * 4 + 1);
 386         if (!r)
 387                 return NULL;
 388
 389         for (f = s, t = r; *f; f++) {
 390
 391                 if ((*f < ' ') || (*f >= 127) ||
 392                     (*f == '\\') || strchr(bad, *f)) {
 393                         *(t++) = '\\';
 394                         *(t++) = 'x';
 395                         *(t++) = hexchar(*f >> 4);
 396                         *(t++) = hexchar(*f);
 397                 } else
 398                         *(t++) = *f;
 399         }
 400
 401         *t = 0;
 402
 403         return r;
 404 }
 405
 406 char *octescape(const char *s, size_t len) {
 407         char *r, *t;
 408         const char *f;
 409
 410         /* Escapes all chars in bad, in addition to \ and " chars,
 411          * in \nnn style escaping. */
 412
 413         r = new(char, len * 4 + 1);
 414         if (!r)
 415                 return NULL;
 416
 417         for (f = s, t = r; f < s + len; f++) {
 418
 419                 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
 420                         *(t++) = '\\';
 421                         *(t++) = '0' + (*f >> 6);
 422                         *(t++) = '0' + ((*f >> 3) & 8);
 423                         *(t++) = '0' + (*f & 8);
 424                 } else
 425                         *(t++) = *f;
 426         }
 427
 428         *t = 0;
 429
 430         return r;
 431
 432 }
 433
 434 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
 435         assert(bad);
 436
 437         for (; *s; s++) {
 438                 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
 439                         *(t++) = '\\';
 440                         *(t++) = *s == '\n' ? 'n' : 't';
 441                         continue;
 442                 }
 443
 444                 if (*s == '\\' || strchr(bad, *s))
 445                         *(t++) = '\\';
 446
 447                 *(t++) = *s;
 448         }
 449
 450         return t;
 451 }
 452
 453 char *shell_escape(const char *s, const char *bad) {
 454         char *r, *t;
 455
 456         r = new(char, strlen(s)*2+1);
 457         if (!r)
 458                 return NULL;
 459
 460         t = strcpy_backslash_escaped(r, s, bad, false);
 461         *t = 0;
 462
 463         return r;
 464 }
 465
 466 char* shell_maybe_quote(const char *s, EscapeStyle style) {
 467         const char *p;
 468         char *r, *t;
 469
 470         assert(s);
 471
 472         /* Encloses a string in quotes if necessary to make it OK as a shell
 473          * string. Note that we treat benign UTF-8 characters as needing
 474          * escaping too, but that should be OK. */
 475
 476         for (p = s; *p; p++)
 477                 if (*p <= ' ' ||
 478                     *p >= 127 ||
 479                     strchr(SHELL_NEED_QUOTES, *p))
 480                         break;
 481
 482         if (!*p)
 483                 return strdup(s);
 484
 485         r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
 486         if (!r)
 487                 return NULL;
 488
 489         t = r;
 490         if (style == ESCAPE_BACKSLASH)
 491                 *(t++) = '"';
 492         else if (style == ESCAPE_POSIX) {
 493                 *(t++) = '$';
 494                 *(t++) = '\'';
 495         } else
 496                 assert_not_reached("Bad EscapeStyle");
 497
 498         t = mempcpy(t, s, p - s);
 499
 500         if (style == ESCAPE_BACKSLASH)
 501                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
 502         else
 503                 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
 504
 505         if (style == ESCAPE_BACKSLASH)
 506                 *(t++) = '"';
 507         else
 508                 *(t++) = '\'';
 509         *t = 0;
 510
 511         return r;
 512 }