src/basic/escape.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include "alloc-util.h"
  23 #include "escape.h"
  24 #include "hexdecoct.h"
  25 #include "string-util.h"
  26 #include "utf8.h"
  27 #include "util.h"
  28
  29 size_t cescape_char(char c, char *buf) {
  30         char * buf_old = buf;
  31
  32         switch (c) {
  33
  34                 case '\a':
  35                         *(buf++) = '\\';
  36                         *(buf++) = 'a';
  37                         break;
  38                 case '\b':
  39                         *(buf++) = '\\';
  40                         *(buf++) = 'b';
  41                         break;
  42                 case '\f':
  43                         *(buf++) = '\\';
  44                         *(buf++) = 'f';
  45                         break;
  46                 case '\n':
  47                         *(buf++) = '\\';
  48                         *(buf++) = 'n';
  49                         break;
  50                 case '\r':
  51                         *(buf++) = '\\';
  52                         *(buf++) = 'r';
  53                         break;
  54                 case '\t':
  55                         *(buf++) = '\\';
  56                         *(buf++) = 't';
  57                         break;
  58                 case '\v':
  59                         *(buf++) = '\\';
  60                         *(buf++) = 'v';
  61                         break;
  62                 case '\\':
  63                         *(buf++) = '\\';
  64                         *(buf++) = '\\';
  65                         break;
  66                 case '"':
  67                         *(buf++) = '\\';
  68                         *(buf++) = '"';
  69                         break;
  70                 case '\'':
  71                         *(buf++) = '\\';
  72                         *(buf++) = '\'';
  73                         break;
  74
  75                 default:
  76                         /* For special chars we prefer octal over
  77                          * hexadecimal encoding, simply because glib's
  78                          * g_strescape() does the same */
  79                         if ((c < ' ') || (c >= 127)) {
  80                                 *(buf++) = '\\';
  81                                 *(buf++) = octchar((unsigned char) c >> 6);
  82                                 *(buf++) = octchar((unsigned char) c >> 3);
  83                                 *(buf++) = octchar((unsigned char) c);
  84                         } else
  85                                 *(buf++) = c;
  86                         break;
  87         }
  88
  89         return buf - buf_old;
  90 }
  91
  92 char *cescape_length(const char *s, size_t n) {
  93         const char *f;
  94         char *r, *t;
  95
  96         assert(s || n == 0);
  97
  98         /* Does C style string escaping. May be reversed with
  99          * cunescape(). */
 100
 101         r = new(char, n*4 + 1);
 102         if (!r)
 103                 return NULL;
 104
 105         for (f = s, t = r; f < s + n; f++)
 106                 t += cescape_char(*f, t);
 107
 108         *t = 0;
 109
 110         return r;
 111 }
 112
 113 char *cescape(const char *s) {
 114         assert(s);
 115
 116         return cescape_length(s, strlen(s));
 117 }
 118
 119 int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
 120         int r = 1;
 121
 122         assert(p);
 123         assert(*p);
 124         assert(ret);
 125
 126         /* Unescapes C style. Returns the unescaped character in ret,
 127          * unless we encountered a \u sequence in which case the full
 128          * unicode character is returned in ret_unicode, instead. */
 129
 130         if (length != (size_t) -1 && length < 1)
 131                 return -EINVAL;
 132
 133         switch (p[0]) {
 134
 135         case 'a':
 136                 *ret = '\a';
 137                 break;
 138         case 'b':
 139                 *ret = '\b';
 140                 break;
 141         case 'f':
 142                 *ret = '\f';
 143                 break;
 144         case 'n':
 145                 *ret = '\n';
 146                 break;
 147         case 'r':
 148                 *ret = '\r';
 149                 break;
 150         case 't':
 151                 *ret = '\t';
 152                 break;
 153         case 'v':
 154                 *ret = '\v';
 155                 break;
 156         case '\\':
 157                 *ret = '\\';
 158                 break;
 159         case '"':
 160                 *ret = '"';
 161                 break;
 162         case '\'':
 163                 *ret = '\'';
 164                 break;
 165
 166         case 's':
 167                 /* This is an extension of the XDG syntax files */
 168                 *ret = ' ';
 169                 break;
 170
 171         case 'x': {
 172                 /* hexadecimal encoding */
 173                 int a, b;
 174
 175                 if (length != (size_t) -1 && length < 3)
 176                         return -EINVAL;
 177
 178                 a = unhexchar(p[1]);
 179                 if (a < 0)
 180                         return -EINVAL;
 181
 182                 b = unhexchar(p[2]);
 183                 if (b < 0)
 184                         return -EINVAL;
 185
 186                 /* Don't allow NUL bytes */
 187                 if (a == 0 && b == 0)
 188                         return -EINVAL;
 189
 190                 *ret = (char) ((a << 4U) | b);
 191                 r = 3;
 192                 break;
 193         }
 194
 195         case 'u': {
 196                 /* C++11 style 16bit unicode */
 197
 198                 int a[4];
 199                 unsigned i;
 200                 uint32_t c;
 201
 202                 if (length != (size_t) -1 && length < 5)
 203                         return -EINVAL;
 204
 205                 for (i = 0; i < 4; i++) {
 206                         a[i] = unhexchar(p[1 + i]);
 207                         if (a[i] < 0)
 208                                 return a[i];
 209                 }
 210
 211                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 212
 213                 /* Don't allow 0 chars */
 214                 if (c == 0)
 215                         return -EINVAL;
 216
 217                 if (c < 128)
 218                         *ret = c;
 219                 else {
 220                         if (!ret_unicode)
 221                                 return -EINVAL;
 222
 223                         *ret = 0;
 224                         *ret_unicode = c;
 225                 }
 226
 227                 r = 5;
 228                 break;
 229         }
 230
 231         case 'U': {
 232                 /* C++11 style 32bit unicode */
 233
 234                 int a[8];
 235                 unsigned i;
 236                 uint32_t c;
 237
 238                 if (length != (size_t) -1 && length < 9)
 239                         return -EINVAL;
 240
 241                 for (i = 0; i < 8; i++) {
 242                         a[i] = unhexchar(p[1 + i]);
 243                         if (a[i] < 0)
 244                                 return a[i];
 245                 }
 246
 247                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 248                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 249
 250                 /* Don't allow 0 chars */
 251                 if (c == 0)
 252                         return -EINVAL;
 253
 254                 /* Don't allow invalid code points */
 255                 if (!unichar_is_valid(c))
 256                         return -EINVAL;
 257
 258                 if (c < 128)
 259                         *ret = c;
 260                 else {
 261                         if (!ret_unicode)
 262                                 return -EINVAL;
 263
 264                         *ret = 0;
 265                         *ret_unicode = c;
 266                 }
 267
 268                 r = 9;
 269                 break;
 270         }
 271
 272         case '0':
 273         case '1':
 274         case '2':
 275         case '3':
 276         case '4':
 277         case '5':
 278         case '6':
 279         case '7': {
 280                 /* octal encoding */
 281                 int a, b, c;
 282                 uint32_t m;
 283
 284                 if (length != (size_t) -1 && length < 3)
 285                         return -EINVAL;
 286
 287                 a = unoctchar(p[0]);
 288                 if (a < 0)
 289                         return -EINVAL;
 290
 291                 b = unoctchar(p[1]);
 292                 if (b < 0)
 293                         return -EINVAL;
 294
 295                 c = unoctchar(p[2]);
 296                 if (c < 0)
 297                         return -EINVAL;
 298
 299                 /* don't allow NUL bytes */
 300                 if (a == 0 && b == 0 && c == 0)
 301                         return -EINVAL;
 302
 303                 /* Don't allow bytes above 255 */
 304                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 305                 if (m > 255)
 306                         return -EINVAL;
 307
 308                 *ret = m;
 309                 r = 3;
 310                 break;
 311         }
 312
 313         default:
 314                 return -EINVAL;
 315         }
 316
 317         return r;
 318 }
 319
 320 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 321         char *r, *t;
 322         const char *f;
 323         size_t pl;
 324
 325         assert(s);
 326         assert(ret);
 327
 328         /* Undoes C style string escaping, and optionally prefixes it. */
 329
 330         pl = prefix ? strlen(prefix) : 0;
 331
 332         r = new(char, pl+length+1);
 333         if (!r)
 334                 return -ENOMEM;
 335
 336         if (prefix)
 337                 memcpy(r, prefix, pl);
 338
 339         for (f = s, t = r + pl; f < s + length; f++) {
 340                 size_t remaining;
 341                 uint32_t u;
 342                 char c;
 343                 int k;
 344
 345                 remaining = s + length - f;
 346                 assert(remaining > 0);
 347
 348                 if (*f != '\\') {
 349                         /* A literal literal, copy verbatim */
 350                         *(t++) = *f;
 351                         continue;
 352                 }
 353
 354                 if (remaining == 1) {
 355                         if (flags & UNESCAPE_RELAX) {
 356                                 /* A trailing backslash, copy verbatim */
 357                                 *(t++) = *f;
 358                                 continue;
 359                         }
 360
 361                         free(r);
 362                         return -EINVAL;
 363                 }
 364
 365                 k = cunescape_one(f + 1, remaining - 1, &c, &u);
 366                 if (k < 0) {
 367                         if (flags & UNESCAPE_RELAX) {
 368                                 /* Invalid escape code, let's take it literal then */
 369                                 *(t++) = '\\';
 370                                 continue;
 371                         }
 372
 373                         free(r);
 374                         return k;
 375                 }
 376
 377                 if (c != 0)
 378                         /* Non-Unicode? Let's encode this directly */
 379                         *(t++) = c;
 380                 else
 381                         /* Unicode? Then let's encode this in UTF-8 */
 382                         t += utf8_encode_unichar(t, u);
 383
 384                 f += k;
 385         }
 386
 387         *t = 0;
 388
 389         *ret = r;
 390         return t - r;
 391 }
 392
 393 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 394         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 395 }
 396
 397 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 398         return cunescape_length(s, strlen(s), flags, ret);
 399 }
 400
 401 char *xescape(const char *s, const char *bad) {
 402         char *r, *t;
 403         const char *f;
 404
 405         /* Escapes all chars in bad, in addition to \ and all special
 406          * chars, in \xFF style escaping. May be reversed with
 407          * cunescape(). */
 408
 409         r = new(char, strlen(s) * 4 + 1);
 410         if (!r)
 411                 return NULL;
 412
 413         for (f = s, t = r; *f; f++) {
 414
 415                 if ((*f < ' ') || (*f >= 127) ||
 416                     (*f == '\\') || strchr(bad, *f)) {
 417                         *(t++) = '\\';
 418                         *(t++) = 'x';
 419                         *(t++) = hexchar(*f >> 4);
 420                         *(t++) = hexchar(*f);
 421                 } else
 422                         *(t++) = *f;
 423         }
 424
 425         *t = 0;
 426
 427         return r;
 428 }
 429
 430 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
 431         assert(bad);
 432
 433         for (; *s; s++) {
 434                 if (*s == '\\' || strchr(bad, *s))
 435                         *(t++) = '\\';
 436
 437                 *(t++) = *s;
 438         }
 439
 440         return t;
 441 }
 442
 443 char *shell_escape(const char *s, const char *bad) {
 444         char *r, *t;
 445
 446         r = new(char, strlen(s)*2+1);
 447         if (!r)
 448                 return NULL;
 449
 450         t = strcpy_backslash_escaped(r, s, bad);
 451         *t = 0;
 452
 453         return r;
 454 }
 455
 456 char *shell_maybe_quote(const char *s) {
 457         const char *p;
 458         char *r, *t;
 459
 460         assert(s);
 461
 462         /* Encloses a string in double quotes if necessary to make it
 463          * OK as shell string. */
 464
 465         for (p = s; *p; p++)
 466                 if (*p <= ' ' ||
 467                     *p >= 127 ||
 468                     strchr(SHELL_NEED_QUOTES, *p))
 469                         break;
 470
 471         if (!*p)
 472                 return strdup(s);
 473
 474         r = new(char, 1+strlen(s)*2+1+1);
 475         if (!r)
 476                 return NULL;
 477
 478         t = r;
 479         *(t++) = '"';
 480         t = mempcpy(t, s, p - s);
 481
 482         t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
 483
 484         *(t++)= '"';
 485         *t = 0;
 486
 487         return r;
 488 }