src/basic/escape.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include "alloc-util.h"
  23 #include "escape.h"
  24 #include "hexdecoct.h"
  25 #include "string-util.h"
  26 #include "utf8.h"
  27 #include "util.h"
  28
  29 size_t cescape_char(char c, char *buf) {
  30         char * buf_old = buf;
  31
  32         switch (c) {
  33
  34                 case '\a':
  35                         *(buf++) = '\\';
  36                         *(buf++) = 'a';
  37                         break;
  38                 case '\b':
  39                         *(buf++) = '\\';
  40                         *(buf++) = 'b';
  41                         break;
  42                 case '\f':
  43                         *(buf++) = '\\';
  44                         *(buf++) = 'f';
  45                         break;
  46                 case '\n':
  47                         *(buf++) = '\\';
  48                         *(buf++) = 'n';
  49                         break;
  50                 case '\r':
  51                         *(buf++) = '\\';
  52                         *(buf++) = 'r';
  53                         break;
  54                 case '\t':
  55                         *(buf++) = '\\';
  56                         *(buf++) = 't';
  57                         break;
  58                 case '\v':
  59                         *(buf++) = '\\';
  60                         *(buf++) = 'v';
  61                         break;
  62                 case '\\':
  63                         *(buf++) = '\\';
  64                         *(buf++) = '\\';
  65                         break;
  66                 case '"':
  67                         *(buf++) = '\\';
  68                         *(buf++) = '"';
  69                         break;
  70                 case '\'':
  71                         *(buf++) = '\\';
  72                         *(buf++) = '\'';
  73                         break;
  74
  75                 default:
  76                         /* For special chars we prefer octal over
  77                          * hexadecimal encoding, simply because glib's
  78                          * g_strescape() does the same */
  79                         if ((c < ' ') || (c >= 127)) {
  80                                 *(buf++) = '\\';
  81                                 *(buf++) = octchar((unsigned char) c >> 6);
  82                                 *(buf++) = octchar((unsigned char) c >> 3);
  83                                 *(buf++) = octchar((unsigned char) c);
  84                         } else
  85                                 *(buf++) = c;
  86                         break;
  87         }
  88
  89         return buf - buf_old;
  90 }
  91
  92 char *cescape(const char *s) {
  93         char *r, *t;
  94         const char *f;
  95
  96         assert(s);
  97
  98         /* Does C style string escaping. May be reversed with
  99          * cunescape(). */
 100
 101         r = new(char, strlen(s)*4 + 1);
 102         if (!r)
 103                 return NULL;
 104
 105         for (f = s, t = r; *f; f++)
 106                 t += cescape_char(*f, t);
 107
 108         *t = 0;
 109
 110         return r;
 111 }
 112
 113 int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
 114         int r = 1;
 115
 116         assert(p);
 117         assert(*p);
 118         assert(ret);
 119
 120         /* Unescapes C style. Returns the unescaped character in ret,
 121          * unless we encountered a \u sequence in which case the full
 122          * unicode character is returned in ret_unicode, instead. */
 123
 124         if (length != (size_t) -1 && length < 1)
 125                 return -EINVAL;
 126
 127         switch (p[0]) {
 128
 129         case 'a':
 130                 *ret = '\a';
 131                 break;
 132         case 'b':
 133                 *ret = '\b';
 134                 break;
 135         case 'f':
 136                 *ret = '\f';
 137                 break;
 138         case 'n':
 139                 *ret = '\n';
 140                 break;
 141         case 'r':
 142                 *ret = '\r';
 143                 break;
 144         case 't':
 145                 *ret = '\t';
 146                 break;
 147         case 'v':
 148                 *ret = '\v';
 149                 break;
 150         case '\\':
 151                 *ret = '\\';
 152                 break;
 153         case '"':
 154                 *ret = '"';
 155                 break;
 156         case '\'':
 157                 *ret = '\'';
 158                 break;
 159
 160         case 's':
 161                 /* This is an extension of the XDG syntax files */
 162                 *ret = ' ';
 163                 break;
 164
 165         case 'x': {
 166                 /* hexadecimal encoding */
 167                 int a, b;
 168
 169                 if (length != (size_t) -1 && length < 3)
 170                         return -EINVAL;
 171
 172                 a = unhexchar(p[1]);
 173                 if (a < 0)
 174                         return -EINVAL;
 175
 176                 b = unhexchar(p[2]);
 177                 if (b < 0)
 178                         return -EINVAL;
 179
 180                 /* Don't allow NUL bytes */
 181                 if (a == 0 && b == 0)
 182                         return -EINVAL;
 183
 184                 *ret = (char) ((a << 4U) | b);
 185                 r = 3;
 186                 break;
 187         }
 188
 189         case 'u': {
 190                 /* C++11 style 16bit unicode */
 191
 192                 int a[4];
 193                 unsigned i;
 194                 uint32_t c;
 195
 196                 if (length != (size_t) -1 && length < 5)
 197                         return -EINVAL;
 198
 199                 for (i = 0; i < 4; i++) {
 200                         a[i] = unhexchar(p[1 + i]);
 201                         if (a[i] < 0)
 202                                 return a[i];
 203                 }
 204
 205                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 206
 207                 /* Don't allow 0 chars */
 208                 if (c == 0)
 209                         return -EINVAL;
 210
 211                 if (c < 128)
 212                         *ret = c;
 213                 else {
 214                         if (!ret_unicode)
 215                                 return -EINVAL;
 216
 217                         *ret = 0;
 218                         *ret_unicode = c;
 219                 }
 220
 221                 r = 5;
 222                 break;
 223         }
 224
 225         case 'U': {
 226                 /* C++11 style 32bit unicode */
 227
 228                 int a[8];
 229                 unsigned i;
 230                 uint32_t c;
 231
 232                 if (length != (size_t) -1 && length < 9)
 233                         return -EINVAL;
 234
 235                 for (i = 0; i < 8; i++) {
 236                         a[i] = unhexchar(p[1 + i]);
 237                         if (a[i] < 0)
 238                                 return a[i];
 239                 }
 240
 241                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 242                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 243
 244                 /* Don't allow 0 chars */
 245                 if (c == 0)
 246                         return -EINVAL;
 247
 248                 /* Don't allow invalid code points */
 249                 if (!unichar_is_valid(c))
 250                         return -EINVAL;
 251
 252                 if (c < 128)
 253                         *ret = c;
 254                 else {
 255                         if (!ret_unicode)
 256                                 return -EINVAL;
 257
 258                         *ret = 0;
 259                         *ret_unicode = c;
 260                 }
 261
 262                 r = 9;
 263                 break;
 264         }
 265
 266         case '0':
 267         case '1':
 268         case '2':
 269         case '3':
 270         case '4':
 271         case '5':
 272         case '6':
 273         case '7': {
 274                 /* octal encoding */
 275                 int a, b, c;
 276                 uint32_t m;
 277
 278                 if (length != (size_t) -1 && length < 3)
 279                         return -EINVAL;
 280
 281                 a = unoctchar(p[0]);
 282                 if (a < 0)
 283                         return -EINVAL;
 284
 285                 b = unoctchar(p[1]);
 286                 if (b < 0)
 287                         return -EINVAL;
 288
 289                 c = unoctchar(p[2]);
 290                 if (c < 0)
 291                         return -EINVAL;
 292
 293                 /* don't allow NUL bytes */
 294                 if (a == 0 && b == 0 && c == 0)
 295                         return -EINVAL;
 296
 297                 /* Don't allow bytes above 255 */
 298                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 299                 if (m > 255)
 300                         return -EINVAL;
 301
 302                 *ret = m;
 303                 r = 3;
 304                 break;
 305         }
 306
 307         default:
 308                 return -EINVAL;
 309         }
 310
 311         return r;
 312 }
 313
 314 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 315         char *r, *t;
 316         const char *f;
 317         size_t pl;
 318
 319         assert(s);
 320         assert(ret);
 321
 322         /* Undoes C style string escaping, and optionally prefixes it. */
 323
 324         pl = prefix ? strlen(prefix) : 0;
 325
 326         r = new(char, pl+length+1);
 327         if (!r)
 328                 return -ENOMEM;
 329
 330         if (prefix)
 331                 memcpy(r, prefix, pl);
 332
 333         for (f = s, t = r + pl; f < s + length; f++) {
 334                 size_t remaining;
 335                 uint32_t u;
 336                 char c;
 337                 int k;
 338
 339                 remaining = s + length - f;
 340                 assert(remaining > 0);
 341
 342                 if (*f != '\\') {
 343                         /* A literal literal, copy verbatim */
 344                         *(t++) = *f;
 345                         continue;
 346                 }
 347
 348                 if (remaining == 1) {
 349                         if (flags & UNESCAPE_RELAX) {
 350                                 /* A trailing backslash, copy verbatim */
 351                                 *(t++) = *f;
 352                                 continue;
 353                         }
 354
 355                         free(r);
 356                         return -EINVAL;
 357                 }
 358
 359                 k = cunescape_one(f + 1, remaining - 1, &c, &u);
 360                 if (k < 0) {
 361                         if (flags & UNESCAPE_RELAX) {
 362                                 /* Invalid escape code, let's take it literal then */
 363                                 *(t++) = '\\';
 364                                 continue;
 365                         }
 366
 367                         free(r);
 368                         return k;
 369                 }
 370
 371                 if (c != 0)
 372                         /* Non-Unicode? Let's encode this directly */
 373                         *(t++) = c;
 374                 else
 375                         /* Unicode? Then let's encode this in UTF-8 */
 376                         t += utf8_encode_unichar(t, u);
 377
 378                 f += k;
 379         }
 380
 381         *t = 0;
 382
 383         *ret = r;
 384         return t - r;
 385 }
 386
 387 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 388         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 389 }
 390
 391 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 392         return cunescape_length(s, strlen(s), flags, ret);
 393 }
 394
 395 char *xescape(const char *s, const char *bad) {
 396         char *r, *t;
 397         const char *f;
 398
 399         /* Escapes all chars in bad, in addition to \ and all special
 400          * chars, in \xFF style escaping. May be reversed with
 401          * cunescape(). */
 402
 403         r = new(char, strlen(s) * 4 + 1);
 404         if (!r)
 405                 return NULL;
 406
 407         for (f = s, t = r; *f; f++) {
 408
 409                 if ((*f < ' ') || (*f >= 127) ||
 410                     (*f == '\\') || strchr(bad, *f)) {
 411                         *(t++) = '\\';
 412                         *(t++) = 'x';
 413                         *(t++) = hexchar(*f >> 4);
 414                         *(t++) = hexchar(*f);
 415                 } else
 416                         *(t++) = *f;
 417         }
 418
 419         *t = 0;
 420
 421         return r;
 422 }
 423
 424 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
 425         assert(bad);
 426
 427         for (; *s; s++) {
 428                 if (*s == '\\' || strchr(bad, *s))
 429                         *(t++) = '\\';
 430
 431                 *(t++) = *s;
 432         }
 433
 434         return t;
 435 }
 436
 437 char *shell_escape(const char *s, const char *bad) {
 438         char *r, *t;
 439
 440         r = new(char, strlen(s)*2+1);
 441         if (!r)
 442                 return NULL;
 443
 444         t = strcpy_backslash_escaped(r, s, bad);
 445         *t = 0;
 446
 447         return r;
 448 }
 449
 450 char *shell_maybe_quote(const char *s) {
 451         const char *p;
 452         char *r, *t;
 453
 454         assert(s);
 455
 456         /* Encloses a string in double quotes if necessary to make it
 457          * OK as shell string. */
 458
 459         for (p = s; *p; p++)
 460                 if (*p <= ' ' ||
 461                     *p >= 127 ||
 462                     strchr(SHELL_NEED_QUOTES, *p))
 463                         break;
 464
 465         if (!*p)
 466                 return strdup(s);
 467
 468         r = new(char, 1+strlen(s)*2+1+1);
 469         if (!r)
 470                 return NULL;
 471
 472         t = r;
 473         *(t++) = '"';
 474         t = mempcpy(t, s, p - s);
 475
 476         t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
 477
 478         *(t++)= '"';
 479         *t = 0;
 480
 481         return r;
 482 }