src/basic/escape.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include "alloc-util.h"
  23 #include "escape.h"
  24 #include "hexdecoct.h"
  25 #include "utf8.h"
  26 #include "util.h"
  27
  28 size_t cescape_char(char c, char *buf) {
  29         char * buf_old = buf;
  30
  31         switch (c) {
  32
  33                 case '\a':
  34                         *(buf++) = '\\';
  35                         *(buf++) = 'a';
  36                         break;
  37                 case '\b':
  38                         *(buf++) = '\\';
  39                         *(buf++) = 'b';
  40                         break;
  41                 case '\f':
  42                         *(buf++) = '\\';
  43                         *(buf++) = 'f';
  44                         break;
  45                 case '\n':
  46                         *(buf++) = '\\';
  47                         *(buf++) = 'n';
  48                         break;
  49                 case '\r':
  50                         *(buf++) = '\\';
  51                         *(buf++) = 'r';
  52                         break;
  53                 case '\t':
  54                         *(buf++) = '\\';
  55                         *(buf++) = 't';
  56                         break;
  57                 case '\v':
  58                         *(buf++) = '\\';
  59                         *(buf++) = 'v';
  60                         break;
  61                 case '\\':
  62                         *(buf++) = '\\';
  63                         *(buf++) = '\\';
  64                         break;
  65                 case '"':
  66                         *(buf++) = '\\';
  67                         *(buf++) = '"';
  68                         break;
  69                 case '\'':
  70                         *(buf++) = '\\';
  71                         *(buf++) = '\'';
  72                         break;
  73
  74                 default:
  75                         /* For special chars we prefer octal over
  76                          * hexadecimal encoding, simply because glib's
  77                          * g_strescape() does the same */
  78                         if ((c < ' ') || (c >= 127)) {
  79                                 *(buf++) = '\\';
  80                                 *(buf++) = octchar((unsigned char) c >> 6);
  81                                 *(buf++) = octchar((unsigned char) c >> 3);
  82                                 *(buf++) = octchar((unsigned char) c);
  83                         } else
  84                                 *(buf++) = c;
  85                         break;
  86         }
  87
  88         return buf - buf_old;
  89 }
  90
  91 char *cescape(const char *s) {
  92         char *r, *t;
  93         const char *f;
  94
  95         assert(s);
  96
  97         /* Does C style string escaping. May be reversed with
  98          * cunescape(). */
  99
 100         r = new(char, strlen(s)*4 + 1);
 101         if (!r)
 102                 return NULL;
 103
 104         for (f = s, t = r; *f; f++)
 105                 t += cescape_char(*f, t);
 106
 107         *t = 0;
 108
 109         return r;
 110 }
 111
 112 int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
 113         int r = 1;
 114
 115         assert(p);
 116         assert(*p);
 117         assert(ret);
 118
 119         /* Unescapes C style. Returns the unescaped character in ret,
 120          * unless we encountered a \u sequence in which case the full
 121          * unicode character is returned in ret_unicode, instead. */
 122
 123         if (length != (size_t) -1 && length < 1)
 124                 return -EINVAL;
 125
 126         switch (p[0]) {
 127
 128         case 'a':
 129                 *ret = '\a';
 130                 break;
 131         case 'b':
 132                 *ret = '\b';
 133                 break;
 134         case 'f':
 135                 *ret = '\f';
 136                 break;
 137         case 'n':
 138                 *ret = '\n';
 139                 break;
 140         case 'r':
 141                 *ret = '\r';
 142                 break;
 143         case 't':
 144                 *ret = '\t';
 145                 break;
 146         case 'v':
 147                 *ret = '\v';
 148                 break;
 149         case '\\':
 150                 *ret = '\\';
 151                 break;
 152         case '"':
 153                 *ret = '"';
 154                 break;
 155         case '\'':
 156                 *ret = '\'';
 157                 break;
 158
 159         case 's':
 160                 /* This is an extension of the XDG syntax files */
 161                 *ret = ' ';
 162                 break;
 163
 164         case 'x': {
 165                 /* hexadecimal encoding */
 166                 int a, b;
 167
 168                 if (length != (size_t) -1 && length < 3)
 169                         return -EINVAL;
 170
 171                 a = unhexchar(p[1]);
 172                 if (a < 0)
 173                         return -EINVAL;
 174
 175                 b = unhexchar(p[2]);
 176                 if (b < 0)
 177                         return -EINVAL;
 178
 179                 /* Don't allow NUL bytes */
 180                 if (a == 0 && b == 0)
 181                         return -EINVAL;
 182
 183                 *ret = (char) ((a << 4U) | b);
 184                 r = 3;
 185                 break;
 186         }
 187
 188         case 'u': {
 189                 /* C++11 style 16bit unicode */
 190
 191                 int a[4];
 192                 unsigned i;
 193                 uint32_t c;
 194
 195                 if (length != (size_t) -1 && length < 5)
 196                         return -EINVAL;
 197
 198                 for (i = 0; i < 4; i++) {
 199                         a[i] = unhexchar(p[1 + i]);
 200                         if (a[i] < 0)
 201                                 return a[i];
 202                 }
 203
 204                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 205
 206                 /* Don't allow 0 chars */
 207                 if (c == 0)
 208                         return -EINVAL;
 209
 210                 if (c < 128)
 211                         *ret = c;
 212                 else {
 213                         if (!ret_unicode)
 214                                 return -EINVAL;
 215
 216                         *ret = 0;
 217                         *ret_unicode = c;
 218                 }
 219
 220                 r = 5;
 221                 break;
 222         }
 223
 224         case 'U': {
 225                 /* C++11 style 32bit unicode */
 226
 227                 int a[8];
 228                 unsigned i;
 229                 uint32_t c;
 230
 231                 if (length != (size_t) -1 && length < 9)
 232                         return -EINVAL;
 233
 234                 for (i = 0; i < 8; i++) {
 235                         a[i] = unhexchar(p[1 + i]);
 236                         if (a[i] < 0)
 237                                 return a[i];
 238                 }
 239
 240                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 241                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 242
 243                 /* Don't allow 0 chars */
 244                 if (c == 0)
 245                         return -EINVAL;
 246
 247                 /* Don't allow invalid code points */
 248                 if (!unichar_is_valid(c))
 249                         return -EINVAL;
 250
 251                 if (c < 128)
 252                         *ret = c;
 253                 else {
 254                         if (!ret_unicode)
 255                                 return -EINVAL;
 256
 257                         *ret = 0;
 258                         *ret_unicode = c;
 259                 }
 260
 261                 r = 9;
 262                 break;
 263         }
 264
 265         case '0':
 266         case '1':
 267         case '2':
 268         case '3':
 269         case '4':
 270         case '5':
 271         case '6':
 272         case '7': {
 273                 /* octal encoding */
 274                 int a, b, c;
 275                 uint32_t m;
 276
 277                 if (length != (size_t) -1 && length < 3)
 278                         return -EINVAL;
 279
 280                 a = unoctchar(p[0]);
 281                 if (a < 0)
 282                         return -EINVAL;
 283
 284                 b = unoctchar(p[1]);
 285                 if (b < 0)
 286                         return -EINVAL;
 287
 288                 c = unoctchar(p[2]);
 289                 if (c < 0)
 290                         return -EINVAL;
 291
 292                 /* don't allow NUL bytes */
 293                 if (a == 0 && b == 0 && c == 0)
 294                         return -EINVAL;
 295
 296                 /* Don't allow bytes above 255 */
 297                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 298                 if (m > 255)
 299                         return -EINVAL;
 300
 301                 *ret = m;
 302                 r = 3;
 303                 break;
 304         }
 305
 306         default:
 307                 return -EINVAL;
 308         }
 309
 310         return r;
 311 }
 312
 313 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 314         char *r, *t;
 315         const char *f;
 316         size_t pl;
 317
 318         assert(s);
 319         assert(ret);
 320
 321         /* Undoes C style string escaping, and optionally prefixes it. */
 322
 323         pl = prefix ? strlen(prefix) : 0;
 324
 325         r = new(char, pl+length+1);
 326         if (!r)
 327                 return -ENOMEM;
 328
 329         if (prefix)
 330                 memcpy(r, prefix, pl);
 331
 332         for (f = s, t = r + pl; f < s + length; f++) {
 333                 size_t remaining;
 334                 uint32_t u;
 335                 char c;
 336                 int k;
 337
 338                 remaining = s + length - f;
 339                 assert(remaining > 0);
 340
 341                 if (*f != '\\') {
 342                         /* A literal literal, copy verbatim */
 343                         *(t++) = *f;
 344                         continue;
 345                 }
 346
 347                 if (remaining == 1) {
 348                         if (flags & UNESCAPE_RELAX) {
 349                                 /* A trailing backslash, copy verbatim */
 350                                 *(t++) = *f;
 351                                 continue;
 352                         }
 353
 354                         free(r);
 355                         return -EINVAL;
 356                 }
 357
 358                 k = cunescape_one(f + 1, remaining - 1, &c, &u);
 359                 if (k < 0) {
 360                         if (flags & UNESCAPE_RELAX) {
 361                                 /* Invalid escape code, let's take it literal then */
 362                                 *(t++) = '\\';
 363                                 continue;
 364                         }
 365
 366                         free(r);
 367                         return k;
 368                 }
 369
 370                 if (c != 0)
 371                         /* Non-Unicode? Let's encode this directly */
 372                         *(t++) = c;
 373                 else
 374                         /* Unicode? Then let's encode this in UTF-8 */
 375                         t += utf8_encode_unichar(t, u);
 376
 377                 f += k;
 378         }
 379
 380         *t = 0;
 381
 382         *ret = r;
 383         return t - r;
 384 }
 385
 386 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 387         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 388 }
 389
 390 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 391         return cunescape_length(s, strlen(s), flags, ret);
 392 }
 393
 394 char *xescape(const char *s, const char *bad) {
 395         char *r, *t;
 396         const char *f;
 397
 398         /* Escapes all chars in bad, in addition to \ and all special
 399          * chars, in \xFF style escaping. May be reversed with
 400          * cunescape(). */
 401
 402         r = new(char, strlen(s) * 4 + 1);
 403         if (!r)
 404                 return NULL;
 405
 406         for (f = s, t = r; *f; f++) {
 407
 408                 if ((*f < ' ') || (*f >= 127) ||
 409                     (*f == '\\') || strchr(bad, *f)) {
 410                         *(t++) = '\\';
 411                         *(t++) = 'x';
 412                         *(t++) = hexchar(*f >> 4);
 413                         *(t++) = hexchar(*f);
 414                 } else
 415                         *(t++) = *f;
 416         }
 417
 418         *t = 0;
 419
 420         return r;
 421 }
 422
 423 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
 424         assert(bad);
 425
 426         for (; *s; s++) {
 427                 if (*s == '\\' || strchr(bad, *s))
 428                         *(t++) = '\\';
 429
 430                 *(t++) = *s;
 431         }
 432
 433         return t;
 434 }
 435
 436 char *shell_escape(const char *s, const char *bad) {
 437         char *r, *t;
 438
 439         r = new(char, strlen(s)*2+1);
 440         if (!r)
 441                 return NULL;
 442
 443         t = strcpy_backslash_escaped(r, s, bad);
 444         *t = 0;
 445
 446         return r;
 447 }
 448
 449 char *shell_maybe_quote(const char *s) {
 450         const char *p;
 451         char *r, *t;
 452
 453         assert(s);
 454
 455         /* Encloses a string in double quotes if necessary to make it
 456          * OK as shell string. */
 457
 458         for (p = s; *p; p++)
 459                 if (*p <= ' ' ||
 460                     *p >= 127 ||
 461                     strchr(SHELL_NEED_QUOTES, *p))
 462                         break;
 463
 464         if (!*p)
 465                 return strdup(s);
 466
 467         r = new(char, 1+strlen(s)*2+1+1);
 468         if (!r)
 469                 return NULL;
 470
 471         t = r;
 472         *(t++) = '"';
 473         t = mempcpy(t, s, p - s);
 474
 475         t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
 476
 477         *(t++)= '"';
 478         *t = 0;
 479
 480         return r;
 481 }