src/basic/escape.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <errno.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25
  26 #include "alloc-util.h"
  27 #include "escape.h"
  28 #include "hexdecoct.h"
  29 #include "macro.h"
  30 #include "utf8.h"
  31
  32 size_t cescape_char(char c, char *buf) {
  33         char * buf_old = buf;
  34
  35         switch (c) {
  36
  37                 case '\a':
  38                         *(buf++) = '\\';
  39                         *(buf++) = 'a';
  40                         break;
  41                 case '\b':
  42                         *(buf++) = '\\';
  43                         *(buf++) = 'b';
  44                         break;
  45                 case '\f':
  46                         *(buf++) = '\\';
  47                         *(buf++) = 'f';
  48                         break;
  49                 case '\n':
  50                         *(buf++) = '\\';
  51                         *(buf++) = 'n';
  52                         break;
  53                 case '\r':
  54                         *(buf++) = '\\';
  55                         *(buf++) = 'r';
  56                         break;
  57                 case '\t':
  58                         *(buf++) = '\\';
  59                         *(buf++) = 't';
  60                         break;
  61                 case '\v':
  62                         *(buf++) = '\\';
  63                         *(buf++) = 'v';
  64                         break;
  65                 case '\\':
  66                         *(buf++) = '\\';
  67                         *(buf++) = '\\';
  68                         break;
  69                 case '"':
  70                         *(buf++) = '\\';
  71                         *(buf++) = '"';
  72                         break;
  73                 case '\'':
  74                         *(buf++) = '\\';
  75                         *(buf++) = '\'';
  76                         break;
  77
  78                 default:
  79                         /* For special chars we prefer octal over
  80                          * hexadecimal encoding, simply because glib's
  81                          * g_strescape() does the same */
  82                         if ((c < ' ') || (c >= 127)) {
  83                                 *(buf++) = '\\';
  84                                 *(buf++) = octchar((unsigned char) c >> 6);
  85                                 *(buf++) = octchar((unsigned char) c >> 3);
  86                                 *(buf++) = octchar((unsigned char) c);
  87                         } else
  88                                 *(buf++) = c;
  89                         break;
  90         }
  91
  92         return buf - buf_old;
  93 }
  94
  95 char *cescape_length(const char *s, size_t n) {
  96         const char *f;
  97         char *r, *t;
  98
  99         assert(s || n == 0);
 100
 101         /* Does C style string escaping. May be reversed with
 102          * cunescape(). */
 103
 104         r = new(char, n*4 + 1);
 105         if (!r)
 106                 return NULL;
 107
 108         for (f = s, t = r; f < s + n; f++)
 109                 t += cescape_char(*f, t);
 110
 111         *t = 0;
 112
 113         return r;
 114 }
 115
 116 char *cescape(const char *s) {
 117         assert(s);
 118
 119         return cescape_length(s, strlen(s));
 120 }
 121
 122 int cunescape_one(const char *p, size_t length, uint32_t *ret, bool *eight_bit) {
 123         int r = 1;
 124
 125         assert(p);
 126         assert(*p);
 127         assert(ret);
 128
 129         /* Unescapes C style. Returns the unescaped character in ret.
 130          * Sets *eight_bit to true if the escaped sequence either fits in
 131          * one byte in UTF-8 or is a non-unicode literal byte and should
 132          * instead be copied directly.
 133          */
 134
 135         if (length != (size_t) -1 && length < 1)
 136                 return -EINVAL;
 137
 138         switch (p[0]) {
 139
 140         case 'a':
 141                 *ret = '\a';
 142                 break;
 143         case 'b':
 144                 *ret = '\b';
 145                 break;
 146         case 'f':
 147                 *ret = '\f';
 148                 break;
 149         case 'n':
 150                 *ret = '\n';
 151                 break;
 152         case 'r':
 153                 *ret = '\r';
 154                 break;
 155         case 't':
 156                 *ret = '\t';
 157                 break;
 158         case 'v':
 159                 *ret = '\v';
 160                 break;
 161         case '\\':
 162                 *ret = '\\';
 163                 break;
 164         case '"':
 165                 *ret = '"';
 166                 break;
 167         case '\'':
 168                 *ret = '\'';
 169                 break;
 170
 171         case 's':
 172                 /* This is an extension of the XDG syntax files */
 173                 *ret = ' ';
 174                 break;
 175
 176         case 'x': {
 177                 /* hexadecimal encoding */
 178                 int a, b;
 179
 180                 if (length != (size_t) -1 && length < 3)
 181                         return -EINVAL;
 182
 183                 a = unhexchar(p[1]);
 184                 if (a < 0)
 185                         return -EINVAL;
 186
 187                 b = unhexchar(p[2]);
 188                 if (b < 0)
 189                         return -EINVAL;
 190
 191                 /* Don't allow NUL bytes */
 192                 if (a == 0 && b == 0)
 193                         return -EINVAL;
 194
 195                 *ret = (a << 4U) | b;
 196                 *eight_bit = true;
 197                 r = 3;
 198                 break;
 199         }
 200
 201         case 'u': {
 202                 /* C++11 style 16bit unicode */
 203
 204                 int a[4];
 205                 unsigned i;
 206                 uint32_t c;
 207
 208                 if (length != (size_t) -1 && length < 5)
 209                         return -EINVAL;
 210
 211                 for (i = 0; i < 4; i++) {
 212                         a[i] = unhexchar(p[1 + i]);
 213                         if (a[i] < 0)
 214                                 return a[i];
 215                 }
 216
 217                 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
 218
 219                 /* Don't allow 0 chars */
 220                 if (c == 0)
 221                         return -EINVAL;
 222
 223                 *ret = c;
 224                 r = 5;
 225                 break;
 226         }
 227
 228         case 'U': {
 229                 /* C++11 style 32bit unicode */
 230
 231                 int a[8];
 232                 unsigned i;
 233                 uint32_t c;
 234
 235                 if (length != (size_t) -1 && length < 9)
 236                         return -EINVAL;
 237
 238                 for (i = 0; i < 8; i++) {
 239                         a[i] = unhexchar(p[1 + i]);
 240                         if (a[i] < 0)
 241                                 return a[i];
 242                 }
 243
 244                 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
 245                     ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
 246
 247                 /* Don't allow 0 chars */
 248                 if (c == 0)
 249                         return -EINVAL;
 250
 251                 /* Don't allow invalid code points */
 252                 if (!unichar_is_valid(c))
 253                         return -EINVAL;
 254
 255                 *ret = c;
 256                 r = 9;
 257                 break;
 258         }
 259
 260         case '0':
 261         case '1':
 262         case '2':
 263         case '3':
 264         case '4':
 265         case '5':
 266         case '6':
 267         case '7': {
 268                 /* octal encoding */
 269                 int a, b, c;
 270                 uint32_t m;
 271
 272                 if (length != (size_t) -1 && length < 3)
 273                         return -EINVAL;
 274
 275                 a = unoctchar(p[0]);
 276                 if (a < 0)
 277                         return -EINVAL;
 278
 279                 b = unoctchar(p[1]);
 280                 if (b < 0)
 281                         return -EINVAL;
 282
 283                 c = unoctchar(p[2]);
 284                 if (c < 0)
 285                         return -EINVAL;
 286
 287                 /* don't allow NUL bytes */
 288                 if (a == 0 && b == 0 && c == 0)
 289                         return -EINVAL;
 290
 291                 /* Don't allow bytes above 255 */
 292                 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
 293                 if (m > 255)
 294                         return -EINVAL;
 295
 296                 *ret = m;
 297                 *eight_bit = true;
 298                 r = 3;
 299                 break;
 300         }
 301
 302         default:
 303                 return -EINVAL;
 304         }
 305
 306         return r;
 307 }
 308
 309 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
 310         char *r, *t;
 311         const char *f;
 312         size_t pl;
 313
 314         assert(s);
 315         assert(ret);
 316
 317         /* Undoes C style string escaping, and optionally prefixes it. */
 318
 319         pl = prefix ? strlen(prefix) : 0;
 320
 321         r = new(char, pl+length+1);
 322         if (!r)
 323                 return -ENOMEM;
 324
 325         if (prefix)
 326                 memcpy(r, prefix, pl);
 327
 328         for (f = s, t = r + pl; f < s + length; f++) {
 329                 size_t remaining;
 330                 uint32_t u;
 331                 bool eight_bit = false;
 332                 int k;
 333
 334                 remaining = s + length - f;
 335                 assert(remaining > 0);
 336
 337                 if (*f != '\\') {
 338                         /* A literal literal, copy verbatim */
 339                         *(t++) = *f;
 340                         continue;
 341                 }
 342
 343                 if (remaining == 1) {
 344                         if (flags & UNESCAPE_RELAX) {
 345                                 /* A trailing backslash, copy verbatim */
 346                                 *(t++) = *f;
 347                                 continue;
 348                         }
 349
 350                         free(r);
 351                         return -EINVAL;
 352                 }
 353
 354                 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
 355                 if (k < 0) {
 356                         if (flags & UNESCAPE_RELAX) {
 357                                 /* Invalid escape code, let's take it literal then */
 358                                 *(t++) = '\\';
 359                                 continue;
 360                         }
 361
 362                         free(r);
 363                         return k;
 364                 }
 365
 366                 f += k;
 367                 if (eight_bit)
 368                         /* One byte? Set directly as specified */
 369                         *(t++) = u;
 370                 else
 371                         /* Otherwise encode as multi-byte UTF-8 */
 372                         t += utf8_encode_unichar(t, u);
 373         }
 374
 375         *t = 0;
 376
 377         *ret = r;
 378         return t - r;
 379 }
 380
 381 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
 382         return cunescape_length_with_prefix(s, length, NULL, flags, ret);
 383 }
 384
 385 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
 386         return cunescape_length(s, strlen(s), flags, ret);
 387 }
 388
 389 char *xescape(const char *s, const char *bad) {
 390         char *r, *t;
 391         const char *f;
 392
 393         /* Escapes all chars in bad, in addition to \ and all special
 394          * chars, in \xFF style escaping. May be reversed with
 395          * cunescape(). */
 396
 397         r = new(char, strlen(s) * 4 + 1);
 398         if (!r)
 399                 return NULL;
 400
 401         for (f = s, t = r; *f; f++) {
 402
 403                 if ((*f < ' ') || (*f >= 127) ||
 404                     (*f == '\\') || strchr(bad, *f)) {
 405                         *(t++) = '\\';
 406                         *(t++) = 'x';
 407                         *(t++) = hexchar(*f >> 4);
 408                         *(t++) = hexchar(*f);
 409                 } else
 410                         *(t++) = *f;
 411         }
 412
 413         *t = 0;
 414
 415         return r;
 416 }
 417
 418 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
 419         assert(bad);
 420
 421         for (; *s; s++) {
 422                 if (*s == '\\' || strchr(bad, *s))
 423                         *(t++) = '\\';
 424
 425                 *(t++) = *s;
 426         }
 427
 428         return t;
 429 }
 430
 431 char *shell_escape(const char *s, const char *bad) {
 432         char *r, *t;
 433
 434         r = new(char, strlen(s)*2+1);
 435         if (!r)
 436                 return NULL;
 437
 438         t = strcpy_backslash_escaped(r, s, bad);
 439         *t = 0;
 440
 441         return r;
 442 }
 443
 444 char *shell_maybe_quote(const char *s) {
 445         const char *p;
 446         char *r, *t;
 447
 448         assert(s);
 449
 450         /* Encloses a string in double quotes if necessary to make it
 451          * OK as shell string. */
 452
 453         for (p = s; *p; p++)
 454                 if (*p <= ' ' ||
 455                     *p >= 127 ||
 456                     strchr(SHELL_NEED_QUOTES, *p))
 457                         break;
 458
 459         if (!*p)
 460                 return strdup(s);
 461
 462         r = new(char, 1+strlen(s)*2+1+1);
 463         if (!r)
 464                 return NULL;
 465
 466         t = r;
 467         *(t++) = '"';
 468         t = mempcpy(t, s, p - s);
 469
 470         t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
 471
 472         *(t++)= '"';
 473         *t = 0;
 474
 475         return r;
 476 }