src/basic/locale-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <ftw.h>
   6 #include <langinfo.h>
   7 #include <libintl.h>
   8 #include <stddef.h>
   9 #include <stdint.h>
  10 #include <stdlib.h>
  11 #include <sys/mman.h>
  12 #include <sys/stat.h>
  13
  14 #include "def.h"
  15 #include "dirent-util.h"
  16 #include "env-util.h"
  17 #include "fd-util.h"
  18 #include "hashmap.h"
  19 #include "locale-util.h"
  20 #include "path-util.h"
  21 #include "set.h"
  22 #include "string-table.h"
  23 #include "string-util.h"
  24 #include "strv.h"
  25 #include "utf8.h"
  26
  27 static char *normalize_locale(const char *name) {
  28         const char *e;
  29
  30         /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
  31          * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
  32          * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
  33          * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
  34          * that for UTF-8 however, since it's kinda the only charset that matters. */
  35
  36         e = endswith(name, ".utf8");
  37         if (e) {
  38                 _cleanup_free_ char *prefix = NULL;
  39
  40                 prefix = strndup(name, e - name);
  41                 if (!prefix)
  42                         return NULL;
  43
  44                 return strjoin(prefix, ".UTF-8");
  45         }
  46
  47         e = strstr(name, ".utf8@");
  48         if (e) {
  49                 _cleanup_free_ char *prefix = NULL;
  50
  51                 prefix = strndup(name, e - name);
  52                 if (!prefix)
  53                         return NULL;
  54
  55                 return strjoin(prefix, ".UTF-8@", e + 6);
  56         }
  57
  58         return strdup(name);
  59 }
  60
  61 static int add_locales_from_archive(Set *locales) {
  62         /* Stolen from glibc... */
  63
  64         struct locarhead {
  65                 uint32_t magic;
  66                 /* Serial number.  */
  67                 uint32_t serial;
  68                 /* Name hash table.  */
  69                 uint32_t namehash_offset;
  70                 uint32_t namehash_used;
  71                 uint32_t namehash_size;
  72                 /* String table.  */
  73                 uint32_t string_offset;
  74                 uint32_t string_used;
  75                 uint32_t string_size;
  76                 /* Table with locale records.  */
  77                 uint32_t locrectab_offset;
  78                 uint32_t locrectab_used;
  79                 uint32_t locrectab_size;
  80                 /* MD5 sum hash table.  */
  81                 uint32_t sumhash_offset;
  82                 uint32_t sumhash_used;
  83                 uint32_t sumhash_size;
  84         };
  85
  86         struct namehashent {
  87                 /* Hash value of the name.  */
  88                 uint32_t hashval;
  89                 /* Offset of the name in the string table.  */
  90                 uint32_t name_offset;
  91                 /* Offset of the locale record.  */
  92                 uint32_t locrec_offset;
  93         };
  94
  95         const struct locarhead *h;
  96         const struct namehashent *e;
  97         const void *p = MAP_FAILED;
  98         _cleanup_close_ int fd = -1;
  99         size_t sz = 0;
 100         struct stat st;
 101         int r;
 102
 103         fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
 104         if (fd < 0)
 105                 return errno == ENOENT ? 0 : -errno;
 106
 107         if (fstat(fd, &st) < 0)
 108                 return -errno;
 109
 110         if (!S_ISREG(st.st_mode))
 111                 return -EBADMSG;
 112
 113         if (st.st_size < (off_t) sizeof(struct locarhead))
 114                 return -EBADMSG;
 115
 116         p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
 117         if (p == MAP_FAILED)
 118                 return -errno;
 119
 120         h = (const struct locarhead *) p;
 121         if (h->magic != 0xde020109 ||
 122             h->namehash_offset + h->namehash_size > st.st_size ||
 123             h->string_offset + h->string_size > st.st_size ||
 124             h->locrectab_offset + h->locrectab_size > st.st_size ||
 125             h->sumhash_offset + h->sumhash_size > st.st_size) {
 126                 r = -EBADMSG;
 127                 goto finish;
 128         }
 129
 130         e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
 131         for (size_t i = 0; i < h->namehash_size; i++) {
 132                 char *z;
 133
 134                 if (e[i].locrec_offset == 0)
 135                         continue;
 136
 137                 if (!utf8_is_valid((char*) p + e[i].name_offset))
 138                         continue;
 139
 140                 z = normalize_locale((char*) p + e[i].name_offset);
 141                 if (!z) {
 142                         r = -ENOMEM;
 143                         goto finish;
 144                 }
 145
 146                 r = set_consume(locales, z);
 147                 if (r < 0)
 148                         goto finish;
 149         }
 150
 151         r = 0;
 152
 153  finish:
 154         if (p != MAP_FAILED)
 155                 munmap((void*) p, sz);
 156
 157         return r;
 158 }
 159
 160 static int add_locales_from_libdir (Set *locales) {
 161         _cleanup_closedir_ DIR *dir = NULL;
 162         struct dirent *entry;
 163         int r;
 164
 165         dir = opendir("/usr/lib/locale");
 166         if (!dir)
 167                 return errno == ENOENT ? 0 : -errno;
 168
 169         FOREACH_DIRENT(entry, dir, return -errno) {
 170                 char *z;
 171
 172                 if (entry->d_type != DT_DIR)
 173                         continue;
 174
 175                 z = normalize_locale(entry->d_name);
 176                 if (!z)
 177                         return -ENOMEM;
 178
 179                 r = set_consume(locales, z);
 180                 if (r < 0 && r != -EEXIST)
 181                         return r;
 182         }
 183
 184         return 0;
 185 }
 186
 187 int get_locales(char ***ret) {
 188         _cleanup_set_free_ Set *locales = NULL;
 189         _cleanup_strv_free_ char **l = NULL;
 190         int r;
 191
 192         locales = set_new(&string_hash_ops);
 193         if (!locales)
 194                 return -ENOMEM;
 195
 196         r = add_locales_from_archive(locales);
 197         if (r < 0 && r != -ENOENT)
 198                 return r;
 199
 200         r = add_locales_from_libdir(locales);
 201         if (r < 0)
 202                 return r;
 203
 204         l = set_get_strv(locales);
 205         if (!l)
 206                 return -ENOMEM;
 207
 208         r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
 209         if (r == -ENXIO || r == 0) {
 210                 char **a, **b;
 211
 212                 /* Filter out non-UTF-8 locales, because it's 2019, by default */
 213                 for (a = b = l; *a; a++) {
 214
 215                         if (endswith(*a, "UTF-8") ||
 216                             strstr(*a, ".UTF-8@"))
 217                                 *(b++) = *a;
 218                         else
 219                                 free(*a);
 220                 }
 221
 222                 *b = NULL;
 223
 224         } else if (r < 0)
 225                 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
 226
 227         strv_sort(l);
 228
 229         *ret = TAKE_PTR(l);
 230
 231         return 0;
 232 }
 233
 234 bool locale_is_valid(const char *name) {
 235
 236         if (isempty(name))
 237                 return false;
 238
 239         if (strlen(name) >= 128)
 240                 return false;
 241
 242         if (!utf8_is_valid(name))
 243                 return false;
 244
 245         if (!filename_is_valid(name))
 246                 return false;
 247
 248         if (!string_is_safe(name))
 249                 return false;
 250
 251         return true;
 252 }
 253
 254 int locale_is_installed(const char *name) {
 255         if (!locale_is_valid(name))
 256                 return false;
 257
 258         if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
 259                 return true;
 260
 261         _cleanup_(freelocalep) locale_t loc =
 262                 newlocale(LC_ALL_MASK, name, 0);
 263         if (loc == (locale_t) 0)
 264                 return errno == ENOMEM ? -ENOMEM : false;
 265
 266         return true;
 267 }
 268
 269 void init_gettext(void) {
 270         setlocale(LC_ALL, "");
 271         textdomain(GETTEXT_PACKAGE);
 272 }
 273
 274 bool is_locale_utf8(void) {
 275         const char *set;
 276         static int cached_answer = -1;
 277
 278         /* Note that we default to 'true' here, since today UTF8 is
 279          * pretty much supported everywhere. */
 280
 281         if (cached_answer >= 0)
 282                 goto out;
 283
 284         if (!setlocale(LC_ALL, "")) {
 285                 cached_answer = true;
 286                 goto out;
 287         }
 288
 289         set = nl_langinfo(CODESET);
 290         if (!set) {
 291                 cached_answer = true;
 292                 goto out;
 293         }
 294
 295         if (streq(set, "UTF-8")) {
 296                 cached_answer = true;
 297                 goto out;
 298         }
 299
 300         /* For LC_CTYPE=="C" return true, because CTYPE is effectively
 301          * unset and everything can do to UTF-8 nowadays. */
 302         set = setlocale(LC_CTYPE, NULL);
 303         if (!set) {
 304                 cached_answer = true;
 305                 goto out;
 306         }
 307
 308         /* Check result, but ignore the result if C was set
 309          * explicitly. */
 310         cached_answer =
 311                 STR_IN_SET(set, "C", "POSIX") &&
 312                 !getenv("LC_ALL") &&
 313                 !getenv("LC_CTYPE") &&
 314                 !getenv("LANG");
 315
 316 out:
 317         return (bool) cached_answer;
 318 }
 319
 320 bool emoji_enabled(void) {
 321         static int cached_emoji_enabled = -1;
 322
 323         if (cached_emoji_enabled < 0) {
 324                 int val;
 325
 326                 val = getenv_bool("SYSTEMD_EMOJI");
 327                 if (val < 0)
 328                         cached_emoji_enabled =
 329                                 is_locale_utf8() &&
 330                                 !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
 331                 else
 332                         cached_emoji_enabled = val;
 333         }
 334
 335         return cached_emoji_enabled;
 336 }
 337
 338 const char *special_glyph(SpecialGlyph code) {
 339
 340         /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
 341          * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
 342          * works reasonably well on the Linux console. For details see:
 343          *
 344          * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
 345          */
 346
 347         static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
 348                 /* ASCII fallback */
 349                 [false] = {
 350                         [SPECIAL_GLYPH_TREE_VERTICAL]           = "| ",
 351                         [SPECIAL_GLYPH_TREE_BRANCH]             = "|-",
 352                         [SPECIAL_GLYPH_TREE_RIGHT]              = "`-",
 353                         [SPECIAL_GLYPH_TREE_SPACE]              = "  ",
 354                         [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = ">",
 355                         [SPECIAL_GLYPH_BLACK_CIRCLE]            = "*",
 356                         [SPECIAL_GLYPH_WHITE_CIRCLE]            = "*",
 357                         [SPECIAL_GLYPH_MULTIPLICATION_SIGN]     = "x",
 358                         [SPECIAL_GLYPH_CIRCLE_ARROW]            = "*",
 359                         [SPECIAL_GLYPH_BULLET]                  = "*",
 360                         [SPECIAL_GLYPH_MU]                      = "u",
 361                         [SPECIAL_GLYPH_CHECK_MARK]              = "+",
 362                         [SPECIAL_GLYPH_CROSS_MARK]              = "-",
 363                         [SPECIAL_GLYPH_LIGHT_SHADE]             = "-",
 364                         [SPECIAL_GLYPH_DARK_SHADE]              = "X",
 365                         [SPECIAL_GLYPH_SIGMA]                   = "S",
 366                         [SPECIAL_GLYPH_ARROW]                   = "->",
 367                         [SPECIAL_GLYPH_ELLIPSIS]                = "...",
 368                         [SPECIAL_GLYPH_EXTERNAL_LINK]           = "[LNK]",
 369                         [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = ":-]",
 370                         [SPECIAL_GLYPH_HAPPY_SMILEY]            = ":-}",
 371                         [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = ":-)",
 372                         [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = ":-|",
 373                         [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
 374                         [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = ":-{",
 375                         [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = ":-[",
 376                         [SPECIAL_GLYPH_LOCK_AND_KEY]            = "o-,",
 377                         [SPECIAL_GLYPH_TOUCH]                   = "O=",    /* Yeah, not very convincing, can you do it better? */
 378                         [SPECIAL_GLYPH_RECYCLING]               = "~",
 379                         [SPECIAL_GLYPH_DOWNLOAD]                = "\\",
 380                         [SPECIAL_GLYPH_SPARKLES]                = "*",
 381                 },
 382
 383                 /* UTF-8 */
 384                 [true] = {
 385                         /* The following are multiple glyphs in both ASCII and in UNICODE */
 386                         [SPECIAL_GLYPH_TREE_VERTICAL]           = "\342\224\202 ",            /* │  */
 387                         [SPECIAL_GLYPH_TREE_BRANCH]             = "\342\224\234\342\224\200", /* ├─ */
 388                         [SPECIAL_GLYPH_TREE_RIGHT]              = "\342\224\224\342\224\200", /* └─ */
 389                         [SPECIAL_GLYPH_TREE_SPACE]              = "  ",                       /*    */
 390
 391                         /* Single glyphs in both cases */
 392                         [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = "\342\200\243",             /* ‣ */
 393                         [SPECIAL_GLYPH_BLACK_CIRCLE]            = "\342\227\217",             /* ● */
 394                         [SPECIAL_GLYPH_WHITE_CIRCLE]            = "\u25CB",                   /* ○ */
 395                         [SPECIAL_GLYPH_MULTIPLICATION_SIGN]     = "\u00D7",                   /* × */
 396                         [SPECIAL_GLYPH_CIRCLE_ARROW]            = "\u21BB",                   /* ↻ */
 397                         [SPECIAL_GLYPH_BULLET]                  = "\342\200\242",             /* • */
 398                         [SPECIAL_GLYPH_MU]                      = "\316\274",                 /* μ (actually called: GREEK SMALL LETTER MU) */
 399                         [SPECIAL_GLYPH_CHECK_MARK]              = "\342\234\223",             /* ✓ */
 400                         [SPECIAL_GLYPH_CROSS_MARK]              = "\342\234\227",             /* ✗ (actually called: BALLOT X) */
 401                         [SPECIAL_GLYPH_LIGHT_SHADE]             = "\342\226\221",             /* ░ */
 402                         [SPECIAL_GLYPH_DARK_SHADE]              = "\342\226\223",             /* ▒ */
 403                         [SPECIAL_GLYPH_SIGMA]                   = "\316\243",                 /* Σ */
 404
 405                         /* Single glyph in Unicode, two in ASCII */
 406                         [SPECIAL_GLYPH_ARROW]                   = "\342\206\222",             /* → (actually called: RIGHTWARDS ARROW) */
 407
 408                         /* Single glyph in Unicode, three in ASCII */
 409                         [SPECIAL_GLYPH_ELLIPSIS]                = "\342\200\246",             /* … (actually called: HORIZONTAL ELLIPSIS) */
 410
 411                         /* Three glyphs in Unicode, five in ASCII */
 412                         [SPECIAL_GLYPH_EXTERNAL_LINK]           = "[\360\237\241\225]",       /* 🡕 (actually called: NORTH EAST SANS-SERIF ARROW, enclosed in []) */
 413
 414                         /* These smileys are a single glyph in Unicode, and three in ASCII */
 415                         [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = "\360\237\230\207",         /* 😇 (actually called: SMILING FACE WITH HALO) */
 416                         [SPECIAL_GLYPH_HAPPY_SMILEY]            = "\360\237\230\200",         /* 😀 (actually called: GRINNING FACE) */
 417                         [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = "\360\237\231\202",         /* 🙂 (actually called: SLIGHTLY SMILING FACE) */
 418                         [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = "\360\237\230\220",         /* 😐 (actually called: NEUTRAL FACE) */
 419                         [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201",         /* 🙁 (actually called: SLIGHTLY FROWNING FACE) */
 420                         [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = "\360\237\230\250",         /* 😨 (actually called: FEARFUL FACE) */
 421                         [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = "\360\237\244\242",         /* 🤢 (actually called: NAUSEATED FACE) */
 422
 423                         /* This emoji is a single character cell glyph in Unicode, and three in ASCII */
 424                         [SPECIAL_GLYPH_LOCK_AND_KEY]            = "\360\237\224\220",         /* 🔐 (actually called: CLOSED LOCK WITH KEY) */
 425
 426                         /* This emoji is a single character cell glyph in Unicode, and two in ASCII */
 427                         [SPECIAL_GLYPH_TOUCH]                   = "\360\237\221\206",         /* 👆 (actually called: BACKHAND INDEX POINTING UP) */
 428
 429                         /* These three emojis are single character cell glyphs in Unicode and also in ASCII. */
 430                         [SPECIAL_GLYPH_RECYCLING]               = "\u267B\uFE0F ",            /* ♻️  (actually called: UNIVERSAL RECYCLNG SYMBOL) */
 431                         [SPECIAL_GLYPH_DOWNLOAD]                = "\u2935\uFE0F ",            /* ⤵️  (actually called: RIGHT ARROW CURVING DOWN) */
 432                         [SPECIAL_GLYPH_SPARKLES]                = "\u2728",                   /* ✨ */
 433                 },
 434         };
 435
 436         if (code < 0)
 437                 return NULL;
 438
 439         assert(code < _SPECIAL_GLYPH_MAX);
 440         return draw_table[code >= _SPECIAL_GLYPH_FIRST_EMOJI ? emoji_enabled() : is_locale_utf8()][code];
 441 }
 442
 443 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
 444         if (!l)
 445                 return;
 446
 447         for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
 448                 l[i] = mfree(l[i]);
 449 }
 450
 451 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
 452         [VARIABLE_LANG] = "LANG",
 453         [VARIABLE_LANGUAGE] = "LANGUAGE",
 454         [VARIABLE_LC_CTYPE] = "LC_CTYPE",
 455         [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
 456         [VARIABLE_LC_TIME] = "LC_TIME",
 457         [VARIABLE_LC_COLLATE] = "LC_COLLATE",
 458         [VARIABLE_LC_MONETARY] = "LC_MONETARY",
 459         [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
 460         [VARIABLE_LC_PAPER] = "LC_PAPER",
 461         [VARIABLE_LC_NAME] = "LC_NAME",
 462         [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
 463         [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
 464         [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
 465         [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
 466 };
 467
 468 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);