src/basic/locale-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <ftw.h>
   6 #include <langinfo.h>
   7 #include <libintl.h>
   8 #include <stddef.h>
   9 #include <stdint.h>
  10 #include <stdlib.h>
  11 #include <sys/mman.h>
  12 #include <sys/stat.h>
  13
  14 #include "def.h"
  15 #include "dirent-util.h"
  16 #include "env-util.h"
  17 #include "fd-util.h"
  18 #include "hashmap.h"
  19 #include "locale-util.h"
  20 #include "path-util.h"
  21 #include "set.h"
  22 #include "string-table.h"
  23 #include "string-util.h"
  24 #include "strv.h"
  25 #include "utf8.h"
  26
  27 static char *normalize_locale(const char *name) {
  28         const char *e;
  29
  30         /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
  31          * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
  32          * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
  33          * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
  34          * that for UTF-8 however, since it's kinda the only charset that matters. */
  35
  36         e = endswith(name, ".utf8");
  37         if (e) {
  38                 _cleanup_free_ char *prefix = NULL;
  39
  40                 prefix = strndup(name, e - name);
  41                 if (!prefix)
  42                         return NULL;
  43
  44                 return strjoin(prefix, ".UTF-8");
  45         }
  46
  47         e = strstr(name, ".utf8@");
  48         if (e) {
  49                 _cleanup_free_ char *prefix = NULL;
  50
  51                 prefix = strndup(name, e - name);
  52                 if (!prefix)
  53                         return NULL;
  54
  55                 return strjoin(prefix, ".UTF-8@", e + 6);
  56         }
  57
  58         return strdup(name);
  59 }
  60
  61 static int add_locales_from_archive(Set *locales) {
  62         /* Stolen from glibc... */
  63
  64         struct locarhead {
  65                 uint32_t magic;
  66                 /* Serial number.  */
  67                 uint32_t serial;
  68                 /* Name hash table.  */
  69                 uint32_t namehash_offset;
  70                 uint32_t namehash_used;
  71                 uint32_t namehash_size;
  72                 /* String table.  */
  73                 uint32_t string_offset;
  74                 uint32_t string_used;
  75                 uint32_t string_size;
  76                 /* Table with locale records.  */
  77                 uint32_t locrectab_offset;
  78                 uint32_t locrectab_used;
  79                 uint32_t locrectab_size;
  80                 /* MD5 sum hash table.  */
  81                 uint32_t sumhash_offset;
  82                 uint32_t sumhash_used;
  83                 uint32_t sumhash_size;
  84         };
  85
  86         struct namehashent {
  87                 /* Hash value of the name.  */
  88                 uint32_t hashval;
  89                 /* Offset of the name in the string table.  */
  90                 uint32_t name_offset;
  91                 /* Offset of the locale record.  */
  92                 uint32_t locrec_offset;
  93         };
  94
  95         const struct locarhead *h;
  96         const struct namehashent *e;
  97         const void *p = MAP_FAILED;
  98         _cleanup_close_ int fd = -1;
  99         size_t sz = 0;
 100         struct stat st;
 101         int r;
 102
 103         fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
 104         if (fd < 0)
 105                 return errno == ENOENT ? 0 : -errno;
 106
 107         if (fstat(fd, &st) < 0)
 108                 return -errno;
 109
 110         if (!S_ISREG(st.st_mode))
 111                 return -EBADMSG;
 112
 113         if (st.st_size < (off_t) sizeof(struct locarhead))
 114                 return -EBADMSG;
 115
 116         p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
 117         if (p == MAP_FAILED)
 118                 return -errno;
 119
 120         h = (const struct locarhead *) p;
 121         if (h->magic != 0xde020109 ||
 122             h->namehash_offset + h->namehash_size > st.st_size ||
 123             h->string_offset + h->string_size > st.st_size ||
 124             h->locrectab_offset + h->locrectab_size > st.st_size ||
 125             h->sumhash_offset + h->sumhash_size > st.st_size) {
 126                 r = -EBADMSG;
 127                 goto finish;
 128         }
 129
 130         e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
 131         for (size_t i = 0; i < h->namehash_size; i++) {
 132                 char *z;
 133
 134                 if (e[i].locrec_offset == 0)
 135                         continue;
 136
 137                 if (!utf8_is_valid((char*) p + e[i].name_offset))
 138                         continue;
 139
 140                 z = normalize_locale((char*) p + e[i].name_offset);
 141                 if (!z) {
 142                         r = -ENOMEM;
 143                         goto finish;
 144                 }
 145
 146                 r = set_consume(locales, z);
 147                 if (r < 0)
 148                         goto finish;
 149         }
 150
 151         r = 0;
 152
 153  finish:
 154         if (p != MAP_FAILED)
 155                 munmap((void*) p, sz);
 156
 157         return r;
 158 }
 159
 160 static int add_locales_from_libdir (Set *locales) {
 161         _cleanup_closedir_ DIR *dir = NULL;
 162         struct dirent *entry;
 163         int r;
 164
 165         dir = opendir("/usr/lib/locale");
 166         if (!dir)
 167                 return errno == ENOENT ? 0 : -errno;
 168
 169         FOREACH_DIRENT(entry, dir, return -errno) {
 170                 char *z;
 171
 172                 dirent_ensure_type(dir, entry);
 173
 174                 if (entry->d_type != DT_DIR)
 175                         continue;
 176
 177                 z = normalize_locale(entry->d_name);
 178                 if (!z)
 179                         return -ENOMEM;
 180
 181                 r = set_consume(locales, z);
 182                 if (r < 0 && r != -EEXIST)
 183                         return r;
 184         }
 185
 186         return 0;
 187 }
 188
 189 int get_locales(char ***ret) {
 190         _cleanup_set_free_ Set *locales = NULL;
 191         _cleanup_strv_free_ char **l = NULL;
 192         int r;
 193
 194         locales = set_new(&string_hash_ops);
 195         if (!locales)
 196                 return -ENOMEM;
 197
 198         r = add_locales_from_archive(locales);
 199         if (r < 0 && r != -ENOENT)
 200                 return r;
 201
 202         r = add_locales_from_libdir(locales);
 203         if (r < 0)
 204                 return r;
 205
 206         l = set_get_strv(locales);
 207         if (!l)
 208                 return -ENOMEM;
 209
 210         r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
 211         if (r == -ENXIO || r == 0) {
 212                 char **a, **b;
 213
 214                 /* Filter out non-UTF-8 locales, because it's 2019, by default */
 215                 for (a = b = l; *a; a++) {
 216
 217                         if (endswith(*a, "UTF-8") ||
 218                             strstr(*a, ".UTF-8@"))
 219                                 *(b++) = *a;
 220                         else
 221                                 free(*a);
 222                 }
 223
 224                 *b = NULL;
 225
 226         } else if (r < 0)
 227                 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
 228
 229         strv_sort(l);
 230
 231         *ret = TAKE_PTR(l);
 232
 233         return 0;
 234 }
 235
 236 bool locale_is_valid(const char *name) {
 237
 238         if (isempty(name))
 239                 return false;
 240
 241         if (strlen(name) >= 128)
 242                 return false;
 243
 244         if (!utf8_is_valid(name))
 245                 return false;
 246
 247         if (!filename_is_valid(name))
 248                 return false;
 249
 250         if (!string_is_safe(name))
 251                 return false;
 252
 253         return true;
 254 }
 255
 256 int locale_is_installed(const char *name) {
 257         if (!locale_is_valid(name))
 258                 return false;
 259
 260         if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
 261                 return true;
 262
 263         _cleanup_(freelocalep) locale_t loc =
 264                 newlocale(LC_ALL_MASK, name, 0);
 265         if (loc == (locale_t) 0)
 266                 return errno == ENOMEM ? -ENOMEM : false;
 267
 268         return true;
 269 }
 270
 271 void init_gettext(void) {
 272         setlocale(LC_ALL, "");
 273         textdomain(GETTEXT_PACKAGE);
 274 }
 275
 276 bool is_locale_utf8(void) {
 277         const char *set;
 278         static int cached_answer = -1;
 279
 280         /* Note that we default to 'true' here, since today UTF8 is
 281          * pretty much supported everywhere. */
 282
 283         if (cached_answer >= 0)
 284                 goto out;
 285
 286         if (!setlocale(LC_ALL, "")) {
 287                 cached_answer = true;
 288                 goto out;
 289         }
 290
 291         set = nl_langinfo(CODESET);
 292         if (!set) {
 293                 cached_answer = true;
 294                 goto out;
 295         }
 296
 297         if (streq(set, "UTF-8")) {
 298                 cached_answer = true;
 299                 goto out;
 300         }
 301
 302         /* For LC_CTYPE=="C" return true, because CTYPE is effectively
 303          * unset and everything can do to UTF-8 nowadays. */
 304         set = setlocale(LC_CTYPE, NULL);
 305         if (!set) {
 306                 cached_answer = true;
 307                 goto out;
 308         }
 309
 310         /* Check result, but ignore the result if C was set
 311          * explicitly. */
 312         cached_answer =
 313                 STR_IN_SET(set, "C", "POSIX") &&
 314                 !getenv("LC_ALL") &&
 315                 !getenv("LC_CTYPE") &&
 316                 !getenv("LANG");
 317
 318 out:
 319         return (bool) cached_answer;
 320 }
 321
 322 bool emoji_enabled(void) {
 323         static int cached_emoji_enabled = -1;
 324
 325         if (cached_emoji_enabled < 0) {
 326                 int val;
 327
 328                 val = getenv_bool("SYSTEMD_EMOJI");
 329                 if (val < 0)
 330                         cached_emoji_enabled =
 331                                 is_locale_utf8() &&
 332                                 !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
 333                 else
 334                         cached_emoji_enabled = val;
 335         }
 336
 337         return cached_emoji_enabled;
 338 }
 339
 340 const char *special_glyph(SpecialGlyph code) {
 341
 342         /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
 343          * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
 344          * works reasonably well on the Linux console. For details see:
 345          *
 346          * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
 347          */
 348
 349         static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
 350                 /* ASCII fallback */
 351                 [false] = {
 352                         [SPECIAL_GLYPH_TREE_VERTICAL]           = "| ",
 353                         [SPECIAL_GLYPH_TREE_BRANCH]             = "|-",
 354                         [SPECIAL_GLYPH_TREE_RIGHT]              = "`-",
 355                         [SPECIAL_GLYPH_TREE_SPACE]              = "  ",
 356                         [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = ">",
 357                         [SPECIAL_GLYPH_BLACK_CIRCLE]            = "*",
 358                         [SPECIAL_GLYPH_WHITE_CIRCLE]            = "*",
 359                         [SPECIAL_GLYPH_MULTIPLICATION_SIGN]     = "x",
 360                         [SPECIAL_GLYPH_CIRCLE_ARROW]            = "*",
 361                         [SPECIAL_GLYPH_BULLET]                  = "*",
 362                         [SPECIAL_GLYPH_MU]                      = "u",
 363                         [SPECIAL_GLYPH_CHECK_MARK]              = "+",
 364                         [SPECIAL_GLYPH_CROSS_MARK]              = "-",
 365                         [SPECIAL_GLYPH_LIGHT_SHADE]             = "-",
 366                         [SPECIAL_GLYPH_DARK_SHADE]              = "X",
 367                         [SPECIAL_GLYPH_SIGMA]                   = "S",
 368                         [SPECIAL_GLYPH_ARROW]                   = "->",
 369                         [SPECIAL_GLYPH_ELLIPSIS]                = "...",
 370                         [SPECIAL_GLYPH_EXTERNAL_LINK]           = "[LNK]",
 371                         [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = ":-]",
 372                         [SPECIAL_GLYPH_HAPPY_SMILEY]            = ":-}",
 373                         [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = ":-)",
 374                         [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = ":-|",
 375                         [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
 376                         [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = ":-{",
 377                         [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = ":-[",
 378                         [SPECIAL_GLYPH_LOCK_AND_KEY]            = "o-,",
 379                         [SPECIAL_GLYPH_TOUCH]                   = "O=",    /* Yeah, not very convincing, can you do it better? */
 380                 },
 381
 382                 /* UTF-8 */
 383                 [true] = {
 384                         /* The following are multiple glyphs in both ASCII and in UNICODE */
 385                         [SPECIAL_GLYPH_TREE_VERTICAL]           = "\342\224\202 ",            /* │  */
 386                         [SPECIAL_GLYPH_TREE_BRANCH]             = "\342\224\234\342\224\200", /* ├─ */
 387                         [SPECIAL_GLYPH_TREE_RIGHT]              = "\342\224\224\342\224\200", /* └─ */
 388                         [SPECIAL_GLYPH_TREE_SPACE]              = "  ",                       /*    */
 389
 390                         /* Single glyphs in both cases */
 391                         [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = "\342\200\243",             /* ‣ */
 392                         [SPECIAL_GLYPH_BLACK_CIRCLE]            = "\342\227\217",             /* ● */
 393                         [SPECIAL_GLYPH_WHITE_CIRCLE]            = "\u25CB",                   /* ○ */
 394                         [SPECIAL_GLYPH_MULTIPLICATION_SIGN]     = "\u00D7",                   /* × */
 395                         [SPECIAL_GLYPH_CIRCLE_ARROW]            = "\u21BB",                   /* ↻ */
 396                         [SPECIAL_GLYPH_BULLET]                  = "\342\200\242",             /* • */
 397                         [SPECIAL_GLYPH_MU]                      = "\316\274",                 /* μ (actually called: GREEK SMALL LETTER MU) */
 398                         [SPECIAL_GLYPH_CHECK_MARK]              = "\342\234\223",             /* ✓ */
 399                         [SPECIAL_GLYPH_CROSS_MARK]              = "\342\234\227",             /* ✗ (actually called: BALLOT X) */
 400                         [SPECIAL_GLYPH_LIGHT_SHADE]             = "\342\226\221",             /* ░ */
 401                         [SPECIAL_GLYPH_DARK_SHADE]              = "\342\226\223",             /* ▒ */
 402                         [SPECIAL_GLYPH_SIGMA]                   = "\316\243",                 /* Σ */
 403
 404                         /* Single glyph in Unicode, two in ASCII */
 405                         [SPECIAL_GLYPH_ARROW]                   = "\342\206\222",             /* → (actually called: RIGHTWARDS ARROW) */
 406
 407                         /* Single glyph in Unicode, three in ASCII */
 408                         [SPECIAL_GLYPH_ELLIPSIS]                = "\342\200\246",             /* … (actually called: HORIZONTAL ELLIPSIS) */
 409
 410                         /* Three glyphs in Unicode, five in ASCII */
 411                         [SPECIAL_GLYPH_EXTERNAL_LINK]           = "[\360\237\241\225]",       /* 🡕 (actually called: NORTH EAST SANS-SERIF ARROW, enclosed in []) */
 412
 413                         /* These smileys are a single glyph in Unicode, and three in ASCII */
 414                         [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = "\360\237\230\207",         /* 😇 (actually called: SMILING FACE WITH HALO) */
 415                         [SPECIAL_GLYPH_HAPPY_SMILEY]            = "\360\237\230\200",         /* 😀 (actually called: GRINNING FACE) */
 416                         [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = "\360\237\231\202",         /* 🙂 (actually called: SLIGHTLY SMILING FACE) */
 417                         [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = "\360\237\230\220",         /* 😐 (actually called: NEUTRAL FACE) */
 418                         [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201",         /* 🙁 (actually called: SLIGHTLY FROWNING FACE) */
 419                         [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = "\360\237\230\250",         /* 😨 (actually called: FEARFUL FACE) */
 420                         [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = "\360\237\244\242",         /* 🤢 (actually called: NAUSEATED FACE) */
 421
 422                         /* This emoji is a single character cell glyph in Unicode, and three in ASCII */
 423                         [SPECIAL_GLYPH_LOCK_AND_KEY]            = "\360\237\224\220",         /* 🔐 (actually called: CLOSED LOCK WITH KEY) */
 424
 425                         /* This emoji is a single character cell glyph in Unicode, and two in ASCII */
 426                         [SPECIAL_GLYPH_TOUCH]                   = "\360\237\221\206",         /* 👆 (actually called: BACKHAND INDEX POINTING UP */
 427                 },
 428         };
 429
 430         if (code < 0)
 431                 return NULL;
 432
 433         assert(code < _SPECIAL_GLYPH_MAX);
 434         return draw_table[code >= _SPECIAL_GLYPH_FIRST_EMOJI ? emoji_enabled() : is_locale_utf8()][code];
 435 }
 436
 437 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
 438         if (!l)
 439                 return;
 440
 441         for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
 442                 l[i] = mfree(l[i]);
 443 }
 444
 445 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
 446         [VARIABLE_LANG] = "LANG",
 447         [VARIABLE_LANGUAGE] = "LANGUAGE",
 448         [VARIABLE_LC_CTYPE] = "LC_CTYPE",
 449         [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
 450         [VARIABLE_LC_TIME] = "LC_TIME",
 451         [VARIABLE_LC_COLLATE] = "LC_COLLATE",
 452         [VARIABLE_LC_MONETARY] = "LC_MONETARY",
 453         [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
 454         [VARIABLE_LC_PAPER] = "LC_PAPER",
 455         [VARIABLE_LC_NAME] = "LC_NAME",
 456         [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
 457         [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
 458         [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
 459         [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
 460 };
 461
 462 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);