Merge pull request #31524 from poettering/secure-getenv-naming-fix

[thirdparty/systemd.git] / src / basic / locale-util.c
diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c

index fd6b01cfaade6f0441b61222964322e667ede998..23565273dd2fceb61dbed0c3e852de22edc21644 100644 (file)
--- a/src/basic/locale-util.c
+++ b/src/basic/locale-util.c
@@ -2,7 +2,6 @@
  
  #include <errno.h>
  #include <fcntl.h>
-#include <ftw.h>
  #include <langinfo.h>
  #include <libintl.h>
  #include <stddef.h>
@@ -11,12 +10,14 @@
  #include <sys/mman.h>
  #include <sys/stat.h>
  
-#include "def.h"
+#include "constants.h"
  #include "dirent-util.h"
  #include "env-util.h"
  #include "fd-util.h"
+#include "fileio.h"
  #include "hashmap.h"
  #include "locale-util.h"
+#include "missing_syscall.h"
  #include "path-util.h"
  #include "set.h"
  #include "string-table.h"
@@ -95,7 +96,7 @@ static int add_locales_from_archive(Set *locales) {
          const struct locarhead *h;
          const struct namehashent *e;
          const void *p = MAP_FAILED;
-        _cleanup_close_ int fd = -1;
+        _cleanup_close_ int fd = -EBADF;
          size_t sz = 0;
          struct stat st;
          int r;
@@ -113,6 +114,9 @@ static int add_locales_from_archive(Set *locales) {
          if (st.st_size < (off_t) sizeof(struct locarhead))
                  return -EBADMSG;
  
+        if (file_offset_beyond_memory_size(st.st_size))
+                return -EFBIG;
+
          p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
          if (p == MAP_FAILED)
                  return -errno;
@@ -157,22 +161,21 @@ static int add_locales_from_archive(Set *locales) {
          return r;
  }
  
-static int add_locales_from_libdir (Set *locales) {
+static int add_locales_from_libdir(Set *locales) {
          _cleanup_closedir_ DIR *dir = NULL;
-        struct dirent *entry;
          int r;
  
          dir = opendir("/usr/lib/locale");
          if (!dir)
                  return errno == ENOENT ? 0 : -errno;
  
-        FOREACH_DIRENT(entry, dir, return -errno) {
+        FOREACH_DIRENT(de, dir, return -errno) {
                  char *z;
  
-                if (entry->d_type != DT_DIR)
+                if (de->d_type != DT_DIR)
                          continue;
  
-                z = normalize_locale(entry->d_name);
+                z = normalize_locale(de->d_name);
                  if (!z)
                          return -ENOMEM;
  
@@ -185,7 +188,7 @@ static int add_locales_from_libdir (Set *locales) {
  }
  
  int get_locales(char ***ret) {
-        _cleanup_set_free_ Set *locales = NULL;
+        _cleanup_set_free_free_ Set *locales = NULL;
          _cleanup_strv_free_ char **l = NULL;
          int r;
  
@@ -201,12 +204,24 @@ int get_locales(char ***ret) {
          if (r < 0)
                  return r;
  
+        char *locale;
+        SET_FOREACH(locale, locales) {
+                r = locale_is_installed(locale);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        free(set_remove(locales, locale));
+        }
+
          l = set_get_strv(locales);
          if (!l)
                  return -ENOMEM;
  
+        /* Now, all elements are owned by strv 'l'. Hence, do not call set_free_free(). */
+        locales = set_free(locales);
+
          r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
-        if (r == -ENXIO || r == 0) {
+        if (IN_SET(r, -ENXIO, 0)) {
                  char **a, **b;
  
                  /* Filter out non-UTF-8 locales, because it's 2019, by default */
@@ -245,7 +260,10 @@ bool locale_is_valid(const char *name) {
          if (!filename_is_valid(name))
                  return false;
  
-        if (!string_is_safe(name))
+        /* Locales look like: ll_CC.ENC@variant, where ll and CC are alphabetic, ENC is alphanumeric with
+         * dashes, and variant seems to be alphabetic.
+         * See: https://www.gnu.org/software/gettext/manual/html_node/Locale-Names.html */
+        if (!in_charset(name, ALPHANUMERICAL "_.-@"))
                  return false;
  
          return true;
@@ -266,14 +284,10 @@ int locale_is_installed(const char *name) {
          return true;
  }
  
-void init_gettext(void) {
-        setlocale(LC_ALL, "");
-        textdomain(GETTEXT_PACKAGE);
-}
-
  bool is_locale_utf8(void) {
-        const char *set;
          static int cached_answer = -1;
+        const char *set;
+        int r;
  
          /* Note that we default to 'true' here, since today UTF8 is
           * pretty much supported everywhere. */
@@ -281,6 +295,19 @@ bool is_locale_utf8(void) {
          if (cached_answer >= 0)
                  goto out;
  
+        r = secure_getenv_bool("SYSTEMD_UTF8");
+        if (r >= 0) {
+                cached_answer = r;
+                goto out;
+        } else if (r != -ENXIO)
+                log_debug_errno(r, "Failed to parse $SYSTEMD_UTF8, ignoring: %m");
+
+        /* This function may be called from libsystemd, and setlocale() is not thread safe. Assuming yes. */
+        if (gettid() != raw_getpid()) {
+                cached_answer = true;
+                goto out;
+        }
+
          if (!setlocale(LC_ALL, "")) {
                  cached_answer = true;
                  goto out;
@@ -317,143 +344,35 @@ out:
          return (bool) cached_answer;
  }
  
-bool emoji_enabled(void) {
-        static int cached_emoji_enabled = -1;
-
-        if (cached_emoji_enabled < 0) {
-                int val;
-
-                val = getenv_bool("SYSTEMD_EMOJI");
-                if (val < 0)
-                        cached_emoji_enabled =
-                                is_locale_utf8() &&
-                                !STRPTR_IN_SET(getenv("TERM"), "dumb", "linux");
-                else
-                        cached_emoji_enabled = val;
-        }
-
-        return cached_emoji_enabled;
-}
-
-const char *special_glyph(SpecialGlyph code) {
-
-        /* A list of a number of interesting unicode glyphs we can use to decorate our output. It's probably wise to be
-         * conservative here, and primarily stick to the glyphs defined in the eurlatgr font, so that display still
-         * works reasonably well on the Linux console. For details see:
-         *
-         * http://git.altlinux.org/people/legion/packages/kbd.git?p=kbd.git;a=blob;f=data/consolefonts/README.eurlatgr
-         */
-
-        static const char* const draw_table[2][_SPECIAL_GLYPH_MAX] = {
-                /* ASCII fallback */
-                [false] = {
-                        [SPECIAL_GLYPH_TREE_VERTICAL]           = "| ",
-                        [SPECIAL_GLYPH_TREE_BRANCH]             = "|-",
-                        [SPECIAL_GLYPH_TREE_RIGHT]              = "`-",
-                        [SPECIAL_GLYPH_TREE_SPACE]              = "  ",
-                        [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = ">",
-                        [SPECIAL_GLYPH_BLACK_CIRCLE]            = "*",
-                        [SPECIAL_GLYPH_WHITE_CIRCLE]            = "*",
-                        [SPECIAL_GLYPH_MULTIPLICATION_SIGN]     = "x",
-                        [SPECIAL_GLYPH_CIRCLE_ARROW]            = "*",
-                        [SPECIAL_GLYPH_BULLET]                  = "*",
-                        [SPECIAL_GLYPH_MU]                      = "u",
-                        [SPECIAL_GLYPH_CHECK_MARK]              = "+",
-                        [SPECIAL_GLYPH_CROSS_MARK]              = "-",
-                        [SPECIAL_GLYPH_LIGHT_SHADE]             = "-",
-                        [SPECIAL_GLYPH_DARK_SHADE]              = "X",
-                        [SPECIAL_GLYPH_SIGMA]                   = "S",
-                        [SPECIAL_GLYPH_ARROW]                   = "->",
-                        [SPECIAL_GLYPH_ELLIPSIS]                = "...",
-                        [SPECIAL_GLYPH_EXTERNAL_LINK]           = "[LNK]",
-                        [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = ":-]",
-                        [SPECIAL_GLYPH_HAPPY_SMILEY]            = ":-}",
-                        [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = ":-)",
-                        [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = ":-|",
-                        [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = ":-(",
-                        [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = ":-{",
-                        [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = ":-[",
-                        [SPECIAL_GLYPH_LOCK_AND_KEY]            = "o-,",
-                        [SPECIAL_GLYPH_TOUCH]                   = "O=",    /* Yeah, not very convincing, can you do it better? */
-                },
-
-                /* UTF-8 */
-                [true] = {
-                        /* The following are multiple glyphs in both ASCII and in UNICODE */
-                        [SPECIAL_GLYPH_TREE_VERTICAL]           = "\342\224\202 ",            /* │  */
-                        [SPECIAL_GLYPH_TREE_BRANCH]             = "\342\224\234\342\224\200", /* ├─ */
-                        [SPECIAL_GLYPH_TREE_RIGHT]              = "\342\224\224\342\224\200", /* └─ */
-                        [SPECIAL_GLYPH_TREE_SPACE]              = "  ",                       /*    */
-
-                        /* Single glyphs in both cases */
-                        [SPECIAL_GLYPH_TRIANGULAR_BULLET]       = "\342\200\243",             /* ‣ */
-                        [SPECIAL_GLYPH_BLACK_CIRCLE]            = "\342\227\217",             /* ● */
-                        [SPECIAL_GLYPH_WHITE_CIRCLE]            = "\u25CB",                   /* ○ */
-                        [SPECIAL_GLYPH_MULTIPLICATION_SIGN]     = "\u00D7",                   /* × */
-                        [SPECIAL_GLYPH_CIRCLE_ARROW]            = "\u21BB",                   /* ↻ */
-                        [SPECIAL_GLYPH_BULLET]                  = "\342\200\242",             /* • */
-                        [SPECIAL_GLYPH_MU]                      = "\316\274",                 /* μ (actually called: GREEK SMALL LETTER MU) */
-                        [SPECIAL_GLYPH_CHECK_MARK]              = "\342\234\223",             /* ✓ */
-                        [SPECIAL_GLYPH_CROSS_MARK]              = "\342\234\227",             /* ✗ (actually called: BALLOT X) */
-                        [SPECIAL_GLYPH_LIGHT_SHADE]             = "\342\226\221",             /* ░ */
-                        [SPECIAL_GLYPH_DARK_SHADE]              = "\342\226\223",             /* ▒ */
-                        [SPECIAL_GLYPH_SIGMA]                   = "\316\243",                 /* Σ */
-
-                        /* Single glyph in Unicode, two in ASCII */
-                        [SPECIAL_GLYPH_ARROW]                   = "\342\206\222",             /* → (actually called: RIGHTWARDS ARROW) */
-
-                        /* Single glyph in Unicode, three in ASCII */
-                        [SPECIAL_GLYPH_ELLIPSIS]                = "\342\200\246",             /* … (actually called: HORIZONTAL ELLIPSIS) */
-
-                        /* Three glyphs in Unicode, five in ASCII */
-                        [SPECIAL_GLYPH_EXTERNAL_LINK]           = "[\360\237\241\225]",       /* 🡕 (actually called: NORTH EAST SANS-SERIF ARROW, enclosed in []) */
-
-                        /* These smileys are a single glyph in Unicode, and three in ASCII */
-                        [SPECIAL_GLYPH_ECSTATIC_SMILEY]         = "\360\237\230\207",         /* 😇 (actually called: SMILING FACE WITH HALO) */
-                        [SPECIAL_GLYPH_HAPPY_SMILEY]            = "\360\237\230\200",         /* 😀 (actually called: GRINNING FACE) */
-                        [SPECIAL_GLYPH_SLIGHTLY_HAPPY_SMILEY]   = "\360\237\231\202",         /* 🙂 (actually called: SLIGHTLY SMILING FACE) */
-                        [SPECIAL_GLYPH_NEUTRAL_SMILEY]          = "\360\237\230\220",         /* 😐 (actually called: NEUTRAL FACE) */
-                        [SPECIAL_GLYPH_SLIGHTLY_UNHAPPY_SMILEY] = "\360\237\231\201",         /* 🙁 (actually called: SLIGHTLY FROWNING FACE) */
-                        [SPECIAL_GLYPH_UNHAPPY_SMILEY]          = "\360\237\230\250",         /* 😨 (actually called: FEARFUL FACE) */
-                        [SPECIAL_GLYPH_DEPRESSED_SMILEY]        = "\360\237\244\242",         /* 🤢 (actually called: NAUSEATED FACE) */
-
-                        /* This emoji is a single character cell glyph in Unicode, and three in ASCII */
-                        [SPECIAL_GLYPH_LOCK_AND_KEY]            = "\360\237\224\220",         /* 🔐 (actually called: CLOSED LOCK WITH KEY) */
-
-                        /* This emoji is a single character cell glyph in Unicode, and two in ASCII */
-                        [SPECIAL_GLYPH_TOUCH]                   = "\360\237\221\206",         /* 👆 (actually called: BACKHAND INDEX POINTING UP */
-                },
-        };
-
-        if (code < 0)
-                return NULL;
-
-        assert(code < _SPECIAL_GLYPH_MAX);
-        return draw_table[code >= _SPECIAL_GLYPH_FIRST_EMOJI ? emoji_enabled() : is_locale_utf8()][code];
+void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
+        free_many_charp(l, _VARIABLE_LC_MAX);
  }
  
-void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
-        if (!l)
-                return;
+void locale_variables_simplify(char *l[_VARIABLE_LC_MAX]) {
+        assert(l);
  
-        for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
-                l[i] = mfree(l[i]);
+        for (LocaleVariable p = 0; p < _VARIABLE_LC_MAX; p++) {
+                if (p == VARIABLE_LANG)
+                        continue;
+                if (isempty(l[p]) || streq_ptr(l[VARIABLE_LANG], l[p]))
+                        l[p] = mfree(l[p]);
+        }
  }
  
  static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
-        [VARIABLE_LANG] = "LANG",
-        [VARIABLE_LANGUAGE] = "LANGUAGE",
-        [VARIABLE_LC_CTYPE] = "LC_CTYPE",
-        [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
-        [VARIABLE_LC_TIME] = "LC_TIME",
-        [VARIABLE_LC_COLLATE] = "LC_COLLATE",
-        [VARIABLE_LC_MONETARY] = "LC_MONETARY",
-        [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
-        [VARIABLE_LC_PAPER] = "LC_PAPER",
-        [VARIABLE_LC_NAME] = "LC_NAME",
-        [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
-        [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
-        [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
+        [VARIABLE_LANG]              = "LANG",
+        [VARIABLE_LANGUAGE]          = "LANGUAGE",
+        [VARIABLE_LC_CTYPE]          = "LC_CTYPE",
+        [VARIABLE_LC_NUMERIC]        = "LC_NUMERIC",
+        [VARIABLE_LC_TIME]           = "LC_TIME",
+        [VARIABLE_LC_COLLATE]        = "LC_COLLATE",
+        [VARIABLE_LC_MONETARY]       = "LC_MONETARY",
+        [VARIABLE_LC_MESSAGES]       = "LC_MESSAGES",
+        [VARIABLE_LC_PAPER]          = "LC_PAPER",
+        [VARIABLE_LC_NAME]           = "LC_NAME",
+        [VARIABLE_LC_ADDRESS]        = "LC_ADDRESS",
+        [VARIABLE_LC_TELEPHONE]      = "LC_TELEPHONE",
+        [VARIABLE_LC_MEASUREMENT]    = "LC_MEASUREMENT",
          [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
  };