From: Bruno Haible Date: Sat, 6 Jun 2020 14:53:12 +0000 (+0200) Subject: msgunfmt: Detect more cases of invalid .mo files. X-Git-Tag: v0.21~52 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=28cc0e349273d8a7c41cc2a2aaf7b94f43644766;p=thirdparty%2Fgettext.git msgunfmt: Detect more cases of invalid .mo files. * gettext-tools/src/read-mo.c: Include hash-string.h. (read_mo_file): Verify the array of messages and the hash table. * gettext-tools/src/write-mo.c (write_table): Fix typo in comment. * gettext-tools/src/Makefile.am (msgunfmt_SOURCES): Add hash-string.c. * gettext-tools/tests/overflow-7.mo: Renamed from gettext-tools/tests/sysdep-without-nul.mo. * gettext-tools/tests/sysdep-without-nul.mo: New file. * gettext-tools/tests/msgunfmt-3: Test also overflow-7.mo. * gettext-tools/tests/Makefile.am (EXTRA_DIST): Add overflow-7.mo. --- diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 85854beb7..764ffadbd 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -199,7 +199,8 @@ endif msgmerge_SOURCES += msgl-fsearch.c lang-table.c plural-count.c msgunfmt_SOURCES = msgunfmt.c msgunfmt_SOURCES += \ - read-mo.c read-java.c read-csharp.c read-resources.c read-tcl.c + read-mo.c read-java.c read-csharp.c read-resources.c read-tcl.c \ + ../../gettext-runtime/intl/hash-string.c if !WOE32DLL xgettext_SOURCES = xgettext.c else diff --git a/gettext-tools/src/read-mo.c b/gettext-tools/src/read-mo.c index 8075c8d77..affc6abb0 100644 --- a/gettext-tools/src/read-mo.c +++ b/gettext-tools/src/read-mo.c @@ -1,5 +1,5 @@ /* Reading binary .mo files. - Copyright (C) 1995-1998, 2000-2007, 2014-2015, 2017 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2007, 2014-2015, 2017, 2020 Free Software Foundation, Inc. Written by Ulrich Drepper , April 1995. This program is free software: you can redistribute it and/or modify @@ -29,8 +29,9 @@ #include #include -/* This include file describes the main part of binary .mo format. */ +/* These two include files describe the binary .mo format. */ #include "gmo.h" +#include "hash-string.h" #include "error.h" #include "xalloc.h" @@ -307,6 +308,98 @@ read_mo_file (message_list_ty *mlp, const char *filename) header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size); header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset); + /* The following verifications attempt to ensure that 'msgunfmt' complains + about a .mo file that may make libintl crash at run time. */ + + /* Verify that the array of messages is sorted. */ + { + char *prev_msgid = NULL; + + for (i = 0; i < header.nstrings; i++) + { + char *msgid; + size_t msgid_len; + + msgid = get_string (&bf, header.orig_tab_offset + i * 8, + &msgid_len); + if (i == 0) + prev_msgid = msgid; + else + { + if (!(strcmp (prev_msgid, msgid) < 0)) + error (EXIT_FAILURE, 0, + _("file \"%s\" is not in GNU .mo format: The array of messages is not sorted."), + filename); + } + } + } + + /* Verify the hash table. */ + if (header.hash_tab_size > 0) + { + char *seen; + unsigned int j; + + /* Verify the hash table's size. */ + if (!(header.hash_tab_size > 2)) + error (EXIT_FAILURE, 0, + _("file \"%s\" is not in GNU .mo format: The hash table size is invalid."), + filename); + + /* Verify that the non-empty hash table entries contain the values + 1, ..., nstrings, each exactly once. */ + seen = (char *) xcalloc (header.nstrings, 1); + for (j = 0; j < header.hash_tab_size; j++) + { + nls_uint32 entry = + get_uint32 (&bf, header.hash_tab_offset + j * 4); + + if (entry != 0) + { + i = entry - 1; + if (!(i < header.nstrings && seen[i] == 0)) + error (EXIT_FAILURE, 0, + _("file \"%s\" is not in GNU .mo format: The hash table contains invalid entries."), + filename); + seen[i] = 1; + } + } + for (i = 0; i < header.nstrings; i++) + if (seen[i] == 0) + error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: Some messages are not present in the hash table."), + filename); + free (seen); + + /* Verify that the hash table lookup algorithm finds the entry for + each message. */ + for (i = 0; i < header.nstrings; i++) + { + size_t msgid_len; + char *msgid = get_string (&bf, header.orig_tab_offset + i * 8, + &msgid_len); + nls_uint32 hash_val = hash_string (msgid); + nls_uint32 idx = hash_val % header.hash_tab_size; + nls_uint32 incr = 1 + (hash_val % (header.hash_tab_size - 2)); + for (;;) + { + nls_uint32 entry = + get_uint32 (&bf, header.hash_tab_offset + idx * 4); + + if (entry == 0) + error (EXIT_FAILURE, 0, + _("file \"%s\" is not in GNU .mo format: Some messages are at a wrong index in the hash table."), + filename); + if (entry == i + 1) + break; + + if (idx >= header.hash_tab_size - incr) + idx -= header.hash_tab_size - incr; + else + idx += incr; + } + } + } + for (i = 0; i < header.nstrings; i++) { message_ty *mp; diff --git a/gettext-tools/src/write-mo.c b/gettext-tools/src/write-mo.c index dbb2d3bc2..41b32c7aa 100644 --- a/gettext-tools/src/write-mo.c +++ b/gettext-tools/src/write-mo.c @@ -1,5 +1,5 @@ /* Writing binary .mo files. - Copyright (C) 1995-1998, 2000-2007, 2016 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2007, 2016, 2020 Free Software Foundation, Inc. Written by Ulrich Drepper , April 1995. This program is free software: you can redistribute it and/or modify @@ -554,7 +554,7 @@ write_table (FILE *output_file, message_list_ty *mlp) hash_tab = XNMALLOC (hash_tab_size, nls_uint32); memset (hash_tab, '\0', hash_tab_size * sizeof (nls_uint32)); - /* Insert all value in the hash table, following the algorithm described + /* Insert all values in the hash table, following the algorithm described above. */ for (j = 0; j < nstrings; j++) { diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index 0768ce1d3..07d41323b 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -178,7 +178,7 @@ TESTS = gettext-1 gettext-2 \ EXTRA_DIST += init.sh init.cfg $(TESTS) \ test.mo overflow-1.mo overflow-2.mo overflow-3.mo overflow-4.mo \ - overflow-5.mo overflow-6.mo sysdep-without-nul.mo \ + overflow-5.mo overflow-6.mo overflow-7.mo sysdep-without-nul.mo \ xg-c-1.ok.po mex-test2.ok \ mm-ko.ascii.pot mm-ko.euc-kr.po mm-ko-comp.euc-kr.po \ mm-viet.comp.po mm-viet.pot mm-viet.out \ diff --git a/gettext-tools/tests/msgunfmt-3 b/gettext-tools/tests/msgunfmt-3 index be72272b4..0a3e62669 100755 --- a/gettext-tools/tests/msgunfmt-3 +++ b/gettext-tools/tests/msgunfmt-3 @@ -5,7 +5,7 @@ : ${MSGUNFMT=msgunfmt} -for f in overflow-1.mo overflow-2.mo overflow-3.mo overflow-4.mo overflow-5.mo overflow-6.mo; do +for f in overflow-1.mo overflow-2.mo overflow-3.mo overflow-4.mo overflow-5.mo overflow-6.mo overflow-7.mo; do LANGUAGE= LC_ALL=C ${MSGUNFMT} "$wabs_srcdir"/$f 2>mu-3.err >/dev/null test $? != 0 || Exit 1 grep ' is truncated' mu-3.err >/dev/null || Exit 1 diff --git a/gettext-tools/tests/overflow-7.mo b/gettext-tools/tests/overflow-7.mo new file mode 100644 index 000000000..6bcaa5105 Binary files /dev/null and b/gettext-tools/tests/overflow-7.mo differ diff --git a/gettext-tools/tests/sysdep-without-nul.mo b/gettext-tools/tests/sysdep-without-nul.mo index 6bcaa5105..915c80db8 100644 Binary files a/gettext-tools/tests/sysdep-without-nul.mo and b/gettext-tools/tests/sysdep-without-nul.mo differ