]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Separate test_archive_string_normalization into NFC version and NFD version.
authorMichihiro NAKAJIMA <ggcueroad@gmail.com>
Mon, 19 Mar 2012 09:35:07 +0000 (18:35 +0900)
committerMichihiro NAKAJIMA <ggcueroad@gmail.com>
Mon, 19 Mar 2012 09:35:07 +0000 (18:35 +0900)
libarchive/test/test_archive_string_conversion.c

index 1fc116818fa1e6167c47f4504adf4ff73adcd19b..fea141d4ab0d182b6f60b6a04b8d465cde1e7655 100644 (file)
@@ -249,61 +249,247 @@ is_wc_unicode(void)
  * On other platforms, the characters to be Form C.
  */
 static void
-test_archive_string_normalization(int mac_nfd)
+test_archive_string_normalization_nfc(const char *testdata)
 {
        struct archive *a, *a2;
-       struct archive_entry *ae;
        struct archive_string utf8;
        struct archive_mstring mstr;
        struct archive_string_conv *f_sconv8, *t_sconv8;
        struct archive_string_conv *f_sconv16be, *f_sconv16le;
        FILE *fp;
        char buff[512];
-       static const char reffile[] = "test_archive_string_conversion.txt.Z";
-       ssize_t size;
        int line = 0;
        int locale_is_utf8, wc_is_unicode;
-       int sconv_opt = 0;
+       int sconv_opt = SCONV_SET_OPT_NORMALIZATION_C;
 
        locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
        wc_is_unicode = is_wc_unicode();
        /* If it doesn't exist, just warn and return. */
        if (!locale_is_utf8 && !wc_is_unicode) {
-               skipping("invalid encoding tests require a suitable locale;"
-                   " en_US.UTF-8 not available on this system");
+               skipping("A test of string normalization for NFC requires "
+                   "a suitable locale; en_US.UTF-8 not available on this "
+                   "system");
                return;
        }
 
        archive_string_init(&utf8);
        memset(&mstr, 0, sizeof(mstr));
 
-       if (mac_nfd)
-               sconv_opt = SCONV_SET_OPT_NORMALIZATION_D;
-       else
-               sconv_opt = SCONV_SET_OPT_NORMALIZATION_C;
-
        /*
-        * Extract a test pattern file.
+        * Create string conversion objects.
         */
-       extract_reference_file(reffile);
        assert((a = archive_read_new()) != NULL);
-       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
-       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
-        assertEqualIntA(a, ARCHIVE_OK,
-            archive_read_open_filename(a, reffile, 512));
+       assertA(NULL != (f_sconv8 =
+           archive_string_conversion_from_charset(a, "UTF-8", 0)));
+       assertA(NULL != (f_sconv16be =
+           archive_string_conversion_from_charset(a, "UTF-16BE", 0)));
+       assertA(NULL != (f_sconv16le =
+           archive_string_conversion_from_charset(a, "UTF-16LE", 0)));
+       assert((a2 = archive_write_new()) != NULL);
+       assertA(NULL != (t_sconv8 =
+           archive_string_conversion_to_charset(a2, "UTF-8", 0)));
+       if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
+           t_sconv8 == NULL) {
+               /* We cannot continue this test. */
+               assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+               return;
+       }
+       archive_string_conversion_set_opt(f_sconv8, sconv_opt);
+       archive_string_conversion_set_opt(f_sconv16be, sconv_opt);
+       archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
+       archive_string_conversion_set_opt(t_sconv8, sconv_opt);
 
-       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
-       assert((fp = fopen("testdata.txt", "w")) != NULL);
-       while ((size = archive_read_data(a, buff, 512)) > 0)
-               fwrite(buff, 1, size, fp);
+       /* Open a test pattern file. */
+       assert((fp = fopen(testdata, "r")) != NULL);
+
+       /*
+        * Read test data.
+        *  Test data format:
+        *     <NFC Unicode pattern> ';' <NFD Unicode pattern> '\n'
+        *  Unicode pattern format:
+        *     [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,}
+        */
+       while (fgets(buff, sizeof(buff), fp) != NULL) {
+               char nfc[80], nfd[80];
+               char utf8_nfc[80], utf8_nfd[80];
+               char utf16be_nfc[80], utf16be_nfd[80];
+               char utf16le_nfc[80], utf16le_nfd[80];
+               wchar_t wc_nfc[40], wc_nfd[40];
+               char *e, *p;
+               const wchar_t *wp;
+               const char *mp;
+               size_t mplen;
+
+               line++;
+               if (buff[0] == '#')
+                       continue;
+               p = strchr(buff, ';');
+               if (p == NULL)
+                       continue;
+               *p++ = '\0';
+               /* Copy an NFC pattern */
+               strncpy(nfc, buff, sizeof(nfc)-1);
+               nfc[sizeof(nfc)-1] = '\0';
+               e = p;
+               p = strchr(p, '\n');
+               if (p == NULL)
+                       continue;
+               *p = '\0';
+               /* Copy an NFD pattern */
+               strncpy(nfd, e, sizeof(nfd)-1);
+               nfd[sizeof(nfd)-1] = '\0';
+
+               /*
+                * Get an NFC patterns.
+                */
+               scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
+                   nfc, 0);
+
+               /*
+                * Get an NFD patterns.
+                */
+               scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
+                   nfd, 0);
+
+               if (locale_is_utf8) {
+                       /*
+                        * Normalize an NFD string for import.
+                        */
+                       assertEqualInt(0, archive_strcpy_l(
+                           &utf8, utf8_nfd, f_sconv8));
+                       failure("NFD(%s) should be converted to NFC(%s):%d",
+                           nfd, nfc, line);
+                       assertEqualUTF8String(utf8_nfc, utf8.s);
+
+                       /*
+                        * Normalize an NFC string for import.
+                        */
+                       assertEqualInt(0, archive_strcpy_l(
+                           &utf8, utf8_nfc, f_sconv8));
+                       failure("NFC(%s) should not be any changed:%d",
+                           nfc, line);
+                       assertEqualUTF8String(utf8_nfc, utf8.s);
+
+                       /*
+                        * Copy an NFC string for export.
+                        */
+                       assertEqualInt(0, archive_strcpy_l(
+                           &utf8, utf8_nfc, t_sconv8));
+                       failure("NFC(%s) should not be any changed:%d",
+                           nfc, line);
+                       assertEqualUTF8String(utf8_nfc, utf8.s);
+
+                       /*
+                        * Normalize an NFD string in UTF-16BE for import.
+                        */
+                       assertEqualInt(0, archive_strncpy_l(
+                           &utf8, utf16be_nfd, 100000, f_sconv16be));
+                       failure("NFD(%s) should be converted to NFC(%s):%d",
+                           nfd, nfc, line);
+                       assertEqualUTF8String(utf8_nfc, utf8.s);
+
+                       /*
+                        * Normalize an NFD string in UTF-16LE for import.
+                        */
+                       assertEqualInt(0, archive_strncpy_l(
+                           &utf8, utf16le_nfd, 100000, f_sconv16le));
+                       failure("NFD(%s) should be converted to NFC(%s):%d",
+                           nfd, nfc, line);
+                       assertEqualUTF8String(utf8_nfc, utf8.s);
+               }
+
+               /*
+                * Test for archive_mstring interface.
+                * In specific, Windows platform UTF-16BE is directly
+                * converted to/from wide-character to avoid the effect of
+                * current locale since windows platform cannot make
+                * locale UTF-8.
+                */
+               if (locale_is_utf8 || wc_is_unicode) {
+                       /*
+                        * Normalize an NFD string in UTF-8 for import.
+                        */
+                       assertEqualInt(0, archive_mstring_copy_mbs_len_l(
+                           &mstr, utf8_nfd, 100000, f_sconv8));
+                       assertEqualInt(0,
+                           archive_mstring_get_wcs(a, &mstr, &wp));
+                       failure("UTF-8 NFD(%s) should be converted "
+                           "to WCS NFC(%s):%d", nfd, nfc, line);
+                       assertEqualWString(wc_nfc, wp);
+
+                       /*
+                        * Normalize an NFD string in UTF-16BE for import.
+                        */
+                       assertEqualInt(0, archive_mstring_copy_mbs_len_l(
+                           &mstr, utf16be_nfd, 100000, f_sconv16be));
+                       assertEqualInt(0,
+                           archive_mstring_get_wcs(a, &mstr, &wp));
+                       failure("UTF-8 NFD(%s) should be converted "
+                           "to WCS NFC(%s):%d", nfd, nfc, line);
+                       assertEqualWString(wc_nfc, wp);
+
+                       /*
+                        * Normalize an NFD string in UTF-16LE for import.
+                        */
+                       assertEqualInt(0, archive_mstring_copy_mbs_len_l(
+                           &mstr, utf16le_nfd, 100000, f_sconv16le));
+                       assertEqualInt(0,
+                           archive_mstring_get_wcs(a, &mstr, &wp));
+                       failure("UTF-8 NFD(%s) should be converted "
+                           "to WCS NFC(%s):%d", nfd, nfc, line);
+                       assertEqualWString(wc_nfc, wp);
+
+                       /*
+                        * Copy an NFC wide-string for export.
+                        */
+                       assertEqualInt(0,
+                           archive_mstring_copy_wcs(&mstr, wc_nfc));
+                       assertEqualInt(0, archive_mstring_get_mbs_l(
+                           &mstr, &mp, &mplen, t_sconv8));
+                       failure("WCS NFC(%s) should be UTF-8 NFC:%d"
+                           ,nfc, line);
+                       assertEqualUTF8String(utf8_nfc, mp);
+               }
+       }
+
+       archive_string_free(&utf8);
+       archive_mstring_clean(&mstr);
        fclose(fp);
+       assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a2));
+}
 
-       /* Open a test pattern file. */
-       assert((fp = fopen("testdata.txt", "r")) != NULL);
+static void
+test_archive_string_normalization_mac_nfd(const char *testdata)
+{
+       struct archive *a, *a2;
+       struct archive_string utf8;
+       struct archive_mstring mstr;
+       struct archive_string_conv *f_sconv8, *t_sconv8;
+       struct archive_string_conv *f_sconv16be, *f_sconv16le;
+       FILE *fp;
+       char buff[512];
+       int line = 0;
+       int locale_is_utf8, wc_is_unicode;
+       int sconv_opt = SCONV_SET_OPT_NORMALIZATION_D;
+
+       locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
+       wc_is_unicode = is_wc_unicode();
+       /* If it doesn't exist, just warn and return. */
+       if (!locale_is_utf8 && !wc_is_unicode) {
+               skipping("A test of string normalization for NFD requires "
+                   "a suitable locale; en_US.UTF-8 not available on this "
+                   "system");
+               return;
+       }
+
+       archive_string_init(&utf8);
+       memset(&mstr, 0, sizeof(mstr));
 
        /*
         * Create string conversion objects.
         */
+       assert((a = archive_read_new()) != NULL);
        assertA(NULL != (f_sconv8 =
            archive_string_conversion_from_charset(a, "UTF-8", 0)));
        assertA(NULL != (f_sconv16be =
@@ -314,10 +500,8 @@ test_archive_string_normalization(int mac_nfd)
        assertA(NULL != (t_sconv8 =
            archive_string_conversion_to_charset(a2, "UTF-8", 0)));
        if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
-           t_sconv8 == NULL || fp == NULL) {
+           t_sconv8 == NULL) {
                /* We cannot continue this test. */
-               if (fp != NULL)
-                       fclose(fp);
                assertEqualInt(ARCHIVE_OK, archive_read_free(a));
                return;
        }
@@ -326,6 +510,9 @@ test_archive_string_normalization(int mac_nfd)
        archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
        archive_string_conversion_set_opt(t_sconv8, sconv_opt);
 
+       /* Open a test pattern file. */
+       assert((fp = fopen(testdata, "r")) != NULL);
+
        /*
         * Read test data.
         *  Test data format:
@@ -368,7 +555,7 @@ test_archive_string_normalization(int mac_nfd)
                 * Get an NFC patterns.
                 */
                should_be_nfc = scan_unicode_pattern(utf8_nfc, wc_nfc,
-                       utf16be_nfc, utf16le_nfc, nfc, mac_nfd);
+                       utf16be_nfc, utf16le_nfc, nfc, 1);
 
                /*
                 * Get an NFD patterns.
@@ -376,7 +563,7 @@ test_archive_string_normalization(int mac_nfd)
                scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
                    nfd, 0);
 
-               if (locale_is_utf8 && mac_nfd) {
+               if (locale_is_utf8) {
                        /*
                         * Normalize an NFC string for import.
                         */
@@ -384,8 +571,7 @@ test_archive_string_normalization(int mac_nfd)
                            &utf8, utf8_nfc, f_sconv8));
                        if (should_be_nfc) {
                                failure("NFC(%s) should not be converted to"
-                                   " NFD(%s):%d",
-                                   nfc, nfd, line);
+                                   " NFD(%s):%d", nfc, nfd, line);
                                assertEqualUTF8String(utf8_nfc, utf8.s);
                        } else {
                                failure("NFC(%s) should be converted to"
@@ -441,52 +627,6 @@ test_archive_string_normalization(int mac_nfd)
                                assertEqualUTF8String(utf8_nfd, utf8.s);
                        }
                }
-               if (locale_is_utf8 && !mac_nfd) {
-                       /*
-                        * Normalize an NFD string for import.
-                        */
-                       assertEqualInt(0, archive_strcpy_l(
-                           &utf8, utf8_nfd, f_sconv8));
-                       failure("NFD(%s) should be converted to NFC(%s):%d",
-                           nfd, nfc, line);
-                       assertEqualUTF8String(utf8_nfc, utf8.s);
-
-                       /*
-                        * Normalize an NFC string for import.
-                        */
-                       assertEqualInt(0, archive_strcpy_l(
-                           &utf8, utf8_nfc, f_sconv8));
-                       failure("NFC(%s) should not be any changed:%d",
-                           nfc, line);
-                       assertEqualUTF8String(utf8_nfc, utf8.s);
-
-                       /*
-                        * Copy an NFC string for export.
-                        */
-                       assertEqualInt(0, archive_strcpy_l(
-                           &utf8, utf8_nfc, t_sconv8));
-                       failure("NFC(%s) should not be any changed:%d",
-                           nfc, line);
-                       assertEqualUTF8String(utf8_nfc, utf8.s);
-
-                       /*
-                        * Normalize an NFD string in UTF-16BE for import.
-                        */
-                       assertEqualInt(0, archive_strncpy_l(
-                           &utf8, utf16be_nfd, 100000, f_sconv16be));
-                       failure("NFD(%s) should be converted to NFC(%s):%d",
-                           nfd, nfc, line);
-                       assertEqualUTF8String(utf8_nfc, utf8.s);
-
-                       /*
-                        * Normalize an NFD string in UTF-16LE for import.
-                        */
-                       assertEqualInt(0, archive_strncpy_l(
-                           &utf8, utf16le_nfd, 100000, f_sconv16le));
-                       failure("NFD(%s) should be converted to NFC(%s):%d",
-                           nfd, nfc, line);
-                       assertEqualUTF8String(utf8_nfc, utf8.s);
-               }
 
                /*
                 * Test for archive_mstring interface.
@@ -495,7 +635,7 @@ test_archive_string_normalization(int mac_nfd)
                 * current locale since windows platform cannot make
                 * locale UTF-8.
                 */
-               if ((locale_is_utf8 || wc_is_unicode) && mac_nfd) {
+               if (locale_is_utf8 || wc_is_unicode) {
                        /*
                         * Normalize an NFD string in UTF-8 for import.
                         */
@@ -560,51 +700,6 @@ test_archive_string_normalization(int mac_nfd)
                            ,nfd, line);
                        assertEqualUTF8String(utf8_nfd, mp);
                }
-               if ((locale_is_utf8 || wc_is_unicode) && !mac_nfd) {
-                       /*
-                        * Normalize an NFD string in UTF-8 for import.
-                        */
-                       assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-                           &mstr, utf8_nfd, 100000, f_sconv8));
-                       assertEqualInt(0,
-                           archive_mstring_get_wcs(a, &mstr, &wp));
-                       failure("UTF-8 NFD(%s) should be converted "
-                           "to WCS NFC(%s):%d", nfd, nfc, line);
-                       assertEqualWString(wc_nfc, wp);
-
-                       /*
-                        * Normalize an NFD string in UTF-16BE for import.
-                        */
-                       assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-                           &mstr, utf16be_nfd, 100000, f_sconv16be));
-                       assertEqualInt(0,
-                           archive_mstring_get_wcs(a, &mstr, &wp));
-                       failure("UTF-8 NFD(%s) should be converted "
-                           "to WCS NFC(%s):%d", nfd, nfc, line);
-                       assertEqualWString(wc_nfc, wp);
-
-                       /*
-                        * Normalize an NFD string in UTF-16LE for import.
-                        */
-                       assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-                           &mstr, utf16le_nfd, 100000, f_sconv16le));
-                       assertEqualInt(0,
-                           archive_mstring_get_wcs(a, &mstr, &wp));
-                       failure("UTF-8 NFD(%s) should be converted "
-                           "to WCS NFC(%s):%d", nfd, nfc, line);
-                       assertEqualWString(wc_nfc, wp);
-
-                       /*
-                        * Copy an NFC wide-string for export.
-                        */
-                       assertEqualInt(0, archive_mstring_copy_wcs(
-                           &mstr, wc_nfc));
-                       assertEqualInt(0, archive_mstring_get_mbs_l(
-                           &mstr, &mp, &mplen, t_sconv8));
-                       failure("WCS NFC(%s) should be UTF-8 NFC:%d"
-                           ,nfc, line);
-                       assertEqualUTF8String(utf8_nfc, mp);
-               }
        }
 
        archive_string_free(&utf8);
@@ -684,7 +779,32 @@ test_archive_string_canonicalization(void)
 
 DEFINE_TEST(test_archive_string_conversion)
 {
-       test_archive_string_normalization(0);
-       test_archive_string_normalization(1);
+       static const char reffile[] = "test_archive_string_conversion.txt.Z";
+       static const char testdata[] = "testdata.txt";
+       struct archive *a;
+       struct archive_entry *ae;
+       char buff[512];
+       ssize_t size;
+       FILE *fp;
+
+       /*
+        * Extract a test pattern file.
+        */
+       extract_reference_file(reffile);
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
+        assertEqualIntA(a, ARCHIVE_OK,
+            archive_read_open_filename(a, reffile, 512));
+
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assert((fp = fopen(testdata, "w")) != NULL);
+       while ((size = archive_read_data(a, buff, 512)) > 0)
+               fwrite(buff, 1, size, fp);
+       fclose(fp);
+       assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+
+       test_archive_string_normalization_nfc(testdata);
+       test_archive_string_normalization_mac_nfd(testdata);
        test_archive_string_canonicalization();
 }