From: Michihiro NAKAJIMA Date: Mon, 19 Mar 2012 09:35:07 +0000 (+0900) Subject: Separate test_archive_string_normalization into NFC version and NFD version. X-Git-Tag: v3.0.4~2^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3417a9b2aeab2cb47df2e429207fabc32921a445;p=thirdparty%2Flibarchive.git Separate test_archive_string_normalization into NFC version and NFD version. --- diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c index 1fc116818..fea141d4a 100644 --- a/libarchive/test/test_archive_string_conversion.c +++ b/libarchive/test/test_archive_string_conversion.c @@ -249,61 +249,247 @@ is_wc_unicode(void) * On other platforms, the characters to be Form C. */ static void -test_archive_string_normalization(int mac_nfd) +test_archive_string_normalization_nfc(const char *testdata) { struct archive *a, *a2; - struct archive_entry *ae; struct archive_string utf8; struct archive_mstring mstr; struct archive_string_conv *f_sconv8, *t_sconv8; struct archive_string_conv *f_sconv16be, *f_sconv16le; FILE *fp; char buff[512]; - static const char reffile[] = "test_archive_string_conversion.txt.Z"; - ssize_t size; int line = 0; int locale_is_utf8, wc_is_unicode; - int sconv_opt = 0; + int sconv_opt = SCONV_SET_OPT_NORMALIZATION_C; locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8")); wc_is_unicode = is_wc_unicode(); /* If it doesn't exist, just warn and return. */ if (!locale_is_utf8 && !wc_is_unicode) { - skipping("invalid encoding tests require a suitable locale;" - " en_US.UTF-8 not available on this system"); + skipping("A test of string normalization for NFC requires " + "a suitable locale; en_US.UTF-8 not available on this " + "system"); return; } archive_string_init(&utf8); memset(&mstr, 0, sizeof(mstr)); - if (mac_nfd) - sconv_opt = SCONV_SET_OPT_NORMALIZATION_D; - else - sconv_opt = SCONV_SET_OPT_NORMALIZATION_C; - /* - * Extract a test pattern file. + * Create string conversion objects. */ - extract_reference_file(reffile); assert((a = archive_read_new()) != NULL); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a)); - assertEqualIntA(a, ARCHIVE_OK, - archive_read_open_filename(a, reffile, 512)); + assertA(NULL != (f_sconv8 = + archive_string_conversion_from_charset(a, "UTF-8", 0))); + assertA(NULL != (f_sconv16be = + archive_string_conversion_from_charset(a, "UTF-16BE", 0))); + assertA(NULL != (f_sconv16le = + archive_string_conversion_from_charset(a, "UTF-16LE", 0))); + assert((a2 = archive_write_new()) != NULL); + assertA(NULL != (t_sconv8 = + archive_string_conversion_to_charset(a2, "UTF-8", 0))); + if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL || + t_sconv8 == NULL) { + /* We cannot continue this test. */ + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + return; + } + archive_string_conversion_set_opt(f_sconv8, sconv_opt); + archive_string_conversion_set_opt(f_sconv16be, sconv_opt); + archive_string_conversion_set_opt(f_sconv16le, sconv_opt); + archive_string_conversion_set_opt(t_sconv8, sconv_opt); - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assert((fp = fopen("testdata.txt", "w")) != NULL); - while ((size = archive_read_data(a, buff, 512)) > 0) - fwrite(buff, 1, size, fp); + /* Open a test pattern file. */ + assert((fp = fopen(testdata, "r")) != NULL); + + /* + * Read test data. + * Test data format: + * ';' '\n' + * Unicode pattern format: + * [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,} + */ + while (fgets(buff, sizeof(buff), fp) != NULL) { + char nfc[80], nfd[80]; + char utf8_nfc[80], utf8_nfd[80]; + char utf16be_nfc[80], utf16be_nfd[80]; + char utf16le_nfc[80], utf16le_nfd[80]; + wchar_t wc_nfc[40], wc_nfd[40]; + char *e, *p; + const wchar_t *wp; + const char *mp; + size_t mplen; + + line++; + if (buff[0] == '#') + continue; + p = strchr(buff, ';'); + if (p == NULL) + continue; + *p++ = '\0'; + /* Copy an NFC pattern */ + strncpy(nfc, buff, sizeof(nfc)-1); + nfc[sizeof(nfc)-1] = '\0'; + e = p; + p = strchr(p, '\n'); + if (p == NULL) + continue; + *p = '\0'; + /* Copy an NFD pattern */ + strncpy(nfd, e, sizeof(nfd)-1); + nfd[sizeof(nfd)-1] = '\0'; + + /* + * Get an NFC patterns. + */ + scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc, + nfc, 0); + + /* + * Get an NFD patterns. + */ + scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd, + nfd, 0); + + if (locale_is_utf8) { + /* + * Normalize an NFD string for import. + */ + assertEqualInt(0, archive_strcpy_l( + &utf8, utf8_nfd, f_sconv8)); + failure("NFD(%s) should be converted to NFC(%s):%d", + nfd, nfc, line); + assertEqualUTF8String(utf8_nfc, utf8.s); + + /* + * Normalize an NFC string for import. + */ + assertEqualInt(0, archive_strcpy_l( + &utf8, utf8_nfc, f_sconv8)); + failure("NFC(%s) should not be any changed:%d", + nfc, line); + assertEqualUTF8String(utf8_nfc, utf8.s); + + /* + * Copy an NFC string for export. + */ + assertEqualInt(0, archive_strcpy_l( + &utf8, utf8_nfc, t_sconv8)); + failure("NFC(%s) should not be any changed:%d", + nfc, line); + assertEqualUTF8String(utf8_nfc, utf8.s); + + /* + * Normalize an NFD string in UTF-16BE for import. + */ + assertEqualInt(0, archive_strncpy_l( + &utf8, utf16be_nfd, 100000, f_sconv16be)); + failure("NFD(%s) should be converted to NFC(%s):%d", + nfd, nfc, line); + assertEqualUTF8String(utf8_nfc, utf8.s); + + /* + * Normalize an NFD string in UTF-16LE for import. + */ + assertEqualInt(0, archive_strncpy_l( + &utf8, utf16le_nfd, 100000, f_sconv16le)); + failure("NFD(%s) should be converted to NFC(%s):%d", + nfd, nfc, line); + assertEqualUTF8String(utf8_nfc, utf8.s); + } + + /* + * Test for archive_mstring interface. + * In specific, Windows platform UTF-16BE is directly + * converted to/from wide-character to avoid the effect of + * current locale since windows platform cannot make + * locale UTF-8. + */ + if (locale_is_utf8 || wc_is_unicode) { + /* + * Normalize an NFD string in UTF-8 for import. + */ + assertEqualInt(0, archive_mstring_copy_mbs_len_l( + &mstr, utf8_nfd, 100000, f_sconv8)); + assertEqualInt(0, + archive_mstring_get_wcs(a, &mstr, &wp)); + failure("UTF-8 NFD(%s) should be converted " + "to WCS NFC(%s):%d", nfd, nfc, line); + assertEqualWString(wc_nfc, wp); + + /* + * Normalize an NFD string in UTF-16BE for import. + */ + assertEqualInt(0, archive_mstring_copy_mbs_len_l( + &mstr, utf16be_nfd, 100000, f_sconv16be)); + assertEqualInt(0, + archive_mstring_get_wcs(a, &mstr, &wp)); + failure("UTF-8 NFD(%s) should be converted " + "to WCS NFC(%s):%d", nfd, nfc, line); + assertEqualWString(wc_nfc, wp); + + /* + * Normalize an NFD string in UTF-16LE for import. + */ + assertEqualInt(0, archive_mstring_copy_mbs_len_l( + &mstr, utf16le_nfd, 100000, f_sconv16le)); + assertEqualInt(0, + archive_mstring_get_wcs(a, &mstr, &wp)); + failure("UTF-8 NFD(%s) should be converted " + "to WCS NFC(%s):%d", nfd, nfc, line); + assertEqualWString(wc_nfc, wp); + + /* + * Copy an NFC wide-string for export. + */ + assertEqualInt(0, + archive_mstring_copy_wcs(&mstr, wc_nfc)); + assertEqualInt(0, archive_mstring_get_mbs_l( + &mstr, &mp, &mplen, t_sconv8)); + failure("WCS NFC(%s) should be UTF-8 NFC:%d" + ,nfc, line); + assertEqualUTF8String(utf8_nfc, mp); + } + } + + archive_string_free(&utf8); + archive_mstring_clean(&mstr); fclose(fp); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + assertEqualInt(ARCHIVE_OK, archive_write_free(a2)); +} - /* Open a test pattern file. */ - assert((fp = fopen("testdata.txt", "r")) != NULL); +static void +test_archive_string_normalization_mac_nfd(const char *testdata) +{ + struct archive *a, *a2; + struct archive_string utf8; + struct archive_mstring mstr; + struct archive_string_conv *f_sconv8, *t_sconv8; + struct archive_string_conv *f_sconv16be, *f_sconv16le; + FILE *fp; + char buff[512]; + int line = 0; + int locale_is_utf8, wc_is_unicode; + int sconv_opt = SCONV_SET_OPT_NORMALIZATION_D; + + locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8")); + wc_is_unicode = is_wc_unicode(); + /* If it doesn't exist, just warn and return. */ + if (!locale_is_utf8 && !wc_is_unicode) { + skipping("A test of string normalization for NFD requires " + "a suitable locale; en_US.UTF-8 not available on this " + "system"); + return; + } + + archive_string_init(&utf8); + memset(&mstr, 0, sizeof(mstr)); /* * Create string conversion objects. */ + assert((a = archive_read_new()) != NULL); assertA(NULL != (f_sconv8 = archive_string_conversion_from_charset(a, "UTF-8", 0))); assertA(NULL != (f_sconv16be = @@ -314,10 +500,8 @@ test_archive_string_normalization(int mac_nfd) assertA(NULL != (t_sconv8 = archive_string_conversion_to_charset(a2, "UTF-8", 0))); if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL || - t_sconv8 == NULL || fp == NULL) { + t_sconv8 == NULL) { /* We cannot continue this test. */ - if (fp != NULL) - fclose(fp); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); return; } @@ -326,6 +510,9 @@ test_archive_string_normalization(int mac_nfd) archive_string_conversion_set_opt(f_sconv16le, sconv_opt); archive_string_conversion_set_opt(t_sconv8, sconv_opt); + /* Open a test pattern file. */ + assert((fp = fopen(testdata, "r")) != NULL); + /* * Read test data. * Test data format: @@ -368,7 +555,7 @@ test_archive_string_normalization(int mac_nfd) * Get an NFC patterns. */ should_be_nfc = scan_unicode_pattern(utf8_nfc, wc_nfc, - utf16be_nfc, utf16le_nfc, nfc, mac_nfd); + utf16be_nfc, utf16le_nfc, nfc, 1); /* * Get an NFD patterns. @@ -376,7 +563,7 @@ test_archive_string_normalization(int mac_nfd) scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd, nfd, 0); - if (locale_is_utf8 && mac_nfd) { + if (locale_is_utf8) { /* * Normalize an NFC string for import. */ @@ -384,8 +571,7 @@ test_archive_string_normalization(int mac_nfd) &utf8, utf8_nfc, f_sconv8)); if (should_be_nfc) { failure("NFC(%s) should not be converted to" - " NFD(%s):%d", - nfc, nfd, line); + " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfc, utf8.s); } else { failure("NFC(%s) should be converted to" @@ -441,52 +627,6 @@ test_archive_string_normalization(int mac_nfd) assertEqualUTF8String(utf8_nfd, utf8.s); } } - if (locale_is_utf8 && !mac_nfd) { - /* - * Normalize an NFD string for import. - */ - assertEqualInt(0, archive_strcpy_l( - &utf8, utf8_nfd, f_sconv8)); - failure("NFD(%s) should be converted to NFC(%s):%d", - nfd, nfc, line); - assertEqualUTF8String(utf8_nfc, utf8.s); - - /* - * Normalize an NFC string for import. - */ - assertEqualInt(0, archive_strcpy_l( - &utf8, utf8_nfc, f_sconv8)); - failure("NFC(%s) should not be any changed:%d", - nfc, line); - assertEqualUTF8String(utf8_nfc, utf8.s); - - /* - * Copy an NFC string for export. - */ - assertEqualInt(0, archive_strcpy_l( - &utf8, utf8_nfc, t_sconv8)); - failure("NFC(%s) should not be any changed:%d", - nfc, line); - assertEqualUTF8String(utf8_nfc, utf8.s); - - /* - * Normalize an NFD string in UTF-16BE for import. - */ - assertEqualInt(0, archive_strncpy_l( - &utf8, utf16be_nfd, 100000, f_sconv16be)); - failure("NFD(%s) should be converted to NFC(%s):%d", - nfd, nfc, line); - assertEqualUTF8String(utf8_nfc, utf8.s); - - /* - * Normalize an NFD string in UTF-16LE for import. - */ - assertEqualInt(0, archive_strncpy_l( - &utf8, utf16le_nfd, 100000, f_sconv16le)); - failure("NFD(%s) should be converted to NFC(%s):%d", - nfd, nfc, line); - assertEqualUTF8String(utf8_nfc, utf8.s); - } /* * Test for archive_mstring interface. @@ -495,7 +635,7 @@ test_archive_string_normalization(int mac_nfd) * current locale since windows platform cannot make * locale UTF-8. */ - if ((locale_is_utf8 || wc_is_unicode) && mac_nfd) { + if (locale_is_utf8 || wc_is_unicode) { /* * Normalize an NFD string in UTF-8 for import. */ @@ -560,51 +700,6 @@ test_archive_string_normalization(int mac_nfd) ,nfd, line); assertEqualUTF8String(utf8_nfd, mp); } - if ((locale_is_utf8 || wc_is_unicode) && !mac_nfd) { - /* - * Normalize an NFD string in UTF-8 for import. - */ - assertEqualInt(0, archive_mstring_copy_mbs_len_l( - &mstr, utf8_nfd, 100000, f_sconv8)); - assertEqualInt(0, - archive_mstring_get_wcs(a, &mstr, &wp)); - failure("UTF-8 NFD(%s) should be converted " - "to WCS NFC(%s):%d", nfd, nfc, line); - assertEqualWString(wc_nfc, wp); - - /* - * Normalize an NFD string in UTF-16BE for import. - */ - assertEqualInt(0, archive_mstring_copy_mbs_len_l( - &mstr, utf16be_nfd, 100000, f_sconv16be)); - assertEqualInt(0, - archive_mstring_get_wcs(a, &mstr, &wp)); - failure("UTF-8 NFD(%s) should be converted " - "to WCS NFC(%s):%d", nfd, nfc, line); - assertEqualWString(wc_nfc, wp); - - /* - * Normalize an NFD string in UTF-16LE for import. - */ - assertEqualInt(0, archive_mstring_copy_mbs_len_l( - &mstr, utf16le_nfd, 100000, f_sconv16le)); - assertEqualInt(0, - archive_mstring_get_wcs(a, &mstr, &wp)); - failure("UTF-8 NFD(%s) should be converted " - "to WCS NFC(%s):%d", nfd, nfc, line); - assertEqualWString(wc_nfc, wp); - - /* - * Copy an NFC wide-string for export. - */ - assertEqualInt(0, archive_mstring_copy_wcs( - &mstr, wc_nfc)); - assertEqualInt(0, archive_mstring_get_mbs_l( - &mstr, &mp, &mplen, t_sconv8)); - failure("WCS NFC(%s) should be UTF-8 NFC:%d" - ,nfc, line); - assertEqualUTF8String(utf8_nfc, mp); - } } archive_string_free(&utf8); @@ -684,7 +779,32 @@ test_archive_string_canonicalization(void) DEFINE_TEST(test_archive_string_conversion) { - test_archive_string_normalization(0); - test_archive_string_normalization(1); + static const char reffile[] = "test_archive_string_conversion.txt.Z"; + static const char testdata[] = "testdata.txt"; + struct archive *a; + struct archive_entry *ae; + char buff[512]; + ssize_t size; + FILE *fp; + + /* + * Extract a test pattern file. + */ + extract_reference_file(reffile); + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_open_filename(a, reffile, 512)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assert((fp = fopen(testdata, "w")) != NULL); + while ((size = archive_read_data(a, buff, 512)) > 0) + fwrite(buff, 1, size, fp); + fclose(fp); + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + + test_archive_string_normalization_nfc(testdata); + test_archive_string_normalization_mac_nfd(testdata); test_archive_string_canonicalization(); }