* On other platforms, the characters to be Form C.
*/
static void
-test_archive_string_normalization(int mac_nfd)
+test_archive_string_normalization_nfc(const char *testdata)
{
struct archive *a, *a2;
- struct archive_entry *ae;
struct archive_string utf8;
struct archive_mstring mstr;
struct archive_string_conv *f_sconv8, *t_sconv8;
struct archive_string_conv *f_sconv16be, *f_sconv16le;
FILE *fp;
char buff[512];
- static const char reffile[] = "test_archive_string_conversion.txt.Z";
- ssize_t size;
int line = 0;
int locale_is_utf8, wc_is_unicode;
- int sconv_opt = 0;
+ int sconv_opt = SCONV_SET_OPT_NORMALIZATION_C;
locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
wc_is_unicode = is_wc_unicode();
/* If it doesn't exist, just warn and return. */
if (!locale_is_utf8 && !wc_is_unicode) {
- skipping("invalid encoding tests require a suitable locale;"
- " en_US.UTF-8 not available on this system");
+ skipping("A test of string normalization for NFC requires "
+ "a suitable locale; en_US.UTF-8 not available on this "
+ "system");
return;
}
archive_string_init(&utf8);
memset(&mstr, 0, sizeof(mstr));
- if (mac_nfd)
- sconv_opt = SCONV_SET_OPT_NORMALIZATION_D;
- else
- sconv_opt = SCONV_SET_OPT_NORMALIZATION_C;
-
/*
- * Extract a test pattern file.
+ * Create string conversion objects.
*/
- extract_reference_file(reffile);
assert((a = archive_read_new()) != NULL);
- assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
- assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
- assertEqualIntA(a, ARCHIVE_OK,
- archive_read_open_filename(a, reffile, 512));
+ assertA(NULL != (f_sconv8 =
+ archive_string_conversion_from_charset(a, "UTF-8", 0)));
+ assertA(NULL != (f_sconv16be =
+ archive_string_conversion_from_charset(a, "UTF-16BE", 0)));
+ assertA(NULL != (f_sconv16le =
+ archive_string_conversion_from_charset(a, "UTF-16LE", 0)));
+ assert((a2 = archive_write_new()) != NULL);
+ assertA(NULL != (t_sconv8 =
+ archive_string_conversion_to_charset(a2, "UTF-8", 0)));
+ if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
+ t_sconv8 == NULL) {
+ /* We cannot continue this test. */
+ assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+ return;
+ }
+ archive_string_conversion_set_opt(f_sconv8, sconv_opt);
+ archive_string_conversion_set_opt(f_sconv16be, sconv_opt);
+ archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
+ archive_string_conversion_set_opt(t_sconv8, sconv_opt);
- assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
- assert((fp = fopen("testdata.txt", "w")) != NULL);
- while ((size = archive_read_data(a, buff, 512)) > 0)
- fwrite(buff, 1, size, fp);
+ /* Open a test pattern file. */
+ assert((fp = fopen(testdata, "r")) != NULL);
+
+ /*
+ * Read test data.
+ * Test data format:
+ * <NFC Unicode pattern> ';' <NFD Unicode pattern> '\n'
+ * Unicode pattern format:
+ * [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,}
+ */
+ while (fgets(buff, sizeof(buff), fp) != NULL) {
+ char nfc[80], nfd[80];
+ char utf8_nfc[80], utf8_nfd[80];
+ char utf16be_nfc[80], utf16be_nfd[80];
+ char utf16le_nfc[80], utf16le_nfd[80];
+ wchar_t wc_nfc[40], wc_nfd[40];
+ char *e, *p;
+ const wchar_t *wp;
+ const char *mp;
+ size_t mplen;
+
+ line++;
+ if (buff[0] == '#')
+ continue;
+ p = strchr(buff, ';');
+ if (p == NULL)
+ continue;
+ *p++ = '\0';
+ /* Copy an NFC pattern */
+ strncpy(nfc, buff, sizeof(nfc)-1);
+ nfc[sizeof(nfc)-1] = '\0';
+ e = p;
+ p = strchr(p, '\n');
+ if (p == NULL)
+ continue;
+ *p = '\0';
+ /* Copy an NFD pattern */
+ strncpy(nfd, e, sizeof(nfd)-1);
+ nfd[sizeof(nfd)-1] = '\0';
+
+ /*
+ * Get an NFC patterns.
+ */
+ scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
+ nfc, 0);
+
+ /*
+ * Get an NFD patterns.
+ */
+ scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
+ nfd, 0);
+
+ if (locale_is_utf8) {
+ /*
+ * Normalize an NFD string for import.
+ */
+ assertEqualInt(0, archive_strcpy_l(
+ &utf8, utf8_nfd, f_sconv8));
+ failure("NFD(%s) should be converted to NFC(%s):%d",
+ nfd, nfc, line);
+ assertEqualUTF8String(utf8_nfc, utf8.s);
+
+ /*
+ * Normalize an NFC string for import.
+ */
+ assertEqualInt(0, archive_strcpy_l(
+ &utf8, utf8_nfc, f_sconv8));
+ failure("NFC(%s) should not be any changed:%d",
+ nfc, line);
+ assertEqualUTF8String(utf8_nfc, utf8.s);
+
+ /*
+ * Copy an NFC string for export.
+ */
+ assertEqualInt(0, archive_strcpy_l(
+ &utf8, utf8_nfc, t_sconv8));
+ failure("NFC(%s) should not be any changed:%d",
+ nfc, line);
+ assertEqualUTF8String(utf8_nfc, utf8.s);
+
+ /*
+ * Normalize an NFD string in UTF-16BE for import.
+ */
+ assertEqualInt(0, archive_strncpy_l(
+ &utf8, utf16be_nfd, 100000, f_sconv16be));
+ failure("NFD(%s) should be converted to NFC(%s):%d",
+ nfd, nfc, line);
+ assertEqualUTF8String(utf8_nfc, utf8.s);
+
+ /*
+ * Normalize an NFD string in UTF-16LE for import.
+ */
+ assertEqualInt(0, archive_strncpy_l(
+ &utf8, utf16le_nfd, 100000, f_sconv16le));
+ failure("NFD(%s) should be converted to NFC(%s):%d",
+ nfd, nfc, line);
+ assertEqualUTF8String(utf8_nfc, utf8.s);
+ }
+
+ /*
+ * Test for archive_mstring interface.
+ * In specific, Windows platform UTF-16BE is directly
+ * converted to/from wide-character to avoid the effect of
+ * current locale since windows platform cannot make
+ * locale UTF-8.
+ */
+ if (locale_is_utf8 || wc_is_unicode) {
+ /*
+ * Normalize an NFD string in UTF-8 for import.
+ */
+ assertEqualInt(0, archive_mstring_copy_mbs_len_l(
+ &mstr, utf8_nfd, 100000, f_sconv8));
+ assertEqualInt(0,
+ archive_mstring_get_wcs(a, &mstr, &wp));
+ failure("UTF-8 NFD(%s) should be converted "
+ "to WCS NFC(%s):%d", nfd, nfc, line);
+ assertEqualWString(wc_nfc, wp);
+
+ /*
+ * Normalize an NFD string in UTF-16BE for import.
+ */
+ assertEqualInt(0, archive_mstring_copy_mbs_len_l(
+ &mstr, utf16be_nfd, 100000, f_sconv16be));
+ assertEqualInt(0,
+ archive_mstring_get_wcs(a, &mstr, &wp));
+ failure("UTF-8 NFD(%s) should be converted "
+ "to WCS NFC(%s):%d", nfd, nfc, line);
+ assertEqualWString(wc_nfc, wp);
+
+ /*
+ * Normalize an NFD string in UTF-16LE for import.
+ */
+ assertEqualInt(0, archive_mstring_copy_mbs_len_l(
+ &mstr, utf16le_nfd, 100000, f_sconv16le));
+ assertEqualInt(0,
+ archive_mstring_get_wcs(a, &mstr, &wp));
+ failure("UTF-8 NFD(%s) should be converted "
+ "to WCS NFC(%s):%d", nfd, nfc, line);
+ assertEqualWString(wc_nfc, wp);
+
+ /*
+ * Copy an NFC wide-string for export.
+ */
+ assertEqualInt(0,
+ archive_mstring_copy_wcs(&mstr, wc_nfc));
+ assertEqualInt(0, archive_mstring_get_mbs_l(
+ &mstr, &mp, &mplen, t_sconv8));
+ failure("WCS NFC(%s) should be UTF-8 NFC:%d"
+ ,nfc, line);
+ assertEqualUTF8String(utf8_nfc, mp);
+ }
+ }
+
+ archive_string_free(&utf8);
+ archive_mstring_clean(&mstr);
fclose(fp);
+ assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+ assertEqualInt(ARCHIVE_OK, archive_write_free(a2));
+}
- /* Open a test pattern file. */
- assert((fp = fopen("testdata.txt", "r")) != NULL);
+static void
+test_archive_string_normalization_mac_nfd(const char *testdata)
+{
+ struct archive *a, *a2;
+ struct archive_string utf8;
+ struct archive_mstring mstr;
+ struct archive_string_conv *f_sconv8, *t_sconv8;
+ struct archive_string_conv *f_sconv16be, *f_sconv16le;
+ FILE *fp;
+ char buff[512];
+ int line = 0;
+ int locale_is_utf8, wc_is_unicode;
+ int sconv_opt = SCONV_SET_OPT_NORMALIZATION_D;
+
+ locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
+ wc_is_unicode = is_wc_unicode();
+ /* If it doesn't exist, just warn and return. */
+ if (!locale_is_utf8 && !wc_is_unicode) {
+ skipping("A test of string normalization for NFD requires "
+ "a suitable locale; en_US.UTF-8 not available on this "
+ "system");
+ return;
+ }
+
+ archive_string_init(&utf8);
+ memset(&mstr, 0, sizeof(mstr));
/*
* Create string conversion objects.
*/
+ assert((a = archive_read_new()) != NULL);
assertA(NULL != (f_sconv8 =
archive_string_conversion_from_charset(a, "UTF-8", 0)));
assertA(NULL != (f_sconv16be =
assertA(NULL != (t_sconv8 =
archive_string_conversion_to_charset(a2, "UTF-8", 0)));
if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
- t_sconv8 == NULL || fp == NULL) {
+ t_sconv8 == NULL) {
/* We cannot continue this test. */
- if (fp != NULL)
- fclose(fp);
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
return;
}
archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
archive_string_conversion_set_opt(t_sconv8, sconv_opt);
+ /* Open a test pattern file. */
+ assert((fp = fopen(testdata, "r")) != NULL);
+
/*
* Read test data.
* Test data format:
* Get an NFC patterns.
*/
should_be_nfc = scan_unicode_pattern(utf8_nfc, wc_nfc,
- utf16be_nfc, utf16le_nfc, nfc, mac_nfd);
+ utf16be_nfc, utf16le_nfc, nfc, 1);
/*
* Get an NFD patterns.
scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
nfd, 0);
- if (locale_is_utf8 && mac_nfd) {
+ if (locale_is_utf8) {
/*
* Normalize an NFC string for import.
*/
&utf8, utf8_nfc, f_sconv8));
if (should_be_nfc) {
failure("NFC(%s) should not be converted to"
- " NFD(%s):%d",
- nfc, nfd, line);
+ " NFD(%s):%d", nfc, nfd, line);
assertEqualUTF8String(utf8_nfc, utf8.s);
} else {
failure("NFC(%s) should be converted to"
assertEqualUTF8String(utf8_nfd, utf8.s);
}
}
- if (locale_is_utf8 && !mac_nfd) {
- /*
- * Normalize an NFD string for import.
- */
- assertEqualInt(0, archive_strcpy_l(
- &utf8, utf8_nfd, f_sconv8));
- failure("NFD(%s) should be converted to NFC(%s):%d",
- nfd, nfc, line);
- assertEqualUTF8String(utf8_nfc, utf8.s);
-
- /*
- * Normalize an NFC string for import.
- */
- assertEqualInt(0, archive_strcpy_l(
- &utf8, utf8_nfc, f_sconv8));
- failure("NFC(%s) should not be any changed:%d",
- nfc, line);
- assertEqualUTF8String(utf8_nfc, utf8.s);
-
- /*
- * Copy an NFC string for export.
- */
- assertEqualInt(0, archive_strcpy_l(
- &utf8, utf8_nfc, t_sconv8));
- failure("NFC(%s) should not be any changed:%d",
- nfc, line);
- assertEqualUTF8String(utf8_nfc, utf8.s);
-
- /*
- * Normalize an NFD string in UTF-16BE for import.
- */
- assertEqualInt(0, archive_strncpy_l(
- &utf8, utf16be_nfd, 100000, f_sconv16be));
- failure("NFD(%s) should be converted to NFC(%s):%d",
- nfd, nfc, line);
- assertEqualUTF8String(utf8_nfc, utf8.s);
-
- /*
- * Normalize an NFD string in UTF-16LE for import.
- */
- assertEqualInt(0, archive_strncpy_l(
- &utf8, utf16le_nfd, 100000, f_sconv16le));
- failure("NFD(%s) should be converted to NFC(%s):%d",
- nfd, nfc, line);
- assertEqualUTF8String(utf8_nfc, utf8.s);
- }
/*
* Test for archive_mstring interface.
* current locale since windows platform cannot make
* locale UTF-8.
*/
- if ((locale_is_utf8 || wc_is_unicode) && mac_nfd) {
+ if (locale_is_utf8 || wc_is_unicode) {
/*
* Normalize an NFD string in UTF-8 for import.
*/
,nfd, line);
assertEqualUTF8String(utf8_nfd, mp);
}
- if ((locale_is_utf8 || wc_is_unicode) && !mac_nfd) {
- /*
- * Normalize an NFD string in UTF-8 for import.
- */
- assertEqualInt(0, archive_mstring_copy_mbs_len_l(
- &mstr, utf8_nfd, 100000, f_sconv8));
- assertEqualInt(0,
- archive_mstring_get_wcs(a, &mstr, &wp));
- failure("UTF-8 NFD(%s) should be converted "
- "to WCS NFC(%s):%d", nfd, nfc, line);
- assertEqualWString(wc_nfc, wp);
-
- /*
- * Normalize an NFD string in UTF-16BE for import.
- */
- assertEqualInt(0, archive_mstring_copy_mbs_len_l(
- &mstr, utf16be_nfd, 100000, f_sconv16be));
- assertEqualInt(0,
- archive_mstring_get_wcs(a, &mstr, &wp));
- failure("UTF-8 NFD(%s) should be converted "
- "to WCS NFC(%s):%d", nfd, nfc, line);
- assertEqualWString(wc_nfc, wp);
-
- /*
- * Normalize an NFD string in UTF-16LE for import.
- */
- assertEqualInt(0, archive_mstring_copy_mbs_len_l(
- &mstr, utf16le_nfd, 100000, f_sconv16le));
- assertEqualInt(0,
- archive_mstring_get_wcs(a, &mstr, &wp));
- failure("UTF-8 NFD(%s) should be converted "
- "to WCS NFC(%s):%d", nfd, nfc, line);
- assertEqualWString(wc_nfc, wp);
-
- /*
- * Copy an NFC wide-string for export.
- */
- assertEqualInt(0, archive_mstring_copy_wcs(
- &mstr, wc_nfc));
- assertEqualInt(0, archive_mstring_get_mbs_l(
- &mstr, &mp, &mplen, t_sconv8));
- failure("WCS NFC(%s) should be UTF-8 NFC:%d"
- ,nfc, line);
- assertEqualUTF8String(utf8_nfc, mp);
- }
}
archive_string_free(&utf8);
DEFINE_TEST(test_archive_string_conversion)
{
- test_archive_string_normalization(0);
- test_archive_string_normalization(1);
+ static const char reffile[] = "test_archive_string_conversion.txt.Z";
+ static const char testdata[] = "testdata.txt";
+ struct archive *a;
+ struct archive_entry *ae;
+ char buff[512];
+ ssize_t size;
+ FILE *fp;
+
+ /*
+ * Extract a test pattern file.
+ */
+ extract_reference_file(reffile);
+ assert((a = archive_read_new()) != NULL);
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
+ assertEqualIntA(a, ARCHIVE_OK,
+ archive_read_open_filename(a, reffile, 512));
+
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+ assert((fp = fopen(testdata, "w")) != NULL);
+ while ((size = archive_read_data(a, buff, 512)) > 0)
+ fwrite(buff, 1, size, fp);
+ fclose(fp);
+ assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+
+ test_archive_string_normalization_nfc(testdata);
+ test_archive_string_normalization_mac_nfd(testdata);
test_archive_string_canonicalization();
}