From: Schantl Stefan Date: Fri, 19 Mar 2010 17:43:49 +0000 (+0100) Subject: toolchain: Add missing i18n patch for diffutils. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f4868223bb01f4dfd0ee399c15654fbddfa6a963;p=ipfire-3.x.git toolchain: Add missing i18n patch for diffutils. --- diff --git a/pkgs/toolchain/diffutils/patches/diffutils-2.8.1-i18n-1.patch b/pkgs/toolchain/diffutils/patches/diffutils-2.8.1-i18n-1.patch deleted file mode 100644 index 488885b82..000000000 --- a/pkgs/toolchain/diffutils/patches/diffutils-2.8.1-i18n-1.patch +++ /dev/null @@ -1,802 +0,0 @@ -Submitted by: Alexander E. Patrakov -Date: 2005-08-13 -Initial Package Version: 2.8.1 -Upstream Status: Unknown, but required for LSB >= 2.0 certification -Origin: RedHat -Description: Fixes treatment of whitespace in multibyte locales. - ---- diffutils-2.8.4/src/diff.c.i18n 2002-06-17 01:55:42.000000000 -0400 -+++ diffutils-2.8.4/src/diff.c 2002-11-16 18:41:37.000000000 -0500 -@@ -275,6 +275,13 @@ - re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); - excluded = new_exclude (); - -+#ifdef HANDLE_MULTIBYTE -+ if (MB_CUR_MAX > 1) -+ lines_differ = lines_differ_multibyte; -+ else -+#endif -+ lines_differ = lines_differ_singlebyte; -+ - /* Decode the options. */ - - while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) ---- diffutils-2.8.4/src/diff.h.i18n 2002-11-16 18:31:32.000000000 -0500 -+++ diffutils-2.8.4/src/diff.h 2002-11-16 18:48:58.000000000 -0500 -@@ -23,6 +23,19 @@ - #include "system.h" - #include - -+/* For platform which support the ISO C amendement 1 functionality we -+ support user defined character classes. */ -+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H -+/* Solaris 2.5 has a bug: must be included before . */ -+# include -+# include -+# if defined (HAVE_MBRTOWC) -+# define HANDLE_MULTIBYTE 1 -+# endif -+#endif -+ -+#define TAB_WIDTH 8 -+ - /* What kind of changes a hunk contains. */ - enum changes - { -@@ -350,7 +363,13 @@ - extern char const pr_program[]; - char *concat (char const *, char const *, char const *); - char *dir_file_pathname (char const *, char const *); --bool lines_differ (char const *, char const *); -+ -+bool (*lines_differ) (char const *, char const *); -+bool lines_differ_singlebyte (char const *, char const *); -+#ifdef HANDLE_MULTIBYTE -+bool lines_differ_multibyte (char const *, char const *); -+#endif -+ - lin translate_line_number (struct file_data const *, lin); - struct change *find_change (struct change *); - struct change *find_reverse_change (struct change *); ---- diffutils-2.8.4/src/io.c.i18n 2002-06-11 02:06:32.000000000 -0400 -+++ diffutils-2.8.4/src/io.c 2002-11-16 18:57:30.000000000 -0500 -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - - /* Rotate an unsigned value to the left. */ - #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) -@@ -213,6 +214,28 @@ - - /* Split the file into lines, simultaneously computing the equivalence - class for each line. */ -+#ifdef HANDLE_MULTIBYTE -+# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \ -+do \ -+{ \ -+ mbstate_t state_bak = STATE; \ -+ \ -+ CONVFAIL = 0; \ -+ MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-2: \ -+ case (size_t)-1: \ -+ STATE = state_bak; \ -+ ++CONVFAIL; \ -+ /* Fall through. */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+} \ -+while (0) -+#endif - - static void - find_and_hash_each_line (struct file_data *current) -@@ -239,12 +262,280 @@ - bool same_length_diff_contents_compare_anyway = - diff_length_compare_anyway | ignore_case; - -+#ifdef HANDLE_MULTIBYTE -+ wchar_t wc; -+ size_t mblength; -+ mbstate_t state; -+ int convfail; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+#endif -+ - while ((char const *) p < suffix_begin) - { - char const *ip = (char const *) p; - - h = 0; -+#ifdef HANDLE_MULTIBYTE -+ if (MB_CUR_MAX > 1) -+ { -+ wchar_t lo_wc; -+ char mbc[MB_LEN_MAX]; -+ mbstate_t state_wc; -+ -+ /* Hash this line until we find a newline. */ -+ switch (ignore_white_space) -+ { -+ case IGNORE_ALL_SPACE: -+ while (1) -+ { -+ if (*p == '\n') -+ { -+ ++p; -+ break; -+ } -+ -+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); -+ -+ if (convfail) -+ mbc[0] = *p++; -+ else if (!iswspace (wc)) -+ { -+ bool flag = 0; -+ -+ if (ignore_case) -+ { -+ lo_wc = towlower (wc); -+ if (lo_wc != wc) -+ { -+ flag = 1; -+ -+ p += mblength; -+ memset (&state_wc, '\0', sizeof(mbstate_t)); -+ mblength = wcrtomb (mbc, lo_wc, &state_wc); -+ -+ assert (mblength != (size_t)-1 && -+ mblength != (size_t)-2); -+ -+ mblength = (mblength < 1) ? 1 : mblength; -+ } -+ } -+ -+ if (!flag) -+ { -+ for (i = 0; i < mblength; i++) -+ mbc[i] = *p++; -+ } -+ } -+ else -+ { -+ p += mblength; -+ continue; -+ } -+ -+ for (i = 0; i < mblength; i++) -+ h = HASH (h, mbc[i]); -+ } -+ break; -+ -+ case IGNORE_SPACE_CHANGE: -+ while (1) -+ { -+ if (*p == '\n') -+ { -+ ++p; -+ break; -+ } - -+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); -+ -+ if (!convfail && iswspace (wc)) -+ { -+ while (1) -+ { -+ if (*p == '\n') -+ { -+ ++p; -+ goto hashing_done; -+ } -+ -+ p += mblength; -+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); -+ if (convfail || (!convfail && !iswspace (wc))) -+ break; -+ } -+ h = HASH (h, ' '); -+ } -+ -+ /* WC is now the first non-space. */ -+ if (convfail) -+ mbc[0] = *p++; -+ else -+ { -+ bool flag = 0; -+ -+ if (ignore_case) -+ { -+ lo_wc = towlower (wc); -+ if (lo_wc != wc) -+ { -+ flag = 1; -+ -+ p += mblength; -+ memset (&state_wc, '\0', sizeof(mbstate_t)); -+ mblength = wcrtomb (mbc, lo_wc, &state_wc); -+ -+ assert (mblength != (size_t)-1 && -+ mblength != (size_t)-2); -+ -+ mblength = (mblength < 1) ? 1 : mblength; -+ } -+ } -+ -+ if (!flag) -+ { -+ for (i = 0; i < mblength; i++) -+ mbc[i] = *p++; -+ } -+ } -+ -+ for (i = 0; i < mblength; i++) -+ h = HASH (h, mbc[i]); -+ } -+ break; -+ -+ case IGNORE_TAB_EXPANSION: -+ { -+ size_t column = 0; -+ -+ while (1) -+ { -+ if (*p == '\n') -+ { -+ ++p; -+ break; -+ } -+ -+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); -+ -+ if (convfail) -+ { -+ h = HASH (h, *p++); -+ ++column; -+ } -+ else -+ { -+ bool flag; -+ -+ switch (wc) -+ { -+ case L'\b': -+ column -= 0 < column; -+ h = HASH (h, '\b'); -+ ++p; -+ break; -+ -+ case L'\t': -+ { -+ int repetitions; -+ -+ repetitions = TAB_WIDTH - column % TAB_WIDTH; -+ column += repetitions; -+ do -+ h = HASH (h, ' '); -+ while (--repetitions != 0); -+ ++p; -+ } -+ break; -+ -+ case L'\r': -+ column = 0; -+ h = HASH (h, '\r'); -+ ++p; -+ break; -+ -+ default: -+ flag = 0; -+ column += wcwidth (wc); -+ if (ignore_case) -+ { -+ lo_wc = towlower (wc); -+ if (lo_wc != wc) -+ { -+ flag = 1; -+ p += mblength; -+ memset (&state_wc, '\0', sizeof(mbstate_t)); -+ mblength = wcrtomb (mbc, lo_wc, &state_wc); -+ -+ assert (mblength != (size_t)-1 && -+ mblength != (size_t)-2); -+ -+ mblength = (mblength < 1) ? 1 : mblength; -+ } -+ } -+ -+ if (!flag) -+ { -+ for (i = 0; i < mblength; i++) -+ mbc[i] = *p++; -+ } -+ -+ for (i = 0; i < mblength; i++) -+ h = HASH (h, mbc[i]); -+ } -+ } -+ } -+ } -+ break; -+ -+ default: -+ while (1) -+ { -+ if (*p == '\n') -+ { -+ ++p; -+ break; -+ } -+ -+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); -+ -+ if (convfail) -+ mbc[0] = *p++; -+ else -+ { -+ int flag = 0; -+ -+ if (ignore_case) -+ { -+ lo_wc = towlower (wc); -+ if (lo_wc != wc) -+ { -+ flag = 1; -+ p += mblength; -+ memset (&state_wc, '\0', sizeof(mbstate_t)); -+ mblength = wcrtomb (mbc, lo_wc, &state_wc); -+ -+ assert (mblength != (size_t)-1 && -+ mblength != (size_t)-2); -+ -+ mblength = (mblength < 1) ? 1 : mblength; -+ } -+ } -+ -+ if (!flag) -+ { -+ for (i = 0; i < mblength; i++) -+ mbc[i] = *p++; -+ } -+ } -+ -+ for (i = 0; i < mblength; i++) -+ h = HASH (h, mbc[i]); -+ } -+ } -+ } -+ else -+#endif - /* Hash this line until we find a newline. */ - if (ignore_case) - switch (ignore_white_space) ---- diffutils-2.8.4/src/side.c.i18n 2002-06-11 02:06:32.000000000 -0400 -+++ diffutils-2.8.4/src/side.c 2002-11-16 18:41:37.000000000 -0500 -@@ -73,11 +73,72 @@ - register size_t out_position = 0; - register char const *text_pointer = line[0]; - register char const *text_limit = line[1]; -+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H -+ unsigned char mbc[MB_LEN_MAX]; -+ wchar_t wc; -+ mbstate_t state, state_bak; -+ size_t mbc_pos, mblength; -+ int mbc_loading_flag = 0; -+ int wc_width; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+#endif - - while (text_pointer < text_limit) - { - register unsigned char c = *text_pointer++; - -+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H -+ if (MB_CUR_MAX > 1 && mbc_loading_flag) -+ { -+ mbc_loading_flag = 0; -+ state_bak = state; -+ mbc[mbc_pos++] = c; -+ -+process_mbc: -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ switch (mblength) -+ { -+ case (size_t)-2: /* Incomplete multibyte character. */ -+ mbc_loading_flag = 1; -+ state = state_bak; -+ break; -+ -+ case (size_t)-1: /* Invalid as a multibyte character. */ -+ if (in_position++ < out_bound) -+ { -+ out_position = in_position; -+ putc (mbc[0], out); -+ } -+ memmove (mbc, mbc + 1, --mbc_pos); -+ if (mbc_pos > 0) -+ { -+ mbc[mbc_pos] = '\0'; -+ goto process_mbc; -+ } -+ break; -+ -+ default: -+ wc_width = wcwidth (wc); -+ if (wc_width < 1) /* Unprintable multibyte character. */ -+ { -+ if (in_position <= out_bound) -+ fprintf (out, "%lc", (wint_t)wc); -+ } -+ else /* Printable multibyte character. */ -+ { -+ in_position += wc_width; -+ if (in_position <= out_bound) -+ { -+ out_position = in_position; -+ fprintf (out, "%lc", (wint_t)wc); -+ } -+ } -+ } -+ continue; -+ } -+#endif - switch (c) - { - case '\t': -@@ -135,8 +196,39 @@ - break; - - default: -- if (! ISPRINT (c)) -- goto control_char; -+#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H -+ if (MB_CUR_MAX > 1) -+ { -+ memset (mbc, '\0', MB_LEN_MAX); -+ mbc_pos = 0; -+ mbc[mbc_pos++] = c; -+ state_bak = state; -+ -+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); -+ -+ /* The value of mblength is always less than 2 here. */ -+ switch (mblength) -+ { -+ case (size_t)-2: /* Incomplete multibyte character. */ -+ state = state_bak; -+ mbc_loading_flag = 1; -+ continue; -+ -+ case (size_t)-1: /* Invalid as a multibyte character. */ -+ state = state_bak; -+ break; -+ -+ default: -+ if (! iswprint (wc)) -+ goto control_char; -+ } -+ } -+ else -+#endif -+ { -+ if (! ISPRINT (c)) -+ goto control_char; -+ } - /* falls through */ - case ' ': - if (in_position++ < out_bound) ---- diffutils-2.8.4/src/util.c.i18n 2002-06-11 02:06:32.000000000 -0400 -+++ diffutils-2.8.4/src/util.c 2002-11-16 18:41:37.000000000 -0500 -@@ -321,7 +321,7 @@ - Return nonzero if the lines differ. */ - - bool --lines_differ (char const *s1, char const *s2) -+lines_differ_singlebyte (char const *s1, char const *s2) - { - register unsigned char const *t1 = (unsigned char const *) s1; - register unsigned char const *t2 = (unsigned char const *) s2; -@@ -450,6 +450,293 @@ - - return 1; - } -+ -+#ifdef HANDLE_MULTIBYTE -+# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \ -+do \ -+{ \ -+ mbstate_t bak = STATE; \ -+ \ -+ CONVFAIL = 0; \ -+ MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-2: \ -+ case (size_t)-1: \ -+ STATE = bak; \ -+ ++CONVFAIL; \ -+ /* Fall through. */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+} \ -+while (0) -+ -+bool -+lines_differ_multibyte (char const *s1, char const *s2) -+{ -+ unsigned char const *end1, *end2; -+ unsigned char c1, c2; -+ wchar_t wc1, wc2, wc1_bak, wc2_bak; -+ size_t mblen1, mblen2; -+ mbstate_t state1, state2, state1_bak, state2_bak; -+ int convfail1, convfail2, convfail1_bak, convfail2_bak; -+ -+ unsigned char const *t1 = (unsigned char const *) s1; -+ unsigned char const *t2 = (unsigned char const *) s2; -+ unsigned char const *t1_bak, *t2_bak; -+ size_t column = 0; -+ -+ if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case) -+ { -+ while (*t1 != '\n') -+ if (*t1++ != * t2++) -+ return 1; -+ return 0; -+ } -+ -+ memset (&state1, '\0', sizeof (mbstate_t)); -+ memset (&state2, '\0', sizeof (mbstate_t)); -+ -+ end1 = s1 + strlen (s1); -+ end2 = s2 + strlen (s2); -+ -+ while (1) -+ { -+ c1 = *t1; -+ c2 = *t2; -+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); -+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); -+ -+ /* Test for exact char equality first, since it's a common case. */ -+ if (convfail1 ^ convfail2) -+ break; -+ else if (convfail1 && convfail2 && c1 != c2) -+ break; -+ else if (!convfail1 && !convfail2 && wc1 != wc2) -+ { -+ switch (ignore_white_space) -+ { -+ case IGNORE_ALL_SPACE: -+ /* For -w, just skip past any white space. */ -+ while (1) -+ { -+ if (convfail1) -+ break; -+ else if (wc1 == L'\n' || !iswspace (wc1)) -+ break; -+ -+ t1 += mblen1; -+ c1 = *t1; -+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); -+ } -+ -+ while (1) -+ { -+ if (convfail2) -+ break; -+ else if (wc2 == L'\n' || !iswspace (wc2)) -+ break; -+ -+ t2 += mblen2; -+ c2 = *t2; -+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); -+ } -+ t1 += mblen1; -+ t2 += mblen2; -+ break; -+ -+ case IGNORE_SPACE_CHANGE: -+ /* For -b, advance past any sequence of white space in -+ line 1 and consider it just one space, or nothing at -+ all if it is at the end of the line. */ -+ if (wc1 != L'\n' && iswspace (wc1)) -+ { -+ size_t mblen_bak; -+ mbstate_t state_bak; -+ -+ do -+ { -+ t1 += mblen1; -+ mblen_bak = mblen1; -+ state_bak = state1; -+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); -+ } -+ while (!convfail1 && (wc1 != L'\n' && iswspace (wc1))); -+ -+ state1 = state_bak; -+ mblen1 = mblen_bak; -+ t1 -= mblen1; -+ convfail1 = 0; -+ wc1 = L' '; -+ } -+ -+ /* Likewise for line 2. */ -+ if (wc2 != L'\n' && iswspace (wc2)) -+ { -+ size_t mblen_bak; -+ mbstate_t state_bak; -+ -+ do -+ { -+ t2 += mblen2; -+ mblen_bak = mblen2; -+ state_bak = state2; -+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); -+ } -+ while (!convfail2 && (wc2 != L'\n' && iswspace (wc2))); -+ -+ state2 = state_bak; -+ mblen2 = mblen_bak; -+ t2 -= mblen2; -+ convfail2 = 0; -+ wc2 = L' '; -+ } -+ -+ if (wc1 != wc2) -+ { -+ if (wc2 == L' ' && wc1 != L'\n' && -+ t1 > (unsigned char const *)s1 && -+ !convfail1_bak && iswspace (wc1_bak)) -+ { -+ t1 = t1_bak; -+ wc1 = wc1_bak; -+ state1 = state1_bak; -+ convfail1 = convfail1_bak; -+ continue; -+ } -+ if (wc1 == L' ' && wc2 != L'\n' -+ && t2 > (unsigned char const *)s2 -+ && !convfail2_bak && iswspace (wc2_bak)) -+ { -+ t2 = t2_bak; -+ wc2 = wc2_bak; -+ state2 = state2_bak; -+ convfail2 = convfail2_bak; -+ continue; -+ } -+ } -+ -+ t1_bak = t1; t2_bak = t2; -+ wc1_bak = wc1; wc2_bak = wc2; -+ state1_bak = state1; state2_bak = state2; -+ convfail1_bak = convfail1; convfail2_bak = convfail2; -+ -+ if (wc1 == L'\n') -+ wc1 = L' '; -+ else -+ t1 += mblen1; -+ -+ if (wc2 == L'\n') -+ wc2 = L' '; -+ else -+ t2 += mblen2; -+ -+ break; -+ -+ case IGNORE_TAB_EXPANSION: -+ if ((wc1 == L' ' && wc2 == L'\t') -+ || (wc1 == L'\t' && wc2 == L' ')) -+ { -+ size_t column2 = column; -+ -+ while (1) -+ { -+ if (convfail1) -+ { -+ ++t1; -+ break; -+ } -+ else if (wc1 == L' ') -+ column++; -+ else if (wc1 == L'\t') -+ column += TAB_WIDTH - column % TAB_WIDTH; -+ else -+ { -+ t1 += mblen1; -+ break; -+ } -+ -+ t1 += mblen1; -+ c1 = *t1; -+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); -+ } -+ -+ while (1) -+ { -+ if (convfail2) -+ { -+ ++t2; -+ break; -+ } -+ else if (wc2 == L' ') -+ column2++; -+ else if (wc2 == L'\t') -+ column2 += TAB_WIDTH - column2 % TAB_WIDTH; -+ else -+ { -+ t2 += mblen2; -+ break; -+ } -+ -+ t2 += mblen2; -+ c2 = *t2; -+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); -+ } -+ -+ if (column != column2) -+ return 1; -+ } -+ else -+ { -+ t1 += mblen1; -+ t2 += mblen2; -+ } -+ break; -+ -+ case IGNORE_NO_WHITE_SPACE: -+ t1 += mblen1; -+ t2 += mblen2; -+ break; -+ } -+ -+ /* Lowercase all letters if -i is specified. */ -+ if (ignore_case) -+ { -+ if (!convfail1) -+ wc1 = towlower (wc1); -+ if (!convfail2) -+ wc2 = towlower (wc2); -+ } -+ -+ if (convfail1 ^ convfail2) -+ break; -+ else if (convfail1 && convfail2 && c1 != c2) -+ break; -+ else if (!convfail1 && !convfail2 && wc1 != wc2) -+ break; -+ } -+ else -+ { -+ t1_bak = t1; t2_bak = t2; -+ wc1_bak = wc1; wc2_bak = wc2; -+ state1_bak = state1; state2_bak = state2; -+ convfail1_bak = convfail1; convfail2_bak = convfail2; -+ -+ t1 += mblen1; t2 += mblen2; -+ } -+ -+ if (!convfail1 && wc1 == L'\n') -+ return 0; -+ -+ column += convfail1 ? 1 : -+ (wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1); -+ } -+ -+ return 1; -+} -+#endif - - /* Find the consecutive changes at the start of the script START. - Return the last link before the first gap. */ diff --git a/pkgs/toolchain/diffutils/patches/diffutils-2.9-i18n-1.patch b/pkgs/toolchain/diffutils/patches/diffutils-2.9-i18n-1.patch new file mode 100644 index 000000000..b6f12ef1f --- /dev/null +++ b/pkgs/toolchain/diffutils/patches/diffutils-2.9-i18n-1.patch @@ -0,0 +1,768 @@ +--- diffutils-2.9/src/diff.c 2010-02-11 10:39:17.000000000 +0100 ++++ diffutils-2.9.mod/src/diff.c 2010-02-13 15:28:22.268208253 +0100 +@@ -284,6 +284,13 @@ + re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); + excluded = new_exclude (); + ++#ifdef HANDLE_MULTIBYTE ++ if (MB_CUR_MAX > 1) ++ lines_differ = lines_differ_multibyte; ++ else ++#endif ++ lines_differ = lines_differ_singlebyte; ++ + /* Decode the options. */ + + while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) +--- diffutils-2.9/src/diff.h 2010-02-11 10:05:57.000000000 +0100 ++++ diffutils-2.9.mod/src/diff.h 2010-02-13 15:28:22.269208190 +0100 +@@ -23,6 +23,19 @@ + #include + #include + ++/* For platform which support the ISO C amendement 1 functionality we ++ support user defined character classes. */ ++#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H ++/* Solaris 2.5 has a bug: must be included before . */ ++# include ++# include ++# if defined (HAVE_MBRTOWC) ++# define HANDLE_MULTIBYTE 1 ++# endif ++#endif ++ ++#define TAB_WIDTH 8 ++ + /* What kind of changes a hunk contains. */ + enum changes + { +@@ -350,7 +363,13 @@ + extern char const pr_program[]; + char *concat (char const *, char const *, char const *); + char *dir_file_pathname (char const *, char const *); +-bool lines_differ (char const *, char const *); ++ ++bool (*lines_differ) (char const *, char const *); ++bool lines_differ_singlebyte (char const *, char const *); ++#ifdef HANDLE_MULTIBYTE ++bool lines_differ_multibyte (char const *, char const *); ++#endif ++ + lin translate_line_number (struct file_data const *, lin); + struct change *find_change (struct change *); + struct change *find_reverse_change (struct change *); +--- diffutils-2.9/src/io.c 2010-02-05 09:10:15.000000000 +0100 ++++ diffutils-2.9.mod/src/io.c 2010-02-13 15:39:59.313224273 +0100 +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + /* Rotate an unsigned value to the left. */ + #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) +@@ -194,6 +195,28 @@ + + /* Split the file into lines, simultaneously computing the equivalence + class for each line. */ ++#ifdef HANDLE_MULTIBYTE ++# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \ ++do \ ++{ \ ++ mbstate_t state_bak = STATE; \ ++ \ ++ CONVFAIL = 0; \ ++ MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \ ++ \ ++ switch (MBLENGTH) \ ++ { \ ++ case (size_t)-2: \ ++ case (size_t)-1: \ ++ STATE = state_bak; \ ++ ++CONVFAIL; \ ++ /* Fall through. */ \ ++ case 0: \ ++ MBLENGTH = 1; \ ++ } \ ++} \ ++while (0) ++#endif + + static void + find_and_hash_each_line (struct file_data *current) +@@ -220,12 +243,282 @@ + bool same_length_diff_contents_compare_anyway = + diff_length_compare_anyway | ignore_case; + ++#ifdef HANDLE_MULTIBYTE ++ wchar_t wc; ++ size_t mblength; ++ mbstate_t state; ++ int convfail; ++ ++ memset (&state, '\0', sizeof (mbstate_t)); ++#endif ++ + while (p < suffix_begin) + { + char const *ip = p; + + h = 0; + ++#ifdef HANDLE_MULTIBYTE ++ if (MB_CUR_MAX > 1) ++ { ++ wchar_t lo_wc; ++ char mbc[MB_LEN_MAX]; ++ mbstate_t state_wc; ++ ++ /* Hash this line until we find a newline. */ ++ switch (ignore_white_space) ++ { ++ case IGNORE_ALL_SPACE: ++ while (1) ++ { ++ if (*p == '\n') ++ { ++ ++p; ++ break; ++ } ++ ++ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); ++ ++ if (convfail) ++ mbc[0] = *p++; ++ else if (!iswspace (wc)) ++ { ++ bool flag = 0; ++ ++ if (ignore_case) ++ { ++ lo_wc = towlower (wc); ++ if (lo_wc != wc) ++ { ++ flag = 1; ++ ++ p += mblength; ++ memset (&state_wc, '\0', sizeof(mbstate_t)); ++ mblength = wcrtomb (mbc, lo_wc, &state_wc); ++ ++ assert (mblength != (size_t)-1 && ++ mblength != (size_t)-2); ++ ++ mblength = (mblength < 1) ? 1 : mblength; ++ } ++ } ++ ++ if (!flag) ++ { ++ for (i = 0; i < mblength; i++) ++ mbc[i] = *p++; ++ } ++ } ++ else ++ { ++ p += mblength; ++ continue; ++ } ++ ++ for (i = 0; i < mblength; i++) ++ h = HASH (h, mbc[i]); ++ } ++ break; ++ ++ case IGNORE_SPACE_CHANGE: ++ while (1) ++ { ++ if (*p == '\n') ++ { ++ ++p; ++ break; ++ } ++ ++ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); ++ ++ if (!convfail && iswspace (wc)) ++ { ++ while (1) ++ { ++ if (*p == '\n') ++ { ++ ++p; ++ goto hashing_done; ++ } ++ ++ p += mblength; ++ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); ++ if (convfail || (!convfail && !iswspace (wc))) ++ break; ++ } ++ h = HASH (h, ' '); ++ } ++ ++ /* WC is now the first non-space. */ ++ if (convfail) ++ mbc[0] = *p++; ++ else ++ { ++ bool flag = 0; ++ ++ if (ignore_case) ++ { ++ lo_wc = towlower (wc); ++ if (lo_wc != wc) ++ { ++ flag = 1; ++ ++ p += mblength; ++ memset (&state_wc, '\0', sizeof(mbstate_t)); ++ mblength = wcrtomb (mbc, lo_wc, &state_wc); ++ ++ assert (mblength != (size_t)-1 && ++ mblength != (size_t)-2); ++ ++ mblength = (mblength < 1) ? 1 : mblength; ++ } ++ } ++ ++ if (!flag) ++ { ++ for (i = 0; i < mblength; i++) ++ mbc[i] = *p++; ++ } ++ } ++ ++ for (i = 0; i < mblength; i++) ++ h = HASH (h, mbc[i]); ++ } ++ break; ++ ++ case IGNORE_TAB_EXPANSION: ++ { ++ size_t column = 0; ++ ++ while (1) ++ { ++ if (*p == '\n') ++ { ++ ++p; ++ break; ++ } ++ ++ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); ++ ++ if (convfail) ++ { ++ h = HASH (h, *p++); ++ ++column; ++ } ++ else ++ { ++ bool flag; ++ ++ switch (wc) ++ { ++ case L'\b': ++ column -= 0 < column; ++ h = HASH (h, '\b'); ++ ++p; ++ break; ++ ++ case L'\t': ++ { ++ int repetitions; ++ ++ repetitions = TAB_WIDTH - column % TAB_WIDTH; ++ column += repetitions; ++ do ++ h = HASH (h, ' '); ++ while (--repetitions != 0); ++ ++p; ++ } ++ break; ++ ++ case L'\r': ++ column = 0; ++ h = HASH (h, '\r'); ++ ++p; ++ break; ++ ++ default: ++ flag = 0; ++ column += wcwidth (wc); ++ if (ignore_case) ++ { ++ lo_wc = towlower (wc); ++ if (lo_wc != wc) ++ { ++ flag = 1; ++ p += mblength; ++ memset (&state_wc, '\0', sizeof(mbstate_t)); ++ mblength = wcrtomb (mbc, lo_wc, &state_wc); ++ ++ assert (mblength != (size_t)-1 && ++ mblength != (size_t)-2); ++ ++ mblength = (mblength < 1) ? 1 : mblength; ++ } ++ } ++ ++ if (!flag) ++ { ++ for (i = 0; i < mblength; i++) ++ mbc[i] = *p++; ++ } ++ ++ for (i = 0; i < mblength; i++) ++ h = HASH (h, mbc[i]); ++ } ++ } ++ } ++ } ++ break; ++ ++ default: ++ while (1) ++ { ++ if (*p == '\n') ++ { ++ ++p; ++ break; ++ } ++ ++ MBC2WC (p, suffix_begin, mblength, wc, state, convfail); ++ ++ if (convfail) ++ mbc[0] = *p++; ++ else ++ { ++ int flag = 0; ++ ++ if (ignore_case) ++ { ++ lo_wc = towlower (wc); ++ if (lo_wc != wc) ++ { ++ flag = 1; ++ p += mblength; ++ memset (&state_wc, '\0', sizeof(mbstate_t)); ++ mblength = wcrtomb (mbc, lo_wc, &state_wc); ++ ++ assert (mblength != (size_t)-1 && ++ mblength != (size_t)-2); ++ ++ mblength = (mblength < 1) ? 1 : mblength; ++ } ++ } ++ ++ if (!flag) ++ { ++ for (i = 0; i < mblength; i++) ++ mbc[i] = *p++; ++ } ++ } ++ ++ for (i = 0; i < mblength; i++) ++ h = HASH (h, mbc[i]); ++ } ++ } ++ } ++ else ++#endif ++ + /* Hash this line until we find a newline. */ + if (ignore_case) + switch (ignore_white_space) +--- diffutils-2.9/src/side.c 2010-02-05 09:10:15.000000000 +0100 ++++ diffutils-2.9.mod/src/side.c 2010-02-13 15:51:32.647221551 +0100 +@@ -77,11 +77,74 @@ + register char const *text_limit = line[1]; + mbstate_t mbstate = { 0 }; + ++#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H ++ unsigned char mbc[MB_LEN_MAX]; ++ wchar_t wc; ++ mbstate_t state, state_bak; ++ size_t mbc_pos, mblength; ++ int mbc_loading_flag = 0; ++ int wc_width; ++ ++ memset (&state, '\0', sizeof (mbstate_t)); ++#endif ++ + while (text_pointer < text_limit) + { + char const *tp0 = text_pointer; + register char c = *text_pointer++; + ++#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H ++ if (MB_CUR_MAX > 1 && mbc_loading_flag) ++ { ++ mbc_loading_flag = 0; ++ state_bak = state; ++ mbc[mbc_pos++] = c; ++ ++process_mbc: ++ mblength = mbrtowc (&wc, mbc, mbc_pos, &state); ++ ++ switch (mblength) ++ { ++ case (size_t)-2: /* Incomplete multibyte character. */ ++ mbc_loading_flag = 1; ++ state = state_bak; ++ break; ++ ++ case (size_t)-1: /* Invalid as a multibyte character. */ ++ if (in_position++ < out_bound) ++ { ++ out_position = in_position; ++ putc (mbc[0], out); ++ } ++ memmove (mbc, mbc + 1, --mbc_pos); ++ if (mbc_pos > 0) ++ { ++ mbc[mbc_pos] = '\0'; ++ goto process_mbc; ++ } ++ break; ++ ++ default: ++ wc_width = wcwidth (wc); ++ if (wc_width < 1) /* Unprintable multibyte character. */ ++ { ++ if (in_position <= out_bound) ++ fprintf (out, "%lc", (wint_t)wc); ++ } ++ else /* Printable multibyte character. */ ++ { ++ in_position += wc_width; ++ if (in_position <= out_bound) ++ { ++ out_position = in_position; ++ fprintf (out, "%lc", (wint_t)wc); ++ } ++ } ++ } ++ continue; ++ } ++#endif ++ + switch (c) + { + case '\t': +--- diffutils-2.9/src/util.c 2010-02-11 10:39:17.000000000 +0100 ++++ diffutils-2.9.mod/src/util.c 2010-02-13 16:08:16.065232588 +0100 +@@ -309,7 +309,7 @@ + + outfile = 0; + } +- ++ + /* Compare two lines (typically one from each input file) + according to the command line options. + For efficiency, this is invoked only when the lines do not match exactly +@@ -317,7 +317,7 @@ + Return nonzero if the lines differ. */ + + bool +-lines_differ (char const *s1, char const *s2) ++lines_differ_singlebyte (char const *s1, char const *s2) + { + register char const *t1 = s1; + register char const *t2 = s2; +@@ -446,7 +446,294 @@ + + return true; + } +- ++ ++#ifdef HANDLE_MULTIBYTE ++# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \ ++do \ ++{ \ ++ mbstate_t bak = STATE; \ ++ \ ++ CONVFAIL = 0; \ ++ MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \ ++ \ ++ switch (MBLENGTH) \ ++ { \ ++ case (size_t)-2: \ ++ case (size_t)-1: \ ++ STATE = bak; \ ++ ++CONVFAIL; \ ++ /* Fall through. */ \ ++ case 0: \ ++ MBLENGTH = 1; \ ++ } \ ++} \ ++while (0) ++ ++bool ++lines_differ_multibyte (char const *s1, char const *s2) ++{ ++ unsigned char const *end1, *end2; ++ unsigned char c1, c2; ++ wchar_t wc1, wc2, wc1_bak, wc2_bak; ++ size_t mblen1, mblen2; ++ mbstate_t state1, state2, state1_bak, state2_bak; ++ int convfail1, convfail2, convfail1_bak, convfail2_bak; ++ ++ unsigned char const *t1 = (unsigned char const *) s1; ++ unsigned char const *t2 = (unsigned char const *) s2; ++ unsigned char const *t1_bak, *t2_bak; ++ size_t column = 0; ++ ++ if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case) ++ { ++ while (*t1 != '\n') ++ if (*t1++ != * t2++) ++ return 1; ++ return 0; ++ } ++ ++ memset (&state1, '\0', sizeof (mbstate_t)); ++ memset (&state2, '\0', sizeof (mbstate_t)); ++ ++ end1 = s1 + strlen (s1); ++ end2 = s2 + strlen (s2); ++ ++ while (1) ++ { ++ c1 = *t1; ++ c2 = *t2; ++ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); ++ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); ++ ++ /* Test for exact char equality first, since it's a common case. */ ++ if (convfail1 ^ convfail2) ++ break; ++ else if (convfail1 && convfail2 && c1 != c2) ++ break; ++ else if (!convfail1 && !convfail2 && wc1 != wc2) ++ { ++ switch (ignore_white_space) ++ { ++ case IGNORE_ALL_SPACE: ++ /* For -w, just skip past any white space. */ ++ while (1) ++ { ++ if (convfail1) ++ break; ++ else if (wc1 == L'\n' || !iswspace (wc1)) ++ break; ++ ++ t1 += mblen1; ++ c1 = *t1; ++ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); ++ } ++ ++ while (1) ++ { ++ if (convfail2) ++ break; ++ else if (wc2 == L'\n' || !iswspace (wc2)) ++ break; ++ ++ t2 += mblen2; ++ c2 = *t2; ++ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); ++ } ++ t1 += mblen1; ++ t2 += mblen2; ++ break; ++ ++ case IGNORE_SPACE_CHANGE: ++ /* For -b, advance past any sequence of white space in ++ line 1 and consider it just one space, or nothing at ++ all if it is at the end of the line. */ ++ if (wc1 != L'\n' && iswspace (wc1)) ++ { ++ size_t mblen_bak; ++ mbstate_t state_bak; ++ ++ do ++ { ++ t1 += mblen1; ++ mblen_bak = mblen1; ++ state_bak = state1; ++ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); ++ } ++ while (!convfail1 && (wc1 != L'\n' && iswspace (wc1))); ++ ++ state1 = state_bak; ++ mblen1 = mblen_bak; ++ t1 -= mblen1; ++ convfail1 = 0; ++ wc1 = L' '; ++ } ++ ++ /* Likewise for line 2. */ ++ if (wc2 != L'\n' && iswspace (wc2)) ++ { ++ size_t mblen_bak; ++ mbstate_t state_bak; ++ ++ do ++ { ++ t2 += mblen2; ++ mblen_bak = mblen2; ++ state_bak = state2; ++ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); ++ } ++ while (!convfail2 && (wc2 != L'\n' && iswspace (wc2))); ++ ++ state2 = state_bak; ++ mblen2 = mblen_bak; ++ t2 -= mblen2; ++ convfail2 = 0; ++ wc2 = L' '; ++ } ++ ++ if (wc1 != wc2) ++ { ++ if (wc2 == L' ' && wc1 != L'\n' && ++ t1 > (unsigned char const *)s1 && ++ !convfail1_bak && iswspace (wc1_bak)) ++ { ++ t1 = t1_bak; ++ wc1 = wc1_bak; ++ state1 = state1_bak; ++ convfail1 = convfail1_bak; ++ continue; ++ } ++ if (wc1 == L' ' && wc2 != L'\n' ++ && t2 > (unsigned char const *)s2 ++ && !convfail2_bak && iswspace (wc2_bak)) ++ { ++ t2 = t2_bak; ++ wc2 = wc2_bak; ++ state2 = state2_bak; ++ convfail2 = convfail2_bak; ++ continue; ++ } ++ } ++ ++ t1_bak = t1; t2_bak = t2; ++ wc1_bak = wc1; wc2_bak = wc2; ++ state1_bak = state1; state2_bak = state2; ++ convfail1_bak = convfail1; convfail2_bak = convfail2; ++ ++ if (wc1 == L'\n') ++ wc1 = L' '; ++ else ++ t1 += mblen1; ++ ++ if (wc2 == L'\n') ++ wc2 = L' '; ++ else ++ t2 += mblen2; ++ ++ break; ++ ++ case IGNORE_TAB_EXPANSION: ++ if ((wc1 == L' ' && wc2 == L'\t') ++ || (wc1 == L'\t' && wc2 == L' ')) ++ { ++ size_t column2 = column; ++ ++ while (1) ++ { ++ if (convfail1) ++ { ++ ++t1; ++ break; ++ } ++ else if (wc1 == L' ') ++ column++; ++ else if (wc1 == L'\t') ++ column += TAB_WIDTH - column % TAB_WIDTH; ++ else ++ { ++ t1 += mblen1; ++ break; ++ } ++ ++ t1 += mblen1; ++ c1 = *t1; ++ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); ++ } ++ ++ while (1) ++ { ++ if (convfail2) ++ { ++ ++t2; ++ break; ++ } ++ else if (wc2 == L' ') ++ column2++; ++ else if (wc2 == L'\t') ++ column2 += TAB_WIDTH - column2 % TAB_WIDTH; ++ else ++ { ++ t2 += mblen2; ++ break; ++ } ++ ++ t2 += mblen2; ++ c2 = *t2; ++ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); ++ } ++ ++ if (column != column2) ++ return 1; ++ } ++ else ++ { ++ t1 += mblen1; ++ t2 += mblen2; ++ } ++ break; ++ ++ case IGNORE_NO_WHITE_SPACE: ++ t1 += mblen1; ++ t2 += mblen2; ++ break; ++ } ++ ++ /* Lowercase all letters if -i is specified. */ ++ if (ignore_case) ++ { ++ if (!convfail1) ++ wc1 = towlower (wc1); ++ if (!convfail2) ++ wc2 = towlower (wc2); ++ } ++ ++ if (convfail1 ^ convfail2) ++ break; ++ else if (convfail1 && convfail2 && c1 != c2) ++ break; ++ else if (!convfail1 && !convfail2 && wc1 != wc2) ++ break; ++ } ++ else ++ { ++ t1_bak = t1; t2_bak = t2; ++ wc1_bak = wc1; wc2_bak = wc2; ++ state1_bak = state1; state2_bak = state2; ++ convfail1_bak = convfail1; convfail2_bak = convfail2; ++ ++ t1 += mblen1; t2 += mblen2; ++ } ++ ++ if (!convfail1 && wc1 == L'\n') ++ return 0; ++ ++ column += convfail1 ? 1 : ++ (wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1); ++ } ++ ++ return 1; ++} ++#endif ++ + /* Find the consecutive changes at the start of the script START. + Return the last link before the first gap. */ +