1 Submitted by: Alexander E. Patrakov
3 Initial Package Version: 2.8.1
4 Upstream Status: Unknown, but required for LSB >= 2.0 certification
6 Description: Fixes treatment of whitespace in multibyte locales.
8 --- diffutils-2.8.4/src/diff.c.i18n 2002-06-17 01:55:42.000000000 -0400
9 +++ diffutils-2.8.4/src/diff.c 2002-11-16 18:41:37.000000000 -0500
11 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
12 excluded = new_exclude ();
14 +#ifdef HANDLE_MULTIBYTE
16 + lines_differ = lines_differ_multibyte;
19 + lines_differ = lines_differ_singlebyte;
21 /* Decode the options. */
23 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
24 --- diffutils-2.8.4/src/diff.h.i18n 2002-11-16 18:31:32.000000000 -0500
25 +++ diffutils-2.8.4/src/diff.h 2002-11-16 18:48:58.000000000 -0500
30 +/* For platform which support the ISO C amendement 1 functionality we
31 + support user defined character classes. */
32 +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H
33 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
36 +# if defined (HAVE_MBRTOWC)
37 +# define HANDLE_MULTIBYTE 1
43 /* What kind of changes a hunk contains. */
47 extern char const pr_program[];
48 char *concat (char const *, char const *, char const *);
49 char *dir_file_pathname (char const *, char const *);
50 -bool lines_differ (char const *, char const *);
52 +bool (*lines_differ) (char const *, char const *);
53 +bool lines_differ_singlebyte (char const *, char const *);
54 +#ifdef HANDLE_MULTIBYTE
55 +bool lines_differ_multibyte (char const *, char const *);
58 lin translate_line_number (struct file_data const *, lin);
59 struct change *find_change (struct change *);
60 struct change *find_reverse_change (struct change *);
61 --- diffutils-2.8.4/src/io.c.i18n 2002-06-11 02:06:32.000000000 -0400
62 +++ diffutils-2.8.4/src/io.c 2002-11-16 18:57:30.000000000 -0500
69 /* Rotate an unsigned value to the left. */
70 #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
73 /* Split the file into lines, simultaneously computing the equivalence
74 class for each line. */
75 +#ifdef HANDLE_MULTIBYTE
76 +# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \
79 + mbstate_t state_bak = STATE; \
82 + MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \
88 + STATE = state_bak; \
90 + /* Fall through. */ \
99 find_and_hash_each_line (struct file_data *current)
100 @@ -239,12 +262,280 @@
101 bool same_length_diff_contents_compare_anyway =
102 diff_length_compare_anyway | ignore_case;
104 +#ifdef HANDLE_MULTIBYTE
110 + memset (&state, '\0', sizeof (mbstate_t));
113 while ((char const *) p < suffix_begin)
115 char const *ip = (char const *) p;
118 +#ifdef HANDLE_MULTIBYTE
119 + if (MB_CUR_MAX > 1)
122 + char mbc[MB_LEN_MAX];
123 + mbstate_t state_wc;
125 + /* Hash this line until we find a newline. */
126 + switch (ignore_white_space)
128 + case IGNORE_ALL_SPACE:
137 + MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
141 + else if (!iswspace (wc))
147 + lo_wc = towlower (wc);
153 + memset (&state_wc, '\0', sizeof(mbstate_t));
154 + mblength = wcrtomb (mbc, lo_wc, &state_wc);
156 + assert (mblength != (size_t)-1 &&
157 + mblength != (size_t)-2);
159 + mblength = (mblength < 1) ? 1 : mblength;
165 + for (i = 0; i < mblength; i++)
175 + for (i = 0; i < mblength; i++)
176 + h = HASH (h, mbc[i]);
180 + case IGNORE_SPACE_CHANGE:
189 + MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
191 + if (!convfail && iswspace (wc))
202 + MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
203 + if (convfail || (!convfail && !iswspace (wc)))
209 + /* WC is now the first non-space. */
218 + lo_wc = towlower (wc);
224 + memset (&state_wc, '\0', sizeof(mbstate_t));
225 + mblength = wcrtomb (mbc, lo_wc, &state_wc);
227 + assert (mblength != (size_t)-1 &&
228 + mblength != (size_t)-2);
230 + mblength = (mblength < 1) ? 1 : mblength;
236 + for (i = 0; i < mblength; i++)
241 + for (i = 0; i < mblength; i++)
242 + h = HASH (h, mbc[i]);
246 + case IGNORE_TAB_EXPANSION:
258 + MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
262 + h = HASH (h, *p++);
272 + column -= 0 < column;
273 + h = HASH (h, '\b');
281 + repetitions = TAB_WIDTH - column % TAB_WIDTH;
282 + column += repetitions;
285 + while (--repetitions != 0);
292 + h = HASH (h, '\r');
298 + column += wcwidth (wc);
301 + lo_wc = towlower (wc);
306 + memset (&state_wc, '\0', sizeof(mbstate_t));
307 + mblength = wcrtomb (mbc, lo_wc, &state_wc);
309 + assert (mblength != (size_t)-1 &&
310 + mblength != (size_t)-2);
312 + mblength = (mblength < 1) ? 1 : mblength;
318 + for (i = 0; i < mblength; i++)
322 + for (i = 0; i < mblength; i++)
323 + h = HASH (h, mbc[i]);
339 + MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
349 + lo_wc = towlower (wc);
354 + memset (&state_wc, '\0', sizeof(mbstate_t));
355 + mblength = wcrtomb (mbc, lo_wc, &state_wc);
357 + assert (mblength != (size_t)-1 &&
358 + mblength != (size_t)-2);
360 + mblength = (mblength < 1) ? 1 : mblength;
366 + for (i = 0; i < mblength; i++)
371 + for (i = 0; i < mblength; i++)
372 + h = HASH (h, mbc[i]);
378 /* Hash this line until we find a newline. */
380 switch (ignore_white_space)
381 --- diffutils-2.8.4/src/side.c.i18n 2002-06-11 02:06:32.000000000 -0400
382 +++ diffutils-2.8.4/src/side.c 2002-11-16 18:41:37.000000000 -0500
384 register size_t out_position = 0;
385 register char const *text_pointer = line[0];
386 register char const *text_limit = line[1];
387 +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
388 + unsigned char mbc[MB_LEN_MAX];
390 + mbstate_t state, state_bak;
391 + size_t mbc_pos, mblength;
392 + int mbc_loading_flag = 0;
395 + memset (&state, '\0', sizeof (mbstate_t));
398 while (text_pointer < text_limit)
400 register unsigned char c = *text_pointer++;
402 +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
403 + if (MB_CUR_MAX > 1 && mbc_loading_flag)
405 + mbc_loading_flag = 0;
407 + mbc[mbc_pos++] = c;
410 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
414 + case (size_t)-2: /* Incomplete multibyte character. */
415 + mbc_loading_flag = 1;
419 + case (size_t)-1: /* Invalid as a multibyte character. */
420 + if (in_position++ < out_bound)
422 + out_position = in_position;
423 + putc (mbc[0], out);
425 + memmove (mbc, mbc + 1, --mbc_pos);
428 + mbc[mbc_pos] = '\0';
434 + wc_width = wcwidth (wc);
435 + if (wc_width < 1) /* Unprintable multibyte character. */
437 + if (in_position <= out_bound)
438 + fprintf (out, "%lc", (wint_t)wc);
440 + else /* Printable multibyte character. */
442 + in_position += wc_width;
443 + if (in_position <= out_bound)
445 + out_position = in_position;
446 + fprintf (out, "%lc", (wint_t)wc);
462 +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H
463 + if (MB_CUR_MAX > 1)
465 + memset (mbc, '\0', MB_LEN_MAX);
467 + mbc[mbc_pos++] = c;
470 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
472 + /* The value of mblength is always less than 2 here. */
475 + case (size_t)-2: /* Incomplete multibyte character. */
477 + mbc_loading_flag = 1;
480 + case (size_t)-1: /* Invalid as a multibyte character. */
485 + if (! iswprint (wc))
497 if (in_position++ < out_bound)
498 --- diffutils-2.8.4/src/util.c.i18n 2002-06-11 02:06:32.000000000 -0400
499 +++ diffutils-2.8.4/src/util.c 2002-11-16 18:41:37.000000000 -0500
501 Return nonzero if the lines differ. */
504 -lines_differ (char const *s1, char const *s2)
505 +lines_differ_singlebyte (char const *s1, char const *s2)
507 register unsigned char const *t1 = (unsigned char const *) s1;
508 register unsigned char const *t2 = (unsigned char const *) s2;
509 @@ -450,6 +450,293 @@
514 +#ifdef HANDLE_MULTIBYTE
515 +# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \
518 + mbstate_t bak = STATE; \
521 + MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \
523 + switch (MBLENGTH) \
529 + /* Fall through. */ \
537 +lines_differ_multibyte (char const *s1, char const *s2)
539 + unsigned char const *end1, *end2;
540 + unsigned char c1, c2;
541 + wchar_t wc1, wc2, wc1_bak, wc2_bak;
542 + size_t mblen1, mblen2;
543 + mbstate_t state1, state2, state1_bak, state2_bak;
544 + int convfail1, convfail2, convfail1_bak, convfail2_bak;
546 + unsigned char const *t1 = (unsigned char const *) s1;
547 + unsigned char const *t2 = (unsigned char const *) s2;
548 + unsigned char const *t1_bak, *t2_bak;
551 + if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case)
553 + while (*t1 != '\n')
554 + if (*t1++ != * t2++)
559 + memset (&state1, '\0', sizeof (mbstate_t));
560 + memset (&state2, '\0', sizeof (mbstate_t));
562 + end1 = s1 + strlen (s1);
563 + end2 = s2 + strlen (s2);
569 + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
570 + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
572 + /* Test for exact char equality first, since it's a common case. */
573 + if (convfail1 ^ convfail2)
575 + else if (convfail1 && convfail2 && c1 != c2)
577 + else if (!convfail1 && !convfail2 && wc1 != wc2)
579 + switch (ignore_white_space)
581 + case IGNORE_ALL_SPACE:
582 + /* For -w, just skip past any white space. */
587 + else if (wc1 == L'\n' || !iswspace (wc1))
592 + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
599 + else if (wc2 == L'\n' || !iswspace (wc2))
604 + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
610 + case IGNORE_SPACE_CHANGE:
611 + /* For -b, advance past any sequence of white space in
612 + line 1 and consider it just one space, or nothing at
613 + all if it is at the end of the line. */
614 + if (wc1 != L'\n' && iswspace (wc1))
617 + mbstate_t state_bak;
622 + mblen_bak = mblen1;
623 + state_bak = state1;
624 + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
626 + while (!convfail1 && (wc1 != L'\n' && iswspace (wc1)));
628 + state1 = state_bak;
629 + mblen1 = mblen_bak;
635 + /* Likewise for line 2. */
636 + if (wc2 != L'\n' && iswspace (wc2))
639 + mbstate_t state_bak;
644 + mblen_bak = mblen2;
645 + state_bak = state2;
646 + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
648 + while (!convfail2 && (wc2 != L'\n' && iswspace (wc2)));
650 + state2 = state_bak;
651 + mblen2 = mblen_bak;
659 + if (wc2 == L' ' && wc1 != L'\n' &&
660 + t1 > (unsigned char const *)s1 &&
661 + !convfail1_bak && iswspace (wc1_bak))
665 + state1 = state1_bak;
666 + convfail1 = convfail1_bak;
669 + if (wc1 == L' ' && wc2 != L'\n'
670 + && t2 > (unsigned char const *)s2
671 + && !convfail2_bak && iswspace (wc2_bak))
675 + state2 = state2_bak;
676 + convfail2 = convfail2_bak;
681 + t1_bak = t1; t2_bak = t2;
682 + wc1_bak = wc1; wc2_bak = wc2;
683 + state1_bak = state1; state2_bak = state2;
684 + convfail1_bak = convfail1; convfail2_bak = convfail2;
698 + case IGNORE_TAB_EXPANSION:
699 + if ((wc1 == L' ' && wc2 == L'\t')
700 + || (wc1 == L'\t' && wc2 == L' '))
702 + size_t column2 = column;
711 + else if (wc1 == L' ')
713 + else if (wc1 == L'\t')
714 + column += TAB_WIDTH - column % TAB_WIDTH;
723 + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
733 + else if (wc2 == L' ')
735 + else if (wc2 == L'\t')
736 + column2 += TAB_WIDTH - column2 % TAB_WIDTH;
745 + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
748 + if (column != column2)
758 + case IGNORE_NO_WHITE_SPACE:
764 + /* Lowercase all letters if -i is specified. */
768 + wc1 = towlower (wc1);
770 + wc2 = towlower (wc2);
773 + if (convfail1 ^ convfail2)
775 + else if (convfail1 && convfail2 && c1 != c2)
777 + else if (!convfail1 && !convfail2 && wc1 != wc2)
782 + t1_bak = t1; t2_bak = t2;
783 + wc1_bak = wc1; wc2_bak = wc2;
784 + state1_bak = state1; state2_bak = state2;
785 + convfail1_bak = convfail1; convfail2_bak = convfail2;
787 + t1 += mblen1; t2 += mblen2;
790 + if (!convfail1 && wc1 == L'\n')
793 + column += convfail1 ? 1 :
794 + (wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1);
801 /* Find the consecutive changes at the start of the script START.
802 Return the last link before the first gap. */