]>
Commit | Line | Data |
---|---|---|
9b0ff0a0 MT |
1 | Submitted by: Alexander E. Patrakov |
2 | Date: 2005-08-13 | |
3 | Initial Package Version: 2.8.1 | |
4 | Upstream Status: Unknown, but required for LSB >= 2.0 certification | |
5 | Origin: RedHat | |
6 | Description: Fixes treatment of whitespace in multibyte locales. | |
7 | ||
8 | --- diffutils-2.8.4/src/diff.c.i18n 2002-06-17 01:55:42.000000000 -0400 | |
9 | +++ diffutils-2.8.4/src/diff.c 2002-11-16 18:41:37.000000000 -0500 | |
10 | @@ -275,6 +275,13 @@ | |
11 | re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); | |
12 | excluded = new_exclude (); | |
13 | ||
14 | +#ifdef HANDLE_MULTIBYTE | |
15 | + if (MB_CUR_MAX > 1) | |
16 | + lines_differ = lines_differ_multibyte; | |
17 | + else | |
18 | +#endif | |
19 | + lines_differ = lines_differ_singlebyte; | |
20 | + | |
21 | /* Decode the options. */ | |
22 | ||
23 | while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) | |
24 | --- diffutils-2.8.4/src/diff.h.i18n 2002-11-16 18:31:32.000000000 -0500 | |
25 | +++ diffutils-2.8.4/src/diff.h 2002-11-16 18:48:58.000000000 -0500 | |
26 | @@ -23,6 +23,19 @@ | |
27 | #include "system.h" | |
28 | #include <stdio.h> | |
29 | ||
30 | +/* For platform which support the ISO C amendement 1 functionality we | |
31 | + support user defined character classes. */ | |
32 | +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H | |
33 | +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ | |
34 | +# include <wchar.h> | |
35 | +# include <wctype.h> | |
36 | +# if defined (HAVE_MBRTOWC) | |
37 | +# define HANDLE_MULTIBYTE 1 | |
38 | +# endif | |
39 | +#endif | |
40 | + | |
41 | +#define TAB_WIDTH 8 | |
42 | + | |
43 | /* What kind of changes a hunk contains. */ | |
44 | enum changes | |
45 | { | |
46 | @@ -350,7 +363,13 @@ | |
47 | extern char const pr_program[]; | |
48 | char *concat (char const *, char const *, char const *); | |
49 | char *dir_file_pathname (char const *, char const *); | |
50 | -bool lines_differ (char const *, char const *); | |
51 | + | |
52 | +bool (*lines_differ) (char const *, char const *); | |
53 | +bool lines_differ_singlebyte (char const *, char const *); | |
54 | +#ifdef HANDLE_MULTIBYTE | |
55 | +bool lines_differ_multibyte (char const *, char const *); | |
56 | +#endif | |
57 | + | |
58 | lin translate_line_number (struct file_data const *, lin); | |
59 | struct change *find_change (struct change *); | |
60 | struct change *find_reverse_change (struct change *); | |
61 | --- diffutils-2.8.4/src/io.c.i18n 2002-06-11 02:06:32.000000000 -0400 | |
62 | +++ diffutils-2.8.4/src/io.c 2002-11-16 18:57:30.000000000 -0500 | |
63 | @@ -26,6 +26,7 @@ | |
64 | #include <regex.h> | |
65 | #include <setmode.h> | |
66 | #include <xalloc.h> | |
67 | +#include <assert.h> | |
68 | ||
69 | /* Rotate an unsigned value to the left. */ | |
70 | #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) | |
71 | @@ -213,6 +214,28 @@ | |
72 | \f | |
73 | /* Split the file into lines, simultaneously computing the equivalence | |
74 | class for each line. */ | |
75 | +#ifdef HANDLE_MULTIBYTE | |
76 | +# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \ | |
77 | +do \ | |
78 | +{ \ | |
79 | + mbstate_t state_bak = STATE; \ | |
80 | + \ | |
81 | + CONVFAIL = 0; \ | |
82 | + MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \ | |
83 | + \ | |
84 | + switch (MBLENGTH) \ | |
85 | + { \ | |
86 | + case (size_t)-2: \ | |
87 | + case (size_t)-1: \ | |
88 | + STATE = state_bak; \ | |
89 | + ++CONVFAIL; \ | |
90 | + /* Fall through. */ \ | |
91 | + case 0: \ | |
92 | + MBLENGTH = 1; \ | |
93 | + } \ | |
94 | +} \ | |
95 | +while (0) | |
96 | +#endif | |
97 | ||
98 | static void | |
99 | find_and_hash_each_line (struct file_data *current) | |
100 | @@ -239,12 +262,280 @@ | |
101 | bool same_length_diff_contents_compare_anyway = | |
102 | diff_length_compare_anyway | ignore_case; | |
103 | ||
104 | +#ifdef HANDLE_MULTIBYTE | |
105 | + wchar_t wc; | |
106 | + size_t mblength; | |
107 | + mbstate_t state; | |
108 | + int convfail; | |
109 | + | |
110 | + memset (&state, '\0', sizeof (mbstate_t)); | |
111 | +#endif | |
112 | + | |
113 | while ((char const *) p < suffix_begin) | |
114 | { | |
115 | char const *ip = (char const *) p; | |
116 | ||
117 | h = 0; | |
118 | +#ifdef HANDLE_MULTIBYTE | |
119 | + if (MB_CUR_MAX > 1) | |
120 | + { | |
121 | + wchar_t lo_wc; | |
122 | + char mbc[MB_LEN_MAX]; | |
123 | + mbstate_t state_wc; | |
124 | + | |
125 | + /* Hash this line until we find a newline. */ | |
126 | + switch (ignore_white_space) | |
127 | + { | |
128 | + case IGNORE_ALL_SPACE: | |
129 | + while (1) | |
130 | + { | |
131 | + if (*p == '\n') | |
132 | + { | |
133 | + ++p; | |
134 | + break; | |
135 | + } | |
136 | + | |
137 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
138 | + | |
139 | + if (convfail) | |
140 | + mbc[0] = *p++; | |
141 | + else if (!iswspace (wc)) | |
142 | + { | |
143 | + bool flag = 0; | |
144 | + | |
145 | + if (ignore_case) | |
146 | + { | |
147 | + lo_wc = towlower (wc); | |
148 | + if (lo_wc != wc) | |
149 | + { | |
150 | + flag = 1; | |
151 | + | |
152 | + p += mblength; | |
153 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
154 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
155 | + | |
156 | + assert (mblength != (size_t)-1 && | |
157 | + mblength != (size_t)-2); | |
158 | + | |
159 | + mblength = (mblength < 1) ? 1 : mblength; | |
160 | + } | |
161 | + } | |
162 | + | |
163 | + if (!flag) | |
164 | + { | |
165 | + for (i = 0; i < mblength; i++) | |
166 | + mbc[i] = *p++; | |
167 | + } | |
168 | + } | |
169 | + else | |
170 | + { | |
171 | + p += mblength; | |
172 | + continue; | |
173 | + } | |
174 | + | |
175 | + for (i = 0; i < mblength; i++) | |
176 | + h = HASH (h, mbc[i]); | |
177 | + } | |
178 | + break; | |
179 | + | |
180 | + case IGNORE_SPACE_CHANGE: | |
181 | + while (1) | |
182 | + { | |
183 | + if (*p == '\n') | |
184 | + { | |
185 | + ++p; | |
186 | + break; | |
187 | + } | |
188 | ||
189 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
190 | + | |
191 | + if (!convfail && iswspace (wc)) | |
192 | + { | |
193 | + while (1) | |
194 | + { | |
195 | + if (*p == '\n') | |
196 | + { | |
197 | + ++p; | |
198 | + goto hashing_done; | |
199 | + } | |
200 | + | |
201 | + p += mblength; | |
202 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
203 | + if (convfail || (!convfail && !iswspace (wc))) | |
204 | + break; | |
205 | + } | |
206 | + h = HASH (h, ' '); | |
207 | + } | |
208 | + | |
209 | + /* WC is now the first non-space. */ | |
210 | + if (convfail) | |
211 | + mbc[0] = *p++; | |
212 | + else | |
213 | + { | |
214 | + bool flag = 0; | |
215 | + | |
216 | + if (ignore_case) | |
217 | + { | |
218 | + lo_wc = towlower (wc); | |
219 | + if (lo_wc != wc) | |
220 | + { | |
221 | + flag = 1; | |
222 | + | |
223 | + p += mblength; | |
224 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
225 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
226 | + | |
227 | + assert (mblength != (size_t)-1 && | |
228 | + mblength != (size_t)-2); | |
229 | + | |
230 | + mblength = (mblength < 1) ? 1 : mblength; | |
231 | + } | |
232 | + } | |
233 | + | |
234 | + if (!flag) | |
235 | + { | |
236 | + for (i = 0; i < mblength; i++) | |
237 | + mbc[i] = *p++; | |
238 | + } | |
239 | + } | |
240 | + | |
241 | + for (i = 0; i < mblength; i++) | |
242 | + h = HASH (h, mbc[i]); | |
243 | + } | |
244 | + break; | |
245 | + | |
246 | + case IGNORE_TAB_EXPANSION: | |
247 | + { | |
248 | + size_t column = 0; | |
249 | + | |
250 | + while (1) | |
251 | + { | |
252 | + if (*p == '\n') | |
253 | + { | |
254 | + ++p; | |
255 | + break; | |
256 | + } | |
257 | + | |
258 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
259 | + | |
260 | + if (convfail) | |
261 | + { | |
262 | + h = HASH (h, *p++); | |
263 | + ++column; | |
264 | + } | |
265 | + else | |
266 | + { | |
267 | + bool flag; | |
268 | + | |
269 | + switch (wc) | |
270 | + { | |
271 | + case L'\b': | |
272 | + column -= 0 < column; | |
273 | + h = HASH (h, '\b'); | |
274 | + ++p; | |
275 | + break; | |
276 | + | |
277 | + case L'\t': | |
278 | + { | |
279 | + int repetitions; | |
280 | + | |
281 | + repetitions = TAB_WIDTH - column % TAB_WIDTH; | |
282 | + column += repetitions; | |
283 | + do | |
284 | + h = HASH (h, ' '); | |
285 | + while (--repetitions != 0); | |
286 | + ++p; | |
287 | + } | |
288 | + break; | |
289 | + | |
290 | + case L'\r': | |
291 | + column = 0; | |
292 | + h = HASH (h, '\r'); | |
293 | + ++p; | |
294 | + break; | |
295 | + | |
296 | + default: | |
297 | + flag = 0; | |
298 | + column += wcwidth (wc); | |
299 | + if (ignore_case) | |
300 | + { | |
301 | + lo_wc = towlower (wc); | |
302 | + if (lo_wc != wc) | |
303 | + { | |
304 | + flag = 1; | |
305 | + p += mblength; | |
306 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
307 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
308 | + | |
309 | + assert (mblength != (size_t)-1 && | |
310 | + mblength != (size_t)-2); | |
311 | + | |
312 | + mblength = (mblength < 1) ? 1 : mblength; | |
313 | + } | |
314 | + } | |
315 | + | |
316 | + if (!flag) | |
317 | + { | |
318 | + for (i = 0; i < mblength; i++) | |
319 | + mbc[i] = *p++; | |
320 | + } | |
321 | + | |
322 | + for (i = 0; i < mblength; i++) | |
323 | + h = HASH (h, mbc[i]); | |
324 | + } | |
325 | + } | |
326 | + } | |
327 | + } | |
328 | + break; | |
329 | + | |
330 | + default: | |
331 | + while (1) | |
332 | + { | |
333 | + if (*p == '\n') | |
334 | + { | |
335 | + ++p; | |
336 | + break; | |
337 | + } | |
338 | + | |
339 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
340 | + | |
341 | + if (convfail) | |
342 | + mbc[0] = *p++; | |
343 | + else | |
344 | + { | |
345 | + int flag = 0; | |
346 | + | |
347 | + if (ignore_case) | |
348 | + { | |
349 | + lo_wc = towlower (wc); | |
350 | + if (lo_wc != wc) | |
351 | + { | |
352 | + flag = 1; | |
353 | + p += mblength; | |
354 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
355 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
356 | + | |
357 | + assert (mblength != (size_t)-1 && | |
358 | + mblength != (size_t)-2); | |
359 | + | |
360 | + mblength = (mblength < 1) ? 1 : mblength; | |
361 | + } | |
362 | + } | |
363 | + | |
364 | + if (!flag) | |
365 | + { | |
366 | + for (i = 0; i < mblength; i++) | |
367 | + mbc[i] = *p++; | |
368 | + } | |
369 | + } | |
370 | + | |
371 | + for (i = 0; i < mblength; i++) | |
372 | + h = HASH (h, mbc[i]); | |
373 | + } | |
374 | + } | |
375 | + } | |
376 | + else | |
377 | +#endif | |
378 | /* Hash this line until we find a newline. */ | |
379 | if (ignore_case) | |
380 | switch (ignore_white_space) | |
381 | --- diffutils-2.8.4/src/side.c.i18n 2002-06-11 02:06:32.000000000 -0400 | |
382 | +++ diffutils-2.8.4/src/side.c 2002-11-16 18:41:37.000000000 -0500 | |
383 | @@ -73,11 +73,72 @@ | |
384 | register size_t out_position = 0; | |
385 | register char const *text_pointer = line[0]; | |
386 | register char const *text_limit = line[1]; | |
387 | +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H | |
388 | + unsigned char mbc[MB_LEN_MAX]; | |
389 | + wchar_t wc; | |
390 | + mbstate_t state, state_bak; | |
391 | + size_t mbc_pos, mblength; | |
392 | + int mbc_loading_flag = 0; | |
393 | + int wc_width; | |
394 | + | |
395 | + memset (&state, '\0', sizeof (mbstate_t)); | |
396 | +#endif | |
397 | ||
398 | while (text_pointer < text_limit) | |
399 | { | |
400 | register unsigned char c = *text_pointer++; | |
401 | ||
402 | +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H | |
403 | + if (MB_CUR_MAX > 1 && mbc_loading_flag) | |
404 | + { | |
405 | + mbc_loading_flag = 0; | |
406 | + state_bak = state; | |
407 | + mbc[mbc_pos++] = c; | |
408 | + | |
409 | +process_mbc: | |
410 | + mblength = mbrtowc (&wc, mbc, mbc_pos, &state); | |
411 | + | |
412 | + switch (mblength) | |
413 | + { | |
414 | + case (size_t)-2: /* Incomplete multibyte character. */ | |
415 | + mbc_loading_flag = 1; | |
416 | + state = state_bak; | |
417 | + break; | |
418 | + | |
419 | + case (size_t)-1: /* Invalid as a multibyte character. */ | |
420 | + if (in_position++ < out_bound) | |
421 | + { | |
422 | + out_position = in_position; | |
423 | + putc (mbc[0], out); | |
424 | + } | |
425 | + memmove (mbc, mbc + 1, --mbc_pos); | |
426 | + if (mbc_pos > 0) | |
427 | + { | |
428 | + mbc[mbc_pos] = '\0'; | |
429 | + goto process_mbc; | |
430 | + } | |
431 | + break; | |
432 | + | |
433 | + default: | |
434 | + wc_width = wcwidth (wc); | |
435 | + if (wc_width < 1) /* Unprintable multibyte character. */ | |
436 | + { | |
437 | + if (in_position <= out_bound) | |
438 | + fprintf (out, "%lc", (wint_t)wc); | |
439 | + } | |
440 | + else /* Printable multibyte character. */ | |
441 | + { | |
442 | + in_position += wc_width; | |
443 | + if (in_position <= out_bound) | |
444 | + { | |
445 | + out_position = in_position; | |
446 | + fprintf (out, "%lc", (wint_t)wc); | |
447 | + } | |
448 | + } | |
449 | + } | |
450 | + continue; | |
451 | + } | |
452 | +#endif | |
453 | switch (c) | |
454 | { | |
455 | case '\t': | |
456 | @@ -135,8 +196,39 @@ | |
457 | break; | |
458 | ||
459 | default: | |
460 | - if (! ISPRINT (c)) | |
461 | - goto control_char; | |
462 | +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H | |
463 | + if (MB_CUR_MAX > 1) | |
464 | + { | |
465 | + memset (mbc, '\0', MB_LEN_MAX); | |
466 | + mbc_pos = 0; | |
467 | + mbc[mbc_pos++] = c; | |
468 | + state_bak = state; | |
469 | + | |
470 | + mblength = mbrtowc (&wc, mbc, mbc_pos, &state); | |
471 | + | |
472 | + /* The value of mblength is always less than 2 here. */ | |
473 | + switch (mblength) | |
474 | + { | |
475 | + case (size_t)-2: /* Incomplete multibyte character. */ | |
476 | + state = state_bak; | |
477 | + mbc_loading_flag = 1; | |
478 | + continue; | |
479 | + | |
480 | + case (size_t)-1: /* Invalid as a multibyte character. */ | |
481 | + state = state_bak; | |
482 | + break; | |
483 | + | |
484 | + default: | |
485 | + if (! iswprint (wc)) | |
486 | + goto control_char; | |
487 | + } | |
488 | + } | |
489 | + else | |
490 | +#endif | |
491 | + { | |
492 | + if (! ISPRINT (c)) | |
493 | + goto control_char; | |
494 | + } | |
495 | /* falls through */ | |
496 | case ' ': | |
497 | if (in_position++ < out_bound) | |
498 | --- diffutils-2.8.4/src/util.c.i18n 2002-06-11 02:06:32.000000000 -0400 | |
499 | +++ diffutils-2.8.4/src/util.c 2002-11-16 18:41:37.000000000 -0500 | |
500 | @@ -321,7 +321,7 @@ | |
501 | Return nonzero if the lines differ. */ | |
502 | ||
503 | bool | |
504 | -lines_differ (char const *s1, char const *s2) | |
505 | +lines_differ_singlebyte (char const *s1, char const *s2) | |
506 | { | |
507 | register unsigned char const *t1 = (unsigned char const *) s1; | |
508 | register unsigned char const *t2 = (unsigned char const *) s2; | |
509 | @@ -450,6 +450,293 @@ | |
510 | ||
511 | return 1; | |
512 | } | |
513 | + | |
514 | +#ifdef HANDLE_MULTIBYTE | |
515 | +# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \ | |
516 | +do \ | |
517 | +{ \ | |
518 | + mbstate_t bak = STATE; \ | |
519 | + \ | |
520 | + CONVFAIL = 0; \ | |
521 | + MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \ | |
522 | + \ | |
523 | + switch (MBLENGTH) \ | |
524 | + { \ | |
525 | + case (size_t)-2: \ | |
526 | + case (size_t)-1: \ | |
527 | + STATE = bak; \ | |
528 | + ++CONVFAIL; \ | |
529 | + /* Fall through. */ \ | |
530 | + case 0: \ | |
531 | + MBLENGTH = 1; \ | |
532 | + } \ | |
533 | +} \ | |
534 | +while (0) | |
535 | + | |
536 | +bool | |
537 | +lines_differ_multibyte (char const *s1, char const *s2) | |
538 | +{ | |
539 | + unsigned char const *end1, *end2; | |
540 | + unsigned char c1, c2; | |
541 | + wchar_t wc1, wc2, wc1_bak, wc2_bak; | |
542 | + size_t mblen1, mblen2; | |
543 | + mbstate_t state1, state2, state1_bak, state2_bak; | |
544 | + int convfail1, convfail2, convfail1_bak, convfail2_bak; | |
545 | + | |
546 | + unsigned char const *t1 = (unsigned char const *) s1; | |
547 | + unsigned char const *t2 = (unsigned char const *) s2; | |
548 | + unsigned char const *t1_bak, *t2_bak; | |
549 | + size_t column = 0; | |
550 | + | |
551 | + if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case) | |
552 | + { | |
553 | + while (*t1 != '\n') | |
554 | + if (*t1++ != * t2++) | |
555 | + return 1; | |
556 | + return 0; | |
557 | + } | |
558 | + | |
559 | + memset (&state1, '\0', sizeof (mbstate_t)); | |
560 | + memset (&state2, '\0', sizeof (mbstate_t)); | |
561 | + | |
562 | + end1 = s1 + strlen (s1); | |
563 | + end2 = s2 + strlen (s2); | |
564 | + | |
565 | + while (1) | |
566 | + { | |
567 | + c1 = *t1; | |
568 | + c2 = *t2; | |
569 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
570 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
571 | + | |
572 | + /* Test for exact char equality first, since it's a common case. */ | |
573 | + if (convfail1 ^ convfail2) | |
574 | + break; | |
575 | + else if (convfail1 && convfail2 && c1 != c2) | |
576 | + break; | |
577 | + else if (!convfail1 && !convfail2 && wc1 != wc2) | |
578 | + { | |
579 | + switch (ignore_white_space) | |
580 | + { | |
581 | + case IGNORE_ALL_SPACE: | |
582 | + /* For -w, just skip past any white space. */ | |
583 | + while (1) | |
584 | + { | |
585 | + if (convfail1) | |
586 | + break; | |
587 | + else if (wc1 == L'\n' || !iswspace (wc1)) | |
588 | + break; | |
589 | + | |
590 | + t1 += mblen1; | |
591 | + c1 = *t1; | |
592 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
593 | + } | |
594 | + | |
595 | + while (1) | |
596 | + { | |
597 | + if (convfail2) | |
598 | + break; | |
599 | + else if (wc2 == L'\n' || !iswspace (wc2)) | |
600 | + break; | |
601 | + | |
602 | + t2 += mblen2; | |
603 | + c2 = *t2; | |
604 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
605 | + } | |
606 | + t1 += mblen1; | |
607 | + t2 += mblen2; | |
608 | + break; | |
609 | + | |
610 | + case IGNORE_SPACE_CHANGE: | |
611 | + /* For -b, advance past any sequence of white space in | |
612 | + line 1 and consider it just one space, or nothing at | |
613 | + all if it is at the end of the line. */ | |
614 | + if (wc1 != L'\n' && iswspace (wc1)) | |
615 | + { | |
616 | + size_t mblen_bak; | |
617 | + mbstate_t state_bak; | |
618 | + | |
619 | + do | |
620 | + { | |
621 | + t1 += mblen1; | |
622 | + mblen_bak = mblen1; | |
623 | + state_bak = state1; | |
624 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
625 | + } | |
626 | + while (!convfail1 && (wc1 != L'\n' && iswspace (wc1))); | |
627 | + | |
628 | + state1 = state_bak; | |
629 | + mblen1 = mblen_bak; | |
630 | + t1 -= mblen1; | |
631 | + convfail1 = 0; | |
632 | + wc1 = L' '; | |
633 | + } | |
634 | + | |
635 | + /* Likewise for line 2. */ | |
636 | + if (wc2 != L'\n' && iswspace (wc2)) | |
637 | + { | |
638 | + size_t mblen_bak; | |
639 | + mbstate_t state_bak; | |
640 | + | |
641 | + do | |
642 | + { | |
643 | + t2 += mblen2; | |
644 | + mblen_bak = mblen2; | |
645 | + state_bak = state2; | |
646 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
647 | + } | |
648 | + while (!convfail2 && (wc2 != L'\n' && iswspace (wc2))); | |
649 | + | |
650 | + state2 = state_bak; | |
651 | + mblen2 = mblen_bak; | |
652 | + t2 -= mblen2; | |
653 | + convfail2 = 0; | |
654 | + wc2 = L' '; | |
655 | + } | |
656 | + | |
657 | + if (wc1 != wc2) | |
658 | + { | |
659 | + if (wc2 == L' ' && wc1 != L'\n' && | |
660 | + t1 > (unsigned char const *)s1 && | |
661 | + !convfail1_bak && iswspace (wc1_bak)) | |
662 | + { | |
663 | + t1 = t1_bak; | |
664 | + wc1 = wc1_bak; | |
665 | + state1 = state1_bak; | |
666 | + convfail1 = convfail1_bak; | |
667 | + continue; | |
668 | + } | |
669 | + if (wc1 == L' ' && wc2 != L'\n' | |
670 | + && t2 > (unsigned char const *)s2 | |
671 | + && !convfail2_bak && iswspace (wc2_bak)) | |
672 | + { | |
673 | + t2 = t2_bak; | |
674 | + wc2 = wc2_bak; | |
675 | + state2 = state2_bak; | |
676 | + convfail2 = convfail2_bak; | |
677 | + continue; | |
678 | + } | |
679 | + } | |
680 | + | |
681 | + t1_bak = t1; t2_bak = t2; | |
682 | + wc1_bak = wc1; wc2_bak = wc2; | |
683 | + state1_bak = state1; state2_bak = state2; | |
684 | + convfail1_bak = convfail1; convfail2_bak = convfail2; | |
685 | + | |
686 | + if (wc1 == L'\n') | |
687 | + wc1 = L' '; | |
688 | + else | |
689 | + t1 += mblen1; | |
690 | + | |
691 | + if (wc2 == L'\n') | |
692 | + wc2 = L' '; | |
693 | + else | |
694 | + t2 += mblen2; | |
695 | + | |
696 | + break; | |
697 | + | |
698 | + case IGNORE_TAB_EXPANSION: | |
699 | + if ((wc1 == L' ' && wc2 == L'\t') | |
700 | + || (wc1 == L'\t' && wc2 == L' ')) | |
701 | + { | |
702 | + size_t column2 = column; | |
703 | + | |
704 | + while (1) | |
705 | + { | |
706 | + if (convfail1) | |
707 | + { | |
708 | + ++t1; | |
709 | + break; | |
710 | + } | |
711 | + else if (wc1 == L' ') | |
712 | + column++; | |
713 | + else if (wc1 == L'\t') | |
714 | + column += TAB_WIDTH - column % TAB_WIDTH; | |
715 | + else | |
716 | + { | |
717 | + t1 += mblen1; | |
718 | + break; | |
719 | + } | |
720 | + | |
721 | + t1 += mblen1; | |
722 | + c1 = *t1; | |
723 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
724 | + } | |
725 | + | |
726 | + while (1) | |
727 | + { | |
728 | + if (convfail2) | |
729 | + { | |
730 | + ++t2; | |
731 | + break; | |
732 | + } | |
733 | + else if (wc2 == L' ') | |
734 | + column2++; | |
735 | + else if (wc2 == L'\t') | |
736 | + column2 += TAB_WIDTH - column2 % TAB_WIDTH; | |
737 | + else | |
738 | + { | |
739 | + t2 += mblen2; | |
740 | + break; | |
741 | + } | |
742 | + | |
743 | + t2 += mblen2; | |
744 | + c2 = *t2; | |
745 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
746 | + } | |
747 | + | |
748 | + if (column != column2) | |
749 | + return 1; | |
750 | + } | |
751 | + else | |
752 | + { | |
753 | + t1 += mblen1; | |
754 | + t2 += mblen2; | |
755 | + } | |
756 | + break; | |
757 | + | |
758 | + case IGNORE_NO_WHITE_SPACE: | |
759 | + t1 += mblen1; | |
760 | + t2 += mblen2; | |
761 | + break; | |
762 | + } | |
763 | + | |
764 | + /* Lowercase all letters if -i is specified. */ | |
765 | + if (ignore_case) | |
766 | + { | |
767 | + if (!convfail1) | |
768 | + wc1 = towlower (wc1); | |
769 | + if (!convfail2) | |
770 | + wc2 = towlower (wc2); | |
771 | + } | |
772 | + | |
773 | + if (convfail1 ^ convfail2) | |
774 | + break; | |
775 | + else if (convfail1 && convfail2 && c1 != c2) | |
776 | + break; | |
777 | + else if (!convfail1 && !convfail2 && wc1 != wc2) | |
778 | + break; | |
779 | + } | |
780 | + else | |
781 | + { | |
782 | + t1_bak = t1; t2_bak = t2; | |
783 | + wc1_bak = wc1; wc2_bak = wc2; | |
784 | + state1_bak = state1; state2_bak = state2; | |
785 | + convfail1_bak = convfail1; convfail2_bak = convfail2; | |
786 | + | |
787 | + t1 += mblen1; t2 += mblen2; | |
788 | + } | |
789 | + | |
790 | + if (!convfail1 && wc1 == L'\n') | |
791 | + return 0; | |
792 | + | |
793 | + column += convfail1 ? 1 : | |
794 | + (wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1); | |
795 | + } | |
796 | + | |
797 | + return 1; | |
798 | +} | |
799 | +#endif | |
800 | \f | |
801 | /* Find the consecutive changes at the start of the script START. | |
802 | Return the last link before the first gap. */ |