]>
Commit | Line | Data |
---|---|---|
a83d6396 SS |
1 | --- diffutils-2.9/src/diff.c 2010-02-11 10:39:17.000000000 +0100 |
2 | +++ diffutils-2.9.mod/src/diff.c 2010-02-13 15:28:22.268208253 +0100 | |
3 | @@ -284,6 +284,13 @@ | |
4 | re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); | |
5 | excluded = new_exclude (); | |
6 | ||
7 | +#ifdef HANDLE_MULTIBYTE | |
8 | + if (MB_CUR_MAX > 1) | |
9 | + lines_differ = lines_differ_multibyte; | |
10 | + else | |
11 | +#endif | |
12 | + lines_differ = lines_differ_singlebyte; | |
13 | + | |
14 | /* Decode the options. */ | |
15 | ||
16 | while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) | |
17 | --- diffutils-2.9/src/diff.h 2010-02-11 10:05:57.000000000 +0100 | |
18 | +++ diffutils-2.9.mod/src/diff.h 2010-02-13 15:28:22.269208190 +0100 | |
19 | @@ -23,6 +23,19 @@ | |
20 | #include <stdio.h> | |
21 | #include <unlocked-io.h> | |
22 | ||
23 | +/* For platform which support the ISO C amendement 1 functionality we | |
24 | + support user defined character classes. */ | |
25 | +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H | |
26 | +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ | |
27 | +# include <wchar.h> | |
28 | +# include <wctype.h> | |
29 | +# if defined (HAVE_MBRTOWC) | |
30 | +# define HANDLE_MULTIBYTE 1 | |
31 | +# endif | |
32 | +#endif | |
33 | + | |
34 | +#define TAB_WIDTH 8 | |
35 | + | |
36 | /* What kind of changes a hunk contains. */ | |
37 | enum changes | |
38 | { | |
39 | @@ -350,7 +363,13 @@ | |
40 | extern char const pr_program[]; | |
41 | char *concat (char const *, char const *, char const *); | |
42 | char *dir_file_pathname (char const *, char const *); | |
43 | -bool lines_differ (char const *, char const *); | |
44 | + | |
45 | +bool (*lines_differ) (char const *, char const *); | |
46 | +bool lines_differ_singlebyte (char const *, char const *); | |
47 | +#ifdef HANDLE_MULTIBYTE | |
48 | +bool lines_differ_multibyte (char const *, char const *); | |
49 | +#endif | |
50 | + | |
51 | lin translate_line_number (struct file_data const *, lin); | |
52 | struct change *find_change (struct change *); | |
53 | struct change *find_reverse_change (struct change *); | |
54 | --- diffutils-2.9/src/io.c 2010-02-05 09:10:15.000000000 +0100 | |
55 | +++ diffutils-2.9.mod/src/io.c 2010-02-13 15:39:59.313224273 +0100 | |
56 | @@ -22,6 +22,7 @@ | |
57 | #include <cmpbuf.h> | |
58 | #include <file-type.h> | |
59 | #include <xalloc.h> | |
60 | +#include <assert.h> | |
61 | ||
62 | /* Rotate an unsigned value to the left. */ | |
63 | #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n))) | |
64 | @@ -194,6 +195,28 @@ | |
65 | \f | |
66 | /* Split the file into lines, simultaneously computing the equivalence | |
67 | class for each line. */ | |
68 | +#ifdef HANDLE_MULTIBYTE | |
69 | +# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \ | |
70 | +do \ | |
71 | +{ \ | |
72 | + mbstate_t state_bak = STATE; \ | |
73 | + \ | |
74 | + CONVFAIL = 0; \ | |
75 | + MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \ | |
76 | + \ | |
77 | + switch (MBLENGTH) \ | |
78 | + { \ | |
79 | + case (size_t)-2: \ | |
80 | + case (size_t)-1: \ | |
81 | + STATE = state_bak; \ | |
82 | + ++CONVFAIL; \ | |
83 | + /* Fall through. */ \ | |
84 | + case 0: \ | |
85 | + MBLENGTH = 1; \ | |
86 | + } \ | |
87 | +} \ | |
88 | +while (0) | |
89 | +#endif | |
90 | ||
91 | static void | |
92 | find_and_hash_each_line (struct file_data *current) | |
93 | @@ -220,12 +243,282 @@ | |
94 | bool same_length_diff_contents_compare_anyway = | |
95 | diff_length_compare_anyway | ignore_case; | |
96 | ||
97 | +#ifdef HANDLE_MULTIBYTE | |
98 | + wchar_t wc; | |
99 | + size_t mblength; | |
100 | + mbstate_t state; | |
101 | + int convfail; | |
102 | + | |
103 | + memset (&state, '\0', sizeof (mbstate_t)); | |
104 | +#endif | |
105 | + | |
106 | while (p < suffix_begin) | |
107 | { | |
108 | char const *ip = p; | |
109 | ||
110 | h = 0; | |
111 | ||
112 | +#ifdef HANDLE_MULTIBYTE | |
113 | + if (MB_CUR_MAX > 1) | |
114 | + { | |
115 | + wchar_t lo_wc; | |
116 | + char mbc[MB_LEN_MAX]; | |
117 | + mbstate_t state_wc; | |
118 | + | |
119 | + /* Hash this line until we find a newline. */ | |
120 | + switch (ignore_white_space) | |
121 | + { | |
122 | + case IGNORE_ALL_SPACE: | |
123 | + while (1) | |
124 | + { | |
125 | + if (*p == '\n') | |
126 | + { | |
127 | + ++p; | |
128 | + break; | |
129 | + } | |
130 | + | |
131 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
132 | + | |
133 | + if (convfail) | |
134 | + mbc[0] = *p++; | |
135 | + else if (!iswspace (wc)) | |
136 | + { | |
137 | + bool flag = 0; | |
138 | + | |
139 | + if (ignore_case) | |
140 | + { | |
141 | + lo_wc = towlower (wc); | |
142 | + if (lo_wc != wc) | |
143 | + { | |
144 | + flag = 1; | |
145 | + | |
146 | + p += mblength; | |
147 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
148 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
149 | + | |
150 | + assert (mblength != (size_t)-1 && | |
151 | + mblength != (size_t)-2); | |
152 | + | |
153 | + mblength = (mblength < 1) ? 1 : mblength; | |
154 | + } | |
155 | + } | |
156 | + | |
157 | + if (!flag) | |
158 | + { | |
159 | + for (i = 0; i < mblength; i++) | |
160 | + mbc[i] = *p++; | |
161 | + } | |
162 | + } | |
163 | + else | |
164 | + { | |
165 | + p += mblength; | |
166 | + continue; | |
167 | + } | |
168 | + | |
169 | + for (i = 0; i < mblength; i++) | |
170 | + h = HASH (h, mbc[i]); | |
171 | + } | |
172 | + break; | |
173 | + | |
174 | + case IGNORE_SPACE_CHANGE: | |
175 | + while (1) | |
176 | + { | |
177 | + if (*p == '\n') | |
178 | + { | |
179 | + ++p; | |
180 | + break; | |
181 | + } | |
182 | + | |
183 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
184 | + | |
185 | + if (!convfail && iswspace (wc)) | |
186 | + { | |
187 | + while (1) | |
188 | + { | |
189 | + if (*p == '\n') | |
190 | + { | |
191 | + ++p; | |
192 | + goto hashing_done; | |
193 | + } | |
194 | + | |
195 | + p += mblength; | |
196 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
197 | + if (convfail || (!convfail && !iswspace (wc))) | |
198 | + break; | |
199 | + } | |
200 | + h = HASH (h, ' '); | |
201 | + } | |
202 | + | |
203 | + /* WC is now the first non-space. */ | |
204 | + if (convfail) | |
205 | + mbc[0] = *p++; | |
206 | + else | |
207 | + { | |
208 | + bool flag = 0; | |
209 | + | |
210 | + if (ignore_case) | |
211 | + { | |
212 | + lo_wc = towlower (wc); | |
213 | + if (lo_wc != wc) | |
214 | + { | |
215 | + flag = 1; | |
216 | + | |
217 | + p += mblength; | |
218 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
219 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
220 | + | |
221 | + assert (mblength != (size_t)-1 && | |
222 | + mblength != (size_t)-2); | |
223 | + | |
224 | + mblength = (mblength < 1) ? 1 : mblength; | |
225 | + } | |
226 | + } | |
227 | + | |
228 | + if (!flag) | |
229 | + { | |
230 | + for (i = 0; i < mblength; i++) | |
231 | + mbc[i] = *p++; | |
232 | + } | |
233 | + } | |
234 | + | |
235 | + for (i = 0; i < mblength; i++) | |
236 | + h = HASH (h, mbc[i]); | |
237 | + } | |
238 | + break; | |
239 | + | |
240 | + case IGNORE_TAB_EXPANSION: | |
241 | + { | |
242 | + size_t column = 0; | |
243 | + | |
244 | + while (1) | |
245 | + { | |
246 | + if (*p == '\n') | |
247 | + { | |
248 | + ++p; | |
249 | + break; | |
250 | + } | |
251 | + | |
252 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
253 | + | |
254 | + if (convfail) | |
255 | + { | |
256 | + h = HASH (h, *p++); | |
257 | + ++column; | |
258 | + } | |
259 | + else | |
260 | + { | |
261 | + bool flag; | |
262 | + | |
263 | + switch (wc) | |
264 | + { | |
265 | + case L'\b': | |
266 | + column -= 0 < column; | |
267 | + h = HASH (h, '\b'); | |
268 | + ++p; | |
269 | + break; | |
270 | + | |
271 | + case L'\t': | |
272 | + { | |
273 | + int repetitions; | |
274 | + | |
275 | + repetitions = TAB_WIDTH - column % TAB_WIDTH; | |
276 | + column += repetitions; | |
277 | + do | |
278 | + h = HASH (h, ' '); | |
279 | + while (--repetitions != 0); | |
280 | + ++p; | |
281 | + } | |
282 | + break; | |
283 | + | |
284 | + case L'\r': | |
285 | + column = 0; | |
286 | + h = HASH (h, '\r'); | |
287 | + ++p; | |
288 | + break; | |
289 | + | |
290 | + default: | |
291 | + flag = 0; | |
292 | + column += wcwidth (wc); | |
293 | + if (ignore_case) | |
294 | + { | |
295 | + lo_wc = towlower (wc); | |
296 | + if (lo_wc != wc) | |
297 | + { | |
298 | + flag = 1; | |
299 | + p += mblength; | |
300 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
301 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
302 | + | |
303 | + assert (mblength != (size_t)-1 && | |
304 | + mblength != (size_t)-2); | |
305 | + | |
306 | + mblength = (mblength < 1) ? 1 : mblength; | |
307 | + } | |
308 | + } | |
309 | + | |
310 | + if (!flag) | |
311 | + { | |
312 | + for (i = 0; i < mblength; i++) | |
313 | + mbc[i] = *p++; | |
314 | + } | |
315 | + | |
316 | + for (i = 0; i < mblength; i++) | |
317 | + h = HASH (h, mbc[i]); | |
318 | + } | |
319 | + } | |
320 | + } | |
321 | + } | |
322 | + break; | |
323 | + | |
324 | + default: | |
325 | + while (1) | |
326 | + { | |
327 | + if (*p == '\n') | |
328 | + { | |
329 | + ++p; | |
330 | + break; | |
331 | + } | |
332 | + | |
333 | + MBC2WC (p, suffix_begin, mblength, wc, state, convfail); | |
334 | + | |
335 | + if (convfail) | |
336 | + mbc[0] = *p++; | |
337 | + else | |
338 | + { | |
339 | + int flag = 0; | |
340 | + | |
341 | + if (ignore_case) | |
342 | + { | |
343 | + lo_wc = towlower (wc); | |
344 | + if (lo_wc != wc) | |
345 | + { | |
346 | + flag = 1; | |
347 | + p += mblength; | |
348 | + memset (&state_wc, '\0', sizeof(mbstate_t)); | |
349 | + mblength = wcrtomb (mbc, lo_wc, &state_wc); | |
350 | + | |
351 | + assert (mblength != (size_t)-1 && | |
352 | + mblength != (size_t)-2); | |
353 | + | |
354 | + mblength = (mblength < 1) ? 1 : mblength; | |
355 | + } | |
356 | + } | |
357 | + | |
358 | + if (!flag) | |
359 | + { | |
360 | + for (i = 0; i < mblength; i++) | |
361 | + mbc[i] = *p++; | |
362 | + } | |
363 | + } | |
364 | + | |
365 | + for (i = 0; i < mblength; i++) | |
366 | + h = HASH (h, mbc[i]); | |
367 | + } | |
368 | + } | |
369 | + } | |
370 | + else | |
371 | +#endif | |
372 | + | |
373 | /* Hash this line until we find a newline. */ | |
374 | if (ignore_case) | |
375 | switch (ignore_white_space) | |
376 | --- diffutils-2.9/src/side.c 2010-02-05 09:10:15.000000000 +0100 | |
377 | +++ diffutils-2.9.mod/src/side.c 2010-02-13 15:51:32.647221551 +0100 | |
378 | @@ -77,11 +77,74 @@ | |
379 | register char const *text_limit = line[1]; | |
380 | mbstate_t mbstate = { 0 }; | |
381 | ||
382 | +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H | |
383 | + unsigned char mbc[MB_LEN_MAX]; | |
384 | + wchar_t wc; | |
385 | + mbstate_t state, state_bak; | |
386 | + size_t mbc_pos, mblength; | |
387 | + int mbc_loading_flag = 0; | |
388 | + int wc_width; | |
389 | + | |
390 | + memset (&state, '\0', sizeof (mbstate_t)); | |
391 | +#endif | |
392 | + | |
393 | while (text_pointer < text_limit) | |
394 | { | |
395 | char const *tp0 = text_pointer; | |
396 | register char c = *text_pointer++; | |
397 | ||
398 | +#if defined HAVE_WCHAR_H && defined HAVE_WCTYPE_H | |
399 | + if (MB_CUR_MAX > 1 && mbc_loading_flag) | |
400 | + { | |
401 | + mbc_loading_flag = 0; | |
402 | + state_bak = state; | |
403 | + mbc[mbc_pos++] = c; | |
404 | + | |
405 | +process_mbc: | |
406 | + mblength = mbrtowc (&wc, mbc, mbc_pos, &state); | |
407 | + | |
408 | + switch (mblength) | |
409 | + { | |
410 | + case (size_t)-2: /* Incomplete multibyte character. */ | |
411 | + mbc_loading_flag = 1; | |
412 | + state = state_bak; | |
413 | + break; | |
414 | + | |
415 | + case (size_t)-1: /* Invalid as a multibyte character. */ | |
416 | + if (in_position++ < out_bound) | |
417 | + { | |
418 | + out_position = in_position; | |
419 | + putc (mbc[0], out); | |
420 | + } | |
421 | + memmove (mbc, mbc + 1, --mbc_pos); | |
422 | + if (mbc_pos > 0) | |
423 | + { | |
424 | + mbc[mbc_pos] = '\0'; | |
425 | + goto process_mbc; | |
426 | + } | |
427 | + break; | |
428 | + | |
429 | + default: | |
430 | + wc_width = wcwidth (wc); | |
431 | + if (wc_width < 1) /* Unprintable multibyte character. */ | |
432 | + { | |
433 | + if (in_position <= out_bound) | |
434 | + fprintf (out, "%lc", (wint_t)wc); | |
435 | + } | |
436 | + else /* Printable multibyte character. */ | |
437 | + { | |
438 | + in_position += wc_width; | |
439 | + if (in_position <= out_bound) | |
440 | + { | |
441 | + out_position = in_position; | |
442 | + fprintf (out, "%lc", (wint_t)wc); | |
443 | + } | |
444 | + } | |
445 | + } | |
446 | + continue; | |
447 | + } | |
448 | +#endif | |
449 | + | |
450 | switch (c) | |
451 | { | |
452 | case '\t': | |
453 | --- diffutils-2.9/src/util.c 2010-02-11 10:39:17.000000000 +0100 | |
454 | +++ diffutils-2.9.mod/src/util.c 2010-02-13 16:08:16.065232588 +0100 | |
455 | @@ -309,7 +309,7 @@ | |
456 | ||
457 | outfile = 0; | |
458 | } | |
459 | -\f | |
460 | + | |
461 | /* Compare two lines (typically one from each input file) | |
462 | according to the command line options. | |
463 | For efficiency, this is invoked only when the lines do not match exactly | |
464 | @@ -317,7 +317,7 @@ | |
465 | Return nonzero if the lines differ. */ | |
466 | ||
467 | bool | |
468 | -lines_differ (char const *s1, char const *s2) | |
469 | +lines_differ_singlebyte (char const *s1, char const *s2) | |
470 | { | |
471 | register char const *t1 = s1; | |
472 | register char const *t2 = s2; | |
473 | @@ -446,7 +446,294 @@ | |
474 | ||
475 | return true; | |
476 | } | |
477 | -\f | |
478 | + | |
479 | +#ifdef HANDLE_MULTIBYTE | |
480 | +# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \ | |
481 | +do \ | |
482 | +{ \ | |
483 | + mbstate_t bak = STATE; \ | |
484 | + \ | |
485 | + CONVFAIL = 0; \ | |
486 | + MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \ | |
487 | + \ | |
488 | + switch (MBLENGTH) \ | |
489 | + { \ | |
490 | + case (size_t)-2: \ | |
491 | + case (size_t)-1: \ | |
492 | + STATE = bak; \ | |
493 | + ++CONVFAIL; \ | |
494 | + /* Fall through. */ \ | |
495 | + case 0: \ | |
496 | + MBLENGTH = 1; \ | |
497 | + } \ | |
498 | +} \ | |
499 | +while (0) | |
500 | + | |
501 | +bool | |
502 | +lines_differ_multibyte (char const *s1, char const *s2) | |
503 | +{ | |
504 | + unsigned char const *end1, *end2; | |
505 | + unsigned char c1, c2; | |
506 | + wchar_t wc1, wc2, wc1_bak, wc2_bak; | |
507 | + size_t mblen1, mblen2; | |
508 | + mbstate_t state1, state2, state1_bak, state2_bak; | |
509 | + int convfail1, convfail2, convfail1_bak, convfail2_bak; | |
510 | + | |
511 | + unsigned char const *t1 = (unsigned char const *) s1; | |
512 | + unsigned char const *t2 = (unsigned char const *) s2; | |
513 | + unsigned char const *t1_bak, *t2_bak; | |
514 | + size_t column = 0; | |
515 | + | |
516 | + if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case) | |
517 | + { | |
518 | + while (*t1 != '\n') | |
519 | + if (*t1++ != * t2++) | |
520 | + return 1; | |
521 | + return 0; | |
522 | + } | |
523 | + | |
524 | + memset (&state1, '\0', sizeof (mbstate_t)); | |
525 | + memset (&state2, '\0', sizeof (mbstate_t)); | |
526 | + | |
527 | + end1 = s1 + strlen (s1); | |
528 | + end2 = s2 + strlen (s2); | |
529 | + | |
530 | + while (1) | |
531 | + { | |
532 | + c1 = *t1; | |
533 | + c2 = *t2; | |
534 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
535 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
536 | + | |
537 | + /* Test for exact char equality first, since it's a common case. */ | |
538 | + if (convfail1 ^ convfail2) | |
539 | + break; | |
540 | + else if (convfail1 && convfail2 && c1 != c2) | |
541 | + break; | |
542 | + else if (!convfail1 && !convfail2 && wc1 != wc2) | |
543 | + { | |
544 | + switch (ignore_white_space) | |
545 | + { | |
546 | + case IGNORE_ALL_SPACE: | |
547 | + /* For -w, just skip past any white space. */ | |
548 | + while (1) | |
549 | + { | |
550 | + if (convfail1) | |
551 | + break; | |
552 | + else if (wc1 == L'\n' || !iswspace (wc1)) | |
553 | + break; | |
554 | + | |
555 | + t1 += mblen1; | |
556 | + c1 = *t1; | |
557 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
558 | + } | |
559 | + | |
560 | + while (1) | |
561 | + { | |
562 | + if (convfail2) | |
563 | + break; | |
564 | + else if (wc2 == L'\n' || !iswspace (wc2)) | |
565 | + break; | |
566 | + | |
567 | + t2 += mblen2; | |
568 | + c2 = *t2; | |
569 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
570 | + } | |
571 | + t1 += mblen1; | |
572 | + t2 += mblen2; | |
573 | + break; | |
574 | + | |
575 | + case IGNORE_SPACE_CHANGE: | |
576 | + /* For -b, advance past any sequence of white space in | |
577 | + line 1 and consider it just one space, or nothing at | |
578 | + all if it is at the end of the line. */ | |
579 | + if (wc1 != L'\n' && iswspace (wc1)) | |
580 | + { | |
581 | + size_t mblen_bak; | |
582 | + mbstate_t state_bak; | |
583 | + | |
584 | + do | |
585 | + { | |
586 | + t1 += mblen1; | |
587 | + mblen_bak = mblen1; | |
588 | + state_bak = state1; | |
589 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
590 | + } | |
591 | + while (!convfail1 && (wc1 != L'\n' && iswspace (wc1))); | |
592 | + | |
593 | + state1 = state_bak; | |
594 | + mblen1 = mblen_bak; | |
595 | + t1 -= mblen1; | |
596 | + convfail1 = 0; | |
597 | + wc1 = L' '; | |
598 | + } | |
599 | + | |
600 | + /* Likewise for line 2. */ | |
601 | + if (wc2 != L'\n' && iswspace (wc2)) | |
602 | + { | |
603 | + size_t mblen_bak; | |
604 | + mbstate_t state_bak; | |
605 | + | |
606 | + do | |
607 | + { | |
608 | + t2 += mblen2; | |
609 | + mblen_bak = mblen2; | |
610 | + state_bak = state2; | |
611 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
612 | + } | |
613 | + while (!convfail2 && (wc2 != L'\n' && iswspace (wc2))); | |
614 | + | |
615 | + state2 = state_bak; | |
616 | + mblen2 = mblen_bak; | |
617 | + t2 -= mblen2; | |
618 | + convfail2 = 0; | |
619 | + wc2 = L' '; | |
620 | + } | |
621 | + | |
622 | + if (wc1 != wc2) | |
623 | + { | |
624 | + if (wc2 == L' ' && wc1 != L'\n' && | |
625 | + t1 > (unsigned char const *)s1 && | |
626 | + !convfail1_bak && iswspace (wc1_bak)) | |
627 | + { | |
628 | + t1 = t1_bak; | |
629 | + wc1 = wc1_bak; | |
630 | + state1 = state1_bak; | |
631 | + convfail1 = convfail1_bak; | |
632 | + continue; | |
633 | + } | |
634 | + if (wc1 == L' ' && wc2 != L'\n' | |
635 | + && t2 > (unsigned char const *)s2 | |
636 | + && !convfail2_bak && iswspace (wc2_bak)) | |
637 | + { | |
638 | + t2 = t2_bak; | |
639 | + wc2 = wc2_bak; | |
640 | + state2 = state2_bak; | |
641 | + convfail2 = convfail2_bak; | |
642 | + continue; | |
643 | + } | |
644 | + } | |
645 | + | |
646 | + t1_bak = t1; t2_bak = t2; | |
647 | + wc1_bak = wc1; wc2_bak = wc2; | |
648 | + state1_bak = state1; state2_bak = state2; | |
649 | + convfail1_bak = convfail1; convfail2_bak = convfail2; | |
650 | + | |
651 | + if (wc1 == L'\n') | |
652 | + wc1 = L' '; | |
653 | + else | |
654 | + t1 += mblen1; | |
655 | + | |
656 | + if (wc2 == L'\n') | |
657 | + wc2 = L' '; | |
658 | + else | |
659 | + t2 += mblen2; | |
660 | + | |
661 | + break; | |
662 | + | |
663 | + case IGNORE_TAB_EXPANSION: | |
664 | + if ((wc1 == L' ' && wc2 == L'\t') | |
665 | + || (wc1 == L'\t' && wc2 == L' ')) | |
666 | + { | |
667 | + size_t column2 = column; | |
668 | + | |
669 | + while (1) | |
670 | + { | |
671 | + if (convfail1) | |
672 | + { | |
673 | + ++t1; | |
674 | + break; | |
675 | + } | |
676 | + else if (wc1 == L' ') | |
677 | + column++; | |
678 | + else if (wc1 == L'\t') | |
679 | + column += TAB_WIDTH - column % TAB_WIDTH; | |
680 | + else | |
681 | + { | |
682 | + t1 += mblen1; | |
683 | + break; | |
684 | + } | |
685 | + | |
686 | + t1 += mblen1; | |
687 | + c1 = *t1; | |
688 | + MBC2WC (t1, end1, mblen1, wc1, state1, convfail1); | |
689 | + } | |
690 | + | |
691 | + while (1) | |
692 | + { | |
693 | + if (convfail2) | |
694 | + { | |
695 | + ++t2; | |
696 | + break; | |
697 | + } | |
698 | + else if (wc2 == L' ') | |
699 | + column2++; | |
700 | + else if (wc2 == L'\t') | |
701 | + column2 += TAB_WIDTH - column2 % TAB_WIDTH; | |
702 | + else | |
703 | + { | |
704 | + t2 += mblen2; | |
705 | + break; | |
706 | + } | |
707 | + | |
708 | + t2 += mblen2; | |
709 | + c2 = *t2; | |
710 | + MBC2WC (t2, end2, mblen2, wc2, state2, convfail2); | |
711 | + } | |
712 | + | |
713 | + if (column != column2) | |
714 | + return 1; | |
715 | + } | |
716 | + else | |
717 | + { | |
718 | + t1 += mblen1; | |
719 | + t2 += mblen2; | |
720 | + } | |
721 | + break; | |
722 | + | |
723 | + case IGNORE_NO_WHITE_SPACE: | |
724 | + t1 += mblen1; | |
725 | + t2 += mblen2; | |
726 | + break; | |
727 | + } | |
728 | + | |
729 | + /* Lowercase all letters if -i is specified. */ | |
730 | + if (ignore_case) | |
731 | + { | |
732 | + if (!convfail1) | |
733 | + wc1 = towlower (wc1); | |
734 | + if (!convfail2) | |
735 | + wc2 = towlower (wc2); | |
736 | + } | |
737 | + | |
738 | + if (convfail1 ^ convfail2) | |
739 | + break; | |
740 | + else if (convfail1 && convfail2 && c1 != c2) | |
741 | + break; | |
742 | + else if (!convfail1 && !convfail2 && wc1 != wc2) | |
743 | + break; | |
744 | + } | |
745 | + else | |
746 | + { | |
747 | + t1_bak = t1; t2_bak = t2; | |
748 | + wc1_bak = wc1; wc2_bak = wc2; | |
749 | + state1_bak = state1; state2_bak = state2; | |
750 | + convfail1_bak = convfail1; convfail2_bak = convfail2; | |
751 | + | |
752 | + t1 += mblen1; t2 += mblen2; | |
753 | + } | |
754 | + | |
755 | + if (!convfail1 && wc1 == L'\n') | |
756 | + return 0; | |
757 | + | |
758 | + column += convfail1 ? 1 : | |
759 | + (wc1 == L'\t') ? TAB_WIDTH - column % TAB_WIDTH : wcwidth (wc1); | |
760 | + } | |
761 | + | |
762 | + return 1; | |
763 | +} | |
764 | +#endif | |
765 | + | |
766 | /* Find the consecutive changes at the start of the script START. | |
767 | Return the last link before the first gap. */ | |
768 |