]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/coreutils-5.96-i18n-1.patch
Update isdn modul blacklisting.
[ipfire-2.x.git] / src / patches / coreutils-5.96-i18n-1.patch
1 Submitted by: Alexander E. Patrakov
2 Date: 2005-11-12
3 Initial Package Version: 5.93
4 Upstream Status: Not accepted, but planned for 6.0
5 Origin: RedHat CVS, see below how to regenerate
6 http://cvs.fedora.redhat.com/viewcvs/*checkout*/devel/coreutils/coreutils-i18n.patch?rev=1.14
7 Description: This patch fixes various problems with multibyte character support.
8 LSB >= 2.0 tests for features added by this patch, but only Coreutils-5.2.1 plus
9 http://www.linuxfromscratch.org/~alexander/patches/coreutils-5.2.1-i18n_fixes-1.patch
10 actually pass the Li18nux2000-level1 testsuite.
11
12 To regenerate:
13 Get http://cvs.fedora.redhat.com/viewcvs/*checkout*/devel/coreutils/coreutils-i18n.patch?rev=1.14
14 Replace the coreutils-5.93/tests/sort/Makefile.in hunk with that from rev 1.13
15 Add this header.
16
17 --- coreutils-5.93/lib/linebuffer.h.i18n 2005-05-14 08:58:06.000000000 +0100
18 +++ coreutils-5.93/lib/linebuffer.h 2005-12-23 08:53:01.000000000 +0000
19 @@ -22,6 +22,11 @@
20
21 # include <stdio.h>
22
23 +/* Get mbstate_t. */
24 +# if HAVE_WCHAR_H
25 +# include <wchar.h>
26 +# endif
27 +
28 /* A `struct linebuffer' holds a line of text. */
29
30 struct linebuffer
31 @@ -29,6 +34,9 @@
32 size_t size; /* Allocated. */
33 size_t length; /* Used. */
34 char *buffer;
35 +# if HAVE_WCHAR_H
36 + mbstate_t state;
37 +# endif
38 };
39
40 /* Initialize linebuffer LINEBUFFER for use. */
41 --- coreutils-5.93/src/cut.c.i18n 2005-08-12 08:16:25.000000000 +0100
42 +++ coreutils-5.93/src/cut.c 2005-12-23 08:53:01.000000000 +0000
43 @@ -29,6 +29,11 @@
44 #include <assert.h>
45 #include <getopt.h>
46 #include <sys/types.h>
47 +
48 +/* Get mbstate_t, mbrtowc(). */
49 +#if HAVE_WCHAR_H
50 +# include <wchar.h>
51 +#endif
52 #include "system.h"
53
54 #include "error.h"
55 @@ -37,6 +42,18 @@
56 #include "quote.h"
57 #include "xstrndup.h"
58
59 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
60 + installation; work around this configuration error. */
61 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
62 +# undef MB_LEN_MAX
63 +# define MB_LEN_MAX 16
64 +#endif
65 +
66 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
67 +#if HAVE_MBRTOWC && defined mbstate_t
68 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
69 +#endif
70 +
71 /* The official name of this program (e.g., no `g' prefix). */
72 #define PROGRAM_NAME "cut"
73
74 @@ -67,6 +84,52 @@
75 } \
76 while (0)
77
78 +/* Refill the buffer BUF to get a multibyte character. */
79 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
80 + do \
81 + { \
82 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
83 + { \
84 + memmove (BUF, BUFPOS, BUFLEN); \
85 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
86 + BUFPOS = BUF; \
87 + } \
88 + } \
89 + while (0)
90 +
91 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
92 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
93 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
94 + do \
95 + { \
96 + mbstate_t state_bak; \
97 + \
98 + if (BUFLEN < 1) \
99 + { \
100 + WC = WEOF; \
101 + break; \
102 + } \
103 + \
104 + /* Get a wide character. */ \
105 + CONVFAIL = 0; \
106 + state_bak = STATE; \
107 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
108 + \
109 + switch (MBLENGTH) \
110 + { \
111 + case (size_t)-1: \
112 + case (size_t)-2: \
113 + CONVFAIL++; \
114 + STATE = state_bak; \
115 + /* Fall througn. */ \
116 + \
117 + case 0: \
118 + MBLENGTH = 1; \
119 + break; \
120 + } \
121 + } \
122 + while (0)
123 +
124 struct range_pair
125 {
126 size_t lo;
127 @@ -85,7 +148,7 @@
128 /* The number of bytes allocated for FIELD_1_BUFFER. */
129 static size_t field_1_bufsize;
130
131 -/* The largest field or byte index used as an endpoint of a closed
132 +/* The largest byte, character or field index used as an endpoint of a closed
133 or degenerate range specification; this doesn't include the starting
134 index of right-open-ended ranges. For example, with either range spec
135 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
136 @@ -97,10 +160,11 @@
137
138 /* This is a bit vector.
139 In byte mode, which bytes to output.
140 + In character mode, which characters to output.
141 In field mode, which DELIM-separated fields to output.
142 - Both bytes and fields are numbered starting with 1,
143 + Bytes, characters and fields are numbered starting with 1,
144 so the zeroth bit of this array is unused.
145 - A field or byte K has been selected if
146 + A byte, character or field K has been selected if
147 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
148 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
149 static unsigned char *printable_field;
150 @@ -109,9 +173,12 @@
151 {
152 undefined_mode,
153
154 - /* Output characters that are in the given bytes. */
155 + /* Output bytes that are at the given positions. */
156 byte_mode,
157
158 + /* Output characters that are at the given positions. */
159 + character_mode,
160 +
161 /* Output the given delimeter-separated fields. */
162 field_mode
163 };
164 @@ -121,6 +188,13 @@
165
166 static enum operating_mode operating_mode;
167
168 +/* If nonzero, when in byte mode, don't split multibyte characters. */
169 +static int byte_mode_character_aware;
170 +
171 +/* If nonzero, the function for single byte locale is work
172 + if this program runs on multibyte locale. */
173 +static int force_singlebyte_mode;
174 +
175 /* If true do not output lines containing no delimeter characters.
176 Otherwise, all such lines are printed. This option is valid only
177 with field mode. */
178 @@ -132,6 +206,9 @@
179
180 /* The delimeter character for field mode. */
181 static unsigned char delim;
182 +#if HAVE_WCHAR_H
183 +static wchar_t wcdelim;
184 +#endif
185
186 /* True if the --output-delimiter=STRING option was specified. */
187 static bool output_delimiter_specified;
188 @@ -205,7 +282,7 @@
189 -f, --fields=LIST select only these fields; also print any line\n\
190 that contains no delimiter character, unless\n\
191 the -s option is specified\n\
192 - -n (ignored)\n\
193 + -n with -b: don't split multibyte characters\n\
194 "), stdout);
195 fputs (_("\
196 --complement complement the set of selected bytes, characters\n\
197 @@ -360,7 +437,7 @@
198 in_digits = false;
199 /* Starting a range. */
200 if (dash_found)
201 - FATAL_ERROR (_("invalid byte or field list"));
202 + FATAL_ERROR (_("invalid byte, character or field list"));
203 dash_found = true;
204 fieldstr++;
205
206 @@ -385,14 +462,16 @@
207 if (value == 0)
208 {
209 /* `n-'. From `initial' to end of line. */
210 - eol_range_start = initial;
211 + if (eol_range_start == 0 ||
212 + (eol_range_start != 0 && eol_range_start > initial))
213 + eol_range_start = initial;
214 field_found = true;
215 }
216 else
217 {
218 /* `m-n' or `-n' (1-n). */
219 if (value < initial)
220 - FATAL_ERROR (_("invalid byte or field list"));
221 + FATAL_ERROR (_("invalid byte, character or field list"));
222
223 /* Is there already a range going to end of line? */
224 if (eol_range_start != 0)
225 @@ -465,6 +544,9 @@
226 if (operating_mode == byte_mode)
227 error (0, 0,
228 _("byte offset %s is too large"), quote (bad_num));
229 + else if (operating_mode == character_mode)
230 + error (0, 0,
231 + _("character offset %s is too large"), quote (bad_num));
232 else
233 error (0, 0,
234 _("field number %s is too large"), quote (bad_num));
235 @@ -475,7 +557,7 @@
236 fieldstr++;
237 }
238 else
239 - FATAL_ERROR (_("invalid byte or field list"));
240 + FATAL_ERROR (_("invalid byte, character or field list"));
241 }
242
243 max_range_endpoint = 0;
244 @@ -568,6 +650,63 @@
245 }
246 }
247
248 +#if HAVE_MBRTOWC
249 +/* This function is in use for the following case.
250 +
251 + 1. Read from the stream STREAM, printing to standard output any selected
252 + characters.
253 +
254 + 2. Read from stream STREAM, printing to standard output any selected bytes,
255 + without splitting multibyte characters. */
256 +
257 +static void
258 +cut_characters_or_cut_bytes_no_split (FILE *stream)
259 +{
260 + int idx; /* number of bytes or characters in the line so far. */
261 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
262 + char *bufpos; /* Next read position of BUF. */
263 + size_t buflen; /* The length of the byte sequence in buf. */
264 + wint_t wc; /* A gotten wide character. */
265 + size_t mblength; /* The byte size of a multibyte character which shows
266 + as same character as WC. */
267 + mbstate_t state; /* State of the stream. */
268 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
269 +
270 + idx = 0;
271 + buflen = 0;
272 + bufpos = buf;
273 + memset (&state, '\0', sizeof(mbstate_t));
274 +
275 + while (1)
276 + {
277 + REFILL_BUFFER (buf, bufpos, buflen, stream);
278 +
279 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
280 +
281 + if (wc == WEOF)
282 + {
283 + if (idx > 0)
284 + putchar ('\n');
285 + break;
286 + }
287 + else if (wc == L'\n')
288 + {
289 + putchar ('\n');
290 + idx = 0;
291 + }
292 + else
293 + {
294 + idx += (operating_mode == byte_mode) ? mblength : 1;
295 + if (print_kth (idx, NULL))
296 + fwrite (bufpos, mblength, sizeof(char), stdout);
297 + }
298 +
299 + buflen -= mblength;
300 + bufpos += mblength;
301 + }
302 +}
303 +#endif
304 +
305 /* Read from stream STREAM, printing to standard output any selected fields. */
306
307 static void
308 @@ -689,13 +828,192 @@
309 }
310 }
311
312 +#if HAVE_MBRTOWC
313 +static void
314 +cut_fields_mb (FILE *stream)
315 +{
316 + int c;
317 + unsigned int field_idx;
318 + int found_any_selected_field;
319 + int buffer_first_field;
320 + int empty_input;
321 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
322 + char *bufpos; /* Next read position of BUF. */
323 + size_t buflen; /* The length of the byte sequence in buf. */
324 + wint_t wc = 0; /* A gotten wide character. */
325 + size_t mblength; /* The byte size of a multibyte character which shows
326 + as same character as WC. */
327 + mbstate_t state; /* State of the stream. */
328 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
329 +
330 + found_any_selected_field = 0;
331 + field_idx = 1;
332 + bufpos = buf;
333 + buflen = 0;
334 + memset (&state, '\0', sizeof(mbstate_t));
335 +
336 + c = getc (stream);
337 + empty_input = (c == EOF);
338 + if (c != EOF)
339 + ungetc (c, stream);
340 + else
341 + wc = WEOF;
342 +
343 + /* To support the semantics of the -s flag, we may have to buffer
344 + all of the first field to determine whether it is `delimited.'
345 + But that is unnecessary if all non-delimited lines must be printed
346 + and the first field has been selected, or if non-delimited lines
347 + must be suppressed and the first field has *not* been selected.
348 + That is because a non-delimited line has exactly one field. */
349 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
350 +
351 + while (1)
352 + {
353 + if (field_idx == 1 && buffer_first_field)
354 + {
355 + int len = 0;
356 +
357 + while (1)
358 + {
359 + REFILL_BUFFER (buf, bufpos, buflen, stream);
360 +
361 + GET_NEXT_WC_FROM_BUFFER
362 + (wc, bufpos, buflen, mblength, state, convfail);
363 +
364 + if (wc == WEOF)
365 + break;
366 +
367 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
368 + memcpy (field_1_buffer + len, bufpos, mblength);
369 + len += mblength;
370 + buflen -= mblength;
371 + bufpos += mblength;
372 +
373 + if (!convfail && (wc == L'\n' || wc == wcdelim))
374 + break;
375 + }
376 +
377 + if (wc == WEOF)
378 + break;
379 +
380 + /* If the first field extends to the end of line (it is not
381 + delimited) and we are printing all non-delimited lines,
382 + print this one. */
383 + if (convfail || (!convfail && wc != wcdelim))
384 + {
385 + if (suppress_non_delimited)
386 + {
387 + /* Empty. */
388 + }
389 + else
390 + {
391 + fwrite (field_1_buffer, sizeof (char), len, stdout);
392 + /* Make sure the output line is newline terminated. */
393 + if (convfail || (!convfail && wc != L'\n'))
394 + putchar ('\n');
395 + }
396 + continue;
397 + }
398 +
399 + if (print_kth (1, NULL))
400 + {
401 + /* Print the field, but not the trailing delimiter. */
402 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
403 + found_any_selected_field = 1;
404 + }
405 + ++field_idx;
406 + }
407 +
408 + if (wc != WEOF)
409 + {
410 + if (print_kth (field_idx, NULL))
411 + {
412 + if (found_any_selected_field)
413 + {
414 + fwrite (output_delimiter_string, sizeof (char),
415 + output_delimiter_length, stdout);
416 + }
417 + found_any_selected_field = 1;
418 + }
419 +
420 + while (1)
421 + {
422 + REFILL_BUFFER (buf, bufpos, buflen, stream);
423 +
424 + GET_NEXT_WC_FROM_BUFFER
425 + (wc, bufpos, buflen, mblength, state, convfail);
426 +
427 + if (wc == WEOF)
428 + break;
429 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
430 + {
431 + buflen -= mblength;
432 + bufpos += mblength;
433 + break;
434 + }
435 +
436 + if (print_kth (field_idx, NULL))
437 + fwrite (bufpos, mblength, sizeof(char), stdout);
438 +
439 + buflen -= mblength;
440 + bufpos += mblength;
441 + }
442 + }
443 +
444 + if ((!convfail || wc == L'\n') && buflen < 1)
445 + wc = WEOF;
446 +
447 + if (!convfail && wc == wcdelim)
448 + ++field_idx;
449 + else if (wc == WEOF || (!convfail && wc == L'\n'))
450 + {
451 + if (found_any_selected_field
452 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
453 + putchar ('\n');
454 + if (wc == WEOF)
455 + break;
456 + field_idx = 1;
457 + found_any_selected_field = 0;
458 + }
459 + }
460 +}
461 +#endif
462 +
463 static void
464 cut_stream (FILE *stream)
465 {
466 - if (operating_mode == byte_mode)
467 - cut_bytes (stream);
468 +#if HAVE_MBRTOWC
469 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
470 + {
471 + switch (operating_mode)
472 + {
473 + case byte_mode:
474 + if (byte_mode_character_aware)
475 + cut_characters_or_cut_bytes_no_split (stream);
476 + else
477 + cut_bytes (stream);
478 + break;
479 +
480 + case character_mode:
481 + cut_characters_or_cut_bytes_no_split (stream);
482 + break;
483 +
484 + case field_mode:
485 + cut_fields_mb (stream);
486 + break;
487 +
488 + default:
489 + abort ();
490 + }
491 + }
492 else
493 - cut_fields (stream);
494 +#endif
495 + {
496 + if (operating_mode == field_mode)
497 + cut_fields (stream);
498 + else
499 + cut_bytes (stream);
500 + }
501 }
502
503 /* Process file FILE to standard output.
504 @@ -745,6 +1063,8 @@
505 bool ok;
506 bool delim_specified = false;
507 char *spec_list_string IF_LINT(= NULL);
508 + char mbdelim[MB_LEN_MAX + 1];
509 + size_t delimlen = 0;
510
511 initialize_main (&argc, &argv);
512 program_name = argv[0];
513 @@ -767,7 +1087,6 @@
514 switch (optc)
515 {
516 case 'b':
517 - case 'c':
518 /* Build the byte list. */
519 if (operating_mode != undefined_mode)
520 FATAL_ERROR (_("only one type of list may be specified"));
521 @@ -775,6 +1094,14 @@
522 spec_list_string = optarg;
523 break;
524
525 + case 'c':
526 + /* Build the character list. */
527 + if (operating_mode != undefined_mode)
528 + FATAL_ERROR (_("only one type of list may be specified"));
529 + operating_mode = character_mode;
530 + spec_list_string = optarg;
531 + break;
532 +
533 case 'f':
534 /* Build the field list. */
535 if (operating_mode != undefined_mode)
536 @@ -786,10 +1113,35 @@
537 case 'd':
538 /* New delimiter. */
539 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
540 - if (optarg[0] != '\0' && optarg[1] != '\0')
541 - FATAL_ERROR (_("the delimiter must be a single character"));
542 - delim = optarg[0];
543 - delim_specified = true;
544 +#if HAVE_MBRTOWC
545 + {
546 + if(MB_CUR_MAX > 1)
547 + {
548 + mbstate_t state;
549 +
550 + memset (&state, '\0', sizeof(mbstate_t));
551 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
552 +
553 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
554 + ++force_singlebyte_mode;
555 + else
556 + {
557 + delimlen = (delimlen < 1) ? 1 : delimlen;
558 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
559 + FATAL_ERROR (_("the delimiter must be a single character"));
560 + memcpy (mbdelim, optarg, delimlen);
561 + }
562 + }
563 +
564 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
565 +#endif
566 + {
567 + if (optarg[0] != '\0' && optarg[1] != '\0')
568 + FATAL_ERROR (_("the delimiter must be a single character"));
569 + delim = (unsigned char) optarg[0];
570 + }
571 + delim_specified = true;
572 + }
573 break;
574
575 case OUTPUT_DELIMITER_OPTION:
576 @@ -802,6 +1154,7 @@
577 break;
578
579 case 'n':
580 + byte_mode_character_aware = 1;
581 break;
582
583 case 's':
584 @@ -824,7 +1177,7 @@
585 if (operating_mode == undefined_mode)
586 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
587
588 - if (delim != '\0' && operating_mode != field_mode)
589 + if (delim_specified && operating_mode != field_mode)
590 FATAL_ERROR (_("an input delimiter may be specified only\
591 when operating on fields"));
592
593 @@ -851,15 +1204,34 @@
594 }
595
596 if (!delim_specified)
597 - delim = '\t';
598 + {
599 + delim = '\t';
600 +#ifdef HAVE_MBRTOWC
601 + wcdelim = L'\t';
602 + mbdelim[0] = '\t';
603 + mbdelim[1] = '\0';
604 + delimlen = 1;
605 +#endif
606 + }
607
608 if (output_delimiter_string == NULL)
609 {
610 - static char dummy[2];
611 - dummy[0] = delim;
612 - dummy[1] = '\0';
613 - output_delimiter_string = dummy;
614 - output_delimiter_length = 1;
615 +#ifdef HAVE_MBRTOWC
616 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
617 + {
618 + output_delimiter_string = xstrdup(mbdelim);
619 + output_delimiter_length = delimlen;
620 + }
621 +
622 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
623 +#endif
624 + {
625 + static char dummy[2];
626 + dummy[0] = delim;
627 + dummy[1] = '\0';
628 + output_delimiter_string = dummy;
629 + output_delimiter_length = 1;
630 + }
631 }
632
633 if (optind == argc)
634 --- coreutils-5.93/src/pr.c.i18n 2005-09-16 08:50:33.000000000 +0100
635 +++ coreutils-5.93/src/pr.c 2005-12-23 08:53:01.000000000 +0000
636 @@ -313,6 +313,32 @@
637
638 #include <getopt.h>
639 #include <sys/types.h>
640 +
641 +/* Get MB_LEN_MAX. */
642 +#include <limits.h>
643 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
644 + installation; work around this configuration error. */
645 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
646 +# define MB_LEN_MAX 16
647 +#endif
648 +
649 +/* Get MB_CUR_MAX. */
650 +#include <stdlib.h>
651 +
652 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
653 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
654 +#if HAVE_WCHAR_H
655 +# include <wchar.h>
656 +#endif
657 +
658 +/* Get iswprint(). -- for wcwidth(). */
659 +#if HAVE_WCTYPE_H
660 +# include <wctype.h>
661 +#endif
662 +#if !defined iswprint && !HAVE_ISWPRINT
663 +# define iswprint(wc) 1
664 +#endif
665 +
666 #include "system.h"
667 #include "error.h"
668 #include "hard-locale.h"
669 @@ -324,6 +350,18 @@
670 #include "strftime.h"
671 #include "xstrtol.h"
672
673 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
674 +#if HAVE_MBRTOWC && defined mbstate_t
675 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
676 +#endif
677 +
678 +#ifndef HAVE_DECL_WCWIDTH
679 +"this configure-time declaration test was not run"
680 +#endif
681 +#if !HAVE_DECL_WCWIDTH
682 +extern int wcwidth ();
683 +#endif
684 +
685 /* The official name of this program (e.g., no `g' prefix). */
686 #define PROGRAM_NAME "pr"
687
688 @@ -416,7 +454,20 @@
689
690 #define NULLCOL (COLUMN *)0
691
692 -static int char_to_clump (char c);
693 +/* Funtion pointers to switch functions for single byte locale or for
694 + multibyte locale. If multibyte functions do not exist in your sysytem,
695 + these pointers always point the function for single byte locale. */
696 +static void (*print_char) (char c);
697 +static int (*char_to_clump) (char c);
698 +
699 +/* Functions for single byte locale. */
700 +static void print_char_single (char c);
701 +static int char_to_clump_single (char c);
702 +
703 +/* Functions for multibyte locale. */
704 +static void print_char_multi (char c);
705 +static int char_to_clump_multi (char c);
706 +
707 static bool read_line (COLUMN *p);
708 static bool print_page (void);
709 static bool print_stored (COLUMN *p);
710 @@ -426,6 +477,7 @@
711 static void pad_across_to (int position);
712 static void add_line_number (COLUMN *p);
713 static void getoptarg (char *arg, char switch_char, char *character,
714 + int *character_length, int *character_width,
715 int *number);
716 void usage (int status);
717 static void print_files (int number_of_files, char **av);
718 @@ -440,7 +492,6 @@
719 static void pad_down (int lines);
720 static void read_rest_of_line (COLUMN *p);
721 static void skip_read (COLUMN *p, int column_number);
722 -static void print_char (char c);
723 static void cleanup (void);
724 static void print_sep_string (void);
725 static void separator_string (const char *optarg_S);
726 @@ -455,7 +506,7 @@
727 we store the leftmost columns contiguously in buff.
728 To print a line from buff, get the index of the first character
729 from line_vector[i], and print up to line_vector[i + 1]. */
730 -static char *buff;
731 +static unsigned char *buff;
732
733 /* Index of the position in buff where the next character
734 will be stored. */
735 @@ -559,7 +610,7 @@
736 static bool untabify_input = false;
737
738 /* (-e) The input tab character. */
739 -static char input_tab_char = '\t';
740 +static char input_tab_char[MB_LEN_MAX] = "\t";
741
742 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
743 where the leftmost column is 1. */
744 @@ -569,7 +620,10 @@
745 static bool tabify_output = false;
746
747 /* (-i) The output tab character. */
748 -static char output_tab_char = '\t';
749 +static char output_tab_char[MB_LEN_MAX] = "\t";
750 +
751 +/* (-i) The byte length of output tab character. */
752 +static int output_tab_char_length = 1;
753
754 /* (-i) The width of the output tab. */
755 static int chars_per_output_tab = 8;
756 @@ -643,7 +697,13 @@
757 static bool numbered_lines = false;
758
759 /* (-n) Character which follows each line number. */
760 -static char number_separator = '\t';
761 +static char number_separator[MB_LEN_MAX] = "\t";
762 +
763 +/* (-n) The byte length of the character which follows each line number. */
764 +static int number_separator_length = 1;
765 +
766 +/* (-n) The character width of the character which follows each line number. */
767 +static int number_separator_width = 0;
768
769 /* (-n) line counting starts with 1st line of input file (not with 1st
770 line of 1st page printed). */
771 @@ -696,6 +756,7 @@
772 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
773 static char *col_sep_string = "";
774 static int col_sep_length = 0;
775 +static int col_sep_width = 0;
776 static char *column_separator = " ";
777 static char *line_separator = "\t";
778
779 @@ -852,6 +913,13 @@
780 col_sep_length = (int) strlen (optarg_S);
781 col_sep_string = xmalloc (col_sep_length + 1);
782 strcpy (col_sep_string, optarg_S);
783 +
784 +#if HAVE_MBRTOWC
785 + if (MB_CUR_MAX > 1)
786 + col_sep_width = mbswidth (col_sep_string, 0);
787 + else
788 +#endif
789 + col_sep_width = col_sep_length;
790 }
791
792 int
793 @@ -877,6 +945,21 @@
794
795 atexit (close_stdout);
796
797 +/* Define which functions are used, the ones for single byte locale or the ones
798 + for multibyte locale. */
799 +#if HAVE_MBRTOWC
800 + if (MB_CUR_MAX > 1)
801 + {
802 + print_char = print_char_multi;
803 + char_to_clump = char_to_clump_multi;
804 + }
805 + else
806 +#endif
807 + {
808 + print_char = print_char_single;
809 + char_to_clump = char_to_clump_single;
810 + }
811 +
812 n_files = 0;
813 file_names = (argc > 1
814 ? xmalloc ((argc - 1) * sizeof (char *))
815 @@ -949,8 +1032,12 @@
816 break;
817 case 'e':
818 if (optarg)
819 - getoptarg (optarg, 'e', &input_tab_char,
820 - &chars_per_input_tab);
821 + {
822 + int dummy_length, dummy_width;
823 +
824 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
825 + &dummy_width, &chars_per_input_tab);
826 + }
827 /* Could check tab width > 0. */
828 untabify_input = true;
829 break;
830 @@ -963,8 +1050,12 @@
831 break;
832 case 'i':
833 if (optarg)
834 - getoptarg (optarg, 'i', &output_tab_char,
835 - &chars_per_output_tab);
836 + {
837 + int dummy_width;
838 +
839 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
840 + &dummy_width, &chars_per_output_tab);
841 + }
842 /* Could check tab width > 0. */
843 tabify_output = true;
844 break;
845 @@ -991,8 +1082,8 @@
846 case 'n':
847 numbered_lines = true;
848 if (optarg)
849 - getoptarg (optarg, 'n', &number_separator,
850 - &chars_per_number);
851 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
852 + &number_separator_width, &chars_per_number);
853 break;
854 case 'N':
855 skip_count = false;
856 @@ -1031,7 +1122,7 @@
857 old_s = false;
858 /* Reset an additional input of -s, -S dominates -s */
859 col_sep_string = "";
860 - col_sep_length = 0;
861 + col_sep_length = col_sep_width = 0;
862 use_col_separator = true;
863 if (optarg)
864 separator_string (optarg);
865 @@ -1188,10 +1279,45 @@
866 a number. */
867
868 static void
869 -getoptarg (char *arg, char switch_char, char *character, int *number)
870 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
871 + int *character_width, int *number)
872 {
873 if (!ISDIGIT (*arg))
874 - *character = *arg++;
875 + {
876 +#ifdef HAVE_MBRTOWC
877 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
878 + {
879 + wchar_t wc;
880 + size_t mblength;
881 + int width;
882 + mbstate_t state = {'\0'};
883 +
884 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
885 +
886 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
887 + {
888 + *character_length = 1;
889 + *character_width = 1;
890 + }
891 + else
892 + {
893 + *character_length = (mblength < 1) ? 1 : mblength;
894 + width = wcwidth (wc);
895 + *character_width = (width < 0) ? 0 : width;
896 + }
897 +
898 + strncpy (character, arg, *character_length);
899 + arg += *character_length;
900 + }
901 + else /* for single byte locale. */
902 +#endif
903 + {
904 + *character = *arg++;
905 + *character_length = 1;
906 + *character_width = 1;
907 + }
908 + }
909 +
910 if (*arg)
911 {
912 long int tmp_long;
913 @@ -1256,7 +1382,7 @@
914 else
915 col_sep_string = column_separator;
916
917 - col_sep_length = 1;
918 + col_sep_length = col_sep_width = 1;
919 use_col_separator = true;
920 }
921 /* It's rather pointless to define a TAB separator with column
922 @@ -1288,11 +1414,11 @@
923 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
924
925 /* Estimate chars_per_text without any margin and keep it constant. */
926 - if (number_separator == '\t')
927 + if (number_separator[0] == '\t')
928 number_width = chars_per_number +
929 TAB_WIDTH (chars_per_default_tab, chars_per_number);
930 else
931 - number_width = chars_per_number + 1;
932 + number_width = chars_per_number + number_separator_width;
933
934 /* The number is part of the column width unless we are
935 printing files in parallel. */
936 @@ -1307,7 +1433,7 @@
937 }
938
939 chars_per_column = (chars_per_line - chars_used_by_number -
940 - (columns - 1) * col_sep_length) / columns;
941 + (columns - 1) * col_sep_width) / columns;
942
943 if (chars_per_column < 1)
944 error (EXIT_FAILURE, 0, _("page width too narrow"));
945 @@ -1432,7 +1558,7 @@
946
947 /* Enlarge p->start_position of first column to use the same form of
948 padding_not_printed with all columns. */
949 - h = h + col_sep_length;
950 + h = h + col_sep_width;
951
952 /* This loop takes care of all but the rightmost column. */
953
954 @@ -1466,7 +1592,7 @@
955 }
956 else
957 {
958 - h = h_next + col_sep_length;
959 + h = h_next + col_sep_width;
960 h_next = h + chars_per_column;
961 }
962 }
963 @@ -1756,9 +1882,9 @@
964 align_column (COLUMN *p)
965 {
966 padding_not_printed = p->start_position;
967 - if (padding_not_printed - col_sep_length > 0)
968 + if (padding_not_printed - col_sep_width > 0)
969 {
970 - pad_across_to (padding_not_printed - col_sep_length);
971 + pad_across_to (padding_not_printed - col_sep_width);
972 padding_not_printed = ANYWHERE;
973 }
974
975 @@ -2029,13 +2155,13 @@
976 /* May be too generous. */
977 buff = X2REALLOC (buff, &buff_allocated);
978 }
979 - buff[buff_current++] = c;
980 + buff[buff_current++] = (unsigned char) c;
981 }
982
983 static void
984 add_line_number (COLUMN *p)
985 {
986 - int i;
987 + int i, j;
988 char *s;
989 int left_cut;
990
991 @@ -2058,22 +2184,24 @@
992 /* Tabification is assumed for multiple columns, also for n-separators,
993 but `default n-separator = TAB' hasn't been given priority over
994 equal column_width also specified by POSIX. */
995 - if (number_separator == '\t')
996 + if (number_separator[0] == '\t')
997 {
998 i = number_width - chars_per_number;
999 while (i-- > 0)
1000 (p->char_func) (' ');
1001 }
1002 else
1003 - (p->char_func) (number_separator);
1004 + for (j = 0; j < number_separator_length; j++)
1005 + (p->char_func) (number_separator[j]);
1006 }
1007 else
1008 /* To comply with POSIX, we avoid any expansion of default TAB
1009 separator with a single column output. No column_width requirement
1010 has to be considered. */
1011 {
1012 - (p->char_func) (number_separator);
1013 - if (number_separator == '\t')
1014 + for (j = 0; j < number_separator_length; j++)
1015 + (p->char_func) (number_separator[j]);
1016 + if (number_separator[0] == '\t')
1017 output_position = POS_AFTER_TAB (chars_per_output_tab,
1018 output_position);
1019 }
1020 @@ -2234,7 +2362,7 @@
1021 while (goal - h_old > 1
1022 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
1023 {
1024 - putchar (output_tab_char);
1025 + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
1026 h_old = h_new;
1027 }
1028 while (++h_old <= goal)
1029 @@ -2254,6 +2382,7 @@
1030 {
1031 char *s;
1032 int l = col_sep_length;
1033 + int not_space_flag;
1034
1035 s = col_sep_string;
1036
1037 @@ -2267,6 +2396,7 @@
1038 {
1039 for (; separators_not_printed > 0; --separators_not_printed)
1040 {
1041 + not_space_flag = 0;
1042 while (l-- > 0)
1043 {
1044 /* 3 types of sep_strings: spaces only, spaces and chars,
1045 @@ -2280,12 +2410,15 @@
1046 }
1047 else
1048 {
1049 + not_space_flag = 1;
1050 if (spaces_not_printed > 0)
1051 print_white_space ();
1052 putchar (*s++);
1053 - ++output_position;
1054 }
1055 }
1056 + if (not_space_flag)
1057 + output_position += col_sep_width;
1058 +
1059 /* sep_string ends with some spaces */
1060 if (spaces_not_printed > 0)
1061 print_white_space ();
1062 @@ -2313,7 +2446,7 @@
1063 required number of tabs and spaces. */
1064
1065 static void
1066 -print_char (char c)
1067 +print_char_single (char c)
1068 {
1069 if (tabify_output)
1070 {
1071 @@ -2337,6 +2470,74 @@
1072 putchar (c);
1073 }
1074
1075 +#ifdef HAVE_MBRTOWC
1076 +static void
1077 +print_char_multi (char c)
1078 +{
1079 + static size_t mbc_pos = 0;
1080 + static unsigned char mbc[MB_LEN_MAX] = {'\0'};
1081 + static mbstate_t state = {'\0'};
1082 + mbstate_t state_bak;
1083 + wchar_t wc;
1084 + size_t mblength;
1085 + int width;
1086 +
1087 + if (tabify_output)
1088 + {
1089 + state_bak = state;
1090 + mbc[mbc_pos++] = (unsigned char)c;
1091 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
1092 +
1093 + while (mbc_pos > 0)
1094 + {
1095 + switch (mblength)
1096 + {
1097 + case (size_t)-2:
1098 + state = state_bak;
1099 + return;
1100 +
1101 + case (size_t)-1:
1102 + state = state_bak;
1103 + ++output_position;
1104 + putchar (mbc[0]);
1105 + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
1106 + --mbc_pos;
1107 + break;
1108 +
1109 + case 0:
1110 + mblength = 1;
1111 +
1112 + default:
1113 + if (wc == L' ')
1114 + {
1115 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1116 + --mbc_pos;
1117 + ++spaces_not_printed;
1118 + return;
1119 + }
1120 + else if (spaces_not_printed > 0)
1121 + print_white_space ();
1122 +
1123 + /* Nonprintables are assumed to have width 0, except L'\b'. */
1124 + if ((width = wcwidth (wc)) < 1)
1125 + {
1126 + if (wc == L'\b')
1127 + --output_position;
1128 + }
1129 + else
1130 + output_position += width;
1131 +
1132 + fwrite (mbc, sizeof(char), mblength, stdout);
1133 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1134 + mbc_pos -= mblength;
1135 + }
1136 + }
1137 + return;
1138 + }
1139 + putchar (c);
1140 +}
1141 +#endif
1142 +
1143 /* Skip to page PAGE before printing.
1144 PAGE may be larger than total number of pages. */
1145
1146 @@ -2517,9 +2718,9 @@
1147 align_empty_cols = false;
1148 }
1149
1150 - if (padding_not_printed - col_sep_length > 0)
1151 + if (padding_not_printed - col_sep_width > 0)
1152 {
1153 - pad_across_to (padding_not_printed - col_sep_length);
1154 + pad_across_to (padding_not_printed - col_sep_width);
1155 padding_not_printed = ANYWHERE;
1156 }
1157
1158 @@ -2620,9 +2821,9 @@
1159 }
1160 }
1161
1162 - if (padding_not_printed - col_sep_length > 0)
1163 + if (padding_not_printed - col_sep_width > 0)
1164 {
1165 - pad_across_to (padding_not_printed - col_sep_length);
1166 + pad_across_to (padding_not_printed - col_sep_width);
1167 padding_not_printed = ANYWHERE;
1168 }
1169
1170 @@ -2635,8 +2836,8 @@
1171 if (spaces_not_printed == 0)
1172 {
1173 output_position = p->start_position + end_vector[line];
1174 - if (p->start_position - col_sep_length == chars_per_margin)
1175 - output_position -= col_sep_length;
1176 + if (p->start_position - col_sep_width == chars_per_margin)
1177 + output_position -= col_sep_width;
1178 }
1179
1180 return true;
1181 @@ -2655,7 +2856,7 @@
1182 number of characters is 1.) */
1183
1184 static int
1185 -char_to_clump (char c)
1186 +char_to_clump_single (char c)
1187 {
1188 unsigned char uc = c;
1189 char *s = clump_buff;
1190 @@ -2665,10 +2866,10 @@
1191 int chars;
1192 int chars_per_c = 8;
1193
1194 - if (c == input_tab_char)
1195 + if (c == input_tab_char[0])
1196 chars_per_c = chars_per_input_tab;
1197
1198 - if (c == input_tab_char || c == '\t')
1199 + if (c == input_tab_char[0] || c == '\t')
1200 {
1201 width = TAB_WIDTH (chars_per_c, input_position);
1202
1203 @@ -2739,6 +2940,154 @@
1204 return chars;
1205 }
1206
1207 +#ifdef HAVE_MBRTOWC
1208 +static int
1209 +char_to_clump_multi (char c)
1210 +{
1211 + static size_t mbc_pos = 0;
1212 + static char mbc[MB_LEN_MAX] = {'\0'};
1213 + static mbstate_t state = {'\0'};
1214 + mbstate_t state_bak;
1215 + wchar_t wc;
1216 + size_t mblength;
1217 + int wc_width;
1218 + register int *s = clump_buff;
1219 + register int i, j;
1220 + char esc_buff[4];
1221 + int width;
1222 + int chars;
1223 + int chars_per_c = 8;
1224 +
1225 + state_bak = state;
1226 + mbc[mbc_pos++] = c;
1227 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
1228 +
1229 + width = 0;
1230 + chars = 0;
1231 + while (mbc_pos > 0)
1232 + {
1233 + switch (mblength)
1234 + {
1235 + case (size_t)-2:
1236 + state = state_bak;
1237 + return 0;
1238 +
1239 + case (size_t)-1:
1240 + state = state_bak;
1241 + mblength = 1;
1242 +
1243 + if (use_esc_sequence || use_cntrl_prefix)
1244 + {
1245 + width = +4;
1246 + chars = +4;
1247 + *s++ = '\\';
1248 + sprintf (esc_buff, "%03o", mbc[0]);
1249 + for (i = 0; i <= 2; ++i)
1250 + *s++ = (int) esc_buff[i];
1251 + }
1252 + else
1253 + {
1254 + width += 1;
1255 + chars += 1;
1256 + *s++ = mbc[0];
1257 + }
1258 + break;
1259 +
1260 + case 0:
1261 + mblength = 1;
1262 + /* Fall through */
1263 +
1264 + default:
1265 + if (memcmp (mbc, input_tab_char, mblength) == 0)
1266 + chars_per_c = chars_per_input_tab;
1267 +
1268 + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
1269 + {
1270 + int width_inc;
1271 +
1272 + width_inc = TAB_WIDTH (chars_per_c, input_position);
1273 + width += width_inc;
1274 +
1275 + if (untabify_input)
1276 + {
1277 + for (i = width_inc; i; --i)
1278 + *s++ = ' ';
1279 + chars += width_inc;
1280 + }
1281 + else
1282 + {
1283 + for (i = 0; i < mblength; i++)
1284 + *s++ = mbc[i];
1285 + chars += mblength;
1286 + }
1287 + }
1288 + else if ((wc_width = wcwidth (wc)) < 1)
1289 + {
1290 + if (use_esc_sequence)
1291 + {
1292 + for (i = 0; i < mblength; i++)
1293 + {
1294 + width += 4;
1295 + chars += 4;
1296 + *s++ = '\\';
1297 + sprintf (esc_buff, "%03o", c);
1298 + for (j = 0; j <= 2; ++j)
1299 + *s++ = (int) esc_buff[j];
1300 + }
1301 + }
1302 + else if (use_cntrl_prefix)
1303 + {
1304 + if (wc < 0200)
1305 + {
1306 + width += 2;
1307 + chars += 2;
1308 + *s++ = '^';
1309 + *s++ = wc ^ 0100;
1310 + }
1311 + else
1312 + {
1313 + for (i = 0; i < mblength; i++)
1314 + {
1315 + width += 4;
1316 + chars += 4;
1317 + *s++ = '\\';
1318 + sprintf (esc_buff, "%03o", c);
1319 + for (j = 0; j <= 2; ++j)
1320 + *s++ = (int) esc_buff[j];
1321 + }
1322 + }
1323 + }
1324 + else if (wc == L'\b')
1325 + {
1326 + width += -1;
1327 + chars += 1;
1328 + *s++ = c;
1329 + }
1330 + else
1331 + {
1332 + width += 0;
1333 + chars += mblength;
1334 + for (i = 0; i < mblength; i++)
1335 + *s++ = mbc[i];
1336 + }
1337 + }
1338 + else
1339 + {
1340 + width += wc_width;
1341 + chars += mblength;
1342 + for (i = 0; i < mblength; i++)
1343 + *s++ = mbc[i];
1344 + }
1345 + }
1346 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
1347 + mbc_pos -= mblength;
1348 + }
1349 +
1350 + input_position += width;
1351 + return chars;
1352 +}
1353 +#endif
1354 +
1355 /* We've just printed some files and need to clean up things before
1356 looking for more options and printing the next batch of files.
1357
1358 --- coreutils-5.93/src/uniq.c.i18n 2005-07-05 07:32:54.000000000 +0100
1359 +++ coreutils-5.93/src/uniq.c 2005-12-23 08:53:01.000000000 +0000
1360 @@ -23,6 +23,16 @@
1361 #include <getopt.h>
1362 #include <sys/types.h>
1363
1364 +/* Get mbstate_t, mbrtowc(). */
1365 +#if HAVE_WCHAR_H
1366 +# include <wchar.h>
1367 +#endif
1368 +
1369 +/* Get isw* functions. */
1370 +#if HAVE_WCTYPE_H
1371 +# include <wctype.h>
1372 +#endif
1373 +
1374 #include "system.h"
1375 #include "argmatch.h"
1376 #include "linebuffer.h"
1377 @@ -32,7 +42,19 @@
1378 #include "quote.h"
1379 #include "xmemcoll.h"
1380 #include "xstrtol.h"
1381 -#include "memcasecmp.h"
1382 +#include "xmemcoll.h"
1383 +
1384 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1385 + installation; work around this configuration error. */
1386 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1387 +# define MB_LEN_MAX 16
1388 +#endif
1389 +
1390 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1391 +#if HAVE_MBRTOWC && defined mbstate_t
1392 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1393 +#endif
1394 +
1395
1396 /* The official name of this program (e.g., no `g' prefix). */
1397 #define PROGRAM_NAME "uniq"
1398 @@ -109,6 +131,10 @@
1399 /* Select whether/how to delimit groups of duplicate lines. */
1400 static enum delimit_method delimit_groups;
1401
1402 +/* Function pointers. */
1403 +static char *
1404 +(*find_field) (struct linebuffer *line);
1405 +
1406 static struct option const longopts[] =
1407 {
1408 {"count", no_argument, NULL, 'c'},
1409 @@ -189,7 +215,7 @@
1410 return a pointer to the beginning of the line's field to be compared. */
1411
1412 static char *
1413 -find_field (const struct linebuffer *line)
1414 +find_field_uni (struct linebuffer *line)
1415 {
1416 size_t count;
1417 char *lp = line->buffer;
1418 @@ -210,6 +236,83 @@
1419 return lp + i;
1420 }
1421
1422 +#if HAVE_MBRTOWC
1423 +
1424 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
1425 + do \
1426 + { \
1427 + mbstate_t state_bak; \
1428 + \
1429 + CONVFAIL = 0; \
1430 + state_bak = *STATEP; \
1431 + \
1432 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
1433 + \
1434 + switch (MBLENGTH) \
1435 + { \
1436 + case (size_t)-2: \
1437 + case (size_t)-1: \
1438 + *STATEP = state_bak; \
1439 + CONVFAIL++; \
1440 + /* Fall through */ \
1441 + case 0: \
1442 + MBLENGTH = 1; \
1443 + } \
1444 + } \
1445 + while (0)
1446 +
1447 +static char *
1448 +find_field_multi (struct linebuffer *line)
1449 +{
1450 + size_t count;
1451 + char *lp = line->buffer;
1452 + size_t size = line->length - 1;
1453 + size_t pos;
1454 + size_t mblength;
1455 + wchar_t wc;
1456 + mbstate_t *statep;
1457 + int convfail;
1458 +
1459 + pos = 0;
1460 + statep = &(line->state);
1461 +
1462 + /* skip fields. */
1463 + for (count = 0; count < skip_fields && pos < size; count++)
1464 + {
1465 + while (pos < size)
1466 + {
1467 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1468 +
1469 + if (convfail || !iswblank (wc))
1470 + {
1471 + pos += mblength;
1472 + break;
1473 + }
1474 + pos += mblength;
1475 + }
1476 +
1477 + while (pos < size)
1478 + {
1479 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1480 +
1481 + if (!convfail && iswblank (wc))
1482 + break;
1483 +
1484 + pos += mblength;
1485 + }
1486 + }
1487 +
1488 + /* skip fields. */
1489 + for (count = 0; count < skip_chars && pos < size; count++)
1490 + {
1491 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
1492 + pos += mblength;
1493 + }
1494 +
1495 + return lp + pos;
1496 +}
1497 +#endif
1498 +
1499 /* Return false if two strings OLD and NEW match, true if not.
1500 OLD and NEW point not to the beginnings of the lines
1501 but rather to the beginnings of the fields to compare.
1502 @@ -218,6 +321,8 @@
1503 static bool
1504 different (char *old, char *new, size_t oldlen, size_t newlen)
1505 {
1506 + char *copy_old, *copy_new;
1507 +
1508 if (check_chars < oldlen)
1509 oldlen = check_chars;
1510 if (check_chars < newlen)
1511 @@ -225,14 +330,92 @@
1512
1513 if (ignore_case)
1514 {
1515 - /* FIXME: This should invoke strcoll somehow. */
1516 - return oldlen != newlen || memcasecmp (old, new, oldlen);
1517 + size_t i;
1518 +
1519 + copy_old = alloca (oldlen + 1);
1520 + copy_new = alloca (oldlen + 1);
1521 +
1522 + for (i = 0; i < oldlen; i++)
1523 + {
1524 + copy_old[i] = toupper (old[i]);
1525 + copy_new[i] = toupper (new[i]);
1526 + }
1527 }
1528 - else if (hard_LC_COLLATE)
1529 - return xmemcoll (old, oldlen, new, newlen) != 0;
1530 else
1531 - return oldlen != newlen || memcmp (old, new, oldlen);
1532 + {
1533 + copy_old = (char *)old;
1534 + copy_new = (char *)new;
1535 + }
1536 +
1537 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
1538 +}
1539 +
1540 +#if HAVE_MBRTOWC
1541 +static int
1542 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1543 +{
1544 + size_t i, j, chars;
1545 + const char *str[2];
1546 + char *copy[2];
1547 + size_t len[2];
1548 + mbstate_t state[2];
1549 + size_t mblength;
1550 + wchar_t wc, uwc;
1551 + mbstate_t state_bak;
1552 +
1553 + str[0] = old;
1554 + str[1] = new;
1555 + len[0] = oldlen;
1556 + len[1] = newlen;
1557 + state[0] = oldstate;
1558 + state[1] = newstate;
1559 +
1560 + for (i = 0; i < 2; i++)
1561 + {
1562 + copy[i] = alloca (len[i] + 1);
1563 +
1564 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1565 + {
1566 + state_bak = state[i];
1567 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1568 +
1569 + switch (mblength)
1570 + {
1571 + case (size_t)-1:
1572 + case (size_t)-2:
1573 + state[i] = state_bak;
1574 + /* Fall through */
1575 + case 0:
1576 + mblength = 1;
1577 + break;
1578 +
1579 + default:
1580 + if (ignore_case)
1581 + {
1582 + uwc = towupper (wc);
1583 +
1584 + if (uwc != wc)
1585 + {
1586 + mbstate_t state_wc;
1587 +
1588 + memset (&state_wc, '\0', sizeof(mbstate_t));
1589 + wcrtomb (copy[i] + j, uwc, &state_wc);
1590 + }
1591 + else
1592 + memcpy (copy[i] + j, str[i] + j, mblength);
1593 + }
1594 + else
1595 + memcpy (copy[i] + j, str[i] + j, mblength);
1596 + }
1597 + j += mblength;
1598 + }
1599 + copy[i][j] = '\0';
1600 + len[i] = j;
1601 + }
1602 +
1603 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1604 }
1605 +#endif
1606
1607 /* Output the line in linebuffer LINE to standard output
1608 provided that the switches say it should be output.
1609 @@ -286,15 +469,43 @@
1610 {
1611 char *prevfield IF_LINT (= NULL);
1612 size_t prevlen IF_LINT (= 0);
1613 +#if HAVE_MBRTOWC
1614 + mbstate_t prevstate;
1615 +
1616 + memset (&prevstate, '\0', sizeof (mbstate_t));
1617 +#endif
1618
1619 while (!feof (stdin))
1620 {
1621 char *thisfield;
1622 size_t thislen;
1623 +#if HAVE_MBRTOWC
1624 + mbstate_t thisstate;
1625 +#endif
1626 +
1627 if (readlinebuffer (thisline, stdin) == 0)
1628 break;
1629 thisfield = find_field (thisline);
1630 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1631 +#if HAVE_MBRTOWC
1632 + if (MB_CUR_MAX > 1)
1633 + {
1634 + thisstate = thisline->state;
1635 +
1636 + if (prevline->length == 0 || different_multi
1637 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1638 + {
1639 + fwrite (thisline->buffer, sizeof (char),
1640 + thisline->length, stdout);
1641 +
1642 + SWAP_LINES (prevline, thisline);
1643 + prevfield = thisfield;
1644 + prevlen = thislen;
1645 + prevstate = thisstate;
1646 + }
1647 + }
1648 + else
1649 +#endif
1650 if (prevline->length == 0
1651 || different (thisfield, prevfield, thislen, prevlen))
1652 {
1653 @@ -313,17 +524,26 @@
1654 size_t prevlen;
1655 uintmax_t match_count = 0;
1656 bool first_delimiter = true;
1657 +#if HAVE_MBRTOWC
1658 + mbstate_t prevstate;
1659 +#endif
1660
1661 if (readlinebuffer (prevline, stdin) == 0)
1662 goto closefiles;
1663 prevfield = find_field (prevline);
1664 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1665 +#if HAVE_MBRTOWC
1666 + prevstate = prevline->state;
1667 +#endif
1668
1669 while (!feof (stdin))
1670 {
1671 bool match;
1672 char *thisfield;
1673 size_t thislen;
1674 +#if HAVE_MBRTOWC
1675 + mbstate_t thisstate;
1676 +#endif
1677 if (readlinebuffer (thisline, stdin) == 0)
1678 {
1679 if (ferror (stdin))
1680 @@ -332,6 +552,15 @@
1681 }
1682 thisfield = find_field (thisline);
1683 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1684 +#if HAVE_MBRTOWC
1685 + if (MB_CUR_MAX > 1)
1686 + {
1687 + thisstate = thisline->state;
1688 + match = !different_multi (thisfield, prevfield,
1689 + thislen, prevlen, thisstate, prevstate);
1690 + }
1691 + else
1692 +#endif
1693 match = !different (thisfield, prevfield, thislen, prevlen);
1694 match_count += match;
1695
1696 @@ -364,6 +593,9 @@
1697 SWAP_LINES (prevline, thisline);
1698 prevfield = thisfield;
1699 prevlen = thislen;
1700 +#if HAVE_MBRTOWC
1701 + prevstate = thisstate;
1702 +#endif
1703 if (!match)
1704 match_count = 0;
1705 }
1706 @@ -408,6 +640,19 @@
1707
1708 atexit (close_stdout);
1709
1710 +#if HAVE_MBRTOWC
1711 + if (MB_CUR_MAX > 1)
1712 + {
1713 + find_field = find_field_multi;
1714 + }
1715 + else
1716 +#endif
1717 + {
1718 + find_field = find_field_uni;
1719 + }
1720 +
1721 +
1722 +
1723 skip_chars = 0;
1724 skip_fields = 0;
1725 check_chars = SIZE_MAX;
1726 --- coreutils-5.93/src/expand.c.i18n 2005-08-12 08:16:25.000000000 +0100
1727 +++ coreutils-5.93/src/expand.c 2005-12-23 08:53:01.000000000 +0000
1728 @@ -38,11 +38,28 @@
1729 #include <stdio.h>
1730 #include <getopt.h>
1731 #include <sys/types.h>
1732 +
1733 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1734 +#if HAVE_WCHAR_H
1735 +# include <wchar.h>
1736 +#endif
1737 +
1738 #include "system.h"
1739 #include "error.h"
1740 #include "quote.h"
1741 #include "xstrndup.h"
1742
1743 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1744 + installation; work around this configuration error. */
1745 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1746 +# define MB_LEN_MAX 16
1747 +#endif
1748 +
1749 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1750 +#if HAVE_MBRTOWC && defined mbstate_t
1751 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1752 +#endif
1753 +
1754 /* The official name of this program (e.g., no `g' prefix). */
1755 #define PROGRAM_NAME "expand"
1756
1757 @@ -182,6 +199,7 @@
1758 stops = num_start + len - 1;
1759 }
1760 }
1761 +
1762 else
1763 {
1764 error (0, 0, _("tab size contains invalid character(s): %s"),
1765 @@ -364,6 +382,142 @@
1766 }
1767 }
1768
1769 +#if HAVE_MBRTOWC
1770 +static void
1771 +expand_multibyte (void)
1772 +{
1773 + FILE *fp; /* Input strem. */
1774 + mbstate_t i_state; /* Current shift state of the input stream. */
1775 + mbstate_t i_state_bak; /* Back up the I_STATE. */
1776 + mbstate_t o_state; /* Current shift state of the output stream. */
1777 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1778 + char *bufpos; /* Next read position of BUF. */
1779 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1780 + wchar_t wc; /* A gotten wide character. */
1781 + size_t mblength; /* The byte size of a multibyte character
1782 + which shows as same character as WC. */
1783 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
1784 + int column = 0; /* Column on screen of the next char. */
1785 + int next_tab_column; /* Column the next tab stop is on. */
1786 + int convert = 1; /* If nonzero, perform translations. */
1787 +
1788 + fp = next_file ((FILE *) NULL);
1789 + if (fp == NULL)
1790 + return;
1791 +
1792 + memset (&o_state, '\0', sizeof(mbstate_t));
1793 + memset (&i_state, '\0', sizeof(mbstate_t));
1794 +
1795 + for (;;)
1796 + {
1797 + /* Refill the buffer BUF. */
1798 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
1799 + {
1800 + memmove (buf, bufpos, buflen);
1801 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
1802 + bufpos = buf;
1803 + }
1804 +
1805 + /* No character is left in BUF. */
1806 + if (buflen < 1)
1807 + {
1808 + fp = next_file (fp);
1809 +
1810 + if (fp == NULL)
1811 + break; /* No more files. */
1812 + else
1813 + {
1814 + memset (&i_state, '\0', sizeof(mbstate_t));
1815 + continue;
1816 + }
1817 + }
1818 +
1819 + /* Get a wide character. */
1820 + i_state_bak = i_state;
1821 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
1822 +
1823 + switch (mblength)
1824 + {
1825 + case (size_t)-1: /* illegal byte sequence. */
1826 + case (size_t)-2:
1827 + mblength = 1;
1828 + i_state = i_state_bak;
1829 + if (convert)
1830 + {
1831 + ++column;
1832 + if (convert_entire_line == 0)
1833 + convert = 0;
1834 + }
1835 + putchar (*bufpos);
1836 + break;
1837 +
1838 + case 0: /* null. */
1839 + mblength = 1;
1840 + if (convert && convert_entire_line == 0)
1841 + convert = 0;
1842 + putchar ('\0');
1843 + break;
1844 +
1845 + default:
1846 + if (wc == L'\n') /* LF. */
1847 + {
1848 + tab_index = 0;
1849 + column = 0;
1850 + convert = 1;
1851 + putchar ('\n');
1852 + }
1853 + else if (wc == L'\t' && convert) /* Tab. */
1854 + {
1855 + if (tab_size == 0)
1856 + {
1857 + /* Do not let tab_index == first_free_tab;
1858 + stop when it is 1 less. */
1859 + while (tab_index < first_free_tab - 1
1860 + && column >= tab_list[tab_index])
1861 + tab_index++;
1862 + next_tab_column = tab_list[tab_index];
1863 + if (tab_index < first_free_tab - 1)
1864 + tab_index++;
1865 + if (column >= next_tab_column)
1866 + next_tab_column = column + 1;
1867 + }
1868 + else
1869 + next_tab_column = column + tab_size - column % tab_size;
1870 +
1871 + while (column < next_tab_column)
1872 + {
1873 + putchar (' ');
1874 + ++column;
1875 + }
1876 + }
1877 + else /* Others. */
1878 + {
1879 + if (convert)
1880 + {
1881 + if (wc == L'\b')
1882 + {
1883 + if (column > 0)
1884 + --column;
1885 + }
1886 + else
1887 + {
1888 + int width; /* The width of WC. */
1889 +
1890 + width = wcwidth (wc);
1891 + column += (width > 0) ? width : 0;
1892 + if (convert_entire_line == 0)
1893 + convert = 0;
1894 + }
1895 + }
1896 + fwrite (bufpos, sizeof(char), mblength, stdout);
1897 + }
1898 + }
1899 + buflen -= mblength;
1900 + bufpos += mblength;
1901 + }
1902 +}
1903 +#endif
1904 +
1905 int
1906 main (int argc, char **argv)
1907 {
1908 @@ -428,7 +582,12 @@
1909
1910 file_list = (optind < argc ? &argv[optind] : stdin_argv);
1911
1912 - expand ();
1913 +#if HAVE_MBRTOWC
1914 + if (MB_CUR_MAX > 1)
1915 + expand_multibyte ();
1916 + else
1917 +#endif
1918 + expand ();
1919
1920 if (have_read_stdin && fclose (stdin) != 0)
1921 error (EXIT_FAILURE, errno, "-");
1922 --- coreutils-5.93/src/fold.c.i18n 2005-08-12 08:29:38.000000000 +0100
1923 +++ coreutils-5.93/src/fold.c 2005-12-23 08:53:01.000000000 +0000
1924 @@ -23,11 +23,33 @@
1925 #include <getopt.h>
1926 #include <sys/types.h>
1927
1928 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1929 +#if HAVE_WCHAR_H
1930 +# include <wchar.h>
1931 +#endif
1932 +
1933 +/* Get iswprint(), iswblank(), wcwidth(). */
1934 +#if HAVE_WCTYPE_H
1935 +# include <wctype.h>
1936 +#endif
1937 +
1938 #include "system.h"
1939 #include "error.h"
1940 #include "quote.h"
1941 #include "xstrtol.h"
1942
1943 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1944 + installation; work around this configuration error. */
1945 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1946 +# undef MB_LEN_MAX
1947 +# define MB_LEN_MAX 16
1948 +#endif
1949 +
1950 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1951 +#if HAVE_MBRTOWC && defined mbstate_t
1952 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1953 +#endif
1954 +
1955 #define TAB_WIDTH 8
1956
1957 /* The official name of this program (e.g., no `g' prefix). */
1958 @@ -35,23 +57,44 @@
1959
1960 #define AUTHORS "David MacKenzie"
1961
1962 +#define FATAL_ERROR(Message) \
1963 + do \
1964 + { \
1965 + error (0, 0, (Message)); \
1966 + usage (2); \
1967 + } \
1968 + while (0)
1969 +
1970 +enum operating_mode
1971 +{
1972 + /* Fold texts by columns that are at the given positions. */
1973 + column_mode,
1974 +
1975 + /* Fold texts by bytes that are at the given positions. */
1976 + byte_mode,
1977 +
1978 + /* Fold texts by characters that are at the given positions. */
1979 + character_mode,
1980 +};
1981 +
1982 /* The name this program was run with. */
1983 char *program_name;
1984
1985 +/* The argument shows current mode. (Default: column_mode) */
1986 +static enum operating_mode operating_mode;
1987 +
1988 /* If nonzero, try to break on whitespace. */
1989 static bool break_spaces;
1990
1991 -/* If nonzero, count bytes, not column positions. */
1992 -static bool count_bytes;
1993 -
1994 /* If nonzero, at least one of the files we read was standard input. */
1995 static bool have_read_stdin;
1996
1997 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1998 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1999
2000 static struct option const longopts[] =
2001 {
2002 {"bytes", no_argument, NULL, 'b'},
2003 + {"characters", no_argument, NULL, 'c'},
2004 {"spaces", no_argument, NULL, 's'},
2005 {"width", required_argument, NULL, 'w'},
2006 {GETOPT_HELP_OPTION_DECL},
2007 @@ -81,6 +124,7 @@
2008 "), stdout);
2009 fputs (_("\
2010 -b, --bytes count bytes rather than columns\n\
2011 + -c, --characters count characters rather than columns\n\
2012 -s, --spaces break at spaces\n\
2013 -w, --width=WIDTH use WIDTH columns instead of 80\n\
2014 "), stdout);
2015 @@ -98,7 +142,7 @@
2016 static size_t
2017 adjust_column (size_t column, char c)
2018 {
2019 - if (!count_bytes)
2020 + if (operating_mode != byte_mode)
2021 {
2022 if (c == '\b')
2023 {
2024 @@ -117,35 +161,14 @@
2025 return column;
2026 }
2027
2028 -/* Fold file FILENAME, or standard input if FILENAME is "-",
2029 - to stdout, with maximum line length WIDTH.
2030 - Return true if successful. */
2031 -
2032 -static bool
2033 -fold_file (char *filename, size_t width)
2034 +static void
2035 +fold_text (FILE *istream, size_t width, int *saved_errno)
2036 {
2037 - FILE *istream;
2038 int c;
2039 size_t column = 0; /* Screen column where next char will go. */
2040 size_t offset_out = 0; /* Index in `line_out' for next char. */
2041 static char *line_out = NULL;
2042 static size_t allocated_out = 0;
2043 - int saved_errno;
2044 -
2045 - if (STREQ (filename, "-"))
2046 - {
2047 - istream = stdin;
2048 - have_read_stdin = true;
2049 - }
2050 - else
2051 - istream = fopen (filename, "r");
2052 -
2053 - if (istream == NULL)
2054 - {
2055 - error (0, errno, "%s", filename);
2056 - return false;
2057 - }
2058 -
2059 while ((c = getc (istream)) != EOF)
2060 {
2061 if (offset_out + 1 >= allocated_out)
2062 @@ -172,6 +195,15 @@
2063 bool found_blank = false;
2064 size_t logical_end = offset_out;
2065
2066 + /* If LINE_OUT has no wide character,
2067 + put a new wide character in LINE_OUT
2068 + if column is bigger than width. */
2069 + if (offset_out == 0)
2070 + {
2071 + line_out[offset_out++] = c;
2072 + continue;
2073 + }
2074 +
2075 /* Look for the last blank. */
2076 while (logical_end)
2077 {
2078 @@ -218,11 +250,225 @@
2079 line_out[offset_out++] = c;
2080 }
2081
2082 - saved_errno = errno;
2083 + *saved_errno = errno;
2084 +
2085 + if (offset_out)
2086 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
2087 +
2088 + free(line_out);
2089 +}
2090 +
2091 +#if HAVE_MBRTOWC
2092 +static void
2093 +fold_multibyte_text (FILE *istream, int width, int *saved_errno)
2094 +{
2095 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2096 + size_t buflen = 0; /* The length of the byte sequence in buf. */
2097 + char *bufpos; /* Next read position of BUF. */
2098 + wint_t wc; /* A gotten wide character. */
2099 + size_t mblength; /* The byte size of a multibyte character which shows
2100 + as same character as WC. */
2101 + mbstate_t state, state_bak; /* State of the stream. */
2102 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
2103 +
2104 + char *line_out = NULL;
2105 + size_t offset_out = 0; /* Index in `line_out' for next char. */
2106 + size_t allocated_out = 0;
2107 +
2108 + int increment;
2109 + size_t column = 0;
2110 +
2111 + size_t last_blank_pos;
2112 + size_t last_blank_column;
2113 + int is_blank_seen;
2114 + int last_blank_increment;
2115 + int is_bs_following_last_blank;
2116 + size_t bs_following_last_blank_num;
2117 + int is_cr_after_last_blank;
2118 +
2119 +#define CLEAR_FLAGS \
2120 + do \
2121 + { \
2122 + last_blank_pos = 0; \
2123 + last_blank_column = 0; \
2124 + is_blank_seen = 0; \
2125 + is_bs_following_last_blank = 0; \
2126 + bs_following_last_blank_num = 0; \
2127 + is_cr_after_last_blank = 0; \
2128 + } \
2129 + while (0)
2130 +
2131 +#define START_NEW_LINE \
2132 + do \
2133 + { \
2134 + putchar ('\n'); \
2135 + column = 0; \
2136 + offset_out = 0; \
2137 + CLEAR_FLAGS; \
2138 + } \
2139 + while (0)
2140 +
2141 + CLEAR_FLAGS;
2142 + memset (&state, '\0', sizeof(mbstate_t));
2143 +
2144 + for (;; bufpos += mblength, buflen -= mblength)
2145 + {
2146 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
2147 + {
2148 + memmove (buf, bufpos, buflen);
2149 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
2150 + bufpos = buf;
2151 + }
2152 +
2153 + if (buflen < 1)
2154 + break;
2155 +
2156 + /* Get a wide character. */
2157 + convfail = 0;
2158 + state_bak = state;
2159 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
2160 +
2161 + switch (mblength)
2162 + {
2163 + case (size_t)-1:
2164 + case (size_t)-2:
2165 + convfail++;
2166 + state = state_bak;
2167 + /* Fall through. */
2168 +
2169 + case 0:
2170 + mblength = 1;
2171 + break;
2172 + }
2173 +
2174 +rescan:
2175 + if (operating_mode == byte_mode) /* byte mode */
2176 + increment = mblength;
2177 + else if (operating_mode == character_mode) /* character mode */
2178 + increment = 1;
2179 + else /* column mode */
2180 + {
2181 + if (convfail)
2182 + increment = 1;
2183 + else
2184 + {
2185 + switch (wc)
2186 + {
2187 + case L'\n':
2188 + fwrite (line_out, sizeof(char), offset_out, stdout);
2189 + START_NEW_LINE;
2190 + continue;
2191 +
2192 + case L'\b':
2193 + increment = (column > 0) ? -1 : 0;
2194 + break;
2195 +
2196 + case L'\r':
2197 + increment = -1 * column;
2198 + break;
2199 +
2200 + case L'\t':
2201 + increment = 8 - column % 8;
2202 + break;
2203 +
2204 + default:
2205 + increment = wcwidth (wc);
2206 + increment = (increment < 0) ? 0 : increment;
2207 + }
2208 + }
2209 + }
2210 +
2211 + if (column + increment > width && break_spaces && last_blank_pos)
2212 + {
2213 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
2214 + putchar ('\n');
2215 +
2216 + offset_out = offset_out - last_blank_pos;
2217 + column = column - last_blank_column + ((is_cr_after_last_blank)
2218 + ? last_blank_increment : bs_following_last_blank_num);
2219 + memmove (line_out, line_out + last_blank_pos, offset_out);
2220 + CLEAR_FLAGS;
2221 + goto rescan;
2222 + }
2223 +
2224 + if (column + increment > width && column != 0)
2225 + {
2226 + fwrite (line_out, sizeof(char), offset_out, stdout);
2227 + START_NEW_LINE;
2228 + goto rescan;
2229 + }
2230 +
2231 + if (allocated_out < offset_out + mblength)
2232 + {
2233 + allocated_out += 1024;
2234 + line_out = xrealloc (line_out, allocated_out);
2235 + }
2236 +
2237 + memcpy (line_out + offset_out, bufpos, mblength);
2238 + offset_out += mblength;
2239 + column += increment;
2240 +
2241 + if (is_blank_seen && !convfail && wc == L'\r')
2242 + is_cr_after_last_blank = 1;
2243 +
2244 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
2245 + ++bs_following_last_blank_num;
2246 + else
2247 + is_bs_following_last_blank = 0;
2248 +
2249 + if (break_spaces && !convfail && iswblank (wc))
2250 + {
2251 + last_blank_pos = offset_out;
2252 + last_blank_column = column;
2253 + is_blank_seen = 1;
2254 + last_blank_increment = increment;
2255 + is_bs_following_last_blank = 1;
2256 + bs_following_last_blank_num = 0;
2257 + is_cr_after_last_blank = 0;
2258 + }
2259 + }
2260 +
2261 + *saved_errno = errno;
2262
2263 if (offset_out)
2264 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
2265
2266 + free(line_out);
2267 +}
2268 +#endif
2269 +
2270 +/* Fold file FILENAME, or standard input if FILENAME is "-",
2271 + to stdout, with maximum line length WIDTH.
2272 + Return 0 if successful, 1 if an error occurs. */
2273 +
2274 +static int
2275 +fold_file (char *filename, int width)
2276 +{
2277 + FILE *istream;
2278 + int saved_errno;
2279 +
2280 + if (STREQ (filename, "-"))
2281 + {
2282 + istream = stdin;
2283 + have_read_stdin = 1;
2284 + }
2285 + else
2286 + istream = fopen (filename, "r");
2287 +
2288 + if (istream == NULL)
2289 + {
2290 + error (0, errno, "%s", filename);
2291 + return 1;
2292 + }
2293 +
2294 + /* Define how ISTREAM is being folded. */
2295 +#if HAVE_MBRTOWC
2296 + if (MB_CUR_MAX > 1)
2297 + fold_multibyte_text (istream, width, &saved_errno);
2298 + else
2299 +#endif
2300 + fold_text (istream, width, &saved_errno);
2301 +
2302 if (ferror (istream))
2303 {
2304 error (0, saved_errno, "%s", filename);
2305 @@ -255,7 +501,8 @@
2306
2307 atexit (close_stdout);
2308
2309 - break_spaces = count_bytes = have_read_stdin = false;
2310 + operating_mode = column_mode;
2311 + break_spaces = have_read_stdin = false;
2312
2313 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
2314 {
2315 @@ -264,7 +511,15 @@
2316 switch (optc)
2317 {
2318 case 'b': /* Count bytes rather than columns. */
2319 - count_bytes = true;
2320 + if (operating_mode != column_mode)
2321 + FATAL_ERROR (_("only one way of folding may be specified"));
2322 + operating_mode = byte_mode;
2323 + break;
2324 +
2325 + case 'c':
2326 + if (operating_mode != column_mode)
2327 + FATAL_ERROR (_("only one way of folding may be specified"));
2328 + operating_mode = character_mode;
2329 break;
2330
2331 case 's': /* Break at word boundaries. */
2332 --- coreutils-5.93/src/join.c.i18n 2005-08-12 08:16:25.000000000 +0100
2333 +++ coreutils-5.93/src/join.c 2005-12-23 08:53:01.000000000 +0000
2334 @@ -23,16 +23,30 @@
2335 #include <sys/types.h>
2336 #include <getopt.h>
2337
2338 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
2339 +#if HAVE_WCHAR_H
2340 +# include <wchar.h>
2341 +#endif
2342 +
2343 +/* Get iswblank(), towupper. */
2344 +#if HAVE_WCTYPE_H
2345 +# include <wctype.h>
2346 +#endif
2347 +
2348 #include "system.h"
2349 #include "error.h"
2350 #include "hard-locale.h"
2351 #include "linebuffer.h"
2352 -#include "memcasecmp.h"
2353 #include "quote.h"
2354 #include "stdio--.h"
2355 #include "xmemcoll.h"
2356 #include "xstrtol.h"
2357
2358 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2359 +#if HAVE_MBRTOWC && defined mbstate_t
2360 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2361 +#endif
2362 +
2363 /* The official name of this program (e.g., no `g' prefix). */
2364 #define PROGRAM_NAME "join"
2365
2366 @@ -104,10 +118,12 @@
2367 /* Last element in `outlist', where a new element can be added. */
2368 static struct outlist *outlist_end = &outlist_head;
2369
2370 -/* Tab character separating fields. If negative, fields are separated
2371 - by any nonempty string of blanks, otherwise by exactly one
2372 - tab character whose value (when cast to unsigned char) equals TAB. */
2373 -static int tab = -1;
2374 +/* Tab character separating fields. If NULL, fields are separated
2375 + by any nonempty string of blanks. */
2376 +static char *tab = NULL;
2377 +
2378 +/* The number of bytes used for tab. */
2379 +static size_t tablen = 0;
2380
2381 static struct option const longopts[] =
2382 {
2383 @@ -197,6 +213,8 @@
2384
2385 /* Fill in the `fields' structure in LINE. */
2386
2387 +/* Fill in the `fields' structure in LINE. */
2388 +
2389 static void
2390 xfields (struct line *line)
2391 {
2392 @@ -206,10 +224,11 @@
2393 if (ptr == lim)
2394 return;
2395
2396 - if (0 <= tab)
2397 + if (tab != NULL)
2398 {
2399 + unsigned char t = tab[0];
2400 char *sep;
2401 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
2402 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
2403 extract_field (line, ptr, sep - ptr);
2404 }
2405 else
2406 @@ -236,6 +255,148 @@
2407 extract_field (line, ptr, lim - ptr);
2408 }
2409
2410 +#if HAVE_MBRTOWC
2411 +static void
2412 +xfields_multibyte (struct line *line)
2413 +{
2414 + char *ptr = line->buf.buffer;
2415 + char const *lim = ptr + line->buf.length - 1;
2416 + wchar_t wc = 0;
2417 + size_t mblength = 1;
2418 + mbstate_t state, state_bak;
2419 +
2420 + memset (&state, 0, sizeof (mbstate_t));
2421 +
2422 + if (ptr == lim)
2423 + return;
2424 +
2425 + if (tab != NULL)
2426 + {
2427 + unsigned char t = tab[0];
2428 + char *sep = ptr;
2429 + for (; ptr < lim; ptr = sep + mblength)
2430 + {
2431 + sep = ptr;
2432 + while (sep < lim)
2433 + {
2434 + state_bak = state;
2435 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2436 +
2437 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2438 + {
2439 + mblength = 1;
2440 + state = state_bak;
2441 + }
2442 + mblength = (mblength < 1) ? 1 : mblength;
2443 +
2444 + if (mblength == tablen && !memcmp (sep, tab, mblength))
2445 + break;
2446 + else
2447 + {
2448 + sep += mblength;
2449 + continue;
2450 + }
2451 + }
2452 +
2453 + if (sep == lim)
2454 + break;
2455 +
2456 + extract_field (line, ptr, sep - ptr);
2457 + }
2458 + }
2459 + else
2460 + {
2461 + /* Skip leading blanks before the first field. */
2462 + while(ptr < lim)
2463 + {
2464 + state_bak = state;
2465 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2466 +
2467 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2468 + {
2469 + mblength = 1;
2470 + state = state_bak;
2471 + break;
2472 + }
2473 + mblength = (mblength < 1) ? 1 : mblength;
2474 +
2475 + if (!iswblank(wc))
2476 + break;
2477 + ptr += mblength;
2478 + }
2479 +
2480 + do
2481 + {
2482 + char *sep;
2483 + state_bak = state;
2484 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2485 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2486 + {
2487 + mblength = 1;
2488 + state = state_bak;
2489 + break;
2490 + }
2491 + mblength = (mblength < 1) ? 1 : mblength;
2492 +
2493 + sep = ptr + mblength;
2494 + while (sep != lim)
2495 + {
2496 + state_bak = state;
2497 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2498 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2499 + {
2500 + mblength = 1;
2501 + state = state_bak;
2502 + break;
2503 + }
2504 + mblength = (mblength < 1) ? 1 : mblength;
2505 +
2506 + if (iswblank (wc))
2507 + break;
2508 +
2509 + sep += mblength;
2510 + }
2511 +
2512 + extract_field (line, ptr, sep - ptr);
2513 + if (sep == lim)
2514 + return;
2515 +
2516 + state_bak = state;
2517 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
2518 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2519 + {
2520 + mblength = 1;
2521 + state = state_bak;
2522 + break;
2523 + }
2524 + mblength = (mblength < 1) ? 1 : mblength;
2525 +
2526 + ptr = sep + mblength;
2527 + while (ptr != lim)
2528 + {
2529 + state_bak = state;
2530 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
2531 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2532 + {
2533 + mblength = 1;
2534 + state = state_bak;
2535 + break;
2536 + }
2537 + mblength = (mblength < 1) ? 1 : mblength;
2538 +
2539 + if (!iswblank (wc))
2540 + break;
2541 +
2542 + ptr += mblength;
2543 + }
2544 + }
2545 + while (ptr != lim);
2546 + }
2547 +
2548 + extract_field (line, ptr, lim - ptr);
2549 +}
2550 +#endif
2551 +
2552 /* Read a line from FP into LINE and split it into fields.
2553 Return true if successful. */
2554
2555 @@ -256,6 +417,11 @@
2556 line->nfields_allocated = 0;
2557 line->nfields = 0;
2558 line->fields = NULL;
2559 +#if HAVE_MBRTOWC
2560 + if (MB_CUR_MAX > 1)
2561 + xfields_multibyte (line);
2562 + else
2563 +#endif
2564 xfields (line);
2565 return true;
2566 }
2567 @@ -310,56 +476,114 @@
2568 keycmp (struct line const *line1, struct line const *line2)
2569 {
2570 /* Start of field to compare in each file. */
2571 - char *beg1;
2572 - char *beg2;
2573 -
2574 - size_t len1;
2575 - size_t len2; /* Length of fields to compare. */
2576 + char *beg[2];
2577 + char *copy[2];
2578 + size_t len[2]; /* Length of fields to compare. */
2579 int diff;
2580 + int i, j;
2581
2582 if (join_field_1 < line1->nfields)
2583 {
2584 - beg1 = line1->fields[join_field_1].beg;
2585 - len1 = line1->fields[join_field_1].len;
2586 + beg[0] = line1->fields[join_field_1].beg;
2587 + len[0] = line1->fields[join_field_1].len;
2588 }
2589 else
2590 {
2591 - beg1 = NULL;
2592 - len1 = 0;
2593 + beg[0] = NULL;
2594 + len[0] = 0;
2595 }
2596
2597 if (join_field_2 < line2->nfields)
2598 {
2599 - beg2 = line2->fields[join_field_2].beg;
2600 - len2 = line2->fields[join_field_2].len;
2601 + beg[1] = line2->fields[join_field_2].beg;
2602 + len[1] = line2->fields[join_field_2].len;
2603 }
2604 else
2605 {
2606 - beg2 = NULL;
2607 - len2 = 0;
2608 + beg[1] = NULL;
2609 + len[1] = 0;
2610 }
2611
2612 - if (len1 == 0)
2613 - return len2 == 0 ? 0 : -1;
2614 - if (len2 == 0)
2615 + if (len[0] == 0)
2616 + return len[1] == 0 ? 0 : -1;
2617 + if (len[1] == 0)
2618 return 1;
2619
2620 if (ignore_case)
2621 {
2622 - /* FIXME: ignore_case does not work with NLS (in particular,
2623 - with multibyte chars). */
2624 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
2625 +#ifdef HAVE_MBRTOWC
2626 + if (MB_CUR_MAX > 1)
2627 + {
2628 + size_t mblength;
2629 + wchar_t wc, uwc;
2630 + mbstate_t state, state_bak;
2631 +
2632 + memset (&state, '\0', sizeof (mbstate_t));
2633 +
2634 + for (i = 0; i < 2; i++)
2635 + {
2636 + copy[i] = alloca (len[i] + 1);
2637 +
2638 + for (j = 0; j < MIN (len[0], len[1]);)
2639 + {
2640 + state_bak = state;
2641 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
2642 +
2643 + switch (mblength)
2644 + {
2645 + case (size_t) -1:
2646 + case (size_t) -2:
2647 + state = state_bak;
2648 + /* Fall through */
2649 + case 0:
2650 + mblength = 1;
2651 + break;
2652 +
2653 + default:
2654 + uwc = towupper (wc);
2655 +
2656 + if (uwc != wc)
2657 + {
2658 + mbstate_t state_wc;
2659 +
2660 + memset (&state_wc, '\0', sizeof (mbstate_t));
2661 + wcrtomb (copy[i] + j, uwc, &state_wc);
2662 + }
2663 + else
2664 + memcpy (copy[i] + j, beg[i] + j, mblength);
2665 + }
2666 + j += mblength;
2667 + }
2668 + copy[i][j] = '\0';
2669 + }
2670 + }
2671 + else
2672 +#endif
2673 + {
2674 + for (i = 0; i < 2; i++)
2675 + {
2676 + copy[i] = alloca (len[i] + 1);
2677 +
2678 + for (j = 0; j < MIN (len[0], len[1]); j++)
2679 + copy[i][j] = toupper (beg[i][j]);
2680 +
2681 + copy[i][j] = '\0';
2682 + }
2683 + }
2684 }
2685 else
2686 {
2687 - if (hard_LC_COLLATE)
2688 - return xmemcoll (beg1, len1, beg2, len2);
2689 - diff = memcmp (beg1, beg2, MIN (len1, len2));
2690 + copy[0] = (unsigned char *) beg[0];
2691 + copy[1] = (unsigned char *) beg[1];
2692 }
2693
2694 + if (HAVE_SETLOCALE && hard_LC_COLLATE)
2695 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
2696 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
2697 +
2698 if (diff)
2699 return diff;
2700 - return len1 < len2 ? -1 : len1 != len2;
2701 + return len[0] - len[1];
2702 }
2703
2704 /* Print field N of LINE if it exists and is nonempty, otherwise
2705 @@ -384,11 +608,18 @@
2706
2707 /* Print the join of LINE1 and LINE2. */
2708
2709 +#define PUT_TAB_CHAR \
2710 + do \
2711 + { \
2712 + (tab != NULL) ? \
2713 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
2714 + } \
2715 + while (0)
2716 +
2717 static void
2718 prjoin (struct line const *line1, struct line const *line2)
2719 {
2720 const struct outlist *outlist;
2721 - char output_separator = tab < 0 ? ' ' : tab;
2722
2723 outlist = outlist_head.next;
2724 if (outlist)
2725 @@ -404,12 +635,12 @@
2726 if (o->file == 0)
2727 {
2728 if (line1 == &uni_blank)
2729 - {
2730 + {
2731 line = line2;
2732 field = join_field_2;
2733 }
2734 else
2735 - {
2736 + {
2737 line = line1;
2738 field = join_field_1;
2739 }
2740 @@ -423,7 +654,7 @@
2741 o = o->next;
2742 if (o == NULL)
2743 break;
2744 - putchar (output_separator);
2745 + PUT_TAB_CHAR;
2746 }
2747 putchar ('\n');
2748 }
2749 @@ -441,23 +672,23 @@
2750 prfield (join_field_1, line1);
2751 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
2752 {
2753 - putchar (output_separator);
2754 + PUT_TAB_CHAR;
2755 prfield (i, line1);
2756 }
2757 for (i = join_field_1 + 1; i < line1->nfields; ++i)
2758 {
2759 - putchar (output_separator);
2760 + PUT_TAB_CHAR;
2761 prfield (i, line1);
2762 }
2763
2764 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
2765 {
2766 - putchar (output_separator);
2767 + PUT_TAB_CHAR;
2768 prfield (i, line2);
2769 }
2770 for (i = join_field_2 + 1; i < line2->nfields; ++i)
2771 {
2772 - putchar (output_separator);
2773 + PUT_TAB_CHAR;
2774 prfield (i, line2);
2775 }
2776 putchar ('\n');
2777 @@ -869,20 +1100,41 @@
2778
2779 case 't':
2780 {
2781 - unsigned char newtab = optarg[0];
2782 - if (! newtab)
2783 + char *newtab;
2784 + size_t newtablen;
2785 + if (! optarg[0])
2786 error (EXIT_FAILURE, 0, _("empty tab"));
2787 - if (optarg[1])
2788 + newtab = xstrdup (optarg);
2789 +#if HAVE_MBRTOWC
2790 + if (MB_CUR_MAX > 1)
2791 + {
2792 + mbstate_t state;
2793 +
2794 + memset (&state, 0, sizeof (mbstate_t));
2795 + newtablen = mbrtowc (NULL, newtab,
2796 + strnlen (newtab, MB_LEN_MAX),
2797 + &state);
2798 + if (newtablen == (size_t) 0
2799 + || newtablen == (size_t) -1
2800 + || newtablen == (size_t) -2)
2801 + newtablen = 1;
2802 + }
2803 + else
2804 +#endif
2805 + newtablen = 1;
2806 +
2807 + if (newtablen == 1 && newtab[1])
2808 + {
2809 + if (STREQ (newtab, "\\0"))
2810 + newtab[0] = '\0';
2811 + }
2812 + if (tab != NULL && strcmp (tab, newtab))
2813 {
2814 - if (STREQ (optarg, "\\0"))
2815 - newtab = '\0';
2816 - else
2817 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
2818 - quote (optarg));
2819 + free (newtab);
2820 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
2821 }
2822 - if (0 <= tab && tab != newtab)
2823 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
2824 tab = newtab;
2825 + tablen = newtablen;
2826 }
2827 break;
2828
2829 --- coreutils-5.93/src/unexpand.c.i18n 2005-08-12 08:16:25.000000000 +0100
2830 +++ coreutils-5.93/src/unexpand.c 2005-12-23 08:53:01.000000000 +0000
2831 @@ -39,11 +39,28 @@
2832 #include <stdio.h>
2833 #include <getopt.h>
2834 #include <sys/types.h>
2835 +
2836 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
2837 +#if HAVE_WCHAR_H
2838 +# include <wchar.h>
2839 +#endif
2840 +
2841 #include "system.h"
2842 #include "error.h"
2843 #include "quote.h"
2844 #include "xstrndup.h"
2845
2846 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2847 + installation; work around this configuration error. */
2848 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
2849 +# define MB_LEN_MAX 16
2850 +#endif
2851 +
2852 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2853 +#if HAVE_MBRTOWC && defined mbstate_t
2854 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2855 +#endif
2856 +
2857 /* The official name of this program (e.g., no `g' prefix). */
2858 #define PROGRAM_NAME "unexpand"
2859
2860 @@ -110,6 +127,208 @@
2861 {NULL, 0, NULL, 0}
2862 };
2863
2864 +static FILE *next_file (FILE *fp);
2865 +
2866 +#if HAVE_MBRTOWC
2867 +static void
2868 +unexpand_multibyte (void)
2869 +{
2870 + FILE *fp; /* Input stream. */
2871 + mbstate_t i_state; /* Current shift state of the input stream. */
2872 + mbstate_t i_state_bak; /* Back up the I_STATE. */
2873 + mbstate_t o_state; /* Current shift state of the output stream. */
2874 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2875 + char *bufpos; /* Next read position of BUF. */
2876 + size_t buflen = 0; /* The length of the byte sequence in buf. */
2877 + wint_t wc; /* A gotten wide character. */
2878 + size_t mblength; /* The byte size of a multibyte character
2879 + which shows as same character as WC. */
2880 +
2881 + /* Index in `tab_list' of next tabstop: */
2882 + int tab_index = 0; /* For calculating width of pending tabs. */
2883 + int print_tab_index = 0; /* For printing as many tabs as possible. */
2884 + unsigned int column = 0; /* Column on screen of next char. */
2885 + int next_tab_column; /* Column the next tab stop is on. */
2886 + int convert = 1; /* If nonzero, perform translations. */
2887 + unsigned int pending = 0; /* Pending columns of blanks. */
2888 +
2889 + fp = next_file ((FILE *) NULL);
2890 + if (fp == NULL)
2891 + return;
2892 +
2893 + memset (&o_state, '\0', sizeof(mbstate_t));
2894 + memset (&i_state, '\0', sizeof(mbstate_t));
2895 +
2896 + for (;;)
2897 + {
2898 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
2899 + {
2900 + memmove (buf, bufpos, buflen);
2901 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
2902 + bufpos = buf;
2903 + }
2904 +
2905 + /* Get a wide character. */
2906 + if (buflen < 1)
2907 + {
2908 + mblength = 1;
2909 + wc = WEOF;
2910 + }
2911 + else
2912 + {
2913 + i_state_bak = i_state;
2914 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
2915 + }
2916 +
2917 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2918 + {
2919 + i_state = i_state_bak;
2920 + wc = L'\0';
2921 + }
2922 +
2923 + if (wc == L' ' && convert && column < INT_MAX)
2924 + {
2925 + ++pending;
2926 + ++column;
2927 + }
2928 + else if (wc == L'\t' && convert)
2929 + {
2930 + if (tab_size == 0)
2931 + {
2932 + /* Do not let tab_index == first_free_tab;
2933 + stop when it is 1 less. */
2934 + while (tab_index < first_free_tab - 1
2935 + && column >= tab_list[tab_index])
2936 + tab_index++;
2937 + next_tab_column = tab_list[tab_index];
2938 + if (tab_index < first_free_tab - 1)
2939 + tab_index++;
2940 + if (column >= next_tab_column)
2941 + {
2942 + convert = 0; /* Ran out of tab stops. */
2943 + goto flush_pend_mb;
2944 + }
2945 + }
2946 + else
2947 + {
2948 + next_tab_column = column + tab_size - column % tab_size;
2949 + }
2950 + pending += next_tab_column - column;
2951 + column = next_tab_column;
2952 + }
2953 + else
2954 + {
2955 +flush_pend_mb:
2956 + /* Flush pending spaces. Print as many tabs as possible,
2957 + then print the rest as spaces. */
2958 + if (pending == 1)
2959 + {
2960 + putchar (' ');
2961 + pending = 0;
2962 + }
2963 + column -= pending;
2964 + while (pending > 0)
2965 + {
2966 + if (tab_size == 0)
2967 + {
2968 + /* Do not let print_tab_index == first_free_tab;
2969 + stop when it is 1 less. */
2970 + while (print_tab_index < first_free_tab - 1
2971 + && column >= tab_list[print_tab_index])
2972 + print_tab_index++;
2973 + next_tab_column = tab_list[print_tab_index];
2974 + if (print_tab_index < first_free_tab - 1)
2975 + print_tab_index++;
2976 + }
2977 + else
2978 + {
2979 + next_tab_column =
2980 + column + tab_size - column % tab_size;
2981 + }
2982 + if (next_tab_column - column <= pending)
2983 + {
2984 + putchar ('\t');
2985 + pending -= next_tab_column - column;
2986 + column = next_tab_column;
2987 + }
2988 + else
2989 + {
2990 + --print_tab_index;
2991 + column += pending;
2992 + while (pending != 0)
2993 + {
2994 + putchar (' ');
2995 + pending--;
2996 + }
2997 + }
2998 + }
2999 +
3000 + if (wc == WEOF)
3001 + {
3002 + fp = next_file (fp);
3003 + if (fp == NULL)
3004 + break; /* No more files. */
3005 + else
3006 + {
3007 + memset (&i_state, '\0', sizeof(mbstate_t));
3008 + continue;
3009 + }
3010 + }
3011 +
3012 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3013 + {
3014 + if (convert)
3015 + {
3016 + ++column;
3017 + if (convert_entire_line == 0)
3018 + convert = 0;
3019 + }
3020 + mblength = 1;
3021 + putchar (buf[0]);
3022 + }
3023 + else if (mblength == 0)
3024 + {
3025 + if (convert && convert_entire_line == 0)
3026 + convert = 0;
3027 + mblength = 1;
3028 + putchar ('\0');
3029 + }
3030 + else
3031 + {
3032 + if (convert)
3033 + {
3034 + if (wc == L'\b')
3035 + {
3036 + if (column > 0)
3037 + --column;
3038 + }
3039 + else
3040 + {
3041 + int width; /* The width of WC. */
3042 +
3043 + width = wcwidth (wc);
3044 + column += (width > 0) ? width : 0;
3045 + if (convert_entire_line == 0)
3046 + convert = 0;
3047 + }
3048 + }
3049 +
3050 + if (wc == L'\n')
3051 + {
3052 + tab_index = print_tab_index = 0;
3053 + column = pending = 0;
3054 + convert = 1;
3055 + }
3056 + fwrite (bufpos, sizeof(char), mblength, stdout);
3057 + }
3058 + }
3059 + buflen -= mblength;
3060 + bufpos += mblength;
3061 + }
3062 +}
3063 +#endif
3064 +
3065 +
3066 void
3067 usage (int status)
3068 {
3069 @@ -532,7 +751,12 @@
3070
3071 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3072
3073 - unexpand ();
3074 +#if HAVE_MBRTOWC
3075 + if (MB_CUR_MAX > 1)
3076 + unexpand_multibyte ();
3077 + else
3078 +#endif
3079 + unexpand ();
3080
3081 if (have_read_stdin && fclose (stdin) != 0)
3082 error (EXIT_FAILURE, errno, "-");
3083 --- coreutils-5.93/src/sort.c.i18n 2005-10-07 19:48:28.000000000 +0100
3084 +++ coreutils-5.93/src/sort.c 2005-12-23 10:38:44.000000000 +0000
3085 @@ -23,9 +23,18 @@
3086
3087 #include <config.h>
3088
3089 +#include <assert.h>
3090 #include <getopt.h>
3091 #include <sys/types.h>
3092 #include <signal.h>
3093 +#if HAVE_WCHAR_H
3094 +# include <wchar.h>
3095 +#endif
3096 +/* Get isw* functions. */
3097 +#if HAVE_WCTYPE_H
3098 +# include <wctype.h>
3099 +#endif
3100 +
3101 #include "system.h"
3102 #include "error.h"
3103 #include "hard-locale.h"
3104 @@ -95,14 +104,38 @@
3105 /* Thousands separator; if -1, then there isn't one. */
3106 static int thousands_sep;
3107
3108 +static int force_general_numcompare = 0;
3109 +
3110 /* Nonzero if the corresponding locales are hard. */
3111 static bool hard_LC_COLLATE;
3112 -#if HAVE_NL_LANGINFO
3113 +#if HAVE_LANGINFO_CODESET
3114 static bool hard_LC_TIME;
3115 #endif
3116
3117 #define NONZERO(x) ((x) != 0)
3118
3119 +/* get a multibyte character's byte length. */
3120 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
3121 + do \
3122 + { \
3123 + wchar_t wc; \
3124 + mbstate_t state_bak; \
3125 + \
3126 + state_bak = STATE; \
3127 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
3128 + \
3129 + switch (MBLENGTH) \
3130 + { \
3131 + case (size_t)-1: \
3132 + case (size_t)-2: \
3133 + STATE = state_bak; \
3134 + /* Fall through. */ \
3135 + case 0: \
3136 + MBLENGTH = 1; \
3137 + } \
3138 + } \
3139 + while (0)
3140 +
3141 /* The kind of blanks for '-b' to skip in various options. */
3142 enum blanktype { bl_start, bl_end, bl_both };
3143
3144 @@ -239,13 +272,11 @@
3145 they were read if all keys compare equal. */
3146 static bool stable;
3147
3148 -/* If TAB has this value, blanks separate fields. */
3149 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
3150 -
3151 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
3152 +/* Tab character separating fields. If tab_length is 0, then fields are
3153 separated by the empty string between a non-blank character and a blank
3154 character. */
3155 -static int tab = TAB_DEFAULT;
3156 +static char tab[MB_LEN_MAX + 1];
3157 +static size_t tab_length = 0;
3158
3159 /* Flag to remove consecutive duplicate lines from the output.
3160 Only the last of a sequence of equal lines will be output. */
3161 @@ -392,6 +423,44 @@
3162 static struct tempnode *volatile temphead;
3163 static struct tempnode *volatile *temptail = &temphead;
3164
3165 +/* Function pointers. */
3166 +static void
3167 +(*inittables) (void);
3168 +static char *
3169 +(*begfield) (const struct line*, const struct keyfield *);
3170 +static char *
3171 +(*limfield) (const struct line*, const struct keyfield *);
3172 +static int
3173 +(*getmonth) (char const *, size_t);
3174 +static int
3175 +(*keycompare) (const struct line *, const struct line *);
3176 +static int
3177 +(*numcompare) (const char *, const char *);
3178 +
3179 +/* Test for white space multibyte character.
3180 + Set LENGTH the byte length of investigated multibyte character. */
3181 +#if HAVE_MBRTOWC
3182 +static int
3183 +ismbblank (const char *str, size_t len, size_t *length)
3184 +{
3185 + size_t mblength;
3186 + wchar_t wc;
3187 + mbstate_t state;
3188 +
3189 + memset (&state, '\0', sizeof(mbstate_t));
3190 + mblength = mbrtowc (&wc, str, len, &state);
3191 +
3192 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3193 + {
3194 + *length = 1;
3195 + return 0;
3196 + }
3197 +
3198 + *length = (mblength < 1) ? 1 : mblength;
3199 + return iswblank (wc);
3200 +}
3201 +#endif
3202 +
3203 /* Clean up any remaining temporary files. */
3204
3205 static void
3206 @@ -545,7 +614,7 @@
3207 free (node);
3208 }
3209
3210 -#if HAVE_NL_LANGINFO
3211 +#if HAVE_LANGINFO_CODESET
3212
3213 static int
3214 struct_month_cmp (const void *m1, const void *m2)
3215 @@ -560,7 +629,7 @@
3216 /* Initialize the character class tables. */
3217
3218 static void
3219 -inittables (void)
3220 +inittables_uni (void)
3221 {
3222 size_t i;
3223
3224 @@ -572,7 +641,7 @@
3225 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
3226 }
3227
3228 -#if HAVE_NL_LANGINFO
3229 +#if HAVE_LANGINFO_CODESET
3230 /* If we're not in the "C" locale, read different names for months. */
3231 if (hard_LC_TIME)
3232 {
3233 @@ -598,6 +667,64 @@
3234 #endif
3235 }
3236
3237 +#if HAVE_MBRTOWC
3238 +static void
3239 +inittables_mb (void)
3240 +{
3241 + int i, j, k, l;
3242 + char *name, *s;
3243 + size_t s_len, mblength;
3244 + char mbc[MB_LEN_MAX];
3245 + wchar_t wc, pwc;
3246 + mbstate_t state_mb, state_wc;
3247 +
3248 + for (i = 0; i < MONTHS_PER_YEAR; i++)
3249 + {
3250 + s = (char *) nl_langinfo (ABMON_1 + i);
3251 + s_len = strlen (s);
3252 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
3253 + monthtab[i].val = i + 1;
3254 +
3255 + memset (&state_mb, '\0', sizeof (mbstate_t));
3256 + memset (&state_wc, '\0', sizeof (mbstate_t));
3257 +
3258 + for (j = 0; j < s_len;)
3259 + {
3260 + if (!ismbblank (s + j, s_len - j, &mblength))
3261 + break;
3262 + j += mblength;
3263 + }
3264 +
3265 + for (k = 0; j < s_len;)
3266 + {
3267 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
3268 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
3269 + if (mblength == 0)
3270 + break;
3271 +
3272 + pwc = towupper (wc);
3273 + if (pwc == wc)
3274 + {
3275 + memcpy (mbc, s + j, mblength);
3276 + j += mblength;
3277 + }
3278 + else
3279 + {
3280 + j += mblength;
3281 + mblength = wcrtomb (mbc, pwc, &state_wc);
3282 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
3283 + }
3284 +
3285 + for (l = 0; l < mblength; l++)
3286 + name[k++] = mbc[l];
3287 + }
3288 + name[k] = '\0';
3289 + }
3290 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
3291 + sizeof (struct month), struct_month_cmp);
3292 +}
3293 +#endif
3294 +
3295 /* Specify the amount of main memory to use when sorting. */
3296 static void
3297 specify_sort_size (char const *s)
3298 @@ -808,7 +935,7 @@
3299 by KEY in LINE. */
3300
3301 static char *
3302 -begfield (const struct line *line, const struct keyfield *key)
3303 +begfield_uni (const struct line *line, const struct keyfield *key)
3304 {
3305 char *ptr = line->text, *lim = ptr + line->length - 1;
3306 size_t sword = key->sword;
3307 @@ -818,10 +945,10 @@
3308 /* The leading field separator itself is included in a field when -t
3309 is absent. */
3310
3311 - if (tab != TAB_DEFAULT)
3312 + if (tab_length)
3313 while (ptr < lim && sword--)
3314 {
3315 - while (ptr < lim && *ptr != tab)
3316 + while (ptr < lim && *ptr != tab[0])
3317 ++ptr;
3318 if (ptr < lim)
3319 ++ptr;
3320 @@ -849,11 +976,70 @@
3321 return ptr;
3322 }
3323
3324 +#if HAVE_MBRTOWC
3325 +static char *
3326 +begfield_mb (const struct line *line, const struct keyfield *key)
3327 +{
3328 + int i;
3329 + char *ptr = line->text, *lim = ptr + line->length - 1;
3330 + size_t sword = key->sword;
3331 + size_t schar = key->schar;
3332 + size_t mblength;
3333 + mbstate_t state;
3334 +
3335 + memset (&state, '\0', sizeof(mbstate_t));
3336 +
3337 + if (tab_length)
3338 + while (ptr < lim && sword--)
3339 + {
3340 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
3341 + {
3342 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3343 + ptr += mblength;
3344 + }
3345 + if (ptr < lim)
3346 + {
3347 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3348 + ptr += mblength;
3349 + }
3350 + }
3351 + else
3352 + while (ptr < lim && sword--)
3353 + {
3354 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3355 + ptr += mblength;
3356 + if (ptr < lim)
3357 + {
3358 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3359 + ptr += mblength;
3360 + }
3361 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
3362 + ptr += mblength;
3363 + }
3364 +
3365 + if (key->skipsblanks)
3366 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3367 + ptr += mblength;
3368 +
3369 + for (i = 0; i < schar; i++)
3370 + {
3371 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3372 +
3373 + if (ptr + mblength > lim)
3374 + break;
3375 + else
3376 + ptr += mblength;
3377 + }
3378 +
3379 + return ptr;
3380 +}
3381 +#endif
3382 +
3383 /* Return the limit of (a pointer to the first character after) the field
3384 in LINE specified by KEY. */
3385
3386 static char *
3387 -limfield (const struct line *line, const struct keyfield *key)
3388 +limfield_uni (const struct line *line, const struct keyfield *key)
3389 {
3390 char *ptr = line->text, *lim = ptr + line->length - 1;
3391 size_t eword = key->eword, echar = key->echar;
3392 @@ -866,10 +1052,10 @@
3393 `beginning' is the first character following the delimiting TAB.
3394 Otherwise, leave PTR pointing at the first `blank' character after
3395 the preceding field. */
3396 - if (tab != TAB_DEFAULT)
3397 + if (tab_length)
3398 while (ptr < lim && eword--)
3399 {
3400 - while (ptr < lim && *ptr != tab)
3401 + while (ptr < lim && *ptr != tab[0])
3402 ++ptr;
3403 if (ptr < lim && (eword | echar))
3404 ++ptr;
3405 @@ -915,10 +1101,10 @@
3406 */
3407
3408 /* Make LIM point to the end of (one byte past) the current field. */
3409 - if (tab != TAB_DEFAULT)
3410 + if (tab_length)
3411 {
3412 char *newlim;
3413 - newlim = memchr (ptr, tab, lim - ptr);
3414 + newlim = memchr (ptr, tab[0], lim - ptr);
3415 if (newlim)
3416 lim = newlim;
3417 }
3418 @@ -951,6 +1137,107 @@
3419 return ptr;
3420 }
3421
3422 +#if HAVE_MBRTOWC
3423 +static char *
3424 +limfield_mb (const struct line *line, const struct keyfield *key)
3425 +{
3426 + char *ptr = line->text, *lim = ptr + line->length - 1;
3427 + size_t eword = key->eword, echar = key->echar;
3428 + int i;
3429 + size_t mblength;
3430 + mbstate_t state;
3431 +
3432 + memset (&state, '\0', sizeof(mbstate_t));
3433 +
3434 + if (tab_length)
3435 + while (ptr < lim && eword--)
3436 + {
3437 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
3438 + {
3439 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3440 + ptr += mblength;
3441 + }
3442 + if (ptr < lim && (eword | echar))
3443 + {
3444 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3445 + ptr += mblength;
3446 + }
3447 + }
3448 + else
3449 + while (ptr < lim && eword--)
3450 + {
3451 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3452 + ptr += mblength;
3453 + if (ptr < lim)
3454 + {
3455 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3456 + ptr += mblength;
3457 + }
3458 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
3459 + ptr += mblength;
3460 + }
3461 +
3462 +
3463 +# ifdef POSIX_UNSPECIFIED
3464 + /* Make LIM point to the end of (one byte past) the current field. */
3465 + if (tab_length)
3466 + {
3467 + char *newlim, *p;
3468 +
3469 + newlim = NULL;
3470 + for (p = ptr; p < lim;)
3471 + {
3472 + if (memcmp (p, tab, tab_length) == 0)
3473 + {
3474 + newlim = p;
3475 + break;
3476 + }
3477 +
3478 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3479 + p += mblength;
3480 + }
3481 + }
3482 + else
3483 + {
3484 + char *newlim;
3485 + newlim = ptr;
3486 +
3487 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
3488 + newlim += mblength;
3489 + if (ptr < lim)
3490 + {
3491 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3492 + ptr += mblength;
3493 + }
3494 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
3495 + newlim += mblength;
3496 + lim = newlim;
3497 + }
3498 +# endif
3499 +
3500 + /* If we're skipping leading blanks, don't start counting characters
3501 + * until after skipping past any leading blanks. */
3502 + if (key->skipsblanks)
3503 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
3504 + ptr += mblength;
3505 +
3506 + memset (&state, '\0', sizeof(mbstate_t));
3507 +
3508 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
3509 + for (i = 0; i < echar; i++)
3510 + {
3511 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
3512 +
3513 + if (ptr + mblength > lim)
3514 + break;
3515 + else
3516 + ptr += mblength;
3517 + }
3518 +
3519 + return ptr;
3520 +}
3521 +#endif
3522 +
3523 /* Fill BUF reading from FP, moving buf->left bytes from the end
3524 of buf->buf to the beginning first. If EOF is reached and the
3525 file wasn't terminated by a newline, supply one. Set up BUF's line
3526 @@ -1067,7 +1354,7 @@
3527 hideously fast. */
3528
3529 static int
3530 -numcompare (const char *a, const char *b)
3531 +numcompare_uni (const char *a, const char *b)
3532 {
3533 while (blanks[to_uchar (*a)])
3534 a++;
3535 @@ -1077,6 +1364,25 @@
3536 return strnumcmp (a, b, decimal_point, thousands_sep);
3537 }
3538
3539 +#if HAVE_MBRTOWC
3540 +static int
3541 +numcompare_mb (const char *a, const char *b)
3542 +{
3543 + size_t mblength, len;
3544 + len = strlen (a); /* okay for UTF-8 */
3545 + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
3546 + {
3547 + a += mblength;
3548 + len -= mblength;
3549 + }
3550 + len = strlen (b); /* okay for UTF-8 */
3551 + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
3552 + b += mblength;
3553 +
3554 + return strnumcmp (a, b, decimal_point, thousands_sep);
3555 +}
3556 +#endif /* HAV_EMBRTOWC */
3557 +
3558 static int
3559 general_numcompare (const char *sa, const char *sb)
3560 {
3561 @@ -1110,7 +1416,7 @@
3562 Return 0 if the name in S is not recognized. */
3563
3564 static int
3565 -getmonth (char const *month, size_t len)
3566 +getmonth_uni (char const *month, size_t len)
3567 {
3568 size_t lo = 0;
3569 size_t hi = MONTHS_PER_YEAR;
3570 @@ -1152,11 +1458,79 @@
3571 return 0;
3572 }
3573
3574 +#if HAVE_MBRTOWC
3575 +static int
3576 +getmonth_mb (const char *s, size_t len)
3577 +{
3578 + char *month;
3579 + register size_t i;
3580 + register int lo = 0, hi = MONTHS_PER_YEAR, result;
3581 + char *tmp;
3582 + size_t wclength, mblength;
3583 + const char **pp;
3584 + const wchar_t **wpp;
3585 + wchar_t *month_wcs;
3586 + mbstate_t state;
3587 +
3588 + while (len > 0 && ismbblank (s, len, &mblength))
3589 + {
3590 + s += mblength;
3591 + len -= mblength;
3592 + }
3593 +
3594 + if (len == 0)
3595 + return 0;
3596 +
3597 + month = (char *) alloca (len + 1);
3598 +
3599 + tmp = (char *) alloca (len + 1);
3600 + memcpy (tmp, s, len);
3601 + tmp[len] = '\0';
3602 + pp = (const char **)&tmp;
3603 + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3604 + memset (&state, '\0', sizeof(mbstate_t));
3605 +
3606 + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
3607 + assert (wclength != (size_t)-1 && *pp == NULL);
3608 +
3609 + for (i = 0; i < wclength; i++)
3610 + {
3611 + month_wcs[i] = towupper(month_wcs[i]);
3612 + if (iswblank (month_wcs[i]))
3613 + {
3614 + month_wcs[i] = L'\0';
3615 + break;
3616 + }
3617 + }
3618 +
3619 + wpp = (const wchar_t **)&month_wcs;
3620 +
3621 + mblength = wcsrtombs (month, wpp, len + 1, &state);
3622 + assert (mblength != (-1) && *wpp == NULL);
3623 +
3624 + do
3625 + {
3626 + int ix = (lo + hi) / 2;
3627 +
3628 + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3629 + hi = ix;
3630 + else
3631 + lo = ix;
3632 + }
3633 + while (hi - lo > 1);
3634 +
3635 + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3636 + ? monthtab[lo].val : 0);
3637 +
3638 + return result;
3639 +}
3640 +#endif
3641 +
3642 /* Compare two lines A and B trying every key in sequence until there
3643 are no more keys or a difference is found. */
3644
3645 static int
3646 -keycompare (const struct line *a, const struct line *b)
3647 +keycompare_uni (const struct line *a, const struct line *b)
3648 {
3649 struct keyfield const *key = keylist;
3650
3651 @@ -1326,6 +1700,177 @@
3652 return key->reverse ? -diff : diff;
3653 }
3654
3655 +#if HAVE_MBRTOWC
3656 +static int
3657 +keycompare_mb (const struct line *a, const struct line *b)
3658 +{
3659 + struct keyfield *key = keylist;
3660 +
3661 + /* For the first iteration only, the key positions have been
3662 + precomputed for us. */
3663 + char *texta = a->keybeg;
3664 + char *textb = b->keybeg;
3665 + char *lima = a->keylim;
3666 + char *limb = b->keylim;
3667 +
3668 + size_t mblength_a, mblength_b;
3669 + wchar_t wc_a, wc_b;
3670 + mbstate_t state_a, state_b;
3671 +
3672 + int diff;
3673 +
3674 + memset (&state_a, '\0', sizeof(mbstate_t));
3675 + memset (&state_b, '\0', sizeof(mbstate_t));
3676 +
3677 + for (;;)
3678 + {
3679 + unsigned char *translate = (unsigned char *) key->translate;
3680 + bool const *ignore = key->ignore;
3681 +
3682 + /* Find the lengths. */
3683 + size_t lena = lima <= texta ? 0 : lima - texta;
3684 + size_t lenb = limb <= textb ? 0 : limb - textb;
3685 +
3686 + /* Actually compare the fields. */
3687 + if (key->numeric | key->general_numeric)
3688 + {
3689 + char savea = *lima, saveb = *limb;
3690 +
3691 + *lima = *limb = '\0';
3692 + if (force_general_numcompare)
3693 + diff = general_numcompare (texta, textb);
3694 + else
3695 + diff = ((key->numeric ? numcompare : general_numcompare)
3696 + (texta, textb));
3697 + *lima = savea, *limb = saveb;
3698 + }
3699 + else if (key->month)
3700 + diff = getmonth (texta, lena) - getmonth (textb, lenb);
3701 + else
3702 + {
3703 + if (ignore || translate)
3704 + {
3705 + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3706 + char *copy_b = copy_a + lena + 1;
3707 + size_t new_len_a, new_len_b;
3708 + size_t i, j;
3709 +
3710 + /* Ignore and/or translate chars before comparing. */
3711 +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3712 + do \
3713 + { \
3714 + wchar_t uwc; \
3715 + char mbc[MB_LEN_MAX]; \
3716 + mbstate_t state_wc; \
3717 + \
3718 + for (NEW_LEN = i = 0; i < LEN;) \
3719 + { \
3720 + mbstate_t state_bak; \
3721 + \
3722 + state_bak = STATE; \
3723 + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3724 + \
3725 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3726 + || MBLENGTH == 0) \
3727 + { \
3728 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3729 + STATE = state_bak; \
3730 + if (!ignore) \
3731 + COPY[NEW_LEN++] = TEXT[i++]; \
3732 + continue; \
3733 + } \
3734 + \
3735 + if (ignore) \
3736 + { \
3737 + if ((ignore == nonprinting && !iswprint (WC)) \
3738 + || (ignore == nondictionary \
3739 + && !iswalnum (WC) && !iswblank (WC))) \
3740 + { \
3741 + i += MBLENGTH; \
3742 + continue; \
3743 + } \
3744 + } \
3745 + \
3746 + if (translate) \
3747 + { \
3748 + \
3749 + uwc = towupper(WC); \
3750 + if (WC == uwc) \
3751 + { \
3752 + memcpy (mbc, TEXT + i, MBLENGTH); \
3753 + i += MBLENGTH; \
3754 + } \
3755 + else \
3756 + { \
3757 + i += MBLENGTH; \
3758 + WC = uwc; \
3759 + memset (&state_wc, '\0', sizeof (mbstate_t)); \
3760 + \
3761 + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3762 + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3763 + } \
3764 + \
3765 + for (j = 0; j < MBLENGTH; j++) \
3766 + COPY[NEW_LEN++] = mbc[j]; \
3767 + } \
3768 + else \
3769 + for (j = 0; j < MBLENGTH; j++) \
3770 + COPY[NEW_LEN++] = TEXT[i++]; \
3771 + } \
3772 + COPY[NEW_LEN] = '\0'; \
3773 + } \
3774 + while (0)
3775 + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3776 + wc_a, mblength_a, state_a);
3777 + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3778 + wc_b, mblength_b, state_b);
3779 + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3780 + }
3781 + else if (lena == 0)
3782 + diff = - NONZERO (lenb);
3783 + else if (lenb == 0)
3784 + goto greater;
3785 + else
3786 + diff = xmemcoll (texta, lena, textb, lenb);
3787 + }
3788 +
3789 + if (diff)
3790 + goto not_equal;
3791 +
3792 + key = key->next;
3793 + if (! key)
3794 + break;
3795 +
3796 + /* Find the beginning and limit of the next field. */
3797 + if (key->eword != -1)
3798 + lima = limfield (a, key), limb = limfield (b, key);
3799 + else
3800 + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3801 +
3802 + if (key->sword != -1)
3803 + texta = begfield (a, key), textb = begfield (b, key);
3804 + else
3805 + {
3806 + texta = a->text, textb = b->text;
3807 + if (key->skipsblanks)
3808 + {
3809 + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3810 + texta += mblength_a;
3811 + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3812 + textb += mblength_b;
3813 + }
3814 + }
3815 + }
3816 +
3817 + return 0;
3818 +
3819 +greater:
3820 + diff = 1;
3821 +not_equal:
3822 + return key->reverse ? -diff : diff;
3823 +}
3824 +#endif
3825 +
3826 /* Compare two lines A and B, returning negative, zero, or positive
3827 depending on whether A compares less than, equal to, or greater than B. */
3828
3829 @@ -2127,7 +2672,7 @@
3830 atexit (close_stdout);
3831
3832 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3833 -#if HAVE_NL_LANGINFO
3834 +#if HAVE_LANGINFO_CODESET
3835 hard_LC_TIME = hard_locale (LC_TIME);
3836 #endif
3837
3838 @@ -2148,6 +2693,27 @@
3839 thousands_sep = -1;
3840 }
3841
3842 +#if HAVE_MBRTOWC
3843 + if (MB_CUR_MAX > 1)
3844 + {
3845 + inittables = inittables_mb;
3846 + begfield = begfield_mb;
3847 + limfield = limfield_mb;
3848 + getmonth = getmonth_mb;
3849 + keycompare = keycompare_mb;
3850 + numcompare = numcompare_mb;
3851 + }
3852 + else
3853 +#endif
3854 + {
3855 + inittables = inittables_uni;
3856 + begfield = begfield_uni;
3857 + limfield = limfield_uni;
3858 + getmonth = getmonth_uni;
3859 + keycompare = keycompare_uni;
3860 + numcompare = numcompare_uni;
3861 + }
3862 +
3863 have_read_stdin = false;
3864 inittables ();
3865
3866 @@ -2349,13 +2915,35 @@
3867
3868 case 't':
3869 {
3870 - char newtab = optarg[0];
3871 - if (! newtab)
3872 + char newtab[MB_LEN_MAX + 1];
3873 + size_t newtab_length = 1;
3874 + strncpy (newtab, optarg, MB_LEN_MAX);
3875 + if (! newtab[0])
3876 error (SORT_FAILURE, 0, _("empty tab"));
3877 - if (optarg[1])
3878 +#if HAVE_MBRTOWC
3879 + if (MB_CUR_MAX > 1)
3880 + {
3881 + wchar_t wc;
3882 + mbstate_t state;
3883 + size_t i;
3884 +
3885 + memset (&state, '\0', sizeof (mbstate_t));
3886 + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3887 + MB_LEN_MAX),
3888 + &state);
3889 + switch (newtab_length)
3890 + {
3891 + case (size_t) -1:
3892 + case (size_t) -2:
3893 + case 0:
3894 + newtab_length = 1;
3895 + }
3896 + }
3897 +#endif
3898 + if (newtab_length == 1 && optarg[1])
3899 {
3900 if (STREQ (optarg, "\\0"))
3901 - newtab = '\0';
3902 + newtab[0] = '\0';
3903 else
3904 {
3905 /* Provoke with `sort -txx'. Complain about
3906 @@ -2366,9 +2954,12 @@
3907 quote (optarg));
3908 }
3909 }
3910 - if (tab != TAB_DEFAULT && tab != newtab)
3911 + if (tab_length
3912 + && (tab_length != newtab_length
3913 + || memcmp (tab, newtab, tab_length) != 0))
3914 error (SORT_FAILURE, 0, _("incompatible tabs"));
3915 - tab = newtab;
3916 + memcpy (tab, newtab, newtab_length);
3917 + tab_length = newtab_length;
3918 }
3919 break;
3920
3921 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3922 +++ coreutils-5.93/tests/sort/sort-mb-tests 2005-12-23 08:53:01.000000000 +0000
3923 @@ -0,0 +1,58 @@
3924 +#! /bin/sh
3925 +case $# in
3926 + 0) xx='../../src/sort';;
3927 + *) xx="$1";;
3928 +esac
3929 +test "$VERBOSE" && echo=echo || echo=:
3930 +$echo testing program: $xx
3931 +errors=0
3932 +test "$srcdir" || srcdir=.
3933 +test "$VERBOSE" && $xx --version 2> /dev/null
3934 +
3935 +export LC_ALL=en_US.UTF-8
3936 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
3937 +errors=0
3938 +
3939 +$xx -t @ -k2 -n mb1.I > mb1.O
3940 +code=$?
3941 +if test $code != 0; then
3942 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
3943 + errors=`expr $errors + 1`
3944 +else
3945 + cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1
3946 + case $? in
3947 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
3948 + 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2
3949 + (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null
3950 + errors=`expr $errors + 1`;;
3951 + 2) $echo "Test mb1 may have failed." 1>&2
3952 + $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2
3953 + errors=`expr $errors + 1`;;
3954 + esac
3955 +fi
3956 +
3957 +$xx -t @ -k4 -n mb2.I > mb2.O
3958 +code=$?
3959 +if test $code != 0; then
3960 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
3961 + errors=`expr $errors + 1`
3962 +else
3963 + cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1
3964 + case $? in
3965 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
3966 + 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2
3967 + (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null
3968 + errors=`expr $errors + 1`;;
3969 + 2) $echo "Test mb2 may have failed." 1>&2
3970 + $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2
3971 + errors=`expr $errors + 1`;;
3972 + esac
3973 +fi
3974 +
3975 +if test $errors = 0; then
3976 + $echo Passed all 113 tests. 1>&2
3977 +else
3978 + $echo Failed $errors tests. 1>&2
3979 +fi
3980 +test $errors = 0 || errors=1
3981 +exit $errors
3982 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3983 +++ coreutils-5.93/tests/sort/mb1.I 2005-12-23 08:53:01.000000000 +0000
3984 @@ -0,0 +1,4 @@
3985 +Apple@10
3986 +Banana@5
3987 +Citrus@20
3988 +Cherry@30
3989 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3990 +++ coreutils-5.93/tests/sort/mb2.I 2005-12-23 08:53:01.000000000 +0000
3991 @@ -0,0 +1,4 @@
3992 +Apple@AA10@@20
3993 +Banana@AA5@@30
3994 +Citrus@AA20@@5
3995 +Cherry@AA30@@10
3996 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
3997 +++ coreutils-5.93/tests/sort/mb1.X 2005-12-23 08:53:01.000000000 +0000
3998 @@ -0,0 +1,4 @@
3999 +Banana@5
4000 +Apple@10
4001 +Citrus@20
4002 +Cherry@30
4003 --- /dev/null 2005-10-10 09:36:06.437701000 +0100
4004 +++ coreutils-5.93/tests/sort/mb2.X 2005-12-23 08:53:01.000000000 +0000
4005 @@ -0,0 +1,4 @@
4006 +Citrus@AA20@@5
4007 +Cherry@AA30@@10
4008 +Apple@AA10@@20
4009 +Banana@AA5@@30
4010 --- coreutils-5.93/tests/sort/Makefile.am.i18n 2005-10-24 22:02:25.000000000 +0100
4011 +++ coreutils-5.93/tests/sort/Makefile.am 2005-12-23 08:53:01.000000000 +0000
4012 @@ -43,14 +43,16 @@
4013 nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
4014 ##test-files-end
4015
4016 -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
4017 -noinst_SCRIPTS = $x-tests
4018 +run_gen += mb1.O mb2.O
4019 +
4020 +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
4021 +noinst_SCRIPTS = $x-tests # $x-mb-tests
4022 TESTS_ENVIRONMENT = \
4023 PATH="`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
4024
4025 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
4026
4027 -TESTS = $x-tests
4028 +TESTS = $x-tests $x-mb-tests
4029
4030 mk_script = $(srcdir)/../mk-script
4031 $(srcdir)/$x-tests: $(mk_script) Test.pm Makefile.am
4032 --- coreutils-5.93/tests/sort/Makefile.in.i18n 2005-10-22 19:13:23.000000000 +0100
4033 +++ coreutils-5.93/tests/sort/Makefile.in 2005-10-26 22:17:04.000000000 +0100
4034 @@ -338,13 +338,15 @@
4035 o-no-file1.E create-empty.O create-empty.E neg-nls.O neg-nls.E nul-nls.O \
4036 nul-nls.E use-nl.O use-nl.E o2.O o2.E nul-tab.O nul-tab.E
4037
4038 -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
4039 -noinst_SCRIPTS = $x-tests
4040 +run_gen += mb1.O mb2.O
4041 +
4042 +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
4043 +noinst_SCRIPTS = $x-tests # $x-mb-tests
4044 TESTS_ENVIRONMENT = \
4045 PATH="`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
4046
4047 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
4048 -TESTS = $x-tests
4049 +TESTS = $x-tests $x-mb-tests
4050 mk_script = $(srcdir)/../mk-script
4051 MAINTAINERCLEANFILES = $x-tests $(maint_gen)
4052 CLEANFILES = $(run_gen)