]> git.ipfire.org Git - people/ms/ipfire-3.x.git/blob - coreutils/patches/coreutils-i18n.patch
Merge remote-tracking branch 'maniacikarus/samba'
[people/ms/ipfire-3.x.git] / coreutils / patches / coreutils-i18n.patch
1 diff -urNp coreutils-8.13-orig/lib/linebuffer.h coreutils-8.13/lib/linebuffer.h
2 --- coreutils-8.13-orig/lib/linebuffer.h 2011-04-24 19:21:45.000000000 +0200
3 +++ coreutils-8.13/lib/linebuffer.h 2011-09-09 10:23:14.163704760 +0200
4 @@ -21,6 +21,11 @@
5
6 # include <stdio.h>
7
8 +/* Get mbstate_t. */
9 +# if HAVE_WCHAR_H
10 +# include <wchar.h>
11 +# endif
12 +
13 /* A 'struct linebuffer' holds a line of text. */
14
15 struct linebuffer
16 @@ -28,6 +33,9 @@ struct linebuffer
17 size_t size; /* Allocated. */
18 size_t length; /* Used. */
19 char *buffer;
20 +# if HAVE_WCHAR_H
21 + mbstate_t state;
22 +# endif
23 };
24
25 /* Initialize linebuffer LINEBUFFER for use. */
26 diff -urNp coreutils-8.13-orig/src/cut.c coreutils-8.13/src/cut.c
27 --- coreutils-8.13-orig/src/cut.c 2011-07-28 12:38:27.000000000 +0200
28 +++ coreutils-8.13/src/cut.c 2011-09-09 10:23:14.165701039 +0200
29 @@ -28,6 +28,11 @@
30 #include <assert.h>
31 #include <getopt.h>
32 #include <sys/types.h>
33 +
34 +/* Get mbstate_t, mbrtowc(). */
35 +#if HAVE_WCHAR_H
36 +# include <wchar.h>
37 +#endif
38 #include "system.h"
39
40 #include "error.h"
41 @@ -37,6 +42,18 @@
42 #include "quote.h"
43 #include "xstrndup.h"
44
45 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
46 + installation; work around this configuration error. */
47 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
48 +# undef MB_LEN_MAX
49 +# define MB_LEN_MAX 16
50 +#endif
51 +
52 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
53 +#if HAVE_MBRTOWC && defined mbstate_t
54 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
55 +#endif
56 +
57 /* The official name of this program (e.g., no `g' prefix). */
58 #define PROGRAM_NAME "cut"
59
60 @@ -72,6 +89,52 @@
61 } \
62 while (0)
63
64 +/* Refill the buffer BUF to get a multibyte character. */
65 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
66 + do \
67 + { \
68 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
69 + { \
70 + memmove (BUF, BUFPOS, BUFLEN); \
71 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
72 + BUFPOS = BUF; \
73 + } \
74 + } \
75 + while (0)
76 +
77 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
78 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
79 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
80 + do \
81 + { \
82 + mbstate_t state_bak; \
83 + \
84 + if (BUFLEN < 1) \
85 + { \
86 + WC = WEOF; \
87 + break; \
88 + } \
89 + \
90 + /* Get a wide character. */ \
91 + CONVFAIL = 0; \
92 + state_bak = STATE; \
93 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
94 + \
95 + switch (MBLENGTH) \
96 + { \
97 + case (size_t)-1: \
98 + case (size_t)-2: \
99 + CONVFAIL++; \
100 + STATE = state_bak; \
101 + /* Fall througn. */ \
102 + \
103 + case 0: \
104 + MBLENGTH = 1; \
105 + break; \
106 + } \
107 + } \
108 + while (0)
109 +
110 struct range_pair
111 {
112 size_t lo;
113 @@ -90,7 +153,7 @@ static char *field_1_buffer;
114 /* The number of bytes allocated for FIELD_1_BUFFER. */
115 static size_t field_1_bufsize;
116
117 -/* The largest field or byte index used as an endpoint of a closed
118 +/* The largest byte, character or field index used as an endpoint of a closed
119 or degenerate range specification; this doesn't include the starting
120 index of right-open-ended ranges. For example, with either range spec
121 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
122 @@ -102,10 +165,11 @@ static size_t eol_range_start;
123
124 /* This is a bit vector.
125 In byte mode, which bytes to output.
126 + In character mode, which characters to output.
127 In field mode, which DELIM-separated fields to output.
128 - Both bytes and fields are numbered starting with 1,
129 + Bytes, characters and fields are numbered starting with 1,
130 so the zeroth bit of this array is unused.
131 - A field or byte K has been selected if
132 + A byte, character or field K has been selected if
133 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
134 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
135 static unsigned char *printable_field;
136 @@ -114,15 +178,25 @@ enum operating_mode
137 {
138 undefined_mode,
139
140 - /* Output characters that are in the given bytes. */
141 + /* Output bytes that are at the given positions. */
142 byte_mode,
143
144 + /* Output characters that are at the given positions. */
145 + character_mode,
146 +
147 /* Output the given delimeter-separated fields. */
148 field_mode
149 };
150
151 static enum operating_mode operating_mode;
152
153 +/* If nonzero, when in byte mode, don't split multibyte characters. */
154 +static int byte_mode_character_aware;
155 +
156 +/* If nonzero, the function for single byte locale is work
157 + if this program runs on multibyte locale. */
158 +static int force_singlebyte_mode;
159 +
160 /* If true do not output lines containing no delimeter characters.
161 Otherwise, all such lines are printed. This option is valid only
162 with field mode. */
163 @@ -134,6 +208,9 @@ static bool complement;
164
165 /* The delimeter character for field mode. */
166 static unsigned char delim;
167 +#if HAVE_WCHAR_H
168 +static wchar_t wcdelim;
169 +#endif
170
171 /* True if the --output-delimiter=STRING option was specified. */
172 static bool output_delimiter_specified;
173 @@ -207,7 +284,7 @@ Mandatory arguments to long options are
174 -f, --fields=LIST select only these fields; also print any line\n\
175 that contains no delimiter character, unless\n\
176 the -s option is specified\n\
177 - -n (ignored)\n\
178 + -n with -b: don't split multibyte characters\n\
179 "), stdout);
180 fputs (_("\
181 --complement complement the set of selected bytes, characters\n\
182 @@ -366,7 +443,7 @@ set_fields (const char *fieldstr)
183 in_digits = false;
184 /* Starting a range. */
185 if (dash_found)
186 - FATAL_ERROR (_("invalid byte or field list"));
187 + FATAL_ERROR (_("invalid byte, character or field list"));
188 dash_found = true;
189 fieldstr++;
190
191 @@ -390,14 +467,16 @@ set_fields (const char *fieldstr)
192 if (!rhs_specified)
193 {
194 /* `n-'. From `initial' to end of line. */
195 - eol_range_start = initial;
196 + if (eol_range_start == 0 ||
197 + (eol_range_start != 0 && eol_range_start > initial))
198 + eol_range_start = initial;
199 field_found = true;
200 }
201 else
202 {
203 /* `m-n' or `-n' (1-n). */
204 if (value < initial)
205 - FATAL_ERROR (_("invalid decreasing range"));
206 + FATAL_ERROR (_("invalid byte, character or field list"));
207
208 /* Is there already a range going to end of line? */
209 if (eol_range_start != 0)
210 @@ -477,6 +556,9 @@ set_fields (const char *fieldstr)
211 if (operating_mode == byte_mode)
212 error (0, 0,
213 _("byte offset %s is too large"), quote (bad_num));
214 + else if (operating_mode == character_mode)
215 + error (0, 0,
216 + _("character offset %s is too large"), quote (bad_num));
217 else
218 error (0, 0,
219 _("field number %s is too large"), quote (bad_num));
220 @@ -487,7 +569,7 @@ set_fields (const char *fieldstr)
221 fieldstr++;
222 }
223 else
224 - FATAL_ERROR (_("invalid byte or field list"));
225 + FATAL_ERROR (_("invalid byte, character or field list"));
226 }
227
228 max_range_endpoint = 0;
229 @@ -582,6 +664,77 @@ cut_bytes (FILE *stream)
230 }
231 }
232
233 +#if HAVE_MBRTOWC
234 +/* This function is in use for the following case.
235 +
236 + 1. Read from the stream STREAM, printing to standard output any selected
237 + characters.
238 +
239 + 2. Read from stream STREAM, printing to standard output any selected bytes,
240 + without splitting multibyte characters. */
241 +
242 +static void
243 +cut_characters_or_cut_bytes_no_split (FILE *stream)
244 +{
245 + int idx; /* number of bytes or characters in the line so far. */
246 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
247 + char *bufpos; /* Next read position of BUF. */
248 + size_t buflen; /* The length of the byte sequence in buf. */
249 + wint_t wc; /* A gotten wide character. */
250 + size_t mblength; /* The byte size of a multibyte character which shows
251 + as same character as WC. */
252 + mbstate_t state; /* State of the stream. */
253 + int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
254 + /* Whether to begin printing delimiters between ranges for the current line.
255 + Set after we've begun printing data corresponding to the first range. */
256 + bool print_delimiter = false;
257 +
258 + idx = 0;
259 + buflen = 0;
260 + bufpos = buf;
261 + memset (&state, '\0', sizeof(mbstate_t));
262 +
263 + while (1)
264 + {
265 + REFILL_BUFFER (buf, bufpos, buflen, stream);
266 +
267 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
268 +
269 + if (wc == WEOF)
270 + {
271 + if (idx > 0)
272 + putchar ('\n');
273 + break;
274 + }
275 + else if (wc == L'\n')
276 + {
277 + putchar ('\n');
278 + idx = 0;
279 + print_delimiter = false;
280 + }
281 + else
282 + {
283 + bool range_start;
284 + bool *rs = output_delimiter_specified ? &range_start : NULL;
285 + idx += (operating_mode == byte_mode) ? mblength : 1;
286 + if (print_kth (idx, rs))
287 + {
288 + if (rs && *rs && print_delimiter)
289 + {
290 + fwrite (output_delimiter_string, sizeof (char),
291 + output_delimiter_length, stdout);
292 + }
293 + print_delimiter = true;
294 + fwrite (bufpos, mblength, sizeof(char), stdout);
295 + }
296 + }
297 +
298 + buflen -= mblength;
299 + bufpos += mblength;
300 + }
301 +}
302 +#endif
303 +
304 /* Read from stream STREAM, printing to standard output any selected fields. */
305
306 static void
307 @@ -704,13 +843,195 @@ cut_fields (FILE *stream)
308 }
309 }
310
311 +#if HAVE_MBRTOWC
312 +static void
313 +cut_fields_mb (FILE *stream)
314 +{
315 + int c;
316 + unsigned int field_idx;
317 + int found_any_selected_field;
318 + int buffer_first_field;
319 + int empty_input;
320 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
321 + char *bufpos; /* Next read position of BUF. */
322 + size_t buflen; /* The length of the byte sequence in buf. */
323 + wint_t wc = 0; /* A gotten wide character. */
324 + size_t mblength; /* The byte size of a multibyte character which shows
325 + as same character as WC. */
326 + mbstate_t state; /* State of the stream. */
327 + int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
328 +
329 + found_any_selected_field = 0;
330 + field_idx = 1;
331 + bufpos = buf;
332 + buflen = 0;
333 + memset (&state, '\0', sizeof(mbstate_t));
334 +
335 + c = getc (stream);
336 + empty_input = (c == EOF);
337 + if (c != EOF)
338 + {
339 + ungetc (c, stream);
340 + wc = 0;
341 + }
342 + else
343 + wc = WEOF;
344 +
345 + /* To support the semantics of the -s flag, we may have to buffer
346 + all of the first field to determine whether it is `delimited.'
347 + But that is unnecessary if all non-delimited lines must be printed
348 + and the first field has been selected, or if non-delimited lines
349 + must be suppressed and the first field has *not* been selected.
350 + That is because a non-delimited line has exactly one field. */
351 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
352 +
353 + while (1)
354 + {
355 + if (field_idx == 1 && buffer_first_field)
356 + {
357 + int len = 0;
358 +
359 + while (1)
360 + {
361 + REFILL_BUFFER (buf, bufpos, buflen, stream);
362 +
363 + GET_NEXT_WC_FROM_BUFFER
364 + (wc, bufpos, buflen, mblength, state, convfail);
365 +
366 + if (wc == WEOF)
367 + break;
368 +
369 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
370 + memcpy (field_1_buffer + len, bufpos, mblength);
371 + len += mblength;
372 + buflen -= mblength;
373 + bufpos += mblength;
374 +
375 + if (!convfail && (wc == L'\n' || wc == wcdelim))
376 + break;
377 + }
378 +
379 + if (wc == WEOF)
380 + break;
381 +
382 + /* If the first field extends to the end of line (it is not
383 + delimited) and we are printing all non-delimited lines,
384 + print this one. */
385 + if (convfail || (!convfail && wc != wcdelim))
386 + {
387 + if (suppress_non_delimited)
388 + {
389 + /* Empty. */
390 + }
391 + else
392 + {
393 + fwrite (field_1_buffer, sizeof (char), len, stdout);
394 + /* Make sure the output line is newline terminated. */
395 + if (convfail || (!convfail && wc != L'\n'))
396 + putchar ('\n');
397 + }
398 + continue;
399 + }
400 +
401 + if (print_kth (1, NULL))
402 + {
403 + /* Print the field, but not the trailing delimiter. */
404 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
405 + found_any_selected_field = 1;
406 + }
407 + ++field_idx;
408 + }
409 +
410 + if (wc != WEOF)
411 + {
412 + if (print_kth (field_idx, NULL))
413 + {
414 + if (found_any_selected_field)
415 + {
416 + fwrite (output_delimiter_string, sizeof (char),
417 + output_delimiter_length, stdout);
418 + }
419 + found_any_selected_field = 1;
420 + }
421 +
422 + while (1)
423 + {
424 + REFILL_BUFFER (buf, bufpos, buflen, stream);
425 +
426 + GET_NEXT_WC_FROM_BUFFER
427 + (wc, bufpos, buflen, mblength, state, convfail);
428 +
429 + if (wc == WEOF)
430 + break;
431 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
432 + {
433 + buflen -= mblength;
434 + bufpos += mblength;
435 + break;
436 + }
437 +
438 + if (print_kth (field_idx, NULL))
439 + fwrite (bufpos, mblength, sizeof(char), stdout);
440 +
441 + buflen -= mblength;
442 + bufpos += mblength;
443 + }
444 + }
445 +
446 + if ((!convfail || wc == L'\n') && buflen < 1)
447 + wc = WEOF;
448 +
449 + if (!convfail && wc == wcdelim)
450 + ++field_idx;
451 + else if (wc == WEOF || (!convfail && wc == L'\n'))
452 + {
453 + if (found_any_selected_field
454 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
455 + putchar ('\n');
456 + if (wc == WEOF)
457 + break;
458 + field_idx = 1;
459 + found_any_selected_field = 0;
460 + }
461 + }
462 +}
463 +#endif
464 +
465 static void
466 cut_stream (FILE *stream)
467 {
468 - if (operating_mode == byte_mode)
469 - cut_bytes (stream);
470 +#if HAVE_MBRTOWC
471 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
472 + {
473 + switch (operating_mode)
474 + {
475 + case byte_mode:
476 + if (byte_mode_character_aware)
477 + cut_characters_or_cut_bytes_no_split (stream);
478 + else
479 + cut_bytes (stream);
480 + break;
481 +
482 + case character_mode:
483 + cut_characters_or_cut_bytes_no_split (stream);
484 + break;
485 +
486 + case field_mode:
487 + cut_fields_mb (stream);
488 + break;
489 +
490 + default:
491 + abort ();
492 + }
493 + }
494 else
495 - cut_fields (stream);
496 +#endif
497 + {
498 + if (operating_mode == field_mode)
499 + cut_fields (stream);
500 + else
501 + cut_bytes (stream);
502 + }
503 }
504
505 /* Process file FILE to standard output.
506 @@ -762,6 +1080,8 @@ main (int argc, char **argv)
507 bool ok;
508 bool delim_specified = false;
509 char *spec_list_string IF_LINT ( = NULL);
510 + char mbdelim[MB_LEN_MAX + 1];
511 + size_t delimlen = 0;
512
513 initialize_main (&argc, &argv);
514 set_program_name (argv[0]);
515 @@ -784,7 +1104,6 @@ main (int argc, char **argv)
516 switch (optc)
517 {
518 case 'b':
519 - case 'c':
520 /* Build the byte list. */
521 if (operating_mode != undefined_mode)
522 FATAL_ERROR (_("only one type of list may be specified"));
523 @@ -792,6 +1111,14 @@ main (int argc, char **argv)
524 spec_list_string = optarg;
525 break;
526
527 + case 'c':
528 + /* Build the character list. */
529 + if (operating_mode != undefined_mode)
530 + FATAL_ERROR (_("only one type of list may be specified"));
531 + operating_mode = character_mode;
532 + spec_list_string = optarg;
533 + break;
534 +
535 case 'f':
536 /* Build the field list. */
537 if (operating_mode != undefined_mode)
538 @@ -803,10 +1130,35 @@ main (int argc, char **argv)
539 case 'd':
540 /* New delimiter. */
541 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
542 - if (optarg[0] != '\0' && optarg[1] != '\0')
543 - FATAL_ERROR (_("the delimiter must be a single character"));
544 - delim = optarg[0];
545 - delim_specified = true;
546 + {
547 +#if HAVE_MBRTOWC
548 + if(MB_CUR_MAX > 1)
549 + {
550 + mbstate_t state;
551 +
552 + memset (&state, '\0', sizeof(mbstate_t));
553 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
554 +
555 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
556 + ++force_singlebyte_mode;
557 + else
558 + {
559 + delimlen = (delimlen < 1) ? 1 : delimlen;
560 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
561 + FATAL_ERROR (_("the delimiter must be a single character"));
562 + memcpy (mbdelim, optarg, delimlen);
563 + }
564 + }
565 +
566 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
567 +#endif
568 + {
569 + if (optarg[0] != '\0' && optarg[1] != '\0')
570 + FATAL_ERROR (_("the delimiter must be a single character"));
571 + delim = (unsigned char) optarg[0];
572 + }
573 + delim_specified = true;
574 + }
575 break;
576
577 case OUTPUT_DELIMITER_OPTION:
578 @@ -819,6 +1171,7 @@ main (int argc, char **argv)
579 break;
580
581 case 'n':
582 + byte_mode_character_aware = 1;
583 break;
584
585 case 's':
586 @@ -841,7 +1194,7 @@ main (int argc, char **argv)
587 if (operating_mode == undefined_mode)
588 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
589
590 - if (delim != '\0' && operating_mode != field_mode)
591 + if (delim_specified && operating_mode != field_mode)
592 FATAL_ERROR (_("an input delimiter may be specified only\
593 when operating on fields"));
594
595 @@ -868,15 +1221,34 @@ main (int argc, char **argv)
596 }
597
598 if (!delim_specified)
599 - delim = '\t';
600 + {
601 + delim = '\t';
602 +#ifdef HAVE_MBRTOWC
603 + wcdelim = L'\t';
604 + mbdelim[0] = '\t';
605 + mbdelim[1] = '\0';
606 + delimlen = 1;
607 +#endif
608 + }
609
610 if (output_delimiter_string == NULL)
611 {
612 - static char dummy[2];
613 - dummy[0] = delim;
614 - dummy[1] = '\0';
615 - output_delimiter_string = dummy;
616 - output_delimiter_length = 1;
617 +#ifdef HAVE_MBRTOWC
618 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
619 + {
620 + output_delimiter_string = xstrdup(mbdelim);
621 + output_delimiter_length = delimlen;
622 + }
623 +
624 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
625 +#endif
626 + {
627 + static char dummy[2];
628 + dummy[0] = delim;
629 + dummy[1] = '\0';
630 + output_delimiter_string = dummy;
631 + output_delimiter_length = 1;
632 + }
633 }
634
635 if (optind == argc)
636 diff -urNp coreutils-8.13-orig/src/expand.c coreutils-8.13/src/expand.c
637 --- coreutils-8.13-orig/src/expand.c 2011-07-28 12:38:27.000000000 +0200
638 +++ coreutils-8.13/src/expand.c 2011-09-09 10:23:14.167583399 +0200
639 @@ -38,12 +38,29 @@
640 #include <stdio.h>
641 #include <getopt.h>
642 #include <sys/types.h>
643 +
644 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
645 +#if HAVE_WCHAR_H
646 +# include <wchar.h>
647 +#endif
648 +
649 #include "system.h"
650 #include "error.h"
651 #include "fadvise.h"
652 #include "quote.h"
653 #include "xstrndup.h"
654
655 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
656 + installation; work around this configuration error. */
657 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
658 +# define MB_LEN_MAX 16
659 +#endif
660 +
661 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
662 +#if HAVE_MBRTOWC && defined mbstate_t
663 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
664 +#endif
665 +
666 /* The official name of this program (e.g., no `g' prefix). */
667 #define PROGRAM_NAME "expand"
668
669 @@ -360,6 +377,142 @@ expand (void)
670 }
671 }
672
673 +#if HAVE_MBRTOWC
674 +static void
675 +expand_multibyte (void)
676 +{
677 + FILE *fp; /* Input strem. */
678 + mbstate_t i_state; /* Current shift state of the input stream. */
679 + mbstate_t i_state_bak; /* Back up the I_STATE. */
680 + mbstate_t o_state; /* Current shift state of the output stream. */
681 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
682 + char *bufpos = buf; /* Next read position of BUF. */
683 + size_t buflen = 0; /* The length of the byte sequence in buf. */
684 + wchar_t wc; /* A gotten wide character. */
685 + size_t mblength; /* The byte size of a multibyte character
686 + which shows as same character as WC. */
687 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
688 + int column = 0; /* Column on screen of the next char. */
689 + int next_tab_column; /* Column the next tab stop is on. */
690 + int convert = 1; /* If nonzero, perform translations. */
691 +
692 + fp = next_file ((FILE *) NULL);
693 + if (fp == NULL)
694 + return;
695 +
696 + memset (&o_state, '\0', sizeof(mbstate_t));
697 + memset (&i_state, '\0', sizeof(mbstate_t));
698 +
699 + for (;;)
700 + {
701 + /* Refill the buffer BUF. */
702 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
703 + {
704 + memmove (buf, bufpos, buflen);
705 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
706 + bufpos = buf;
707 + }
708 +
709 + /* No character is left in BUF. */
710 + if (buflen < 1)
711 + {
712 + fp = next_file (fp);
713 +
714 + if (fp == NULL)
715 + break; /* No more files. */
716 + else
717 + {
718 + memset (&i_state, '\0', sizeof(mbstate_t));
719 + continue;
720 + }
721 + }
722 +
723 + /* Get a wide character. */
724 + i_state_bak = i_state;
725 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
726 +
727 + switch (mblength)
728 + {
729 + case (size_t)-1: /* illegal byte sequence. */
730 + case (size_t)-2:
731 + mblength = 1;
732 + i_state = i_state_bak;
733 + if (convert)
734 + {
735 + ++column;
736 + if (convert_entire_line == 0)
737 + convert = 0;
738 + }
739 + putchar (*bufpos);
740 + break;
741 +
742 + case 0: /* null. */
743 + mblength = 1;
744 + if (convert && convert_entire_line == 0)
745 + convert = 0;
746 + putchar ('\0');
747 + break;
748 +
749 + default:
750 + if (wc == L'\n') /* LF. */
751 + {
752 + tab_index = 0;
753 + column = 0;
754 + convert = 1;
755 + putchar ('\n');
756 + }
757 + else if (wc == L'\t' && convert) /* Tab. */
758 + {
759 + if (tab_size == 0)
760 + {
761 + /* Do not let tab_index == first_free_tab;
762 + stop when it is 1 less. */
763 + while (tab_index < first_free_tab - 1
764 + && column >= tab_list[tab_index])
765 + tab_index++;
766 + next_tab_column = tab_list[tab_index];
767 + if (tab_index < first_free_tab - 1)
768 + tab_index++;
769 + if (column >= next_tab_column)
770 + next_tab_column = column + 1;
771 + }
772 + else
773 + next_tab_column = column + tab_size - column % tab_size;
774 +
775 + while (column < next_tab_column)
776 + {
777 + putchar (' ');
778 + ++column;
779 + }
780 + }
781 + else /* Others. */
782 + {
783 + if (convert)
784 + {
785 + if (wc == L'\b')
786 + {
787 + if (column > 0)
788 + --column;
789 + }
790 + else
791 + {
792 + int width; /* The width of WC. */
793 +
794 + width = wcwidth (wc);
795 + column += (width > 0) ? width : 0;
796 + if (convert_entire_line == 0)
797 + convert = 0;
798 + }
799 + }
800 + fwrite (bufpos, sizeof(char), mblength, stdout);
801 + }
802 + }
803 + buflen -= mblength;
804 + bufpos += mblength;
805 + }
806 +}
807 +#endif
808 +
809 int
810 main (int argc, char **argv)
811 {
812 @@ -424,7 +577,12 @@ main (int argc, char **argv)
813
814 file_list = (optind < argc ? &argv[optind] : stdin_argv);
815
816 - expand ();
817 +#if HAVE_MBRTOWC
818 + if (MB_CUR_MAX > 1)
819 + expand_multibyte ();
820 + else
821 +#endif
822 + expand ();
823
824 if (have_read_stdin && fclose (stdin) != 0)
825 error (EXIT_FAILURE, errno, "-");
826 diff -urNp coreutils-8.13-orig/src/fold.c coreutils-8.13/src/fold.c
827 --- coreutils-8.13-orig/src/fold.c 2011-07-28 12:38:27.000000000 +0200
828 +++ coreutils-8.13/src/fold.c 2011-09-09 10:23:14.169583741 +0200
829 @@ -22,12 +22,34 @@
830 #include <getopt.h>
831 #include <sys/types.h>
832
833 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
834 +#if HAVE_WCHAR_H
835 +# include <wchar.h>
836 +#endif
837 +
838 +/* Get iswprint(), iswblank(), wcwidth(). */
839 +#if HAVE_WCTYPE_H
840 +# include <wctype.h>
841 +#endif
842 +
843 #include "system.h"
844 #include "error.h"
845 #include "fadvise.h"
846 #include "quote.h"
847 #include "xstrtol.h"
848
849 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
850 + installation; work around this configuration error. */
851 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
852 +# undef MB_LEN_MAX
853 +# define MB_LEN_MAX 16
854 +#endif
855 +
856 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
857 +#if HAVE_MBRTOWC && defined mbstate_t
858 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
859 +#endif
860 +
861 #define TAB_WIDTH 8
862
863 /* The official name of this program (e.g., no `g' prefix). */
864 @@ -35,20 +57,41 @@
865
866 #define AUTHORS proper_name ("David MacKenzie")
867
868 +#define FATAL_ERROR(Message) \
869 + do \
870 + { \
871 + error (0, 0, (Message)); \
872 + usage (2); \
873 + } \
874 + while (0)
875 +
876 +enum operating_mode
877 +{
878 + /* Fold texts by columns that are at the given positions. */
879 + column_mode,
880 +
881 + /* Fold texts by bytes that are at the given positions. */
882 + byte_mode,
883 +
884 + /* Fold texts by characters that are at the given positions. */
885 + character_mode,
886 +};
887 +
888 +/* The argument shows current mode. (Default: column_mode) */
889 +static enum operating_mode operating_mode;
890 +
891 /* If nonzero, try to break on whitespace. */
892 static bool break_spaces;
893
894 -/* If nonzero, count bytes, not column positions. */
895 -static bool count_bytes;
896 -
897 /* If nonzero, at least one of the files we read was standard input. */
898 static bool have_read_stdin;
899
900 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
901 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
902
903 static struct option const longopts[] =
904 {
905 {"bytes", no_argument, NULL, 'b'},
906 + {"characters", no_argument, NULL, 'c'},
907 {"spaces", no_argument, NULL, 's'},
908 {"width", required_argument, NULL, 'w'},
909 {GETOPT_HELP_OPTION_DECL},
910 @@ -78,6 +121,7 @@ Mandatory arguments to long options are
911 "), stdout);
912 fputs (_("\
913 -b, --bytes count bytes rather than columns\n\
914 + -c, --characters count characters rather than columns\n\
915 -s, --spaces break at spaces\n\
916 -w, --width=WIDTH use WIDTH columns instead of 80\n\
917 "), stdout);
918 @@ -95,7 +139,7 @@ Mandatory arguments to long options are
919 static size_t
920 adjust_column (size_t column, char c)
921 {
922 - if (!count_bytes)
923 + if (operating_mode != byte_mode)
924 {
925 if (c == '\b')
926 {
927 @@ -118,30 +162,14 @@ adjust_column (size_t column, char c)
928 to stdout, with maximum line length WIDTH.
929 Return true if successful. */
930
931 -static bool
932 -fold_file (char const *filename, size_t width)
933 +static void
934 +fold_text (FILE *istream, size_t width, int *saved_errno)
935 {
936 - FILE *istream;
937 int c;
938 size_t column = 0; /* Screen column where next char will go. */
939 size_t offset_out = 0; /* Index in `line_out' for next char. */
940 static char *line_out = NULL;
941 static size_t allocated_out = 0;
942 - int saved_errno;
943 -
944 - if (STREQ (filename, "-"))
945 - {
946 - istream = stdin;
947 - have_read_stdin = true;
948 - }
949 - else
950 - istream = fopen (filename, "r");
951 -
952 - if (istream == NULL)
953 - {
954 - error (0, errno, "%s", filename);
955 - return false;
956 - }
957
958 fadvise (istream, FADVISE_SEQUENTIAL);
959
960 @@ -171,6 +199,15 @@ fold_file (char const *filename, size_t
961 bool found_blank = false;
962 size_t logical_end = offset_out;
963
964 + /* If LINE_OUT has no wide character,
965 + put a new wide character in LINE_OUT
966 + if column is bigger than width. */
967 + if (offset_out == 0)
968 + {
969 + line_out[offset_out++] = c;
970 + continue;
971 + }
972 +
973 /* Look for the last blank. */
974 while (logical_end)
975 {
976 @@ -217,11 +254,221 @@ fold_file (char const *filename, size_t
977 line_out[offset_out++] = c;
978 }
979
980 - saved_errno = errno;
981 + *saved_errno = errno;
982
983 if (offset_out)
984 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
985
986 +}
987 +
988 +#if HAVE_MBRTOWC
989 +static void
990 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
991 +{
992 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
993 + size_t buflen = 0; /* The length of the byte sequence in buf. */
994 + char *bufpos = buf; /* Next read position of BUF. */
995 + wint_t wc; /* A gotten wide character. */
996 + size_t mblength; /* The byte size of a multibyte character which shows
997 + as same character as WC. */
998 + mbstate_t state, state_bak; /* State of the stream. */
999 + int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
1000 +
1001 + static char *line_out = NULL;
1002 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1003 + static size_t allocated_out = 0;
1004 +
1005 + int increment;
1006 + size_t column = 0;
1007 +
1008 + size_t last_blank_pos;
1009 + size_t last_blank_column;
1010 + int is_blank_seen;
1011 + int last_blank_increment = 0;
1012 + int is_bs_following_last_blank;
1013 + size_t bs_following_last_blank_num;
1014 + int is_cr_after_last_blank;
1015 +
1016 +#define CLEAR_FLAGS \
1017 + do \
1018 + { \
1019 + last_blank_pos = 0; \
1020 + last_blank_column = 0; \
1021 + is_blank_seen = 0; \
1022 + is_bs_following_last_blank = 0; \
1023 + bs_following_last_blank_num = 0; \
1024 + is_cr_after_last_blank = 0; \
1025 + } \
1026 + while (0)
1027 +
1028 +#define START_NEW_LINE \
1029 + do \
1030 + { \
1031 + putchar ('\n'); \
1032 + column = 0; \
1033 + offset_out = 0; \
1034 + CLEAR_FLAGS; \
1035 + } \
1036 + while (0)
1037 +
1038 + CLEAR_FLAGS;
1039 + memset (&state, '\0', sizeof(mbstate_t));
1040 +
1041 + for (;; bufpos += mblength, buflen -= mblength)
1042 + {
1043 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1044 + {
1045 + memmove (buf, bufpos, buflen);
1046 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1047 + bufpos = buf;
1048 + }
1049 +
1050 + if (buflen < 1)
1051 + break;
1052 +
1053 + /* Get a wide character. */
1054 + state_bak = state;
1055 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1056 +
1057 + switch (mblength)
1058 + {
1059 + case (size_t)-1:
1060 + case (size_t)-2:
1061 + convfail++;
1062 + state = state_bak;
1063 + /* Fall through. */
1064 +
1065 + case 0:
1066 + mblength = 1;
1067 + break;
1068 + }
1069 +
1070 +rescan:
1071 + if (operating_mode == byte_mode) /* byte mode */
1072 + increment = mblength;
1073 + else if (operating_mode == character_mode) /* character mode */
1074 + increment = 1;
1075 + else /* column mode */
1076 + {
1077 + if (convfail)
1078 + increment = 1;
1079 + else
1080 + {
1081 + switch (wc)
1082 + {
1083 + case L'\n':
1084 + fwrite (line_out, sizeof(char), offset_out, stdout);
1085 + START_NEW_LINE;
1086 + continue;
1087 +
1088 + case L'\b':
1089 + increment = (column > 0) ? -1 : 0;
1090 + break;
1091 +
1092 + case L'\r':
1093 + increment = -1 * column;
1094 + break;
1095 +
1096 + case L'\t':
1097 + increment = 8 - column % 8;
1098 + break;
1099 +
1100 + default:
1101 + increment = wcwidth (wc);
1102 + increment = (increment < 0) ? 0 : increment;
1103 + }
1104 + }
1105 + }
1106 +
1107 + if (column + increment > width && break_spaces && last_blank_pos)
1108 + {
1109 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1110 + putchar ('\n');
1111 +
1112 + offset_out = offset_out - last_blank_pos;
1113 + column = column - last_blank_column + ((is_cr_after_last_blank)
1114 + ? last_blank_increment : bs_following_last_blank_num);
1115 + memmove (line_out, line_out + last_blank_pos, offset_out);
1116 + CLEAR_FLAGS;
1117 + goto rescan;
1118 + }
1119 +
1120 + if (column + increment > width && column != 0)
1121 + {
1122 + fwrite (line_out, sizeof(char), offset_out, stdout);
1123 + START_NEW_LINE;
1124 + goto rescan;
1125 + }
1126 +
1127 + if (allocated_out < offset_out + mblength)
1128 + {
1129 + line_out = X2REALLOC (line_out, &allocated_out);
1130 + }
1131 +
1132 + memcpy (line_out + offset_out, bufpos, mblength);
1133 + offset_out += mblength;
1134 + column += increment;
1135 +
1136 + if (is_blank_seen && !convfail && wc == L'\r')
1137 + is_cr_after_last_blank = 1;
1138 +
1139 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1140 + ++bs_following_last_blank_num;
1141 + else
1142 + is_bs_following_last_blank = 0;
1143 +
1144 + if (break_spaces && !convfail && iswblank (wc))
1145 + {
1146 + last_blank_pos = offset_out;
1147 + last_blank_column = column;
1148 + is_blank_seen = 1;
1149 + last_blank_increment = increment;
1150 + is_bs_following_last_blank = 1;
1151 + bs_following_last_blank_num = 0;
1152 + is_cr_after_last_blank = 0;
1153 + }
1154 + }
1155 +
1156 + *saved_errno = errno;
1157 +
1158 + if (offset_out)
1159 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1160 +
1161 +}
1162 +#endif
1163 +
1164 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1165 + to stdout, with maximum line length WIDTH.
1166 + Return 0 if successful, 1 if an error occurs. */
1167 +
1168 +static bool
1169 +fold_file (char *filename, size_t width)
1170 +{
1171 + FILE *istream;
1172 + int saved_errno;
1173 +
1174 + if (STREQ (filename, "-"))
1175 + {
1176 + istream = stdin;
1177 + have_read_stdin = 1;
1178 + }
1179 + else
1180 + istream = fopen (filename, "r");
1181 +
1182 + if (istream == NULL)
1183 + {
1184 + error (0, errno, "%s", filename);
1185 + return 1;
1186 + }
1187 +
1188 + /* Define how ISTREAM is being folded. */
1189 +#if HAVE_MBRTOWC
1190 + if (MB_CUR_MAX > 1)
1191 + fold_multibyte_text (istream, width, &saved_errno);
1192 + else
1193 +#endif
1194 + fold_text (istream, width, &saved_errno);
1195 +
1196 if (ferror (istream))
1197 {
1198 error (0, saved_errno, "%s", filename);
1199 @@ -254,7 +501,8 @@ main (int argc, char **argv)
1200
1201 atexit (close_stdout);
1202
1203 - break_spaces = count_bytes = have_read_stdin = false;
1204 + operating_mode = column_mode;
1205 + break_spaces = have_read_stdin = false;
1206
1207 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1208 {
1209 @@ -263,7 +511,15 @@ main (int argc, char **argv)
1210 switch (optc)
1211 {
1212 case 'b': /* Count bytes rather than columns. */
1213 - count_bytes = true;
1214 + if (operating_mode != column_mode)
1215 + FATAL_ERROR (_("only one way of folding may be specified"));
1216 + operating_mode = byte_mode;
1217 + break;
1218 +
1219 + case 'c':
1220 + if (operating_mode != column_mode)
1221 + FATAL_ERROR (_("only one way of folding may be specified"));
1222 + operating_mode = character_mode;
1223 break;
1224
1225 case 's': /* Break at word boundaries. */
1226 diff -urNp coreutils-8.13-orig/src/join.c coreutils-8.13/src/join.c
1227 --- coreutils-8.13-orig/src/join.c 2011-08-08 10:16:09.000000000 +0200
1228 +++ coreutils-8.13/src/join.c 2011-09-09 10:23:14.172687087 +0200
1229 @@ -22,18 +22,32 @@
1230 #include <sys/types.h>
1231 #include <getopt.h>
1232
1233 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1234 +#if HAVE_WCHAR_H
1235 +# include <wchar.h>
1236 +#endif
1237 +
1238 +/* Get iswblank(), towupper. */
1239 +#if HAVE_WCTYPE_H
1240 +# include <wctype.h>
1241 +#endif
1242 +
1243 #include "system.h"
1244 #include "error.h"
1245 #include "fadvise.h"
1246 #include "hard-locale.h"
1247 #include "linebuffer.h"
1248 -#include "memcasecmp.h"
1249 #include "quote.h"
1250 #include "stdio--.h"
1251 #include "xmemcoll.h"
1252 #include "xstrtol.h"
1253 #include "argmatch.h"
1254
1255 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1256 +#if HAVE_MBRTOWC && defined mbstate_t
1257 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1258 +#endif
1259 +
1260 /* The official name of this program (e.g., no `g' prefix). */
1261 #define PROGRAM_NAME "join"
1262
1263 @@ -135,10 +149,12 @@ static struct outlist outlist_head;
1264 /* Last element in `outlist', where a new element can be added. */
1265 static struct outlist *outlist_end = &outlist_head;
1266
1267 -/* Tab character separating fields. If negative, fields are separated
1268 - by any nonempty string of blanks, otherwise by exactly one
1269 - tab character whose value (when cast to unsigned char) equals TAB. */
1270 -static int tab = -1;
1271 +/* Tab character separating fields. If NULL, fields are separated
1272 + by any nonempty string of blanks. */
1273 +static char *tab = NULL;
1274 +
1275 +/* The number of bytes used for tab. */
1276 +static size_t tablen = 0;
1277
1278 /* If nonzero, check that the input is correctly ordered. */
1279 static enum
1280 @@ -263,13 +279,14 @@ xfields (struct line *line)
1281 if (ptr == lim)
1282 return;
1283
1284 - if (0 <= tab && tab != '\n')
1285 + if (tab != NULL)
1286 {
1287 + unsigned char t = tab[0];
1288 char *sep;
1289 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1290 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1291 extract_field (line, ptr, sep - ptr);
1292 }
1293 - else if (tab < 0)
1294 + else
1295 {
1296 /* Skip leading blanks before the first field. */
1297 while (isblank (to_uchar (*ptr)))
1298 @@ -293,6 +310,148 @@ xfields (struct line *line)
1299 extract_field (line, ptr, lim - ptr);
1300 }
1301
1302 +#if HAVE_MBRTOWC
1303 +static void
1304 +xfields_multibyte (struct line *line)
1305 +{
1306 + char *ptr = line->buf.buffer;
1307 + char const *lim = ptr + line->buf.length - 1;
1308 + wchar_t wc = 0;
1309 + size_t mblength = 1;
1310 + mbstate_t state, state_bak;
1311 +
1312 + memset (&state, 0, sizeof (mbstate_t));
1313 +
1314 + if (ptr >= lim)
1315 + return;
1316 +
1317 + if (tab != NULL)
1318 + {
1319 + unsigned char t = tab[0];
1320 + char *sep = ptr;
1321 + for (; ptr < lim; ptr = sep + mblength)
1322 + {
1323 + sep = ptr;
1324 + while (sep < lim)
1325 + {
1326 + state_bak = state;
1327 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1328 +
1329 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1330 + {
1331 + mblength = 1;
1332 + state = state_bak;
1333 + }
1334 + mblength = (mblength < 1) ? 1 : mblength;
1335 +
1336 + if (mblength == tablen && !memcmp (sep, tab, mblength))
1337 + break;
1338 + else
1339 + {
1340 + sep += mblength;
1341 + continue;
1342 + }
1343 + }
1344 +
1345 + if (sep >= lim)
1346 + break;
1347 +
1348 + extract_field (line, ptr, sep - ptr);
1349 + }
1350 + }
1351 + else
1352 + {
1353 + /* Skip leading blanks before the first field. */
1354 + while(ptr < lim)
1355 + {
1356 + state_bak = state;
1357 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1358 +
1359 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1360 + {
1361 + mblength = 1;
1362 + state = state_bak;
1363 + break;
1364 + }
1365 + mblength = (mblength < 1) ? 1 : mblength;
1366 +
1367 + if (!iswblank(wc))
1368 + break;
1369 + ptr += mblength;
1370 + }
1371 +
1372 + do
1373 + {
1374 + char *sep;
1375 + state_bak = state;
1376 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1377 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1378 + {
1379 + mblength = 1;
1380 + state = state_bak;
1381 + break;
1382 + }
1383 + mblength = (mblength < 1) ? 1 : mblength;
1384 +
1385 + sep = ptr + mblength;
1386 + while (sep < lim)
1387 + {
1388 + state_bak = state;
1389 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1390 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1391 + {
1392 + mblength = 1;
1393 + state = state_bak;
1394 + break;
1395 + }
1396 + mblength = (mblength < 1) ? 1 : mblength;
1397 +
1398 + if (iswblank (wc))
1399 + break;
1400 +
1401 + sep += mblength;
1402 + }
1403 +
1404 + extract_field (line, ptr, sep - ptr);
1405 + if (sep >= lim)
1406 + return;
1407 +
1408 + state_bak = state;
1409 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1410 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1411 + {
1412 + mblength = 1;
1413 + state = state_bak;
1414 + break;
1415 + }
1416 + mblength = (mblength < 1) ? 1 : mblength;
1417 +
1418 + ptr = sep + mblength;
1419 + while (ptr < lim)
1420 + {
1421 + state_bak = state;
1422 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1423 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1424 + {
1425 + mblength = 1;
1426 + state = state_bak;
1427 + break;
1428 + }
1429 + mblength = (mblength < 1) ? 1 : mblength;
1430 +
1431 + if (!iswblank (wc))
1432 + break;
1433 +
1434 + ptr += mblength;
1435 + }
1436 + }
1437 + while (ptr < lim);
1438 + }
1439 +
1440 + extract_field (line, ptr, lim - ptr);
1441 +}
1442 +#endif
1443 +
1444 static void
1445 freeline (struct line *line)
1446 {
1447 @@ -314,56 +473,115 @@ keycmp (struct line const *line1, struct
1448 size_t jf_1, size_t jf_2)
1449 {
1450 /* Start of field to compare in each file. */
1451 - char *beg1;
1452 - char *beg2;
1453 -
1454 - size_t len1;
1455 - size_t len2; /* Length of fields to compare. */
1456 + char *beg[2];
1457 + char *copy[2];
1458 + size_t len[2]; /* Length of fields to compare. */
1459 int diff;
1460 + int i, j;
1461
1462 if (jf_1 < line1->nfields)
1463 {
1464 - beg1 = line1->fields[jf_1].beg;
1465 - len1 = line1->fields[jf_1].len;
1466 + beg[0] = line1->fields[jf_1].beg;
1467 + len[0] = line1->fields[jf_1].len;
1468 }
1469 else
1470 {
1471 - beg1 = NULL;
1472 - len1 = 0;
1473 + beg[0] = NULL;
1474 + len[0] = 0;
1475 }
1476
1477 if (jf_2 < line2->nfields)
1478 {
1479 - beg2 = line2->fields[jf_2].beg;
1480 - len2 = line2->fields[jf_2].len;
1481 + beg[1] = line2->fields[jf_2].beg;
1482 + len[1] = line2->fields[jf_2].len;
1483 }
1484 else
1485 {
1486 - beg2 = NULL;
1487 - len2 = 0;
1488 + beg[1] = NULL;
1489 + len[1] = 0;
1490 }
1491
1492 - if (len1 == 0)
1493 - return len2 == 0 ? 0 : -1;
1494 - if (len2 == 0)
1495 + if (len[0] == 0)
1496 + return len[1] == 0 ? 0 : -1;
1497 + if (len[1] == 0)
1498 return 1;
1499
1500 if (ignore_case)
1501 {
1502 - /* FIXME: ignore_case does not work with NLS (in particular,
1503 - with multibyte chars). */
1504 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
1505 +#ifdef HAVE_MBRTOWC
1506 + if (MB_CUR_MAX > 1)
1507 + {
1508 + size_t mblength;
1509 + wchar_t wc, uwc;
1510 + mbstate_t state, state_bak;
1511 +
1512 + memset (&state, '\0', sizeof (mbstate_t));
1513 +
1514 + for (i = 0; i < 2; i++)
1515 + {
1516 + copy[i] = alloca (len[i] + 1);
1517 +
1518 + for (j = 0; j < MIN (len[0], len[1]);)
1519 + {
1520 + state_bak = state;
1521 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
1522 +
1523 + switch (mblength)
1524 + {
1525 + case (size_t) -1:
1526 + case (size_t) -2:
1527 + state = state_bak;
1528 + /* Fall through */
1529 + case 0:
1530 + mblength = 1;
1531 + break;
1532 +
1533 + default:
1534 + uwc = towupper (wc);
1535 +
1536 + if (uwc != wc)
1537 + {
1538 + mbstate_t state_wc;
1539 +
1540 + memset (&state_wc, '\0', sizeof (mbstate_t));
1541 + wcrtomb (copy[i] + j, uwc, &state_wc);
1542 + }
1543 + else
1544 + memcpy (copy[i] + j, beg[i] + j, mblength);
1545 + }
1546 + j += mblength;
1547 + }
1548 + copy[i][j] = '\0';
1549 + }
1550 + }
1551 + else
1552 +#endif
1553 + {
1554 + for (i = 0; i < 2; i++)
1555 + {
1556 + copy[i] = alloca (len[i] + 1);
1557 +
1558 + for (j = 0; j < MIN (len[0], len[1]); j++)
1559 + copy[i][j] = toupper (beg[i][j]);
1560 +
1561 + copy[i][j] = '\0';
1562 + }
1563 + }
1564 }
1565 else
1566 {
1567 - if (hard_LC_COLLATE)
1568 - return xmemcoll (beg1, len1, beg2, len2);
1569 - diff = memcmp (beg1, beg2, MIN (len1, len2));
1570 + copy[0] = (unsigned char *) beg[0];
1571 + copy[1] = (unsigned char *) beg[1];
1572 }
1573
1574 + if (hard_LC_COLLATE)
1575 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
1576 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
1577 +
1578 +
1579 if (diff)
1580 return diff;
1581 - return len1 < len2 ? -1 : len1 != len2;
1582 + return len[0] - len[1];
1583 }
1584
1585 /* Check that successive input lines PREV and CURRENT from input file
1586 @@ -455,6 +673,11 @@ get_line (FILE *fp, struct line **linep,
1587 }
1588 ++line_no[which - 1];
1589
1590 +#if HAVE_MBRTOWC
1591 + if (MB_CUR_MAX > 1)
1592 + xfields_multibyte (line);
1593 + else
1594 +#endif
1595 xfields (line);
1596
1597 if (prevline[which - 1])
1598 @@ -554,21 +777,28 @@ prfield (size_t n, struct line const *li
1599
1600 /* Output all the fields in line, other than the join field. */
1601
1602 +#define PUT_TAB_CHAR \
1603 + do \
1604 + { \
1605 + (tab != NULL) ? \
1606 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
1607 + } \
1608 + while (0)
1609 +
1610 static void
1611 prfields (struct line const *line, size_t join_field, size_t autocount)
1612 {
1613 size_t i;
1614 size_t nfields = autoformat ? autocount : line->nfields;
1615 - char output_separator = tab < 0 ? ' ' : tab;
1616
1617 for (i = 0; i < join_field && i < nfields; ++i)
1618 {
1619 - putchar (output_separator);
1620 + PUT_TAB_CHAR;
1621 prfield (i, line);
1622 }
1623 for (i = join_field + 1; i < nfields; ++i)
1624 {
1625 - putchar (output_separator);
1626 + PUT_TAB_CHAR;
1627 prfield (i, line);
1628 }
1629 }
1630 @@ -579,7 +809,6 @@ static void
1631 prjoin (struct line const *line1, struct line const *line2)
1632 {
1633 const struct outlist *outlist;
1634 - char output_separator = tab < 0 ? ' ' : tab;
1635 size_t field;
1636 struct line const *line;
1637
1638 @@ -613,7 +842,7 @@ prjoin (struct line const *line1, struct
1639 o = o->next;
1640 if (o == NULL)
1641 break;
1642 - putchar (output_separator);
1643 + PUT_TAB_CHAR;
1644 }
1645 putchar ('\n');
1646 }
1647 @@ -1091,21 +1320,46 @@ main (int argc, char **argv)
1648
1649 case 't':
1650 {
1651 - unsigned char newtab = optarg[0];
1652 + char *newtab = NULL;
1653 + size_t newtablen;
1654 + newtab = xstrdup (optarg);
1655 +#if HAVE_MBRTOWC
1656 + if (MB_CUR_MAX > 1)
1657 + {
1658 + mbstate_t state;
1659 +
1660 + memset (&state, 0, sizeof (mbstate_t));
1661 + newtablen = mbrtowc (NULL, newtab,
1662 + strnlen (newtab, MB_LEN_MAX),
1663 + &state);
1664 + if (newtablen == (size_t) 0
1665 + || newtablen == (size_t) -1
1666 + || newtablen == (size_t) -2)
1667 + newtablen = 1;
1668 + }
1669 + else
1670 +#endif
1671 + newtablen = 1;
1672 if (! newtab)
1673 + {
1674 - newtab = '\n'; /* '' => process the whole line. */
1675 + newtab = "\n"; /* '' => process the whole line. */
1676 + }
1677 else if (optarg[1])
1678 {
1679 - if (STREQ (optarg, "\\0"))
1680 - newtab = '\0';
1681 - else
1682 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1683 - quote (optarg));
1684 + if (newtablen == 1 && newtab[1])
1685 + {
1686 + if (STREQ (newtab, "\\0"))
1687 + newtab[0] = '\0';
1688 + }
1689 + }
1690 + if (tab != NULL && strcmp (tab, newtab))
1691 + {
1692 + free (newtab);
1693 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
1694 }
1695 - if (0 <= tab && tab != newtab)
1696 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
1697 tab = newtab;
1698 - }
1699 + tablen = newtablen;
1700 + }
1701 break;
1702
1703 case NOCHECK_ORDER_OPTION:
1704 diff -urNp coreutils-8.13-orig/src/pr.c coreutils-8.13/src/pr.c
1705 --- coreutils-8.13-orig/src/pr.c 2011-08-30 23:01:40.000000000 +0200
1706 +++ coreutils-8.13/src/pr.c 2011-09-09 10:23:14.177658905 +0200
1707 @@ -312,6 +312,32 @@
1708
1709 #include <getopt.h>
1710 #include <sys/types.h>
1711 +
1712 +/* Get MB_LEN_MAX. */
1713 +#include <limits.h>
1714 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1715 + installation; work around this configuration error. */
1716 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
1717 +# define MB_LEN_MAX 16
1718 +#endif
1719 +
1720 +/* Get MB_CUR_MAX. */
1721 +#include <stdlib.h>
1722 +
1723 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
1724 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1725 +#if HAVE_WCHAR_H
1726 +# include <wchar.h>
1727 +#endif
1728 +
1729 +/* Get iswprint(). -- for wcwidth(). */
1730 +#if HAVE_WCTYPE_H
1731 +# include <wctype.h>
1732 +#endif
1733 +#if !defined iswprint && !HAVE_ISWPRINT
1734 +# define iswprint(wc) 1
1735 +#endif
1736 +
1737 #include "system.h"
1738 #include "error.h"
1739 #include "fadvise.h"
1740 @@ -323,6 +349,18 @@
1741 #include "strftime.h"
1742 #include "xstrtol.h"
1743
1744 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1745 +#if HAVE_MBRTOWC && defined mbstate_t
1746 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1747 +#endif
1748 +
1749 +#ifndef HAVE_DECL_WCWIDTH
1750 +"this configure-time declaration test was not run"
1751 +#endif
1752 +#if !HAVE_DECL_WCWIDTH
1753 +extern int wcwidth ();
1754 +#endif
1755 +
1756 /* The official name of this program (e.g., no `g' prefix). */
1757 #define PROGRAM_NAME "pr"
1758
1759 @@ -415,7 +453,20 @@ struct COLUMN
1760
1761 typedef struct COLUMN COLUMN;
1762
1763 -static int char_to_clump (char c);
1764 +/* Funtion pointers to switch functions for single byte locale or for
1765 + multibyte locale. If multibyte functions do not exist in your sysytem,
1766 + these pointers always point the function for single byte locale. */
1767 +static void (*print_char) (char c);
1768 +static int (*char_to_clump) (char c);
1769 +
1770 +/* Functions for single byte locale. */
1771 +static void print_char_single (char c);
1772 +static int char_to_clump_single (char c);
1773 +
1774 +/* Functions for multibyte locale. */
1775 +static void print_char_multi (char c);
1776 +static int char_to_clump_multi (char c);
1777 +
1778 static bool read_line (COLUMN *p);
1779 static bool print_page (void);
1780 static bool print_stored (COLUMN *p);
1781 @@ -425,6 +476,7 @@ static void print_header (void);
1782 static void pad_across_to (int position);
1783 static void add_line_number (COLUMN *p);
1784 static void getoptarg (char *arg, char switch_char, char *character,
1785 + int *character_length, int *character_width,
1786 int *number);
1787 static void print_files (int number_of_files, char **av);
1788 static void init_parameters (int number_of_files);
1789 @@ -439,7 +491,6 @@ static void store_char (char c);
1790 static void pad_down (int lines);
1791 static void read_rest_of_line (COLUMN *p);
1792 static void skip_read (COLUMN *p, int column_number);
1793 -static void print_char (char c);
1794 static void cleanup (void);
1795 static void print_sep_string (void);
1796 static void separator_string (const char *optarg_S);
1797 @@ -451,7 +502,7 @@ static COLUMN *column_vector;
1798 we store the leftmost columns contiguously in buff.
1799 To print a line from buff, get the index of the first character
1800 from line_vector[i], and print up to line_vector[i + 1]. */
1801 -static char *buff;
1802 +static unsigned char *buff;
1803
1804 /* Index of the position in buff where the next character
1805 will be stored. */
1806 @@ -555,7 +606,7 @@ static int chars_per_column;
1807 static bool untabify_input = false;
1808
1809 /* (-e) The input tab character. */
1810 -static char input_tab_char = '\t';
1811 +static char input_tab_char[MB_LEN_MAX] = "\t";
1812
1813 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
1814 where the leftmost column is 1. */
1815 @@ -565,7 +616,10 @@ static int chars_per_input_tab = 8;
1816 static bool tabify_output = false;
1817
1818 /* (-i) The output tab character. */
1819 -static char output_tab_char = '\t';
1820 +static char output_tab_char[MB_LEN_MAX] = "\t";
1821 +
1822 +/* (-i) The byte length of output tab character. */
1823 +static int output_tab_char_length = 1;
1824
1825 /* (-i) The width of the output tab. */
1826 static int chars_per_output_tab = 8;
1827 @@ -639,7 +693,13 @@ static int power_10;
1828 static bool numbered_lines = false;
1829
1830 /* (-n) Character which follows each line number. */
1831 -static char number_separator = '\t';
1832 +static char number_separator[MB_LEN_MAX] = "\t";
1833 +
1834 +/* (-n) The byte length of the character which follows each line number. */
1835 +static int number_separator_length = 1;
1836 +
1837 +/* (-n) The character width of the character which follows each line number. */
1838 +static int number_separator_width = 0;
1839
1840 /* (-n) line counting starts with 1st line of input file (not with 1st
1841 line of 1st page printed). */
1842 @@ -692,6 +752,7 @@ static bool use_col_separator = false;
1843 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
1844 static char *col_sep_string = (char *) "";
1845 static int col_sep_length = 0;
1846 +static int col_sep_width = 0;
1847 static char *column_separator = (char *) " ";
1848 static char *line_separator = (char *) "\t";
1849
1850 @@ -848,6 +909,13 @@ separator_string (const char *optarg_S)
1851 col_sep_length = (int) strlen (optarg_S);
1852 col_sep_string = xmalloc (col_sep_length + 1);
1853 strcpy (col_sep_string, optarg_S);
1854 +
1855 +#if HAVE_MBRTOWC
1856 + if (MB_CUR_MAX > 1)
1857 + col_sep_width = mbswidth (col_sep_string, 0);
1858 + else
1859 +#endif
1860 + col_sep_width = col_sep_length;
1861 }
1862
1863 int
1864 @@ -872,6 +940,21 @@ main (int argc, char **argv)
1865
1866 atexit (close_stdout);
1867
1868 +/* Define which functions are used, the ones for single byte locale or the ones
1869 + for multibyte locale. */
1870 +#if HAVE_MBRTOWC
1871 + if (MB_CUR_MAX > 1)
1872 + {
1873 + print_char = print_char_multi;
1874 + char_to_clump = char_to_clump_multi;
1875 + }
1876 + else
1877 +#endif
1878 + {
1879 + print_char = print_char_single;
1880 + char_to_clump = char_to_clump_single;
1881 + }
1882 +
1883 n_files = 0;
1884 file_names = (argc > 1
1885 ? xmalloc ((argc - 1) * sizeof (char *))
1886 @@ -948,8 +1031,12 @@ main (int argc, char **argv)
1887 break;
1888 case 'e':
1889 if (optarg)
1890 - getoptarg (optarg, 'e', &input_tab_char,
1891 - &chars_per_input_tab);
1892 + {
1893 + int dummy_length, dummy_width;
1894 +
1895 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
1896 + &dummy_width, &chars_per_input_tab);
1897 + }
1898 /* Could check tab width > 0. */
1899 untabify_input = true;
1900 break;
1901 @@ -962,8 +1049,12 @@ main (int argc, char **argv)
1902 break;
1903 case 'i':
1904 if (optarg)
1905 - getoptarg (optarg, 'i', &output_tab_char,
1906 - &chars_per_output_tab);
1907 + {
1908 + int dummy_width;
1909 +
1910 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
1911 + &dummy_width, &chars_per_output_tab);
1912 + }
1913 /* Could check tab width > 0. */
1914 tabify_output = true;
1915 break;
1916 @@ -990,8 +1081,8 @@ main (int argc, char **argv)
1917 case 'n':
1918 numbered_lines = true;
1919 if (optarg)
1920 - getoptarg (optarg, 'n', &number_separator,
1921 - &chars_per_number);
1922 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
1923 + &number_separator_width, &chars_per_number);
1924 break;
1925 case 'N':
1926 skip_count = false;
1927 @@ -1030,7 +1121,7 @@ main (int argc, char **argv)
1928 old_s = false;
1929 /* Reset an additional input of -s, -S dominates -s */
1930 col_sep_string = bad_cast ("");
1931 - col_sep_length = 0;
1932 + col_sep_length = col_sep_width = 0;
1933 use_col_separator = true;
1934 if (optarg)
1935 separator_string (optarg);
1936 @@ -1187,10 +1278,45 @@ main (int argc, char **argv)
1937 a number. */
1938
1939 static void
1940 -getoptarg (char *arg, char switch_char, char *character, int *number)
1941 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
1942 + int *character_width, int *number)
1943 {
1944 if (!ISDIGIT (*arg))
1945 - *character = *arg++;
1946 + {
1947 +#ifdef HAVE_MBRTOWC
1948 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
1949 + {
1950 + wchar_t wc;
1951 + size_t mblength;
1952 + int width;
1953 + mbstate_t state = {'\0'};
1954 +
1955 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
1956 +
1957 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1958 + {
1959 + *character_length = 1;
1960 + *character_width = 1;
1961 + }
1962 + else
1963 + {
1964 + *character_length = (mblength < 1) ? 1 : mblength;
1965 + width = wcwidth (wc);
1966 + *character_width = (width < 0) ? 0 : width;
1967 + }
1968 +
1969 + strncpy (character, arg, *character_length);
1970 + arg += *character_length;
1971 + }
1972 + else /* for single byte locale. */
1973 +#endif
1974 + {
1975 + *character = *arg++;
1976 + *character_length = 1;
1977 + *character_width = 1;
1978 + }
1979 + }
1980 +
1981 if (*arg)
1982 {
1983 long int tmp_long;
1984 @@ -1249,7 +1375,7 @@ init_parameters (int number_of_files)
1985 else
1986 col_sep_string = column_separator;
1987
1988 - col_sep_length = 1;
1989 + col_sep_length = col_sep_width = 1;
1990 use_col_separator = true;
1991 }
1992 /* It's rather pointless to define a TAB separator with column
1993 @@ -1280,11 +1406,11 @@ init_parameters (int number_of_files)
1994 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
1995
1996 /* Estimate chars_per_text without any margin and keep it constant. */
1997 - if (number_separator == '\t')
1998 + if (number_separator[0] == '\t')
1999 number_width = chars_per_number +
2000 TAB_WIDTH (chars_per_default_tab, chars_per_number);
2001 else
2002 - number_width = chars_per_number + 1;
2003 + number_width = chars_per_number + number_separator_width;
2004
2005 /* The number is part of the column width unless we are
2006 printing files in parallel. */
2007 @@ -1299,7 +1425,7 @@ init_parameters (int number_of_files)
2008 }
2009
2010 chars_per_column = (chars_per_line - chars_used_by_number -
2011 - (columns - 1) * col_sep_length) / columns;
2012 + (columns - 1) * col_sep_width) / columns;
2013
2014 if (chars_per_column < 1)
2015 error (EXIT_FAILURE, 0, _("page width too narrow"));
2016 @@ -1424,7 +1550,7 @@ init_funcs (void)
2017
2018 /* Enlarge p->start_position of first column to use the same form of
2019 padding_not_printed with all columns. */
2020 - h = h + col_sep_length;
2021 + h = h + col_sep_width;
2022
2023 /* This loop takes care of all but the rightmost column. */
2024
2025 @@ -1458,7 +1584,7 @@ init_funcs (void)
2026 }
2027 else
2028 {
2029 - h = h_next + col_sep_length;
2030 + h = h_next + col_sep_width;
2031 h_next = h + chars_per_column;
2032 }
2033 }
2034 @@ -1749,9 +1875,9 @@ static void
2035 align_column (COLUMN *p)
2036 {
2037 padding_not_printed = p->start_position;
2038 - if (padding_not_printed - col_sep_length > 0)
2039 + if (padding_not_printed - col_sep_width > 0)
2040 {
2041 - pad_across_to (padding_not_printed - col_sep_length);
2042 + pad_across_to (padding_not_printed - col_sep_width);
2043 padding_not_printed = ANYWHERE;
2044 }
2045
2046 @@ -2022,13 +2148,13 @@ store_char (char c)
2047 /* May be too generous. */
2048 buff = X2REALLOC (buff, &buff_allocated);
2049 }
2050 - buff[buff_current++] = c;
2051 + buff[buff_current++] = (unsigned char) c;
2052 }
2053
2054 static void
2055 add_line_number (COLUMN *p)
2056 {
2057 - int i;
2058 + int i, j;
2059 char *s;
2060 int left_cut;
2061
2062 @@ -2051,22 +2177,24 @@ add_line_number (COLUMN *p)
2063 /* Tabification is assumed for multiple columns, also for n-separators,
2064 but `default n-separator = TAB' hasn't been given priority over
2065 equal column_width also specified by POSIX. */
2066 - if (number_separator == '\t')
2067 + if (number_separator[0] == '\t')
2068 {
2069 i = number_width - chars_per_number;
2070 while (i-- > 0)
2071 (p->char_func) (' ');
2072 }
2073 else
2074 - (p->char_func) (number_separator);
2075 + for (j = 0; j < number_separator_length; j++)
2076 + (p->char_func) (number_separator[j]);
2077 }
2078 else
2079 /* To comply with POSIX, we avoid any expansion of default TAB
2080 separator with a single column output. No column_width requirement
2081 has to be considered. */
2082 {
2083 - (p->char_func) (number_separator);
2084 - if (number_separator == '\t')
2085 + for (j = 0; j < number_separator_length; j++)
2086 + (p->char_func) (number_separator[j]);
2087 + if (number_separator[0] == '\t')
2088 output_position = POS_AFTER_TAB (chars_per_output_tab,
2089 output_position);
2090 }
2091 @@ -2227,7 +2355,7 @@ print_white_space (void)
2092 while (goal - h_old > 1
2093 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
2094 {
2095 - putchar (output_tab_char);
2096 + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
2097 h_old = h_new;
2098 }
2099 while (++h_old <= goal)
2100 @@ -2247,6 +2375,7 @@ print_sep_string (void)
2101 {
2102 char *s;
2103 int l = col_sep_length;
2104 + int not_space_flag;
2105
2106 s = col_sep_string;
2107
2108 @@ -2260,6 +2389,7 @@ print_sep_string (void)
2109 {
2110 for (; separators_not_printed > 0; --separators_not_printed)
2111 {
2112 + not_space_flag = 0;
2113 while (l-- > 0)
2114 {
2115 /* 3 types of sep_strings: spaces only, spaces and chars,
2116 @@ -2273,12 +2403,15 @@ print_sep_string (void)
2117 }
2118 else
2119 {
2120 + not_space_flag = 1;
2121 if (spaces_not_printed > 0)
2122 print_white_space ();
2123 putchar (*s++);
2124 - ++output_position;
2125 }
2126 }
2127 + if (not_space_flag)
2128 + output_position += col_sep_width;
2129 +
2130 /* sep_string ends with some spaces */
2131 if (spaces_not_printed > 0)
2132 print_white_space ();
2133 @@ -2306,7 +2439,7 @@ print_clump (COLUMN *p, int n, char *clu
2134 required number of tabs and spaces. */
2135
2136 static void
2137 -print_char (char c)
2138 +print_char_single (char c)
2139 {
2140 if (tabify_output)
2141 {
2142 @@ -2330,6 +2463,74 @@ print_char (char c)
2143 putchar (c);
2144 }
2145
2146 +#ifdef HAVE_MBRTOWC
2147 +static void
2148 +print_char_multi (char c)
2149 +{
2150 + static size_t mbc_pos = 0;
2151 + static char mbc[MB_LEN_MAX] = {'\0'};
2152 + static mbstate_t state = {'\0'};
2153 + mbstate_t state_bak;
2154 + wchar_t wc;
2155 + size_t mblength;
2156 + int width;
2157 +
2158 + if (tabify_output)
2159 + {
2160 + state_bak = state;
2161 + mbc[mbc_pos++] = c;
2162 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2163 +
2164 + while (mbc_pos > 0)
2165 + {
2166 + switch (mblength)
2167 + {
2168 + case (size_t)-2:
2169 + state = state_bak;
2170 + return;
2171 +
2172 + case (size_t)-1:
2173 + state = state_bak;
2174 + ++output_position;
2175 + putchar (mbc[0]);
2176 + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
2177 + --mbc_pos;
2178 + break;
2179 +
2180 + case 0:
2181 + mblength = 1;
2182 +
2183 + default:
2184 + if (wc == L' ')
2185 + {
2186 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2187 + --mbc_pos;
2188 + ++spaces_not_printed;
2189 + return;
2190 + }
2191 + else if (spaces_not_printed > 0)
2192 + print_white_space ();
2193 +
2194 + /* Nonprintables are assumed to have width 0, except L'\b'. */
2195 + if ((width = wcwidth (wc)) < 1)
2196 + {
2197 + if (wc == L'\b')
2198 + --output_position;
2199 + }
2200 + else
2201 + output_position += width;
2202 +
2203 + fwrite (mbc, sizeof(char), mblength, stdout);
2204 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2205 + mbc_pos -= mblength;
2206 + }
2207 + }
2208 + return;
2209 + }
2210 + putchar (c);
2211 +}
2212 +#endif
2213 +
2214 /* Skip to page PAGE before printing.
2215 PAGE may be larger than total number of pages. */
2216
2217 @@ -2509,9 +2710,9 @@ read_line (COLUMN *p)
2218 align_empty_cols = false;
2219 }
2220
2221 - if (padding_not_printed - col_sep_length > 0)
2222 + if (padding_not_printed - col_sep_width > 0)
2223 {
2224 - pad_across_to (padding_not_printed - col_sep_length);
2225 + pad_across_to (padding_not_printed - col_sep_width);
2226 padding_not_printed = ANYWHERE;
2227 }
2228
2229 @@ -2612,9 +2813,9 @@ print_stored (COLUMN *p)
2230 }
2231 }
2232
2233 - if (padding_not_printed - col_sep_length > 0)
2234 + if (padding_not_printed - col_sep_width > 0)
2235 {
2236 - pad_across_to (padding_not_printed - col_sep_length);
2237 + pad_across_to (padding_not_printed - col_sep_width);
2238 padding_not_printed = ANYWHERE;
2239 }
2240
2241 @@ -2627,8 +2828,8 @@ print_stored (COLUMN *p)
2242 if (spaces_not_printed == 0)
2243 {
2244 output_position = p->start_position + end_vector[line];
2245 - if (p->start_position - col_sep_length == chars_per_margin)
2246 - output_position -= col_sep_length;
2247 + if (p->start_position - col_sep_width == chars_per_margin)
2248 + output_position -= col_sep_width;
2249 }
2250
2251 return true;
2252 @@ -2647,7 +2848,7 @@ print_stored (COLUMN *p)
2253 number of characters is 1.) */
2254
2255 static int
2256 -char_to_clump (char c)
2257 +char_to_clump_single (char c)
2258 {
2259 unsigned char uc = c;
2260 char *s = clump_buff;
2261 @@ -2657,10 +2858,10 @@ char_to_clump (char c)
2262 int chars;
2263 int chars_per_c = 8;
2264
2265 - if (c == input_tab_char)
2266 + if (c == input_tab_char[0])
2267 chars_per_c = chars_per_input_tab;
2268
2269 - if (c == input_tab_char || c == '\t')
2270 + if (c == input_tab_char[0] || c == '\t')
2271 {
2272 width = TAB_WIDTH (chars_per_c, input_position);
2273
2274 @@ -2741,6 +2942,155 @@ char_to_clump (char c)
2275 return chars;
2276 }
2277
2278 +#ifdef HAVE_MBRTOWC
2279 +static int
2280 +char_to_clump_multi (char c)
2281 +{
2282 + unsigned char uc = c;
2283 + static size_t mbc_pos = 0;
2284 + static char mbc[MB_LEN_MAX] = {'\0'};
2285 + static mbstate_t state = {'\0'};
2286 + mbstate_t state_bak;
2287 + wchar_t wc;
2288 + size_t mblength;
2289 + int wc_width;
2290 + register char *s = clump_buff;
2291 + register int i, j;
2292 + char esc_buff[4];
2293 + int width;
2294 + int chars;
2295 + int chars_per_c = 8;
2296 +
2297 + state_bak = state;
2298 + mbc[mbc_pos++] = c;
2299 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2300 +
2301 + width = 0;
2302 + chars = 0;
2303 + while (mbc_pos > 0)
2304 + {
2305 + switch (mblength)
2306 + {
2307 + case (size_t)-2:
2308 + state = state_bak;
2309 + return 0;
2310 +
2311 + case (size_t)-1:
2312 + state = state_bak;
2313 + mblength = 1;
2314 +
2315 + if (use_esc_sequence || use_cntrl_prefix)
2316 + {
2317 + width = +4;
2318 + chars = +4;
2319 + *s++ = '\\';
2320 + sprintf (esc_buff, "%03o", mbc[0]);
2321 + for (i = 0; i <= 2; ++i)
2322 + *s++ = (int) esc_buff[i];
2323 + }
2324 + else
2325 + {
2326 + width += 1;
2327 + chars += 1;
2328 + *s++ = mbc[0];
2329 + }
2330 + break;
2331 +
2332 + case 0:
2333 + mblength = 1;
2334 + /* Fall through */
2335 +
2336 + default:
2337 + if (memcmp (mbc, input_tab_char, mblength) == 0)
2338 + chars_per_c = chars_per_input_tab;
2339 +
2340 + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
2341 + {
2342 + int width_inc;
2343 +
2344 + width_inc = TAB_WIDTH (chars_per_c, input_position);
2345 + width += width_inc;
2346 +
2347 + if (untabify_input)
2348 + {
2349 + for (i = width_inc; i; --i)
2350 + *s++ = ' ';
2351 + chars += width_inc;
2352 + }
2353 + else
2354 + {
2355 + for (i = 0; i < mblength; i++)
2356 + *s++ = mbc[i];
2357 + chars += mblength;
2358 + }
2359 + }
2360 + else if ((wc_width = wcwidth (wc)) < 1)
2361 + {
2362 + if (use_esc_sequence)
2363 + {
2364 + for (i = 0; i < mblength; i++)
2365 + {
2366 + width += 4;
2367 + chars += 4;
2368 + *s++ = '\\';
2369 + sprintf (esc_buff, "%03o", uc);
2370 + for (j = 0; j <= 2; ++j)
2371 + *s++ = (int) esc_buff[j];
2372 + }
2373 + }
2374 + else if (use_cntrl_prefix)
2375 + {
2376 + if (wc < 0200)
2377 + {
2378 + width += 2;
2379 + chars += 2;
2380 + *s++ = '^';
2381 + *s++ = wc ^ 0100;
2382 + }
2383 + else
2384 + {
2385 + for (i = 0; i < mblength; i++)
2386 + {
2387 + width += 4;
2388 + chars += 4;
2389 + *s++ = '\\';
2390 + sprintf (esc_buff, "%03o", uc);
2391 + for (j = 0; j <= 2; ++j)
2392 + *s++ = (int) esc_buff[j];
2393 + }
2394 + }
2395 + }
2396 + else if (wc == L'\b')
2397 + {
2398 + width += -1;
2399 + chars += 1;
2400 + *s++ = c;
2401 + }
2402 + else
2403 + {
2404 + width += 0;
2405 + chars += mblength;
2406 + for (i = 0; i < mblength; i++)
2407 + *s++ = mbc[i];
2408 + }
2409 + }
2410 + else
2411 + {
2412 + width += wc_width;
2413 + chars += mblength;
2414 + for (i = 0; i < mblength; i++)
2415 + *s++ = mbc[i];
2416 + }
2417 + }
2418 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2419 + mbc_pos -= mblength;
2420 + }
2421 +
2422 + input_position += width;
2423 + return chars;
2424 +}
2425 +#endif
2426 +
2427 /* We've just printed some files and need to clean up things before
2428 looking for more options and printing the next batch of files.
2429
2430 diff -urNp coreutils-8.13-orig/src/sort.c coreutils-8.13/src/sort.c
2431 --- coreutils-8.13-orig/src/sort.c 2011-07-29 10:12:25.000000000 +0200
2432 +++ coreutils-8.13/src/sort.c 2011-09-09 10:23:14.183686800 +0200
2433 @@ -22,11 +22,20 @@
2434
2435 #include <config.h>
2436
2437 +#include <assert.h>
2438 #include <getopt.h>
2439 #include <pthread.h>
2440 #include <sys/types.h>
2441 #include <sys/wait.h>
2442 #include <signal.h>
2443 +#if HAVE_WCHAR_H
2444 +# include <wchar.h>
2445 +#endif
2446 +/* Get isw* functions. */
2447 +#if HAVE_WCTYPE_H
2448 +# include <wctype.h>
2449 +#endif
2450 +
2451 #include "system.h"
2452 #include "argmatch.h"
2453 #include "error.h"
2454 @@ -167,12 +176,34 @@ static int thousands_sep;
2455
2456 /* Nonzero if the corresponding locales are hard. */
2457 static bool hard_LC_COLLATE;
2458 -#if HAVE_NL_LANGINFO
2459 +#if HAVE_LANGINFO_CODESET
2460 static bool hard_LC_TIME;
2461 #endif
2462
2463 #define NONZERO(x) ((x) != 0)
2464
2465 +/* get a multibyte character's byte length. */
2466 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
2467 + do \
2468 + { \
2469 + wchar_t wc; \
2470 + mbstate_t state_bak; \
2471 + \
2472 + state_bak = STATE; \
2473 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
2474 + \
2475 + switch (MBLENGTH) \
2476 + { \
2477 + case (size_t)-1: \
2478 + case (size_t)-2: \
2479 + STATE = state_bak; \
2480 + /* Fall through. */ \
2481 + case 0: \
2482 + MBLENGTH = 1; \
2483 + } \
2484 + } \
2485 + while (0)
2486 +
2487 /* The kind of blanks for '-b' to skip in various options. */
2488 enum blanktype { bl_start, bl_end, bl_both };
2489
2490 @@ -343,13 +374,11 @@ static bool reverse;
2491 they were read if all keys compare equal. */
2492 static bool stable;
2493
2494 -/* If TAB has this value, blanks separate fields. */
2495 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
2496 -
2497 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
2498 +/* Tab character separating fields. If tab_length is 0, then fields are
2499 separated by the empty string between a non-blank character and a blank
2500 character. */
2501 -static int tab = TAB_DEFAULT;
2502 +static char tab[MB_LEN_MAX + 1];
2503 +static size_t tab_length = 0;
2504
2505 /* Flag to remove consecutive duplicate lines from the output.
2506 Only the last of a sequence of equal lines will be output. */
2507 @@ -783,6 +812,46 @@ reap_all (void)
2508 reap (-1);
2509 }
2510
2511 +/* Function pointers. */
2512 +static void
2513 +(*inittables) (void);
2514 +static char *
2515 +(*begfield) (const struct line*, const struct keyfield *);
2516 +static char *
2517 +(*limfield) (const struct line*, const struct keyfield *);
2518 +static void
2519 +(*skipblanks) (char **ptr, char *lim);
2520 +static int
2521 +(*getmonth) (char const *, size_t, char **);
2522 +static int
2523 +(*keycompare) (const struct line *, const struct line *);
2524 +static int
2525 +(*numcompare) (const char *, const char *);
2526 +
2527 +/* Test for white space multibyte character.
2528 + Set LENGTH the byte length of investigated multibyte character. */
2529 +#if HAVE_MBRTOWC
2530 +static int
2531 +ismbblank (const char *str, size_t len, size_t *length)
2532 +{
2533 + size_t mblength;
2534 + wchar_t wc;
2535 + mbstate_t state;
2536 +
2537 + memset (&state, '\0', sizeof(mbstate_t));
2538 + mblength = mbrtowc (&wc, str, len, &state);
2539 +
2540 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2541 + {
2542 + *length = 1;
2543 + return 0;
2544 + }
2545 +
2546 + *length = (mblength < 1) ? 1 : mblength;
2547 + return iswblank (wc);
2548 +}
2549 +#endif
2550 +
2551 /* Clean up any remaining temporary files. */
2552
2553 static void
2554 @@ -1215,7 +1284,7 @@ zaptemp (char const *name)
2555 free (node);
2556 }
2557
2558 -#if HAVE_NL_LANGINFO
2559 +#if HAVE_LANGINFO_CODESET
2560
2561 static int
2562 struct_month_cmp (void const *m1, void const *m2)
2563 @@ -1230,7 +1299,7 @@ struct_month_cmp (void const *m1, void c
2564 /* Initialize the character class tables. */
2565
2566 static void
2567 -inittables (void)
2568 +inittables_uni (void)
2569 {
2570 size_t i;
2571
2572 @@ -1242,7 +1311,7 @@ inittables (void)
2573 fold_toupper[i] = toupper (i);
2574 }
2575
2576 -#if HAVE_NL_LANGINFO
2577 +#if HAVE_LANGINFO_CODESET
2578 /* If we're not in the "C" locale, read different names for months. */
2579 if (hard_LC_TIME)
2580 {
2581 @@ -1324,6 +1393,84 @@ specify_nmerge (int oi, char c, char con
2582 xstrtol_fatal (e, oi, c, long_options, s);
2583 }
2584
2585 +#if HAVE_MBRTOWC
2586 +static void
2587 +inittables_mb (void)
2588 +{
2589 + int i, j, k, l;
2590 + char *name, *s, *lc_time, *lc_ctype;
2591 + size_t s_len, mblength;
2592 + char mbc[MB_LEN_MAX];
2593 + wchar_t wc, pwc;
2594 + mbstate_t state_mb, state_wc;
2595 +
2596 + lc_time = setlocale (LC_TIME, "");
2597 + if (lc_time)
2598 + lc_time = xstrdup (lc_time);
2599 +
2600 + lc_ctype = setlocale (LC_CTYPE, "");
2601 + if (lc_ctype)
2602 + lc_ctype = xstrdup (lc_ctype);
2603 +
2604 + if (lc_time && lc_ctype)
2605 + /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert
2606 + * the names of months to upper case */
2607 + setlocale (LC_CTYPE, lc_time);
2608 +
2609 + for (i = 0; i < MONTHS_PER_YEAR; i++)
2610 + {
2611 + s = (char *) nl_langinfo (ABMON_1 + i);
2612 + s_len = strlen (s);
2613 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
2614 + monthtab[i].val = i + 1;
2615 +
2616 + memset (&state_mb, '\0', sizeof (mbstate_t));
2617 + memset (&state_wc, '\0', sizeof (mbstate_t));
2618 +
2619 + for (j = 0; j < s_len;)
2620 + {
2621 + if (!ismbblank (s + j, s_len - j, &mblength))
2622 + break;
2623 + j += mblength;
2624 + }
2625 +
2626 + for (k = 0; j < s_len;)
2627 + {
2628 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
2629 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
2630 + if (mblength == 0)
2631 + break;
2632 +
2633 + pwc = towupper (wc);
2634 + if (pwc == wc)
2635 + {
2636 + memcpy (mbc, s + j, mblength);
2637 + j += mblength;
2638 + }
2639 + else
2640 + {
2641 + j += mblength;
2642 + mblength = wcrtomb (mbc, pwc, &state_wc);
2643 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
2644 + }
2645 +
2646 + for (l = 0; l < mblength; l++)
2647 + name[k++] = mbc[l];
2648 + }
2649 + name[k] = '\0';
2650 + }
2651 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
2652 + sizeof (struct month), struct_month_cmp);
2653 +
2654 + if (lc_time && lc_ctype)
2655 + /* restore the original locales */
2656 + setlocale (LC_CTYPE, lc_ctype);
2657 +
2658 + free (lc_ctype);
2659 + free (lc_time);
2660 +}
2661 +#endif
2662 +
2663 /* Specify the amount of main memory to use when sorting. */
2664 static void
2665 specify_sort_size (int oi, char c, char const *s)
2666 @@ -1552,7 +1699,7 @@ buffer_linelim (struct buffer const *buf
2667 by KEY in LINE. */
2668
2669 static char *
2670 -begfield (struct line const *line, struct keyfield const *key)
2671 +begfield_uni (const struct line *line, const struct keyfield *key)
2672 {
2673 char *ptr = line->text, *lim = ptr + line->length - 1;
2674 size_t sword = key->sword;
2675 @@ -1561,10 +1708,10 @@ begfield (struct line const *line, struc
2676 /* The leading field separator itself is included in a field when -t
2677 is absent. */
2678
2679 - if (tab != TAB_DEFAULT)
2680 + if (tab_length)
2681 while (ptr < lim && sword--)
2682 {
2683 - while (ptr < lim && *ptr != tab)
2684 + while (ptr < lim && *ptr != tab[0])
2685 ++ptr;
2686 if (ptr < lim)
2687 ++ptr;
2688 @@ -1590,11 +1737,70 @@ begfield (struct line const *line, struc
2689 return ptr;
2690 }
2691
2692 +#if HAVE_MBRTOWC
2693 +static char *
2694 +begfield_mb (const struct line *line, const struct keyfield *key)
2695 +{
2696 + int i;
2697 + char *ptr = line->text, *lim = ptr + line->length - 1;
2698 + size_t sword = key->sword;
2699 + size_t schar = key->schar;
2700 + size_t mblength;
2701 + mbstate_t state;
2702 +
2703 + memset (&state, '\0', sizeof(mbstate_t));
2704 +
2705 + if (tab_length)
2706 + while (ptr < lim && sword--)
2707 + {
2708 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2709 + {
2710 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2711 + ptr += mblength;
2712 + }
2713 + if (ptr < lim)
2714 + {
2715 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2716 + ptr += mblength;
2717 + }
2718 + }
2719 + else
2720 + while (ptr < lim && sword--)
2721 + {
2722 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2723 + ptr += mblength;
2724 + if (ptr < lim)
2725 + {
2726 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2727 + ptr += mblength;
2728 + }
2729 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2730 + ptr += mblength;
2731 + }
2732 +
2733 + if (key->skipsblanks)
2734 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2735 + ptr += mblength;
2736 +
2737 + for (i = 0; i < schar; i++)
2738 + {
2739 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2740 +
2741 + if (ptr + mblength > lim)
2742 + break;
2743 + else
2744 + ptr += mblength;
2745 + }
2746 +
2747 + return ptr;
2748 +}
2749 +#endif
2750 +
2751 /* Return the limit of (a pointer to the first character after) the field
2752 in LINE specified by KEY. */
2753
2754 static char *
2755 -limfield (struct line const *line, struct keyfield const *key)
2756 +limfield_uni (const struct line *line, const struct keyfield *key)
2757 {
2758 char *ptr = line->text, *lim = ptr + line->length - 1;
2759 size_t eword = key->eword, echar = key->echar;
2760 @@ -1609,10 +1815,10 @@ limfield (struct line const *line, struc
2761 `beginning' is the first character following the delimiting TAB.
2762 Otherwise, leave PTR pointing at the first `blank' character after
2763 the preceding field. */
2764 - if (tab != TAB_DEFAULT)
2765 + if (tab_length)
2766 while (ptr < lim && eword--)
2767 {
2768 - while (ptr < lim && *ptr != tab)
2769 + while (ptr < lim && *ptr != tab[0])
2770 ++ptr;
2771 if (ptr < lim && (eword || echar))
2772 ++ptr;
2773 @@ -1658,10 +1864,10 @@ limfield (struct line const *line, struc
2774 */
2775
2776 /* Make LIM point to the end of (one byte past) the current field. */
2777 - if (tab != TAB_DEFAULT)
2778 + if (tab_length)
2779 {
2780 char *newlim;
2781 - newlim = memchr (ptr, tab, lim - ptr);
2782 + newlim = memchr (ptr, tab[0], lim - ptr);
2783 if (newlim)
2784 lim = newlim;
2785 }
2786 @@ -1692,6 +1898,130 @@ limfield (struct line const *line, struc
2787 return ptr;
2788 }
2789
2790 +#if HAVE_MBRTOWC
2791 +static char *
2792 +limfield_mb (const struct line *line, const struct keyfield *key)
2793 +{
2794 + char *ptr = line->text, *lim = ptr + line->length - 1;
2795 + size_t eword = key->eword, echar = key->echar;
2796 + int i;
2797 + size_t mblength;
2798 + mbstate_t state;
2799 +
2800 + if (echar == 0)
2801 + eword++; /* skip all of end field. */
2802 +
2803 + memset (&state, '\0', sizeof(mbstate_t));
2804 +
2805 + if (tab_length)
2806 + while (ptr < lim && eword--)
2807 + {
2808 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2809 + {
2810 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2811 + ptr += mblength;
2812 + }
2813 + if (ptr < lim && (eword | echar))
2814 + {
2815 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2816 + ptr += mblength;
2817 + }
2818 + }
2819 + else
2820 + while (ptr < lim && eword--)
2821 + {
2822 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2823 + ptr += mblength;
2824 + if (ptr < lim)
2825 + {
2826 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2827 + ptr += mblength;
2828 + }
2829 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2830 + ptr += mblength;
2831 + }
2832 +
2833 +
2834 +# ifdef POSIX_UNSPECIFIED
2835 + /* Make LIM point to the end of (one byte past) the current field. */
2836 + if (tab_length)
2837 + {
2838 + char *newlim, *p;
2839 +
2840 + newlim = NULL;
2841 + for (p = ptr; p < lim;)
2842 + {
2843 + if (memcmp (p, tab, tab_length) == 0)
2844 + {
2845 + newlim = p;
2846 + break;
2847 + }
2848 +
2849 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2850 + p += mblength;
2851 + }
2852 + }
2853 + else
2854 + {
2855 + char *newlim;
2856 + newlim = ptr;
2857 +
2858 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2859 + newlim += mblength;
2860 + if (ptr < lim)
2861 + {
2862 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2863 + ptr += mblength;
2864 + }
2865 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2866 + newlim += mblength;
2867 + lim = newlim;
2868 + }
2869 +# endif
2870 +
2871 + if (echar != 0)
2872 + {
2873 + /* If we're skipping leading blanks, don't start counting characters
2874 + * until after skipping past any leading blanks. */
2875 + if (key->skipsblanks)
2876 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2877 + ptr += mblength;
2878 +
2879 + memset (&state, '\0', sizeof(mbstate_t));
2880 +
2881 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2882 + for (i = 0; i < echar; i++)
2883 + {
2884 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2885 +
2886 + if (ptr + mblength > lim)
2887 + break;
2888 + else
2889 + ptr += mblength;
2890 + }
2891 + }
2892 +
2893 + return ptr;
2894 +}
2895 +#endif
2896 +
2897 +static void
2898 +skipblanks_uni (char **ptr, char *lim)
2899 +{
2900 + while (*ptr < lim && blanks[to_uchar (**ptr)])
2901 + ++(*ptr);
2902 +}
2903 +
2904 +#if HAVE_MBRTOWC
2905 +static void
2906 +skipblanks_mb (char **ptr, char *lim)
2907 +{
2908 + size_t mblength;
2909 + while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
2910 + (*ptr) += mblength;
2911 +}
2912 +#endif
2913 +
2914 /* Fill BUF reading from FP, moving buf->left bytes from the end
2915 of buf->buf to the beginning first. If EOF is reached and the
2916 file wasn't terminated by a newline, supply one. Set up BUF's line
2917 @@ -1778,8 +2108,22 @@ fillbuf (struct buffer *buf, FILE *fp, c
2918 else
2919 {
2920 if (key->skipsblanks)
2921 - while (blanks[to_uchar (*line_start)])
2922 - line_start++;
2923 + {
2924 +#if HAVE_MBRTOWC
2925 + if (MB_CUR_MAX > 1)
2926 + {
2927 + size_t mblength;
2928 + while (line_start < line->keylim &&
2929 + ismbblank (line_start,
2930 + line->keylim - line_start,
2931 + &mblength))
2932 + line_start += mblength;
2933 + }
2934 + else
2935 +#endif
2936 + while (blanks[to_uchar (*line_start)])
2937 + line_start++;
2938 + }
2939 line->keybeg = line_start;
2940 }
2941 }
2942 @@ -1900,7 +2244,7 @@ human_numcompare (char const *a, char co
2943 hideously fast. */
2944
2945 static int
2946 -numcompare (char const *a, char const *b)
2947 +numcompare_uni (const char *a, const char *b)
2948 {
2949 while (blanks[to_uchar (*a)])
2950 a++;
2951 @@ -1910,6 +2254,25 @@ numcompare (char const *a, char const *b
2952 return strnumcmp (a, b, decimal_point, thousands_sep);
2953 }
2954
2955 +#if HAVE_MBRTOWC
2956 +static int
2957 +numcompare_mb (const char *a, const char *b)
2958 +{
2959 + size_t mblength, len;
2960 + len = strlen (a); /* okay for UTF-8 */
2961 + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2962 + {
2963 + a += mblength;
2964 + len -= mblength;
2965 + }
2966 + len = strlen (b); /* okay for UTF-8 */
2967 + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2968 + b += mblength;
2969 +
2970 + return strnumcmp (a, b, decimal_point, thousands_sep);
2971 +}
2972 +#endif /* HAV_EMBRTOWC */
2973 +
2974 /* Work around a problem whereby the long double value returned by glibc's
2975 strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
2976 A and B before calling strtold. FIXME: remove this function once
2977 @@ -1942,7 +2305,7 @@ general_numcompare (char const *sa, char
2978 Return 0 if the name in S is not recognized. */
2979
2980 static int
2981 -getmonth (char const *month, char **ea)
2982 +getmonth_uni (char const *month, size_t len, char **ea)
2983 {
2984 size_t lo = 0;
2985 size_t hi = MONTHS_PER_YEAR;
2986 @@ -2217,15 +2580,14 @@ debug_key (struct line const *line, stru
2987 char saved = *lim;
2988 *lim = '\0';
2989
2990 - while (blanks[to_uchar (*beg)])
2991 - beg++;
2992 + skipblanks (&beg, lim);
2993
2994 char *tighter_lim = beg;
2995
2996 if (lim < beg)
2997 tighter_lim = lim;
2998 else if (key->month)
2999 - getmonth (beg, &tighter_lim);
3000 + getmonth (beg, lim-beg, &tighter_lim);
3001 else if (key->general_numeric)
3002 ignore_value (strtold (beg, &tighter_lim));
3003 else if (key->numeric || key->human_numeric)
3004 @@ -2369,7 +2731,7 @@ key_warnings (struct keyfield const *gke
3005 bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
3006 && !(key->schar || key->echar);
3007 bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
3008 - if (!gkey_only && tab == TAB_DEFAULT && !line_offset
3009 + if (!gkey_only && !tab_length && !line_offset
3010 && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
3011 || (!key->skipsblanks && key->schar)
3012 || (!key->skipeblanks && key->echar)))
3013 @@ -2427,11 +2789,83 @@ key_warnings (struct keyfield const *gke
3014 error (0, 0, _("option `-r' only applies to last-resort comparison"));
3015 }
3016
3017 +#if HAVE_MBRTOWC
3018 +static int
3019 +getmonth_mb (const char *s, size_t len, char **ea)
3020 +{
3021 + char *month;
3022 + register size_t i;
3023 + register int lo = 0, hi = MONTHS_PER_YEAR, result;
3024 + char *tmp;
3025 + size_t wclength, mblength;
3026 + const char **pp;
3027 + const wchar_t **wpp;
3028 + wchar_t *month_wcs;
3029 + mbstate_t state;
3030 +
3031 + while (len > 0 && ismbblank (s, len, &mblength))
3032 + {
3033 + s += mblength;
3034 + len -= mblength;
3035 + }
3036 +
3037 + if (len == 0)
3038 + return 0;
3039 +
3040 + month = (char *) alloca (len + 1);
3041 +
3042 + tmp = (char *) alloca (len + 1);
3043 + memcpy (tmp, s, len);
3044 + tmp[len] = '\0';
3045 + pp = (const char **)&tmp;
3046 + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3047 + memset (&state, '\0', sizeof(mbstate_t));
3048 +
3049 + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
3050 + if (wclength == (size_t)-1 || *pp != NULL)
3051 + error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s));
3052 +
3053 + for (i = 0; i < wclength; i++)
3054 + {
3055 + month_wcs[i] = towupper(month_wcs[i]);
3056 + if (iswblank (month_wcs[i]))
3057 + {
3058 + month_wcs[i] = L'\0';
3059 + break;
3060 + }
3061 + }
3062 +
3063 + wpp = (const wchar_t **)&month_wcs;
3064 +
3065 + mblength = wcsrtombs (month, wpp, len + 1, &state);
3066 + assert (mblength != (-1) && *wpp == NULL);
3067 +
3068 + do
3069 + {
3070 + int ix = (lo + hi) / 2;
3071 +
3072 + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3073 + hi = ix;
3074 + else
3075 + lo = ix;
3076 + }
3077 + while (hi - lo > 1);
3078 +
3079 + if (ea)
3080 + *ea = (char *) month;
3081 +
3082 + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3083 + ? monthtab[lo].val : 0);
3084 +
3085 + return result;
3086 +}
3087 +#endif
3088 +
3089 /* Compare two lines A and B trying every key in sequence until there
3090 are no more keys or a difference is found. */
3091
3092 static int
3093 -keycompare (struct line const *a, struct line const *b)
3094 +keycompare_uni (const struct line *a, const struct line *b)
3095 {
3096 struct keyfield *key = keylist;
3097
3098 @@ -2516,7 +2950,7 @@ keycompare (struct line const *a, struct
3099 else if (key->human_numeric)
3100 diff = human_numcompare (ta, tb);
3101 else if (key->month)
3102 - diff = getmonth (ta, NULL) - getmonth (tb, NULL);
3103 + diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL);
3104 else if (key->random)
3105 diff = compare_random (ta, tlena, tb, tlenb);
3106 else if (key->version)
3107 @@ -2632,6 +3066,180 @@ keycompare (struct line const *a, struct
3108 return key->reverse ? -diff : diff;
3109 }
3110
3111 +#if HAVE_MBRTOWC
3112 +static int
3113 +keycompare_mb (const struct line *a, const struct line *b)
3114 +{
3115 + struct keyfield *key = keylist;
3116 +
3117 + /* For the first iteration only, the key positions have been
3118 + precomputed for us. */
3119 + char *texta = a->keybeg;
3120 + char *textb = b->keybeg;
3121 + char *lima = a->keylim;
3122 + char *limb = b->keylim;
3123 +
3124 + size_t mblength_a, mblength_b;
3125 + wchar_t wc_a, wc_b;
3126 + mbstate_t state_a, state_b;
3127 +
3128 + int diff;
3129 +
3130 + memset (&state_a, '\0', sizeof(mbstate_t));
3131 + memset (&state_b, '\0', sizeof(mbstate_t));
3132 +
3133 + for (;;)
3134 + {
3135 + char const *translate = key->translate;
3136 + bool const *ignore = key->ignore;
3137 +
3138 + /* Find the lengths. */
3139 + size_t lena = lima <= texta ? 0 : lima - texta;
3140 + size_t lenb = limb <= textb ? 0 : limb - textb;
3141 +
3142 + /* Actually compare the fields. */
3143 + if (key->random)
3144 + diff = compare_random (texta, lena, textb, lenb);
3145 + else if (key->numeric | key->general_numeric | key->human_numeric)
3146 + {
3147 + char savea = *lima, saveb = *limb;
3148 +
3149 + *lima = *limb = '\0';
3150 + diff = (key->numeric ? numcompare (texta, textb)
3151 + : key->general_numeric ? general_numcompare (texta, textb)
3152 + : human_numcompare (texta, textb));
3153 + *lima = savea, *limb = saveb;
3154 + }
3155 + else if (key->version)
3156 + diff = filevercmp (texta, textb);
3157 + else if (key->month)
3158 + diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL);
3159 + else
3160 + {
3161 + if (ignore || translate)
3162 + {
3163 + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3164 + char *copy_b = copy_a + lena + 1;
3165 + size_t new_len_a, new_len_b;
3166 + size_t i, j;
3167 +
3168 + /* Ignore and/or translate chars before comparing. */
3169 +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3170 + do \
3171 + { \
3172 + wchar_t uwc; \
3173 + char mbc[MB_LEN_MAX]; \
3174 + mbstate_t state_wc; \
3175 + \
3176 + for (NEW_LEN = i = 0; i < LEN;) \
3177 + { \
3178 + mbstate_t state_bak; \
3179 + \
3180 + state_bak = STATE; \
3181 + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3182 + \
3183 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3184 + || MBLENGTH == 0) \
3185 + { \
3186 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3187 + STATE = state_bak; \
3188 + if (!ignore) \
3189 + COPY[NEW_LEN++] = TEXT[i]; \
3190 + i++; \
3191 + continue; \
3192 + } \
3193 + \
3194 + if (ignore) \
3195 + { \
3196 + if ((ignore == nonprinting && !iswprint (WC)) \
3197 + || (ignore == nondictionary \
3198 + && !iswalnum (WC) && !iswblank (WC))) \
3199 + { \
3200 + i += MBLENGTH; \
3201 + continue; \
3202 + } \
3203 + } \
3204 + \
3205 + if (translate) \
3206 + { \
3207 + \
3208 + uwc = towupper(WC); \
3209 + if (WC == uwc) \
3210 + { \
3211 + memcpy (mbc, TEXT + i, MBLENGTH); \
3212 + i += MBLENGTH; \
3213 + } \
3214 + else \
3215 + { \
3216 + i += MBLENGTH; \
3217 + WC = uwc; \
3218 + memset (&state_wc, '\0', sizeof (mbstate_t)); \
3219 + \
3220 + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3221 + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3222 + } \
3223 + \
3224 + for (j = 0; j < MBLENGTH; j++) \
3225 + COPY[NEW_LEN++] = mbc[j]; \
3226 + } \
3227 + else \
3228 + for (j = 0; j < MBLENGTH; j++) \
3229 + COPY[NEW_LEN++] = TEXT[i++]; \
3230 + } \
3231 + COPY[NEW_LEN] = '\0'; \
3232 + } \
3233 + while (0)
3234 + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3235 + wc_a, mblength_a, state_a);
3236 + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3237 + wc_b, mblength_b, state_b);
3238 + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3239 + }
3240 + else if (lena == 0)
3241 + diff = - NONZERO (lenb);
3242 + else if (lenb == 0)
3243 + goto greater;
3244 + else
3245 + diff = xmemcoll (texta, lena, textb, lenb);
3246 + }
3247 +
3248 + if (diff)
3249 + goto not_equal;
3250 +
3251 + key = key->next;
3252 + if (! key)
3253 + break;
3254 +
3255 + /* Find the beginning and limit of the next field. */
3256 + if (key->eword != -1)
3257 + lima = limfield (a, key), limb = limfield (b, key);
3258 + else
3259 + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3260 +
3261 + if (key->sword != -1)
3262 + texta = begfield (a, key), textb = begfield (b, key);
3263 + else
3264 + {
3265 + texta = a->text, textb = b->text;
3266 + if (key->skipsblanks)
3267 + {
3268 + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3269 + texta += mblength_a;
3270 + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3271 + textb += mblength_b;
3272 + }
3273 + }
3274 + }
3275 +
3276 + return 0;
3277 +
3278 +greater:
3279 + diff = 1;
3280 +not_equal:
3281 + return key->reverse ? -diff : diff;
3282 +}
3283 +#endif
3284 +
3285 /* Compare two lines A and B, returning negative, zero, or positive
3286 depending on whether A compares less than, equal to, or greater than B. */
3287
3288 @@ -4095,7 +4702,7 @@ main (int argc, char **argv)
3289 initialize_exit_failure (SORT_FAILURE);
3290
3291 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3292 -#if HAVE_NL_LANGINFO
3293 +#if HAVE_LANGINFO_CODESET
3294 hard_LC_TIME = hard_locale (LC_TIME);
3295 #endif
3296
3297 @@ -4116,6 +4723,29 @@ main (int argc, char **argv)
3298 thousands_sep = -1;
3299 }
3300
3301 +#if HAVE_MBRTOWC
3302 + if (MB_CUR_MAX > 1)
3303 + {
3304 + inittables = inittables_mb;
3305 + begfield = begfield_mb;
3306 + limfield = limfield_mb;
3307 + skipblanks = skipblanks_mb;
3308 + getmonth = getmonth_mb;
3309 + keycompare = keycompare_mb;
3310 + numcompare = numcompare_mb;
3311 + }
3312 + else
3313 +#endif
3314 + {
3315 + inittables = inittables_uni;
3316 + begfield = begfield_uni;
3317 + limfield = limfield_uni;
3318 + skipblanks = skipblanks_uni;
3319 + getmonth = getmonth_uni;
3320 + keycompare = keycompare_uni;
3321 + numcompare = numcompare_uni;
3322 + }
3323 +
3324 have_read_stdin = false;
3325 inittables ();
3326
3327 @@ -4386,13 +5016,34 @@ main (int argc, char **argv)
3328
3329 case 't':
3330 {
3331 - char newtab = optarg[0];
3332 - if (! newtab)
3333 + char newtab[MB_LEN_MAX + 1];
3334 + size_t newtab_length = 1;
3335 + strncpy (newtab, optarg, MB_LEN_MAX);
3336 + if (! newtab[0])
3337 error (SORT_FAILURE, 0, _("empty tab"));
3338 - if (optarg[1])
3339 +#if HAVE_MBRTOWC
3340 + if (MB_CUR_MAX > 1)
3341 + {
3342 + wchar_t wc;
3343 + mbstate_t state;
3344 +
3345 + memset (&state, '\0', sizeof (mbstate_t));
3346 + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3347 + MB_LEN_MAX),
3348 + &state);
3349 + switch (newtab_length)
3350 + {
3351 + case (size_t) -1:
3352 + case (size_t) -2:
3353 + case 0:
3354 + newtab_length = 1;
3355 + }
3356 + }
3357 +#endif
3358 + if (newtab_length == 1 && optarg[1])
3359 {
3360 if (STREQ (optarg, "\\0"))
3361 - newtab = '\0';
3362 + newtab[0] = '\0';
3363 else
3364 {
3365 /* Provoke with `sort -txx'. Complain about
3366 @@ -4403,9 +5054,12 @@ main (int argc, char **argv)
3367 quote (optarg));
3368 }
3369 }
3370 - if (tab != TAB_DEFAULT && tab != newtab)
3371 + if (tab_length
3372 + && (tab_length != newtab_length
3373 + || memcmp (tab, newtab, tab_length) != 0))
3374 error (SORT_FAILURE, 0, _("incompatible tabs"));
3375 - tab = newtab;
3376 + memcpy (tab, newtab, newtab_length);
3377 + tab_length = newtab_length;
3378 }
3379 break;
3380
3381 diff -urNp coreutils-8.13-orig/src/unexpand.c coreutils-8.13/src/unexpand.c
3382 --- coreutils-8.13-orig/src/unexpand.c 2011-07-28 12:38:27.000000000 +0200
3383 +++ coreutils-8.13/src/unexpand.c 2011-09-09 10:23:14.185647633 +0200
3384 @@ -39,12 +39,29 @@
3385 #include <stdio.h>
3386 #include <getopt.h>
3387 #include <sys/types.h>
3388 +
3389 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
3390 +#if HAVE_WCHAR_H
3391 +# include <wchar.h>
3392 +#endif
3393 +
3394 #include "system.h"
3395 #include "error.h"
3396 #include "fadvise.h"
3397 #include "quote.h"
3398 #include "xstrndup.h"
3399
3400 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3401 + installation; work around this configuration error. */
3402 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3403 +# define MB_LEN_MAX 16
3404 +#endif
3405 +
3406 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3407 +#if HAVE_MBRTOWC && defined mbstate_t
3408 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3409 +#endif
3410 +
3411 /* The official name of this program (e.g., no `g' prefix). */
3412 #define PROGRAM_NAME "unexpand"
3413
3414 @@ -104,6 +121,208 @@ static struct option const longopts[] =
3415 {NULL, 0, NULL, 0}
3416 };
3417
3418 +static FILE *next_file (FILE *fp);
3419 +
3420 +#if HAVE_MBRTOWC
3421 +static void
3422 +unexpand_multibyte (void)
3423 +{
3424 + FILE *fp; /* Input stream. */
3425 + mbstate_t i_state; /* Current shift state of the input stream. */
3426 + mbstate_t i_state_bak; /* Back up the I_STATE. */
3427 + mbstate_t o_state; /* Current shift state of the output stream. */
3428 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3429 + char *bufpos = buf; /* Next read position of BUF. */
3430 + size_t buflen = 0; /* The length of the byte sequence in buf. */
3431 + wint_t wc; /* A gotten wide character. */
3432 + size_t mblength; /* The byte size of a multibyte character
3433 + which shows as same character as WC. */
3434 +
3435 + /* Index in `tab_list' of next tabstop: */
3436 + int tab_index = 0; /* For calculating width of pending tabs. */
3437 + int print_tab_index = 0; /* For printing as many tabs as possible. */
3438 + unsigned int column = 0; /* Column on screen of next char. */
3439 + int next_tab_column; /* Column the next tab stop is on. */
3440 + int convert = 1; /* If nonzero, perform translations. */
3441 + unsigned int pending = 0; /* Pending columns of blanks. */
3442 +
3443 + fp = next_file ((FILE *) NULL);
3444 + if (fp == NULL)
3445 + return;
3446 +
3447 + memset (&o_state, '\0', sizeof(mbstate_t));
3448 + memset (&i_state, '\0', sizeof(mbstate_t));
3449 +
3450 + for (;;)
3451 + {
3452 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
3453 + {
3454 + memmove (buf, bufpos, buflen);
3455 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
3456 + bufpos = buf;
3457 + }
3458 +
3459 + /* Get a wide character. */
3460 + if (buflen < 1)
3461 + {
3462 + mblength = 1;
3463 + wc = WEOF;
3464 + }
3465 + else
3466 + {
3467 + i_state_bak = i_state;
3468 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
3469 + }
3470 +
3471 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3472 + {
3473 + i_state = i_state_bak;
3474 + wc = L'\0';
3475 + }
3476 +
3477 + if (wc == L' ' && convert && column < INT_MAX)
3478 + {
3479 + ++pending;
3480 + ++column;
3481 + }
3482 + else if (wc == L'\t' && convert)
3483 + {
3484 + if (tab_size == 0)
3485 + {
3486 + /* Do not let tab_index == first_free_tab;
3487 + stop when it is 1 less. */
3488 + while (tab_index < first_free_tab - 1
3489 + && column >= tab_list[tab_index])
3490 + tab_index++;
3491 + next_tab_column = tab_list[tab_index];
3492 + if (tab_index < first_free_tab - 1)
3493 + tab_index++;
3494 + if (column >= next_tab_column)
3495 + {
3496 + convert = 0; /* Ran out of tab stops. */
3497 + goto flush_pend_mb;
3498 + }
3499 + }
3500 + else
3501 + {
3502 + next_tab_column = column + tab_size - column % tab_size;
3503 + }
3504 + pending += next_tab_column - column;
3505 + column = next_tab_column;
3506 + }
3507 + else
3508 + {
3509 +flush_pend_mb:
3510 + /* Flush pending spaces. Print as many tabs as possible,
3511 + then print the rest as spaces. */
3512 + if (pending == 1)
3513 + {
3514 + putchar (' ');
3515 + pending = 0;
3516 + }
3517 + column -= pending;
3518 + while (pending > 0)
3519 + {
3520 + if (tab_size == 0)
3521 + {
3522 + /* Do not let print_tab_index == first_free_tab;
3523 + stop when it is 1 less. */
3524 + while (print_tab_index < first_free_tab - 1
3525 + && column >= tab_list[print_tab_index])
3526 + print_tab_index++;
3527 + next_tab_column = tab_list[print_tab_index];
3528 + if (print_tab_index < first_free_tab - 1)
3529 + print_tab_index++;
3530 + }
3531 + else
3532 + {
3533 + next_tab_column =
3534 + column + tab_size - column % tab_size;
3535 + }
3536 + if (next_tab_column - column <= pending)
3537 + {
3538 + putchar ('\t');
3539 + pending -= next_tab_column - column;
3540 + column = next_tab_column;
3541 + }
3542 + else
3543 + {
3544 + --print_tab_index;
3545 + column += pending;
3546 + while (pending != 0)
3547 + {
3548 + putchar (' ');
3549 + pending--;
3550 + }
3551 + }
3552 + }
3553 +
3554 + if (wc == WEOF)
3555 + {
3556 + fp = next_file (fp);
3557 + if (fp == NULL)
3558 + break; /* No more files. */
3559 + else
3560 + {
3561 + memset (&i_state, '\0', sizeof(mbstate_t));
3562 + continue;
3563 + }
3564 + }
3565 +
3566 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3567 + {
3568 + if (convert)
3569 + {
3570 + ++column;
3571 + if (convert_entire_line == 0)
3572 + convert = 0;
3573 + }
3574 + mblength = 1;
3575 + putchar (buf[0]);
3576 + }
3577 + else if (mblength == 0)
3578 + {
3579 + if (convert && convert_entire_line == 0)
3580 + convert = 0;
3581 + mblength = 1;
3582 + putchar ('\0');
3583 + }
3584 + else
3585 + {
3586 + if (convert)
3587 + {
3588 + if (wc == L'\b')
3589 + {
3590 + if (column > 0)
3591 + --column;
3592 + }
3593 + else
3594 + {
3595 + int width; /* The width of WC. */
3596 +
3597 + width = wcwidth (wc);
3598 + column += (width > 0) ? width : 0;
3599 + if (convert_entire_line == 0)
3600 + convert = 0;
3601 + }
3602 + }
3603 +
3604 + if (wc == L'\n')
3605 + {
3606 + tab_index = print_tab_index = 0;
3607 + column = pending = 0;
3608 + convert = 1;
3609 + }
3610 + fwrite (bufpos, sizeof(char), mblength, stdout);
3611 + }
3612 + }
3613 + buflen -= mblength;
3614 + bufpos += mblength;
3615 + }
3616 +}
3617 +#endif
3618 +
3619 +
3620 void
3621 usage (int status)
3622 {
3623 @@ -526,7 +745,12 @@ main (int argc, char **argv)
3624
3625 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3626
3627 - unexpand ();
3628 +#if HAVE_MBRTOWC
3629 + if (MB_CUR_MAX > 1)
3630 + unexpand_multibyte ();
3631 + else
3632 +#endif
3633 + unexpand ();
3634
3635 if (have_read_stdin && fclose (stdin) != 0)
3636 error (EXIT_FAILURE, errno, "-");
3637 diff -urNp coreutils-8.13-orig/src/uniq.c coreutils-8.13/src/uniq.c
3638 --- coreutils-8.13-orig/src/uniq.c 2011-07-28 12:38:27.000000000 +0200
3639 +++ coreutils-8.13/src/uniq.c 2011-09-09 10:24:19.631560964 +0200
3640 @@ -21,6 +21,16 @@
3641 #include <getopt.h>
3642 #include <sys/types.h>
3643
3644 +/* Get mbstate_t, mbrtowc(). */
3645 +#if HAVE_WCHAR_H
3646 +# include <wchar.h>
3647 +#endif
3648 +
3649 +/* Get isw* functions. */
3650 +#if HAVE_WCTYPE_H
3651 +# include <wctype.h>
3652 +#endif
3653 +
3654 #include "system.h"
3655 #include "argmatch.h"
3656 #include "linebuffer.h"
3657 @@ -32,7 +42,19 @@
3658 #include "stdio--.h"
3659 #include "xmemcoll.h"
3660 #include "xstrtol.h"
3661 -#include "memcasecmp.h"
3662 +#include "xmemcoll.h"
3663 +
3664 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3665 + installation; work around this configuration error. */
3666 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3667 +# define MB_LEN_MAX 16
3668 +#endif
3669 +
3670 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3671 +#if HAVE_MBRTOWC && defined mbstate_t
3672 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3673 +#endif
3674 +
3675
3676 /* The official name of this program (e.g., no `g' prefix). */
3677 #define PROGRAM_NAME "uniq"
3678 @@ -108,6 +130,10 @@ static enum delimit_method const delimit
3679 /* Select whether/how to delimit groups of duplicate lines. */
3680 static enum delimit_method delimit_groups;
3681
3682 +/* Function pointers. */
3683 +static char *
3684 +(*find_field) (struct linebuffer *line);
3685 +
3686 static struct option const longopts[] =
3687 {
3688 {"count", no_argument, NULL, 'c'},
3689 @@ -207,7 +233,7 @@ size_opt (char const *opt, char const *m
3690 return a pointer to the beginning of the line's field to be compared. */
3691
3692 static char * _GL_ATTRIBUTE_PURE
3693 -find_field (struct linebuffer const *line)
3694 +find_field_uni (struct linebuffer *line)
3695 {
3696 size_t count;
3697 char const *lp = line->buffer;
3698 @@ -227,6 +253,83 @@ find_field (struct linebuffer const *lin
3699 return line->buffer + i;
3700 }
3701
3702 +#if HAVE_MBRTOWC
3703 +
3704 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
3705 + do \
3706 + { \
3707 + mbstate_t state_bak; \
3708 + \
3709 + CONVFAIL = 0; \
3710 + state_bak = *STATEP; \
3711 + \
3712 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
3713 + \
3714 + switch (MBLENGTH) \
3715 + { \
3716 + case (size_t)-2: \
3717 + case (size_t)-1: \
3718 + *STATEP = state_bak; \
3719 + CONVFAIL++; \
3720 + /* Fall through */ \
3721 + case 0: \
3722 + MBLENGTH = 1; \
3723 + } \
3724 + } \
3725 + while (0)
3726 +
3727 +static char *
3728 +find_field_multi (struct linebuffer *line)
3729 +{
3730 + size_t count;
3731 + char *lp = line->buffer;
3732 + size_t size = line->length - 1;
3733 + size_t pos;
3734 + size_t mblength;
3735 + wchar_t wc;
3736 + mbstate_t *statep;
3737 + int convfail = 0;
3738 +
3739 + pos = 0;
3740 + statep = &(line->state);
3741 +
3742 + /* skip fields. */
3743 + for (count = 0; count < skip_fields && pos < size; count++)
3744 + {
3745 + while (pos < size)
3746 + {
3747 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3748 +
3749 + if (convfail || !iswblank (wc))
3750 + {
3751 + pos += mblength;
3752 + break;
3753 + }
3754 + pos += mblength;
3755 + }
3756 +
3757 + while (pos < size)
3758 + {
3759 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3760 +
3761 + if (!convfail && iswblank (wc))
3762 + break;
3763 +
3764 + pos += mblength;
3765 + }
3766 + }
3767 +
3768 + /* skip fields. */
3769 + for (count = 0; count < skip_chars && pos < size; count++)
3770 + {
3771 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3772 + pos += mblength;
3773 + }
3774 +
3775 + return lp + pos;
3776 +}
3777 +#endif
3778 +
3779 /* Return false if two strings OLD and NEW match, true if not.
3780 OLD and NEW point not to the beginnings of the lines
3781 but rather to the beginnings of the fields to compare.
3782 @@ -235,6 +338,8 @@ find_field (struct linebuffer const *lin
3783 static bool
3784 different (char *old, char *new, size_t oldlen, size_t newlen)
3785 {
3786 + char *copy_old, *copy_new;
3787 +
3788 if (check_chars < oldlen)
3789 oldlen = check_chars;
3790 if (check_chars < newlen)
3791 @@ -242,14 +347,92 @@ different (char *old, char *new, size_t
3792
3793 if (ignore_case)
3794 {
3795 - /* FIXME: This should invoke strcoll somehow. */
3796 - return oldlen != newlen || memcasecmp (old, new, oldlen);
3797 + size_t i;
3798 +
3799 + copy_old = alloca (oldlen + 1);
3800 + copy_new = alloca (oldlen + 1);
3801 +
3802 + for (i = 0; i < oldlen; i++)
3803 + {
3804 + copy_old[i] = toupper (old[i]);
3805 + copy_new[i] = toupper (new[i]);
3806 + }
3807 }
3808 - else if (hard_LC_COLLATE)
3809 - return xmemcoll (old, oldlen, new, newlen) != 0;
3810 else
3811 - return oldlen != newlen || memcmp (old, new, oldlen);
3812 + {
3813 + copy_old = (char *)old;
3814 + copy_new = (char *)new;
3815 + }
3816 +
3817 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
3818 +}
3819 +
3820 +#if HAVE_MBRTOWC
3821 +static int
3822 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
3823 +{
3824 + size_t i, j, chars;
3825 + const char *str[2];
3826 + char *copy[2];
3827 + size_t len[2];
3828 + mbstate_t state[2];
3829 + size_t mblength;
3830 + wchar_t wc, uwc;
3831 + mbstate_t state_bak;
3832 +
3833 + str[0] = old;
3834 + str[1] = new;
3835 + len[0] = oldlen;
3836 + len[1] = newlen;
3837 + state[0] = oldstate;
3838 + state[1] = newstate;
3839 +
3840 + for (i = 0; i < 2; i++)
3841 + {
3842 + copy[i] = alloca (len[i] + 1);
3843 +
3844 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
3845 + {
3846 + state_bak = state[i];
3847 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
3848 +
3849 + switch (mblength)
3850 + {
3851 + case (size_t)-1:
3852 + case (size_t)-2:
3853 + state[i] = state_bak;
3854 + /* Fall through */
3855 + case 0:
3856 + mblength = 1;
3857 + break;
3858 +
3859 + default:
3860 + if (ignore_case)
3861 + {
3862 + uwc = towupper (wc);
3863 +
3864 + if (uwc != wc)
3865 + {
3866 + mbstate_t state_wc;
3867 +
3868 + memset (&state_wc, '\0', sizeof(mbstate_t));
3869 + wcrtomb (copy[i] + j, uwc, &state_wc);
3870 + }
3871 + else
3872 + memcpy (copy[i] + j, str[i] + j, mblength);
3873 + }
3874 + else
3875 + memcpy (copy[i] + j, str[i] + j, mblength);
3876 + }
3877 + j += mblength;
3878 + }
3879 + copy[i][j] = '\0';
3880 + len[i] = j;
3881 + }
3882 +
3883 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
3884 }
3885 +#endif
3886
3887 /* Output the line in linebuffer LINE to standard output
3888 provided that the switches say it should be output.
3889 @@ -305,15 +488,43 @@ check_file (const char *infile, const ch
3890 {
3891 char *prevfield IF_LINT ( = NULL);
3892 size_t prevlen IF_LINT ( = 0);
3893 +#if HAVE_MBRTOWC
3894 + mbstate_t prevstate;
3895 +
3896 + memset (&prevstate, '\0', sizeof (mbstate_t));
3897 +#endif
3898
3899 while (!feof (stdin))
3900 {
3901 char *thisfield;
3902 size_t thislen;
3903 +#if HAVE_MBRTOWC
3904 + mbstate_t thisstate;
3905 +#endif
3906 +
3907 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3908 break;
3909 thisfield = find_field (thisline);
3910 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3911 +#if HAVE_MBRTOWC
3912 + if (MB_CUR_MAX > 1)
3913 + {
3914 + thisstate = thisline->state;
3915 +
3916 + if (prevline->length == 0 || different_multi
3917 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
3918 + {
3919 + fwrite (thisline->buffer, sizeof (char),
3920 + thisline->length, stdout);
3921 +
3922 + SWAP_LINES (prevline, thisline);
3923 + prevfield = thisfield;
3924 + prevlen = thislen;
3925 + prevstate = thisstate;
3926 + }
3927 + }
3928 + else
3929 +#endif
3930 if (prevline->length == 0
3931 || different (thisfield, prevfield, thislen, prevlen))
3932 {
3933 @@ -332,17 +543,26 @@ check_file (const char *infile, const ch
3934 size_t prevlen;
3935 uintmax_t match_count = 0;
3936 bool first_delimiter = true;
3937 +#if HAVE_MBRTOWC
3938 + mbstate_t prevstate;
3939 +#endif
3940
3941 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
3942 goto closefiles;
3943 prevfield = find_field (prevline);
3944 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
3945 +#if HAVE_MBRTOWC
3946 + prevstate = prevline->state;
3947 +#endif
3948
3949 while (!feof (stdin))
3950 {
3951 bool match;
3952 char *thisfield;
3953 size_t thislen;
3954 +#if HAVE_MBRTOWC
3955 + mbstate_t thisstate = thisline->state;
3956 +#endif
3957 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3958 {
3959 if (ferror (stdin))
3960 @@ -351,6 +571,14 @@ check_file (const char *infile, const ch
3961 }
3962 thisfield = find_field (thisline);
3963 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3964 +#if HAVE_MBRTOWC
3965 + if (MB_CUR_MAX > 1)
3966 + {
3967 + match = !different_multi (thisfield, prevfield,
3968 + thislen, prevlen, thisstate, prevstate);
3969 + }
3970 + else
3971 +#endif
3972 match = !different (thisfield, prevfield, thislen, prevlen);
3973 match_count += match;
3974
3975 @@ -383,6 +611,9 @@ check_file (const char *infile, const ch
3976 SWAP_LINES (prevline, thisline);
3977 prevfield = thisfield;
3978 prevlen = thislen;
3979 +#if HAVE_MBRTOWC
3980 + prevstate = thisstate;
3981 +#endif
3982 if (!match)
3983 match_count = 0;
3984 }
3985 @@ -428,6 +659,19 @@ main (int argc, char **argv)
3986
3987 atexit (close_stdout);
3988
3989 +#if HAVE_MBRTOWC
3990 + if (MB_CUR_MAX > 1)
3991 + {
3992 + find_field = find_field_multi;
3993 + }
3994 + else
3995 +#endif
3996 + {
3997 + find_field = find_field_uni;
3998 + }
3999 +
4000 +
4001 +
4002 skip_chars = 0;
4003 skip_fields = 0;
4004 check_chars = SIZE_MAX;
4005 diff -urNp coreutils-8.13-orig/tests/Makefile.am coreutils-8.13/tests/Makefile.am
4006 --- coreutils-8.13-orig/tests/Makefile.am 2011-09-09 10:22:43.352561668 +0200
4007 +++ coreutils-8.13/tests/Makefile.am 2011-09-09 10:23:14.189688942 +0200
4008 @@ -238,6 +238,7 @@ TESTS = \
4009 misc/sort-debug-keys \
4010 misc/sort-debug-warn \
4011 misc/sort-files0-from \
4012 + misc/sort-mb-tests \
4013 misc/sort-float \
4014 misc/sort-merge \
4015 misc/sort-merge-fdlimit \
4016 @@ -518,6 +519,10 @@ TESTS = \
4017 $(root_tests)
4018
4019 pr_data = \
4020 + misc/mb1.X \
4021 + misc/mb1.I \
4022 + misc/mb2.X \
4023 + misc/mb2.I \
4024 pr/0F \
4025 pr/0FF \
4026 pr/0FFnt \
4027 diff -urNp coreutils-8.13-orig/tests/misc/cut coreutils-8.13/tests/misc/cut
4028 --- coreutils-8.13-orig/tests/misc/cut 2011-09-02 14:08:40.000000000 +0200
4029 +++ coreutils-8.13/tests/misc/cut 2011-09-09 10:23:14.190686793 +0200
4030 @@ -23,14 +23,15 @@ my $mb_locale = $ENV{LOCALE_FR_UTF8};
4031 # Turn off localization of executable's output.
4032 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
4033
4034 -my $mb_locale = $ENV{LOCALE_FR_UTF8};
4035 -! defined $mb_locale || $mb_locale eq 'none'
4036 - and $mb_locale = 'C';
4037 +#my $mb_locale = $ENV{LOCALE_FR_UTF8};
4038 +#! defined $mb_locale || $mb_locale eq 'none'
4039 +# and $mb_locale = 'C';
4040 +my $mb_locale = 'C';
4041
4042 my $prog = 'cut';
4043 my $try = "Try \`$prog --help' for more information.\n";
4044 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
4045 -my $inval = "$prog: invalid byte or field list\n$try";
4046 +my $inval = "$prog: invalid byte, character or field list\n$try";
4047 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
4048
4049 my @Tests =
4050 @@ -147,7 +147,7 @@ my @Tests =
4051
4052 # None of the following invalid ranges provoked an error up to coreutils-6.9.
4053 ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
4054 - {ERR=>"$prog: invalid decreasing range\n$try"}],
4055 + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
4056 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
4057 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
4058 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1},
4059 diff -urNp coreutils-8.13-orig/tests/misc/mb1.I coreutils-8.13/tests/misc/mb1.I
4060 --- coreutils-8.13-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100
4061 +++ coreutils-8.13/tests/misc/mb1.I 2011-09-09 10:23:14.191687037 +0200
4062 @@ -0,0 +1,4 @@
4063 +Apple@10
4064 +Banana@5
4065 +Citrus@20
4066 +Cherry@30
4067 diff -urNp coreutils-8.13-orig/tests/misc/mb1.X coreutils-8.13/tests/misc/mb1.X
4068 --- coreutils-8.13-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100
4069 +++ coreutils-8.13/tests/misc/mb1.X 2011-09-09 10:23:14.192581910 +0200
4070 @@ -0,0 +1,4 @@
4071 +Banana@5
4072 +Apple@10
4073 +Citrus@20
4074 +Cherry@30
4075 diff -urNp coreutils-8.13-orig/tests/misc/mb2.I coreutils-8.13/tests/misc/mb2.I
4076 --- coreutils-8.13-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100
4077 +++ coreutils-8.13/tests/misc/mb2.I 2011-09-09 10:23:14.192581910 +0200
4078 @@ -0,0 +1,4 @@
4079 +Apple@AA10@@20
4080 +Banana@AA5@@30
4081 +Citrus@AA20@@5
4082 +Cherry@AA30@@10
4083 diff -urNp coreutils-8.13-orig/tests/misc/mb2.X coreutils-8.13/tests/misc/mb2.X
4084 --- coreutils-8.13-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100
4085 +++ coreutils-8.13/tests/misc/mb2.X 2011-09-09 10:23:14.193687456 +0200
4086 @@ -0,0 +1,4 @@
4087 +Citrus@AA20@@5
4088 +Cherry@AA30@@10
4089 +Apple@AA10@@20
4090 +Banana@AA5@@30
4091 diff -urNp coreutils-8.13-orig/tests/misc/sort-mb-tests coreutils-8.13/tests/misc/sort-mb-tests
4092 --- coreutils-8.13-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100
4093 +++ coreutils-8.13/tests/misc/sort-mb-tests 2011-09-09 10:23:14.194687565 +0200
4094 @@ -0,0 +1,58 @@
4095 +#! /bin/sh
4096 +case $# in
4097 + 0) xx='../src/sort';;
4098 + *) xx="$1";;
4099 +esac
4100 +test "$VERBOSE" && echo=echo || echo=:
4101 +$echo testing program: $xx
4102 +errors=0
4103 +test "$srcdir" || srcdir=.
4104 +test "$VERBOSE" && $xx --version 2> /dev/null
4105 +
4106 +export LC_ALL=en_US.UTF-8
4107 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
4108 +errors=0
4109 +
4110 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
4111 +code=$?
4112 +if test $code != 0; then
4113 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0"
4114 + errors=`expr $errors + 1`
4115 +else
4116 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
4117 + case $? in
4118 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
4119 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
4120 + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
4121 + errors=`expr $errors + 1`;;
4122 + 2) $echo "Test mb1 may have failed." 1>&2
4123 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
4124 + errors=`expr $errors + 1`;;
4125 + esac
4126 +fi
4127 +
4128 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
4129 +code=$?
4130 +if test $code != 0; then
4131 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
4132 + errors=`expr $errors + 1`
4133 +else
4134 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
4135 + case $? in
4136 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
4137 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
4138 + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
4139 + errors=`expr $errors + 1`;;
4140 + 2) $echo "Test mb2 may have failed." 1>&2
4141 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
4142 + errors=`expr $errors + 1`;;
4143 + esac
4144 +fi
4145 +
4146 +if test $errors = 0; then
4147 + $echo Passed all 113 tests. 1>&2
4148 +else
4149 + $echo Failed $errors tests. 1>&2
4150 +fi
4151 +test $errors = 0 || errors=1
4152 +exit $errors