]> git.ipfire.org Git - people/amarx/ipfire-3.x.git/blame - pkgs/coreutils/patches/coreutils-i18n.patch
Merge remote-tracking branch 'stevee/sat-solver'
[people/amarx/ipfire-3.x.git] / pkgs / coreutils / patches / coreutils-i18n.patch
CommitLineData
407c5be3
SS
1diff -urNp coreutils-8.9-orig/lib/linebuffer.h coreutils-8.9/lib/linebuffer.h
2--- coreutils-8.9-orig/lib/linebuffer.h 2011-01-01 22:19:27.000000000 +0100
3+++ coreutils-8.9/lib/linebuffer.h 2011-01-04 17:41:55.358888521 +0100
56ae3f82
SS
4@@ -21,6 +21,11 @@
5
6 # include <stdio.h>
7
8+/* Get mbstate_t. */
9+# if HAVE_WCHAR_H
10+# include <wchar.h>
11+# endif
12+
13 /* A `struct linebuffer' holds a line of text. */
14
15 struct linebuffer
16@@ -28,6 +33,9 @@ struct linebuffer
17 size_t size; /* Allocated. */
18 size_t length; /* Used. */
19 char *buffer;
20+# if HAVE_WCHAR_H
21+ mbstate_t state;
22+# endif
23 };
24
25 /* Initialize linebuffer LINEBUFFER for use. */
407c5be3
SS
26diff -urNp coreutils-8.9-orig/src/cut.c coreutils-8.9/src/cut.c
27--- coreutils-8.9-orig/src/cut.c 2011-01-01 22:19:23.000000000 +0100
28+++ coreutils-8.9/src/cut.c 2011-01-04 17:41:55.361888730 +0100
56ae3f82
SS
29@@ -28,6 +28,11 @@
30 #include <assert.h>
31 #include <getopt.h>
32 #include <sys/types.h>
33+
34+/* Get mbstate_t, mbrtowc(). */
35+#if HAVE_WCHAR_H
36+# include <wchar.h>
37+#endif
38 #include "system.h"
39
40 #include "error.h"
1555d43c 41@@ -37,6 +42,18 @@
56ae3f82
SS
42 #include "quote.h"
43 #include "xstrndup.h"
44
45+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
46+ installation; work around this configuration error. */
47+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
48+# undef MB_LEN_MAX
49+# define MB_LEN_MAX 16
50+#endif
51+
52+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
53+#if HAVE_MBRTOWC && defined mbstate_t
54+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
55+#endif
56+
57 /* The official name of this program (e.g., no `g' prefix). */
58 #define PROGRAM_NAME "cut"
59
1555d43c 60@@ -72,6 +89,52 @@
56ae3f82
SS
61 } \
62 while (0)
63
64+/* Refill the buffer BUF to get a multibyte character. */
65+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
66+ do \
67+ { \
68+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
69+ { \
70+ memmove (BUF, BUFPOS, BUFLEN); \
71+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
72+ BUFPOS = BUF; \
73+ } \
74+ } \
75+ while (0)
76+
77+/* Get wide character on BUFPOS. BUFPOS is not included after that.
78+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
79+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
80+ do \
81+ { \
82+ mbstate_t state_bak; \
83+ \
84+ if (BUFLEN < 1) \
85+ { \
86+ WC = WEOF; \
87+ break; \
88+ } \
89+ \
90+ /* Get a wide character. */ \
91+ CONVFAIL = 0; \
92+ state_bak = STATE; \
93+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
94+ \
95+ switch (MBLENGTH) \
96+ { \
97+ case (size_t)-1: \
98+ case (size_t)-2: \
99+ CONVFAIL++; \
100+ STATE = state_bak; \
101+ /* Fall througn. */ \
102+ \
103+ case 0: \
104+ MBLENGTH = 1; \
105+ break; \
106+ } \
107+ } \
108+ while (0)
109+
110 struct range_pair
111 {
112 size_t lo;
1555d43c 113@@ -90,7 +153,7 @@ static char *field_1_buffer;
56ae3f82
SS
114 /* The number of bytes allocated for FIELD_1_BUFFER. */
115 static size_t field_1_bufsize;
116
117-/* The largest field or byte index used as an endpoint of a closed
118+/* The largest byte, character or field index used as an endpoint of a closed
119 or degenerate range specification; this doesn't include the starting
120 index of right-open-ended ranges. For example, with either range spec
121 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
1555d43c 122@@ -102,10 +165,11 @@ static size_t eol_range_start;
56ae3f82
SS
123
124 /* This is a bit vector.
125 In byte mode, which bytes to output.
126+ In character mode, which characters to output.
127 In field mode, which DELIM-separated fields to output.
128- Both bytes and fields are numbered starting with 1,
129+ Bytes, characters and fields are numbered starting with 1,
130 so the zeroth bit of this array is unused.
131- A field or byte K has been selected if
132+ A byte, character or field K has been selected if
133 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
134 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
135 static unsigned char *printable_field;
1555d43c 136@@ -114,15 +178,25 @@ enum operating_mode
56ae3f82
SS
137 {
138 undefined_mode,
139
140- /* Output characters that are in the given bytes. */
141+ /* Output bytes that are at the given positions. */
142 byte_mode,
143
144+ /* Output characters that are at the given positions. */
145+ character_mode,
146+
147 /* Output the given delimeter-separated fields. */
148 field_mode
149 };
150
151 static enum operating_mode operating_mode;
152
153+/* If nonzero, when in byte mode, don't split multibyte characters. */
154+static int byte_mode_character_aware;
155+
156+/* If nonzero, the function for single byte locale is work
157+ if this program runs on multibyte locale. */
158+static int force_singlebyte_mode;
159+
160 /* If true do not output lines containing no delimeter characters.
161 Otherwise, all such lines are printed. This option is valid only
162 with field mode. */
1555d43c 163@@ -134,6 +208,9 @@ static bool complement;
56ae3f82
SS
164
165 /* The delimeter character for field mode. */
166 static unsigned char delim;
167+#if HAVE_WCHAR_H
168+static wchar_t wcdelim;
169+#endif
170
171 /* True if the --output-delimiter=STRING option was specified. */
172 static bool output_delimiter_specified;
1555d43c 173@@ -207,7 +284,7 @@ Mandatory arguments to long options are
56ae3f82
SS
174 -f, --fields=LIST select only these fields; also print any line\n\
175 that contains no delimiter character, unless\n\
176 the -s option is specified\n\
177- -n (ignored)\n\
178+ -n with -b: don't split multibyte characters\n\
179 "), stdout);
180 fputs (_("\
181 --complement complement the set of selected bytes, characters\n\
1555d43c 182@@ -366,7 +443,7 @@ set_fields (const char *fieldstr)
56ae3f82
SS
183 in_digits = false;
184 /* Starting a range. */
185 if (dash_found)
186- FATAL_ERROR (_("invalid byte or field list"));
187+ FATAL_ERROR (_("invalid byte, character or field list"));
188 dash_found = true;
189 fieldstr++;
190
1555d43c 191@@ -390,14 +467,16 @@ set_fields (const char *fieldstr)
56ae3f82
SS
192 if (!rhs_specified)
193 {
194 /* `n-'. From `initial' to end of line. */
195- eol_range_start = initial;
196+ if (eol_range_start == 0 ||
197+ (eol_range_start != 0 && eol_range_start > initial))
198+ eol_range_start = initial;
199 field_found = true;
200 }
201 else
202 {
203 /* `m-n' or `-n' (1-n). */
204 if (value < initial)
205- FATAL_ERROR (_("invalid decreasing range"));
206+ FATAL_ERROR (_("invalid byte, character or field list"));
207
208 /* Is there already a range going to end of line? */
209 if (eol_range_start != 0)
1555d43c 210@@ -477,6 +556,9 @@ set_fields (const char *fieldstr)
56ae3f82
SS
211 if (operating_mode == byte_mode)
212 error (0, 0,
213 _("byte offset %s is too large"), quote (bad_num));
214+ else if (operating_mode == character_mode)
215+ error (0, 0,
216+ _("character offset %s is too large"), quote (bad_num));
217 else
218 error (0, 0,
219 _("field number %s is too large"), quote (bad_num));
1555d43c 220@@ -487,7 +569,7 @@ set_fields (const char *fieldstr)
56ae3f82
SS
221 fieldstr++;
222 }
223 else
224- FATAL_ERROR (_("invalid byte or field list"));
225+ FATAL_ERROR (_("invalid byte, character or field list"));
226 }
227
228 max_range_endpoint = 0;
1555d43c 229@@ -580,6 +662,63 @@ cut_bytes (FILE *stream)
56ae3f82
SS
230 }
231 }
232
233+#if HAVE_MBRTOWC
234+/* This function is in use for the following case.
235+
236+ 1. Read from the stream STREAM, printing to standard output any selected
237+ characters.
238+
239+ 2. Read from stream STREAM, printing to standard output any selected bytes,
240+ without splitting multibyte characters. */
241+
242+static void
243+cut_characters_or_cut_bytes_no_split (FILE *stream)
244+{
245+ int idx; /* number of bytes or characters in the line so far. */
246+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
247+ char *bufpos; /* Next read position of BUF. */
248+ size_t buflen; /* The length of the byte sequence in buf. */
249+ wint_t wc; /* A gotten wide character. */
250+ size_t mblength; /* The byte size of a multibyte character which shows
251+ as same character as WC. */
252+ mbstate_t state; /* State of the stream. */
253+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
254+
255+ idx = 0;
256+ buflen = 0;
257+ bufpos = buf;
258+ memset (&state, '\0', sizeof(mbstate_t));
259+
260+ while (1)
261+ {
262+ REFILL_BUFFER (buf, bufpos, buflen, stream);
263+
264+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
265+
266+ if (wc == WEOF)
267+ {
268+ if (idx > 0)
269+ putchar ('\n');
270+ break;
271+ }
272+ else if (wc == L'\n')
273+ {
274+ putchar ('\n');
275+ idx = 0;
276+ }
277+ else
278+ {
279+ idx += (operating_mode == byte_mode) ? mblength : 1;
280+ if (print_kth (idx, NULL))
281+ fwrite (bufpos, mblength, sizeof(char), stdout);
282+ }
283+
284+ buflen -= mblength;
285+ bufpos += mblength;
286+ }
287+}
288+#endif
289+
290 /* Read from stream STREAM, printing to standard output any selected fields. */
291
292 static void
1555d43c 293@@ -702,13 +841,192 @@ cut_fields (FILE *stream)
56ae3f82
SS
294 }
295 }
296
297+#if HAVE_MBRTOWC
298+static void
299+cut_fields_mb (FILE *stream)
300+{
301+ int c;
302+ unsigned int field_idx;
303+ int found_any_selected_field;
304+ int buffer_first_field;
305+ int empty_input;
306+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
307+ char *bufpos; /* Next read position of BUF. */
308+ size_t buflen; /* The length of the byte sequence in buf. */
309+ wint_t wc = 0; /* A gotten wide character. */
310+ size_t mblength; /* The byte size of a multibyte character which shows
311+ as same character as WC. */
312+ mbstate_t state; /* State of the stream. */
313+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
314+
315+ found_any_selected_field = 0;
316+ field_idx = 1;
317+ bufpos = buf;
318+ buflen = 0;
319+ memset (&state, '\0', sizeof(mbstate_t));
320+
321+ c = getc (stream);
322+ empty_input = (c == EOF);
323+ if (c != EOF)
324+ ungetc (c, stream);
325+ else
326+ wc = WEOF;
327+
328+ /* To support the semantics of the -s flag, we may have to buffer
329+ all of the first field to determine whether it is `delimited.'
330+ But that is unnecessary if all non-delimited lines must be printed
331+ and the first field has been selected, or if non-delimited lines
332+ must be suppressed and the first field has *not* been selected.
333+ That is because a non-delimited line has exactly one field. */
334+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
335+
336+ while (1)
337+ {
338+ if (field_idx == 1 && buffer_first_field)
339+ {
340+ int len = 0;
341+
342+ while (1)
343+ {
344+ REFILL_BUFFER (buf, bufpos, buflen, stream);
345+
346+ GET_NEXT_WC_FROM_BUFFER
347+ (wc, bufpos, buflen, mblength, state, convfail);
348+
349+ if (wc == WEOF)
350+ break;
351+
352+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
353+ memcpy (field_1_buffer + len, bufpos, mblength);
354+ len += mblength;
355+ buflen -= mblength;
356+ bufpos += mblength;
357+
358+ if (!convfail && (wc == L'\n' || wc == wcdelim))
359+ break;
360+ }
361+
362+ if (wc == WEOF)
363+ break;
364+
365+ /* If the first field extends to the end of line (it is not
366+ delimited) and we are printing all non-delimited lines,
367+ print this one. */
368+ if (convfail || (!convfail && wc != wcdelim))
369+ {
370+ if (suppress_non_delimited)
371+ {
372+ /* Empty. */
373+ }
374+ else
375+ {
376+ fwrite (field_1_buffer, sizeof (char), len, stdout);
377+ /* Make sure the output line is newline terminated. */
378+ if (convfail || (!convfail && wc != L'\n'))
379+ putchar ('\n');
380+ }
381+ continue;
382+ }
383+
384+ if (print_kth (1, NULL))
385+ {
386+ /* Print the field, but not the trailing delimiter. */
387+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
388+ found_any_selected_field = 1;
389+ }
390+ ++field_idx;
391+ }
392+
393+ if (wc != WEOF)
394+ {
395+ if (print_kth (field_idx, NULL))
396+ {
397+ if (found_any_selected_field)
398+ {
399+ fwrite (output_delimiter_string, sizeof (char),
400+ output_delimiter_length, stdout);
401+ }
402+ found_any_selected_field = 1;
403+ }
404+
405+ while (1)
406+ {
407+ REFILL_BUFFER (buf, bufpos, buflen, stream);
408+
409+ GET_NEXT_WC_FROM_BUFFER
410+ (wc, bufpos, buflen, mblength, state, convfail);
411+
412+ if (wc == WEOF)
413+ break;
414+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
415+ {
416+ buflen -= mblength;
417+ bufpos += mblength;
418+ break;
419+ }
420+
421+ if (print_kth (field_idx, NULL))
422+ fwrite (bufpos, mblength, sizeof(char), stdout);
423+
424+ buflen -= mblength;
425+ bufpos += mblength;
426+ }
427+ }
428+
429+ if ((!convfail || wc == L'\n') && buflen < 1)
430+ wc = WEOF;
431+
432+ if (!convfail && wc == wcdelim)
433+ ++field_idx;
434+ else if (wc == WEOF || (!convfail && wc == L'\n'))
435+ {
436+ if (found_any_selected_field
437+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
438+ putchar ('\n');
439+ if (wc == WEOF)
440+ break;
441+ field_idx = 1;
442+ found_any_selected_field = 0;
443+ }
444+ }
445+}
446+#endif
447+
448 static void
449 cut_stream (FILE *stream)
450 {
451- if (operating_mode == byte_mode)
452- cut_bytes (stream);
453+#if HAVE_MBRTOWC
454+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
455+ {
456+ switch (operating_mode)
457+ {
458+ case byte_mode:
459+ if (byte_mode_character_aware)
460+ cut_characters_or_cut_bytes_no_split (stream);
461+ else
462+ cut_bytes (stream);
463+ break;
464+
465+ case character_mode:
466+ cut_characters_or_cut_bytes_no_split (stream);
467+ break;
468+
469+ case field_mode:
470+ cut_fields_mb (stream);
471+ break;
472+
473+ default:
474+ abort ();
475+ }
476+ }
477 else
478- cut_fields (stream);
479+#endif
480+ {
481+ if (operating_mode == field_mode)
482+ cut_fields (stream);
483+ else
484+ cut_bytes (stream);
485+ }
486 }
487
488 /* Process file FILE to standard output.
1555d43c 489@@ -760,6 +1078,8 @@ main (int argc, char **argv)
56ae3f82
SS
490 bool ok;
491 bool delim_specified = false;
1555d43c 492 char *spec_list_string IF_LINT ( = NULL);
56ae3f82
SS
493+ char mbdelim[MB_LEN_MAX + 1];
494+ size_t delimlen = 0;
495
496 initialize_main (&argc, &argv);
497 set_program_name (argv[0]);
1555d43c 498@@ -782,7 +1102,6 @@ main (int argc, char **argv)
56ae3f82
SS
499 switch (optc)
500 {
501 case 'b':
502- case 'c':
503 /* Build the byte list. */
504 if (operating_mode != undefined_mode)
505 FATAL_ERROR (_("only one type of list may be specified"));
1555d43c 506@@ -790,6 +1109,14 @@ main (int argc, char **argv)
56ae3f82
SS
507 spec_list_string = optarg;
508 break;
509
510+ case 'c':
511+ /* Build the character list. */
512+ if (operating_mode != undefined_mode)
513+ FATAL_ERROR (_("only one type of list may be specified"));
514+ operating_mode = character_mode;
515+ spec_list_string = optarg;
516+ break;
517+
518 case 'f':
519 /* Build the field list. */
520 if (operating_mode != undefined_mode)
1555d43c 521@@ -801,10 +1128,35 @@ main (int argc, char **argv)
56ae3f82
SS
522 case 'd':
523 /* New delimiter. */
524 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
525- if (optarg[0] != '\0' && optarg[1] != '\0')
526- FATAL_ERROR (_("the delimiter must be a single character"));
527- delim = optarg[0];
528- delim_specified = true;
529+ {
530+#if HAVE_MBRTOWC
531+ if(MB_CUR_MAX > 1)
532+ {
533+ mbstate_t state;
534+
535+ memset (&state, '\0', sizeof(mbstate_t));
536+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
537+
538+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
539+ ++force_singlebyte_mode;
540+ else
541+ {
542+ delimlen = (delimlen < 1) ? 1 : delimlen;
543+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
544+ FATAL_ERROR (_("the delimiter must be a single character"));
545+ memcpy (mbdelim, optarg, delimlen);
546+ }
547+ }
548+
549+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
550+#endif
551+ {
552+ if (optarg[0] != '\0' && optarg[1] != '\0')
553+ FATAL_ERROR (_("the delimiter must be a single character"));
554+ delim = (unsigned char) optarg[0];
555+ }
556+ delim_specified = true;
557+ }
558 break;
559
560 case OUTPUT_DELIMITER_OPTION:
1555d43c 561@@ -817,6 +1169,7 @@ main (int argc, char **argv)
56ae3f82
SS
562 break;
563
564 case 'n':
565+ byte_mode_character_aware = 1;
566 break;
567
568 case 's':
1555d43c 569@@ -839,7 +1192,7 @@ main (int argc, char **argv)
56ae3f82
SS
570 if (operating_mode == undefined_mode)
571 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
572
573- if (delim != '\0' && operating_mode != field_mode)
574+ if (delim_specified && operating_mode != field_mode)
575 FATAL_ERROR (_("an input delimiter may be specified only\
576 when operating on fields"));
577
1555d43c 578@@ -866,15 +1219,34 @@ main (int argc, char **argv)
56ae3f82
SS
579 }
580
581 if (!delim_specified)
582- delim = '\t';
583+ {
584+ delim = '\t';
585+#ifdef HAVE_MBRTOWC
586+ wcdelim = L'\t';
587+ mbdelim[0] = '\t';
588+ mbdelim[1] = '\0';
589+ delimlen = 1;
590+#endif
591+ }
592
593 if (output_delimiter_string == NULL)
594 {
595- static char dummy[2];
596- dummy[0] = delim;
597- dummy[1] = '\0';
598- output_delimiter_string = dummy;
599- output_delimiter_length = 1;
600+#ifdef HAVE_MBRTOWC
601+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
602+ {
603+ output_delimiter_string = xstrdup(mbdelim);
604+ output_delimiter_length = delimlen;
605+ }
606+
607+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
608+#endif
609+ {
610+ static char dummy[2];
611+ dummy[0] = delim;
612+ dummy[1] = '\0';
613+ output_delimiter_string = dummy;
614+ output_delimiter_length = 1;
615+ }
616 }
617
618 if (optind == argc)
407c5be3
SS
619diff -urNp coreutils-8.9-orig/src/expand.c coreutils-8.9/src/expand.c
620--- coreutils-8.9-orig/src/expand.c 2011-01-01 22:19:23.000000000 +0100
621+++ coreutils-8.9/src/expand.c 2011-01-04 17:41:55.363905562 +0100
1555d43c 622@@ -38,12 +38,29 @@
56ae3f82
SS
623 #include <stdio.h>
624 #include <getopt.h>
625 #include <sys/types.h>
626+
627+/* Get mbstate_t, mbrtowc(), wcwidth(). */
628+#if HAVE_WCHAR_H
629+# include <wchar.h>
630+#endif
631+
632 #include "system.h"
633 #include "error.h"
1555d43c 634 #include "fadvise.h"
56ae3f82
SS
635 #include "quote.h"
636 #include "xstrndup.h"
637
638+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
639+ installation; work around this configuration error. */
640+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
641+# define MB_LEN_MAX 16
642+#endif
643+
644+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
645+#if HAVE_MBRTOWC && defined mbstate_t
646+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
647+#endif
648+
649 /* The official name of this program (e.g., no `g' prefix). */
650 #define PROGRAM_NAME "expand"
651
1555d43c 652@@ -360,6 +377,142 @@ expand (void)
56ae3f82
SS
653 }
654 }
655
656+#if HAVE_MBRTOWC
657+static void
658+expand_multibyte (void)
659+{
660+ FILE *fp; /* Input strem. */
661+ mbstate_t i_state; /* Current shift state of the input stream. */
662+ mbstate_t i_state_bak; /* Back up the I_STATE. */
663+ mbstate_t o_state; /* Current shift state of the output stream. */
664+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
665+ char *bufpos; /* Next read position of BUF. */
666+ size_t buflen = 0; /* The length of the byte sequence in buf. */
667+ wchar_t wc; /* A gotten wide character. */
668+ size_t mblength; /* The byte size of a multibyte character
669+ which shows as same character as WC. */
670+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
671+ int column = 0; /* Column on screen of the next char. */
672+ int next_tab_column; /* Column the next tab stop is on. */
673+ int convert = 1; /* If nonzero, perform translations. */
674+
675+ fp = next_file ((FILE *) NULL);
676+ if (fp == NULL)
677+ return;
678+
679+ memset (&o_state, '\0', sizeof(mbstate_t));
680+ memset (&i_state, '\0', sizeof(mbstate_t));
681+
682+ for (;;)
683+ {
684+ /* Refill the buffer BUF. */
685+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
686+ {
687+ memmove (buf, bufpos, buflen);
688+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
689+ bufpos = buf;
690+ }
691+
692+ /* No character is left in BUF. */
693+ if (buflen < 1)
694+ {
695+ fp = next_file (fp);
696+
697+ if (fp == NULL)
698+ break; /* No more files. */
699+ else
700+ {
701+ memset (&i_state, '\0', sizeof(mbstate_t));
702+ continue;
703+ }
704+ }
705+
706+ /* Get a wide character. */
707+ i_state_bak = i_state;
708+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
709+
710+ switch (mblength)
711+ {
712+ case (size_t)-1: /* illegal byte sequence. */
713+ case (size_t)-2:
714+ mblength = 1;
715+ i_state = i_state_bak;
716+ if (convert)
717+ {
718+ ++column;
719+ if (convert_entire_line == 0)
720+ convert = 0;
721+ }
722+ putchar (*bufpos);
723+ break;
724+
725+ case 0: /* null. */
726+ mblength = 1;
727+ if (convert && convert_entire_line == 0)
728+ convert = 0;
729+ putchar ('\0');
730+ break;
731+
732+ default:
733+ if (wc == L'\n') /* LF. */
734+ {
735+ tab_index = 0;
736+ column = 0;
737+ convert = 1;
738+ putchar ('\n');
739+ }
740+ else if (wc == L'\t' && convert) /* Tab. */
741+ {
742+ if (tab_size == 0)
743+ {
744+ /* Do not let tab_index == first_free_tab;
745+ stop when it is 1 less. */
746+ while (tab_index < first_free_tab - 1
747+ && column >= tab_list[tab_index])
748+ tab_index++;
749+ next_tab_column = tab_list[tab_index];
750+ if (tab_index < first_free_tab - 1)
751+ tab_index++;
752+ if (column >= next_tab_column)
753+ next_tab_column = column + 1;
754+ }
755+ else
756+ next_tab_column = column + tab_size - column % tab_size;
757+
758+ while (column < next_tab_column)
759+ {
760+ putchar (' ');
761+ ++column;
762+ }
763+ }
764+ else /* Others. */
765+ {
766+ if (convert)
767+ {
768+ if (wc == L'\b')
769+ {
770+ if (column > 0)
771+ --column;
772+ }
773+ else
774+ {
775+ int width; /* The width of WC. */
776+
777+ width = wcwidth (wc);
778+ column += (width > 0) ? width : 0;
779+ if (convert_entire_line == 0)
780+ convert = 0;
781+ }
782+ }
783+ fwrite (bufpos, sizeof(char), mblength, stdout);
784+ }
785+ }
786+ buflen -= mblength;
787+ bufpos += mblength;
788+ }
789+}
790+#endif
791+
792 int
793 main (int argc, char **argv)
794 {
1555d43c 795@@ -424,7 +577,12 @@ main (int argc, char **argv)
56ae3f82
SS
796
797 file_list = (optind < argc ? &argv[optind] : stdin_argv);
798
799- expand ();
800+#if HAVE_MBRTOWC
801+ if (MB_CUR_MAX > 1)
802+ expand_multibyte ();
803+ else
804+#endif
805+ expand ();
806
807 if (have_read_stdin && fclose (stdin) != 0)
808 error (EXIT_FAILURE, errno, "-");
407c5be3
SS
809diff -urNp coreutils-8.9-orig/src/fold.c coreutils-8.9/src/fold.c
810--- coreutils-8.9-orig/src/fold.c 2011-01-01 22:19:23.000000000 +0100
811+++ coreutils-8.9/src/fold.c 2011-01-04 17:41:55.366888520 +0100
1555d43c 812@@ -22,12 +22,34 @@
56ae3f82
SS
813 #include <getopt.h>
814 #include <sys/types.h>
815
816+/* Get mbstate_t, mbrtowc(), wcwidth(). */
817+#if HAVE_WCHAR_H
818+# include <wchar.h>
819+#endif
820+
821+/* Get iswprint(), iswblank(), wcwidth(). */
822+#if HAVE_WCTYPE_H
823+# include <wctype.h>
824+#endif
825+
826 #include "system.h"
827 #include "error.h"
1555d43c 828 #include "fadvise.h"
56ae3f82
SS
829 #include "quote.h"
830 #include "xstrtol.h"
831
832+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
833+ installation; work around this configuration error. */
834+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
835+# undef MB_LEN_MAX
836+# define MB_LEN_MAX 16
837+#endif
838+
839+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
840+#if HAVE_MBRTOWC && defined mbstate_t
841+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
842+#endif
843+
844 #define TAB_WIDTH 8
845
846 /* The official name of this program (e.g., no `g' prefix). */
1555d43c 847@@ -35,20 +57,41 @@
56ae3f82
SS
848
849 #define AUTHORS proper_name ("David MacKenzie")
850
851+#define FATAL_ERROR(Message) \
852+ do \
853+ { \
854+ error (0, 0, (Message)); \
855+ usage (2); \
856+ } \
857+ while (0)
858+
859+enum operating_mode
860+{
861+ /* Fold texts by columns that are at the given positions. */
862+ column_mode,
863+
864+ /* Fold texts by bytes that are at the given positions. */
865+ byte_mode,
866+
867+ /* Fold texts by characters that are at the given positions. */
868+ character_mode,
869+};
870+
871+/* The argument shows current mode. (Default: column_mode) */
872+static enum operating_mode operating_mode;
873+
874 /* If nonzero, try to break on whitespace. */
875 static bool break_spaces;
876
877-/* If nonzero, count bytes, not column positions. */
878-static bool count_bytes;
879-
880 /* If nonzero, at least one of the files we read was standard input. */
881 static bool have_read_stdin;
882
883-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
884+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
885
886 static struct option const longopts[] =
887 {
888 {"bytes", no_argument, NULL, 'b'},
889+ {"characters", no_argument, NULL, 'c'},
890 {"spaces", no_argument, NULL, 's'},
891 {"width", required_argument, NULL, 'w'},
892 {GETOPT_HELP_OPTION_DECL},
1555d43c 893@@ -78,6 +121,7 @@ Mandatory arguments to long options are
56ae3f82
SS
894 "), stdout);
895 fputs (_("\
896 -b, --bytes count bytes rather than columns\n\
897+ -c, --characters count characters rather than columns\n\
898 -s, --spaces break at spaces\n\
899 -w, --width=WIDTH use WIDTH columns instead of 80\n\
900 "), stdout);
1555d43c 901@@ -95,7 +139,7 @@ Mandatory arguments to long options are
56ae3f82
SS
902 static size_t
903 adjust_column (size_t column, char c)
904 {
905- if (!count_bytes)
906+ if (operating_mode != byte_mode)
907 {
908 if (c == '\b')
909 {
1555d43c 910@@ -118,30 +162,14 @@ adjust_column (size_t column, char c)
56ae3f82
SS
911 to stdout, with maximum line length WIDTH.
912 Return true if successful. */
913
914-static bool
915-fold_file (char const *filename, size_t width)
916+static void
917+fold_text (FILE *istream, size_t width, int *saved_errno)
918 {
919- FILE *istream;
920 int c;
921 size_t column = 0; /* Screen column where next char will go. */
922 size_t offset_out = 0; /* Index in `line_out' for next char. */
923 static char *line_out = NULL;
924 static size_t allocated_out = 0;
925- int saved_errno;
926-
927- if (STREQ (filename, "-"))
928- {
929- istream = stdin;
930- have_read_stdin = true;
931- }
932- else
933- istream = fopen (filename, "r");
934-
935- if (istream == NULL)
936- {
937- error (0, errno, "%s", filename);
938- return false;
939- }
940
1555d43c
SS
941 fadvise (istream, FADVISE_SEQUENTIAL);
942
943@@ -171,6 +199,15 @@ fold_file (char const *filename, size_t
56ae3f82
SS
944 bool found_blank = false;
945 size_t logical_end = offset_out;
946
947+ /* If LINE_OUT has no wide character,
948+ put a new wide character in LINE_OUT
949+ if column is bigger than width. */
950+ if (offset_out == 0)
951+ {
952+ line_out[offset_out++] = c;
953+ continue;
954+ }
955+
956 /* Look for the last blank. */
957 while (logical_end)
958 {
1555d43c 959@@ -217,11 +254,222 @@ fold_file (char const *filename, size_t
56ae3f82
SS
960 line_out[offset_out++] = c;
961 }
962
963- saved_errno = errno;
964+ *saved_errno = errno;
965
966 if (offset_out)
967 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
968
969+}
970+
971+#if HAVE_MBRTOWC
972+static void
973+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
974+{
975+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
976+ size_t buflen = 0; /* The length of the byte sequence in buf. */
977+ char *bufpos = NULL; /* Next read position of BUF. */
978+ wint_t wc; /* A gotten wide character. */
979+ size_t mblength; /* The byte size of a multibyte character which shows
980+ as same character as WC. */
981+ mbstate_t state, state_bak; /* State of the stream. */
982+ int convfail; /* 1, when conversion is failed. Otherwise 0. */
983+
984+ static char *line_out = NULL;
985+ size_t offset_out = 0; /* Index in `line_out' for next char. */
986+ static size_t allocated_out = 0;
987+
988+ int increment;
989+ size_t column = 0;
990+
991+ size_t last_blank_pos;
992+ size_t last_blank_column;
993+ int is_blank_seen;
994+ int last_blank_increment = 0;
995+ int is_bs_following_last_blank;
996+ size_t bs_following_last_blank_num;
997+ int is_cr_after_last_blank;
998+
999+#define CLEAR_FLAGS \
1000+ do \
1001+ { \
1002+ last_blank_pos = 0; \
1003+ last_blank_column = 0; \
1004+ is_blank_seen = 0; \
1005+ is_bs_following_last_blank = 0; \
1006+ bs_following_last_blank_num = 0; \
1007+ is_cr_after_last_blank = 0; \
1008+ } \
1009+ while (0)
1010+
1011+#define START_NEW_LINE \
1012+ do \
1013+ { \
1014+ putchar ('\n'); \
1015+ column = 0; \
1016+ offset_out = 0; \
1017+ CLEAR_FLAGS; \
1018+ } \
1019+ while (0)
1020+
1021+ CLEAR_FLAGS;
1022+ memset (&state, '\0', sizeof(mbstate_t));
1023+
1024+ for (;; bufpos += mblength, buflen -= mblength)
1025+ {
1026+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1027+ {
1028+ memmove (buf, bufpos, buflen);
1029+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1030+ bufpos = buf;
1031+ }
1032+
1033+ if (buflen < 1)
1034+ break;
1035+
1036+ /* Get a wide character. */
1037+ convfail = 0;
1038+ state_bak = state;
1039+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1040+
1041+ switch (mblength)
1042+ {
1043+ case (size_t)-1:
1044+ case (size_t)-2:
1045+ convfail++;
1046+ state = state_bak;
1047+ /* Fall through. */
1048+
1049+ case 0:
1050+ mblength = 1;
1051+ break;
1052+ }
1053+
1054+rescan:
1055+ if (operating_mode == byte_mode) /* byte mode */
1056+ increment = mblength;
1057+ else if (operating_mode == character_mode) /* character mode */
1058+ increment = 1;
1059+ else /* column mode */
1060+ {
1061+ if (convfail)
1062+ increment = 1;
1063+ else
1064+ {
1065+ switch (wc)
1066+ {
1067+ case L'\n':
1068+ fwrite (line_out, sizeof(char), offset_out, stdout);
1069+ START_NEW_LINE;
1070+ continue;
1071+
1072+ case L'\b':
1073+ increment = (column > 0) ? -1 : 0;
1074+ break;
1075+
1076+ case L'\r':
1077+ increment = -1 * column;
1078+ break;
1079+
1080+ case L'\t':
1081+ increment = 8 - column % 8;
1082+ break;
1083+
1084+ default:
1085+ increment = wcwidth (wc);
1086+ increment = (increment < 0) ? 0 : increment;
1087+ }
1088+ }
1089+ }
1090+
1091+ if (column + increment > width && break_spaces && last_blank_pos)
1092+ {
1093+ fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1094+ putchar ('\n');
1095+
1096+ offset_out = offset_out - last_blank_pos;
1097+ column = column - last_blank_column + ((is_cr_after_last_blank)
1098+ ? last_blank_increment : bs_following_last_blank_num);
1099+ memmove (line_out, line_out + last_blank_pos, offset_out);
1100+ CLEAR_FLAGS;
1101+ goto rescan;
1102+ }
1103+
1104+ if (column + increment > width && column != 0)
1105+ {
1106+ fwrite (line_out, sizeof(char), offset_out, stdout);
1107+ START_NEW_LINE;
1108+ goto rescan;
1109+ }
1110+
1111+ if (allocated_out < offset_out + mblength)
1112+ {
1113+ line_out = X2REALLOC (line_out, &allocated_out);
1114+ }
1115+
1116+ memcpy (line_out + offset_out, bufpos, mblength);
1117+ offset_out += mblength;
1118+ column += increment;
1119+
1120+ if (is_blank_seen && !convfail && wc == L'\r')
1121+ is_cr_after_last_blank = 1;
1122+
1123+ if (is_bs_following_last_blank && !convfail && wc == L'\b')
1124+ ++bs_following_last_blank_num;
1125+ else
1126+ is_bs_following_last_blank = 0;
1127+
1128+ if (break_spaces && !convfail && iswblank (wc))
1129+ {
1130+ last_blank_pos = offset_out;
1131+ last_blank_column = column;
1132+ is_blank_seen = 1;
1133+ last_blank_increment = increment;
1134+ is_bs_following_last_blank = 1;
1135+ bs_following_last_blank_num = 0;
1136+ is_cr_after_last_blank = 0;
1137+ }
1138+ }
1139+
1140+ *saved_errno = errno;
1141+
1142+ if (offset_out)
1143+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1144+
1145+}
1146+#endif
1147+
1148+/* Fold file FILENAME, or standard input if FILENAME is "-",
1149+ to stdout, with maximum line length WIDTH.
1150+ Return 0 if successful, 1 if an error occurs. */
1151+
1152+static bool
1153+fold_file (char *filename, size_t width)
1154+{
1155+ FILE *istream;
1156+ int saved_errno;
1157+
1158+ if (STREQ (filename, "-"))
1159+ {
1160+ istream = stdin;
1161+ have_read_stdin = 1;
1162+ }
1163+ else
1164+ istream = fopen (filename, "r");
1165+
1166+ if (istream == NULL)
1167+ {
1168+ error (0, errno, "%s", filename);
1169+ return 1;
1170+ }
1171+
1172+ /* Define how ISTREAM is being folded. */
1173+#if HAVE_MBRTOWC
1174+ if (MB_CUR_MAX > 1)
1175+ fold_multibyte_text (istream, width, &saved_errno);
1176+ else
1177+#endif
1178+ fold_text (istream, width, &saved_errno);
1179+
1180 if (ferror (istream))
1181 {
1182 error (0, saved_errno, "%s", filename);
1555d43c 1183@@ -254,7 +502,8 @@ main (int argc, char **argv)
56ae3f82
SS
1184
1185 atexit (close_stdout);
1186
1187- break_spaces = count_bytes = have_read_stdin = false;
1188+ operating_mode = column_mode;
1189+ break_spaces = have_read_stdin = false;
1190
1191 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1192 {
1555d43c 1193@@ -263,7 +512,15 @@ main (int argc, char **argv)
56ae3f82
SS
1194 switch (optc)
1195 {
1196 case 'b': /* Count bytes rather than columns. */
1197- count_bytes = true;
1198+ if (operating_mode != column_mode)
1199+ FATAL_ERROR (_("only one way of folding may be specified"));
1200+ operating_mode = byte_mode;
1201+ break;
1202+
1203+ case 'c':
1204+ if (operating_mode != column_mode)
1205+ FATAL_ERROR (_("only one way of folding may be specified"));
1206+ operating_mode = character_mode;
1207 break;
1208
1209 case 's': /* Break at word boundaries. */
407c5be3
SS
1210diff -urNp coreutils-8.9-orig/src/join.c coreutils-8.9/src/join.c
1211--- coreutils-8.9-orig/src/join.c 2011-01-01 22:19:23.000000000 +0100
1212+++ coreutils-8.9/src/join.c 2011-01-04 17:41:55.369888660 +0100
1555d43c 1213@@ -22,18 +22,32 @@
56ae3f82
SS
1214 #include <sys/types.h>
1215 #include <getopt.h>
1216
1217+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1218+#if HAVE_WCHAR_H
1219+# include <wchar.h>
1220+#endif
1221+
1222+/* Get iswblank(), towupper. */
1223+#if HAVE_WCTYPE_H
1224+# include <wctype.h>
1225+#endif
1226+
1227 #include "system.h"
1228 #include "error.h"
1555d43c 1229 #include "fadvise.h"
56ae3f82
SS
1230 #include "hard-locale.h"
1231 #include "linebuffer.h"
1232-#include "memcasecmp.h"
1233 #include "quote.h"
1234 #include "stdio--.h"
1235 #include "xmemcoll.h"
1236 #include "xstrtol.h"
1237 #include "argmatch.h"
1238
1239+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1240+#if HAVE_MBRTOWC && defined mbstate_t
1241+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1242+#endif
1243+
1244 /* The official name of this program (e.g., no `g' prefix). */
1245 #define PROGRAM_NAME "join"
1246
1555d43c 1247@@ -122,10 +136,12 @@ static struct outlist outlist_head;
56ae3f82
SS
1248 /* Last element in `outlist', where a new element can be added. */
1249 static struct outlist *outlist_end = &outlist_head;
1250
1251-/* Tab character separating fields. If negative, fields are separated
1252- by any nonempty string of blanks, otherwise by exactly one
1253- tab character whose value (when cast to unsigned char) equals TAB. */
1254-static int tab = -1;
1255+/* Tab character separating fields. If NULL, fields are separated
1256+ by any nonempty string of blanks. */
1257+static char *tab = NULL;
1258+
1259+/* The number of bytes used for tab. */
1260+static size_t tablen = 0;
1261
1262 /* If nonzero, check that the input is correctly ordered. */
1263 static enum
1555d43c 1264@@ -249,13 +265,14 @@ xfields (struct line *line)
56ae3f82
SS
1265 if (ptr == lim)
1266 return;
1267
1555d43c 1268- if (0 <= tab && tab != '\n')
56ae3f82
SS
1269+ if (tab != NULL)
1270 {
1271+ unsigned char t = tab[0];
1272 char *sep;
1273- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1274+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1275 extract_field (line, ptr, sep - ptr);
1276 }
1555d43c
SS
1277- else if (tab < 0)
1278+ else
1279 {
1280 /* Skip leading blanks before the first field. */
1281 while (isblank (to_uchar (*ptr)))
1282@@ -279,6 +296,148 @@ xfields (struct line *line)
56ae3f82
SS
1283 extract_field (line, ptr, lim - ptr);
1284 }
1285
1286+#if HAVE_MBRTOWC
1287+static void
1288+xfields_multibyte (struct line *line)
1289+{
1290+ char *ptr = line->buf.buffer;
1291+ char const *lim = ptr + line->buf.length - 1;
1292+ wchar_t wc = 0;
1293+ size_t mblength = 1;
1294+ mbstate_t state, state_bak;
1295+
1296+ memset (&state, 0, sizeof (mbstate_t));
1297+
1298+ if (ptr >= lim)
1299+ return;
1300+
1301+ if (tab != NULL)
1302+ {
1303+ unsigned char t = tab[0];
1304+ char *sep = ptr;
1305+ for (; ptr < lim; ptr = sep + mblength)
1306+ {
1307+ sep = ptr;
1308+ while (sep < lim)
1309+ {
1310+ state_bak = state;
1311+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1312+
1313+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1314+ {
1315+ mblength = 1;
1316+ state = state_bak;
1317+ }
1318+ mblength = (mblength < 1) ? 1 : mblength;
1319+
1320+ if (mblength == tablen && !memcmp (sep, tab, mblength))
1321+ break;
1322+ else
1323+ {
1324+ sep += mblength;
1325+ continue;
1326+ }
1327+ }
1328+
1329+ if (sep >= lim)
1330+ break;
1331+
1332+ extract_field (line, ptr, sep - ptr);
1333+ }
1334+ }
1335+ else
1336+ {
1337+ /* Skip leading blanks before the first field. */
1338+ while(ptr < lim)
1339+ {
1340+ state_bak = state;
1341+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1342+
1343+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1344+ {
1345+ mblength = 1;
1346+ state = state_bak;
1347+ break;
1348+ }
1349+ mblength = (mblength < 1) ? 1 : mblength;
1350+
1351+ if (!iswblank(wc))
1352+ break;
1353+ ptr += mblength;
1354+ }
1355+
1356+ do
1357+ {
1358+ char *sep;
1359+ state_bak = state;
1360+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1361+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1362+ {
1363+ mblength = 1;
1364+ state = state_bak;
1365+ break;
1366+ }
1367+ mblength = (mblength < 1) ? 1 : mblength;
1368+
1369+ sep = ptr + mblength;
1370+ while (sep < lim)
1371+ {
1372+ state_bak = state;
1373+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1374+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1375+ {
1376+ mblength = 1;
1377+ state = state_bak;
1378+ break;
1379+ }
1380+ mblength = (mblength < 1) ? 1 : mblength;
1381+
1382+ if (iswblank (wc))
1383+ break;
1384+
1385+ sep += mblength;
1386+ }
1387+
1388+ extract_field (line, ptr, sep - ptr);
1389+ if (sep >= lim)
1390+ return;
1391+
1392+ state_bak = state;
1393+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1394+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1395+ {
1396+ mblength = 1;
1397+ state = state_bak;
1398+ break;
1399+ }
1400+ mblength = (mblength < 1) ? 1 : mblength;
1401+
1402+ ptr = sep + mblength;
1403+ while (ptr < lim)
1404+ {
1405+ state_bak = state;
1406+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1407+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1408+ {
1409+ mblength = 1;
1410+ state = state_bak;
1411+ break;
1412+ }
1413+ mblength = (mblength < 1) ? 1 : mblength;
1414+
1415+ if (!iswblank (wc))
1416+ break;
1417+
1418+ ptr += mblength;
1419+ }
1420+ }
1421+ while (ptr < lim);
1422+ }
1423+
1424+ extract_field (line, ptr, lim - ptr);
1425+}
1426+#endif
1427+
1428 static void
1429 freeline (struct line *line)
1430 {
1555d43c 1431@@ -300,56 +459,115 @@ keycmp (struct line const *line1, struct
56ae3f82
SS
1432 size_t jf_1, size_t jf_2)
1433 {
1434 /* Start of field to compare in each file. */
1435- char *beg1;
1436- char *beg2;
1437-
1438- size_t len1;
1439- size_t len2; /* Length of fields to compare. */
1440+ char *beg[2];
1441+ char *copy[2];
1442+ size_t len[2]; /* Length of fields to compare. */
1443 int diff;
1444+ int i, j;
1445
1446 if (jf_1 < line1->nfields)
1447 {
1448- beg1 = line1->fields[jf_1].beg;
1449- len1 = line1->fields[jf_1].len;
1450+ beg[0] = line1->fields[jf_1].beg;
1451+ len[0] = line1->fields[jf_1].len;
1452 }
1453 else
1454 {
1455- beg1 = NULL;
1456- len1 = 0;
1457+ beg[0] = NULL;
1458+ len[0] = 0;
1459 }
1460
1461 if (jf_2 < line2->nfields)
1462 {
1463- beg2 = line2->fields[jf_2].beg;
1464- len2 = line2->fields[jf_2].len;
1465+ beg[1] = line2->fields[jf_2].beg;
1466+ len[1] = line2->fields[jf_2].len;
1467 }
1468 else
1469 {
1470- beg2 = NULL;
1471- len2 = 0;
1472+ beg[1] = NULL;
1473+ len[1] = 0;
1474 }
1475
1476- if (len1 == 0)
1477- return len2 == 0 ? 0 : -1;
1478- if (len2 == 0)
1479+ if (len[0] == 0)
1480+ return len[1] == 0 ? 0 : -1;
1481+ if (len[1] == 0)
1482 return 1;
1483
1484 if (ignore_case)
1485 {
1486- /* FIXME: ignore_case does not work with NLS (in particular,
1487- with multibyte chars). */
1488- diff = memcasecmp (beg1, beg2, MIN (len1, len2));
1489+#ifdef HAVE_MBRTOWC
1490+ if (MB_CUR_MAX > 1)
1491+ {
1492+ size_t mblength;
1493+ wchar_t wc, uwc;
1494+ mbstate_t state, state_bak;
1495+
1496+ memset (&state, '\0', sizeof (mbstate_t));
1497+
1498+ for (i = 0; i < 2; i++)
1499+ {
1500+ copy[i] = alloca (len[i] + 1);
1501+
1502+ for (j = 0; j < MIN (len[0], len[1]);)
1503+ {
1504+ state_bak = state;
1505+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
1506+
1507+ switch (mblength)
1508+ {
1509+ case (size_t) -1:
1510+ case (size_t) -2:
1511+ state = state_bak;
1512+ /* Fall through */
1513+ case 0:
1514+ mblength = 1;
1515+ break;
1516+
1517+ default:
1518+ uwc = towupper (wc);
1519+
1520+ if (uwc != wc)
1521+ {
1522+ mbstate_t state_wc;
1523+
1524+ memset (&state_wc, '\0', sizeof (mbstate_t));
1525+ wcrtomb (copy[i] + j, uwc, &state_wc);
1526+ }
1527+ else
1528+ memcpy (copy[i] + j, beg[i] + j, mblength);
1529+ }
1530+ j += mblength;
1531+ }
1532+ copy[i][j] = '\0';
1533+ }
1534+ }
1535+ else
1536+#endif
1537+ {
1538+ for (i = 0; i < 2; i++)
1539+ {
1540+ copy[i] = alloca (len[i] + 1);
1541+
1542+ for (j = 0; j < MIN (len[0], len[1]); j++)
1543+ copy[i][j] = toupper (beg[i][j]);
1544+
1545+ copy[i][j] = '\0';
1546+ }
1547+ }
1548 }
1549 else
1550 {
1551- if (hard_LC_COLLATE)
1552- return xmemcoll (beg1, len1, beg2, len2);
1553- diff = memcmp (beg1, beg2, MIN (len1, len2));
1554+ copy[0] = (unsigned char *) beg[0];
1555+ copy[1] = (unsigned char *) beg[1];
1556 }
1557
1558+ if (hard_LC_COLLATE)
1559+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
1560+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
1561+
1562+
1563 if (diff)
1564 return diff;
1565- return len1 < len2 ? -1 : len1 != len2;
1566+ return len[0] - len[1];
1567 }
1568
1569 /* Check that successive input lines PREV and CURRENT from input file
1555d43c 1570@@ -430,6 +648,11 @@ get_line (FILE *fp, struct line **linep,
56ae3f82
SS
1571 return false;
1572 }
1573
1574+#if HAVE_MBRTOWC
1575+ if (MB_CUR_MAX > 1)
1576+ xfields_multibyte (line);
1577+ else
1578+#endif
1579 xfields (line);
1580
1581 if (prevline[which - 1])
1555d43c 1582@@ -529,11 +752,18 @@ prfield (size_t n, struct line const *li
56ae3f82
SS
1583
1584 /* Print the join of LINE1 and LINE2. */
1585
1586+#define PUT_TAB_CHAR \
1587+ do \
1588+ { \
1589+ (tab != NULL) ? \
1590+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
1591+ } \
1592+ while (0)
1593+
1594 static void
1595 prjoin (struct line const *line1, struct line const *line2)
1596 {
1597 const struct outlist *outlist;
1598- char output_separator = tab < 0 ? ' ' : tab;
1599
1600 outlist = outlist_head.next;
1601 if (outlist)
1555d43c 1602@@ -568,7 +798,7 @@ prjoin (struct line const *line1, struct
56ae3f82
SS
1603 o = o->next;
1604 if (o == NULL)
1605 break;
1606- putchar (output_separator);
1607+ PUT_TAB_CHAR;
1608 }
1609 putchar ('\n');
1610 }
1555d43c 1611@@ -586,23 +816,23 @@ prjoin (struct line const *line1, struct
56ae3f82
SS
1612 prfield (join_field_1, line1);
1613 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
1614 {
1615- putchar (output_separator);
1616+ PUT_TAB_CHAR;
1617 prfield (i, line1);
1618 }
1619 for (i = join_field_1 + 1; i < line1->nfields; ++i)
1620 {
1621- putchar (output_separator);
1622+ PUT_TAB_CHAR;
1623 prfield (i, line1);
1624 }
1625
1626 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
1627 {
1628- putchar (output_separator);
1629+ PUT_TAB_CHAR;
1630 prfield (i, line2);
1631 }
1632 for (i = join_field_2 + 1; i < line2->nfields; ++i)
1633 {
1634- putchar (output_separator);
1635+ PUT_TAB_CHAR;
1636 prfield (i, line2);
1637 }
1638 putchar ('\n');
1555d43c 1639@@ -1043,21 +1273,46 @@ main (int argc, char **argv)
56ae3f82
SS
1640
1641 case 't':
1642 {
1643- unsigned char newtab = optarg[0];
1644+ char *newtab;
1645+ size_t newtablen;
1646+ newtab = xstrdup (optarg);
1647+#if HAVE_MBRTOWC
1648+ if (MB_CUR_MAX > 1)
1649+ {
1650+ mbstate_t state;
1651+
1652+ memset (&state, 0, sizeof (mbstate_t));
1653+ newtablen = mbrtowc (NULL, newtab,
1654+ strnlen (newtab, MB_LEN_MAX),
1655+ &state);
1656+ if (newtablen == (size_t) 0
1657+ || newtablen == (size_t) -1
1658+ || newtablen == (size_t) -2)
1659+ newtablen = 1;
1660+ }
1661+ else
1662+#endif
1663+ newtablen = 1;
1664 if (! newtab)
1665+ {
1666 newtab = '\n'; /* '' => process the whole line. */
1667+ }
1668 else if (optarg[1])
1669 {
1670- if (STREQ (optarg, "\\0"))
1671- newtab = '\0';
1672- else
1673- error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1674- quote (optarg));
1675+ if (newtablen == 1 && newtab[1])
1676+ {
1677+ if (STREQ (newtab, "\\0"))
1678+ newtab[0] = '\0';
1679+ }
1680+ }
1681+ if (tab != NULL && strcmp (tab, newtab))
1682+ {
1683+ free (newtab);
1684+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
1685 }
1686- if (0 <= tab && tab != newtab)
1687- error (EXIT_FAILURE, 0, _("incompatible tabs"));
1688 tab = newtab;
1689- }
1690+ tablen = newtablen;
1691+ }
1692 break;
1693
1694 case NOCHECK_ORDER_OPTION:
407c5be3
SS
1695diff -urNp coreutils-8.9-orig/src/pr.c coreutils-8.9/src/pr.c
1696--- coreutils-8.9-orig/src/pr.c 2011-01-01 22:19:23.000000000 +0100
1697+++ coreutils-8.9/src/pr.c 2011-01-04 17:41:55.377138275 +0100
56ae3f82
SS
1698@@ -312,6 +312,32 @@
1699
1700 #include <getopt.h>
1701 #include <sys/types.h>
1702+
1703+/* Get MB_LEN_MAX. */
1704+#include <limits.h>
1705+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1706+ installation; work around this configuration error. */
1707+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
1708+# define MB_LEN_MAX 16
1709+#endif
1710+
1711+/* Get MB_CUR_MAX. */
1712+#include <stdlib.h>
1713+
1714+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
1715+/* Get mbstate_t, mbrtowc(), wcwidth(). */
1716+#if HAVE_WCHAR_H
1717+# include <wchar.h>
1718+#endif
1719+
1720+/* Get iswprint(). -- for wcwidth(). */
1721+#if HAVE_WCTYPE_H
1722+# include <wctype.h>
1723+#endif
1724+#if !defined iswprint && !HAVE_ISWPRINT
1725+# define iswprint(wc) 1
1726+#endif
1727+
1728 #include "system.h"
1729 #include "error.h"
1555d43c
SS
1730 #include "fadvise.h"
1731@@ -323,6 +349,18 @@
56ae3f82
SS
1732 #include "strftime.h"
1733 #include "xstrtol.h"
1734
1735+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1736+#if HAVE_MBRTOWC && defined mbstate_t
1737+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1738+#endif
1739+
1740+#ifndef HAVE_DECL_WCWIDTH
1741+"this configure-time declaration test was not run"
1742+#endif
1743+#if !HAVE_DECL_WCWIDTH
1744+extern int wcwidth ();
1745+#endif
1746+
1747 /* The official name of this program (e.g., no `g' prefix). */
1748 #define PROGRAM_NAME "pr"
1749
1555d43c 1750@@ -415,7 +453,20 @@ struct COLUMN
56ae3f82
SS
1751
1752 typedef struct COLUMN COLUMN;
1753
1754-static int char_to_clump (char c);
1755+/* Funtion pointers to switch functions for single byte locale or for
1756+ multibyte locale. If multibyte functions do not exist in your sysytem,
1757+ these pointers always point the function for single byte locale. */
1758+static void (*print_char) (char c);
1759+static int (*char_to_clump) (char c);
1760+
1761+/* Functions for single byte locale. */
1762+static void print_char_single (char c);
1763+static int char_to_clump_single (char c);
1764+
1765+/* Functions for multibyte locale. */
1766+static void print_char_multi (char c);
1767+static int char_to_clump_multi (char c);
1768+
1769 static bool read_line (COLUMN *p);
1770 static bool print_page (void);
1771 static bool print_stored (COLUMN *p);
1555d43c 1772@@ -425,6 +476,7 @@ static void print_header (void);
56ae3f82
SS
1773 static void pad_across_to (int position);
1774 static void add_line_number (COLUMN *p);
1775 static void getoptarg (char *arg, char switch_char, char *character,
1776+ int *character_length, int *character_width,
1777 int *number);
1778 void usage (int status);
1779 static void print_files (int number_of_files, char **av);
1555d43c 1780@@ -439,7 +491,6 @@ static void store_char (char c);
56ae3f82
SS
1781 static void pad_down (int lines);
1782 static void read_rest_of_line (COLUMN *p);
1783 static void skip_read (COLUMN *p, int column_number);
1784-static void print_char (char c);
1785 static void cleanup (void);
1786 static void print_sep_string (void);
1787 static void separator_string (const char *optarg_S);
1555d43c 1788@@ -451,7 +502,7 @@ static COLUMN *column_vector;
56ae3f82
SS
1789 we store the leftmost columns contiguously in buff.
1790 To print a line from buff, get the index of the first character
1791 from line_vector[i], and print up to line_vector[i + 1]. */
1792-static char *buff;
1793+static unsigned char *buff;
1794
1795 /* Index of the position in buff where the next character
1796 will be stored. */
1555d43c 1797@@ -555,7 +606,7 @@ static int chars_per_column;
56ae3f82
SS
1798 static bool untabify_input = false;
1799
1800 /* (-e) The input tab character. */
1801-static char input_tab_char = '\t';
1802+static char input_tab_char[MB_LEN_MAX] = "\t";
1803
1804 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
1805 where the leftmost column is 1. */
1555d43c 1806@@ -565,7 +616,10 @@ static int chars_per_input_tab = 8;
56ae3f82
SS
1807 static bool tabify_output = false;
1808
1809 /* (-i) The output tab character. */
1810-static char output_tab_char = '\t';
1811+static char output_tab_char[MB_LEN_MAX] = "\t";
1812+
1813+/* (-i) The byte length of output tab character. */
1814+static int output_tab_char_length = 1;
1815
1816 /* (-i) The width of the output tab. */
1817 static int chars_per_output_tab = 8;
1555d43c 1818@@ -639,7 +693,13 @@ static int power_10;
56ae3f82
SS
1819 static bool numbered_lines = false;
1820
1821 /* (-n) Character which follows each line number. */
1822-static char number_separator = '\t';
1823+static char number_separator[MB_LEN_MAX] = "\t";
1824+
1825+/* (-n) The byte length of the character which follows each line number. */
1826+static int number_separator_length = 1;
1827+
1828+/* (-n) The character width of the character which follows each line number. */
1829+static int number_separator_width = 0;
1830
1831 /* (-n) line counting starts with 1st line of input file (not with 1st
1832 line of 1st page printed). */
1555d43c 1833@@ -692,6 +752,7 @@ static bool use_col_separator = false;
56ae3f82
SS
1834 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
1835 static char *col_sep_string = (char *) "";
1836 static int col_sep_length = 0;
1837+static int col_sep_width = 0;
1838 static char *column_separator = (char *) " ";
1839 static char *line_separator = (char *) "\t";
1840
1555d43c 1841@@ -848,6 +909,13 @@ separator_string (const char *optarg_S)
56ae3f82
SS
1842 col_sep_length = (int) strlen (optarg_S);
1843 col_sep_string = xmalloc (col_sep_length + 1);
1844 strcpy (col_sep_string, optarg_S);
1845+
1846+#if HAVE_MBRTOWC
1847+ if (MB_CUR_MAX > 1)
1848+ col_sep_width = mbswidth (col_sep_string, 0);
1849+ else
1850+#endif
1851+ col_sep_width = col_sep_length;
1852 }
1853
1854 int
1555d43c 1855@@ -872,6 +940,21 @@ main (int argc, char **argv)
56ae3f82
SS
1856
1857 atexit (close_stdout);
1858
1859+/* Define which functions are used, the ones for single byte locale or the ones
1860+ for multibyte locale. */
1861+#if HAVE_MBRTOWC
1862+ if (MB_CUR_MAX > 1)
1863+ {
1864+ print_char = print_char_multi;
1865+ char_to_clump = char_to_clump_multi;
1866+ }
1867+ else
1868+#endif
1869+ {
1870+ print_char = print_char_single;
1871+ char_to_clump = char_to_clump_single;
1872+ }
1873+
1874 n_files = 0;
1875 file_names = (argc > 1
1876 ? xmalloc ((argc - 1) * sizeof (char *))
1555d43c 1877@@ -948,8 +1031,12 @@ main (int argc, char **argv)
56ae3f82
SS
1878 break;
1879 case 'e':
1880 if (optarg)
1881- getoptarg (optarg, 'e', &input_tab_char,
1882- &chars_per_input_tab);
1883+ {
1884+ int dummy_length, dummy_width;
1885+
1886+ getoptarg (optarg, 'e', input_tab_char, &dummy_length,
1887+ &dummy_width, &chars_per_input_tab);
1888+ }
1889 /* Could check tab width > 0. */
1890 untabify_input = true;
1891 break;
1555d43c 1892@@ -962,8 +1049,12 @@ main (int argc, char **argv)
56ae3f82
SS
1893 break;
1894 case 'i':
1895 if (optarg)
1896- getoptarg (optarg, 'i', &output_tab_char,
1897- &chars_per_output_tab);
1898+ {
1899+ int dummy_width;
1900+
1901+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
1902+ &dummy_width, &chars_per_output_tab);
1903+ }
1904 /* Could check tab width > 0. */
1905 tabify_output = true;
1906 break;
1555d43c 1907@@ -990,8 +1081,8 @@ main (int argc, char **argv)
56ae3f82
SS
1908 case 'n':
1909 numbered_lines = true;
1910 if (optarg)
1911- getoptarg (optarg, 'n', &number_separator,
1912- &chars_per_number);
1913+ getoptarg (optarg, 'n', number_separator, &number_separator_length,
1914+ &number_separator_width, &chars_per_number);
1915 break;
1916 case 'N':
1917 skip_count = false;
1555d43c 1918@@ -1030,7 +1121,7 @@ main (int argc, char **argv)
56ae3f82
SS
1919 old_s = false;
1920 /* Reset an additional input of -s, -S dominates -s */
1921 col_sep_string = bad_cast ("");
1922- col_sep_length = 0;
1923+ col_sep_length = col_sep_width = 0;
1924 use_col_separator = true;
1925 if (optarg)
1926 separator_string (optarg);
1555d43c 1927@@ -1187,10 +1278,45 @@ main (int argc, char **argv)
56ae3f82
SS
1928 a number. */
1929
1930 static void
1931-getoptarg (char *arg, char switch_char, char *character, int *number)
1932+getoptarg (char *arg, char switch_char, char *character, int *character_length,
1933+ int *character_width, int *number)
1934 {
1935 if (!ISDIGIT (*arg))
1936- *character = *arg++;
1937+ {
1938+#ifdef HAVE_MBRTOWC
1939+ if (MB_CUR_MAX > 1) /* for multibyte locale. */
1940+ {
1941+ wchar_t wc;
1942+ size_t mblength;
1943+ int width;
1944+ mbstate_t state = {'\0'};
1945+
1946+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
1947+
1948+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1949+ {
1950+ *character_length = 1;
1951+ *character_width = 1;
1952+ }
1953+ else
1954+ {
1955+ *character_length = (mblength < 1) ? 1 : mblength;
1956+ width = wcwidth (wc);
1957+ *character_width = (width < 0) ? 0 : width;
1958+ }
1959+
1960+ strncpy (character, arg, *character_length);
1961+ arg += *character_length;
1962+ }
1963+ else /* for single byte locale. */
1964+#endif
1965+ {
1966+ *character = *arg++;
1967+ *character_length = 1;
1968+ *character_width = 1;
1969+ }
1970+ }
1971+
1972 if (*arg)
1973 {
1974 long int tmp_long;
1555d43c 1975@@ -1249,7 +1375,7 @@ init_parameters (int number_of_files)
56ae3f82
SS
1976 else
1977 col_sep_string = column_separator;
1978
1979- col_sep_length = 1;
1980+ col_sep_length = col_sep_width = 1;
1981 use_col_separator = true;
1982 }
1983 /* It's rather pointless to define a TAB separator with column
1555d43c 1984@@ -1280,11 +1406,11 @@ init_parameters (int number_of_files)
56ae3f82
SS
1985 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
1986
1987 /* Estimate chars_per_text without any margin and keep it constant. */
1988- if (number_separator == '\t')
1989+ if (number_separator[0] == '\t')
1990 number_width = chars_per_number +
1991 TAB_WIDTH (chars_per_default_tab, chars_per_number);
1992 else
1993- number_width = chars_per_number + 1;
1994+ number_width = chars_per_number + number_separator_width;
1995
1996 /* The number is part of the column width unless we are
1997 printing files in parallel. */
1555d43c 1998@@ -1299,7 +1425,7 @@ init_parameters (int number_of_files)
56ae3f82
SS
1999 }
2000
2001 chars_per_column = (chars_per_line - chars_used_by_number -
2002- (columns - 1) * col_sep_length) / columns;
2003+ (columns - 1) * col_sep_width) / columns;
2004
2005 if (chars_per_column < 1)
2006 error (EXIT_FAILURE, 0, _("page width too narrow"));
1555d43c 2007@@ -1424,7 +1550,7 @@ init_funcs (void)
56ae3f82
SS
2008
2009 /* Enlarge p->start_position of first column to use the same form of
2010 padding_not_printed with all columns. */
2011- h = h + col_sep_length;
2012+ h = h + col_sep_width;
2013
2014 /* This loop takes care of all but the rightmost column. */
2015
1555d43c 2016@@ -1458,7 +1584,7 @@ init_funcs (void)
56ae3f82
SS
2017 }
2018 else
2019 {
2020- h = h_next + col_sep_length;
2021+ h = h_next + col_sep_width;
2022 h_next = h + chars_per_column;
2023 }
2024 }
1555d43c 2025@@ -1749,9 +1875,9 @@ static void
56ae3f82
SS
2026 align_column (COLUMN *p)
2027 {
2028 padding_not_printed = p->start_position;
2029- if (padding_not_printed - col_sep_length > 0)
2030+ if (padding_not_printed - col_sep_width > 0)
2031 {
2032- pad_across_to (padding_not_printed - col_sep_length);
2033+ pad_across_to (padding_not_printed - col_sep_width);
2034 padding_not_printed = ANYWHERE;
2035 }
2036
1555d43c 2037@@ -2022,13 +2148,13 @@ store_char (char c)
56ae3f82
SS
2038 /* May be too generous. */
2039 buff = X2REALLOC (buff, &buff_allocated);
2040 }
2041- buff[buff_current++] = c;
2042+ buff[buff_current++] = (unsigned char) c;
2043 }
2044
2045 static void
2046 add_line_number (COLUMN *p)
2047 {
2048- int i;
2049+ int i, j;
2050 char *s;
2051 int left_cut;
2052
1555d43c 2053@@ -2051,22 +2177,24 @@ add_line_number (COLUMN *p)
56ae3f82
SS
2054 /* Tabification is assumed for multiple columns, also for n-separators,
2055 but `default n-separator = TAB' hasn't been given priority over
2056 equal column_width also specified by POSIX. */
2057- if (number_separator == '\t')
2058+ if (number_separator[0] == '\t')
2059 {
2060 i = number_width - chars_per_number;
2061 while (i-- > 0)
2062 (p->char_func) (' ');
2063 }
2064 else
2065- (p->char_func) (number_separator);
2066+ for (j = 0; j < number_separator_length; j++)
2067+ (p->char_func) (number_separator[j]);
2068 }
2069 else
2070 /* To comply with POSIX, we avoid any expansion of default TAB
2071 separator with a single column output. No column_width requirement
2072 has to be considered. */
2073 {
2074- (p->char_func) (number_separator);
2075- if (number_separator == '\t')
2076+ for (j = 0; j < number_separator_length; j++)
2077+ (p->char_func) (number_separator[j]);
2078+ if (number_separator[0] == '\t')
2079 output_position = POS_AFTER_TAB (chars_per_output_tab,
2080 output_position);
2081 }
1555d43c 2082@@ -2227,7 +2355,7 @@ print_white_space (void)
56ae3f82
SS
2083 while (goal - h_old > 1
2084 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
2085 {
2086- putchar (output_tab_char);
2087+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
2088 h_old = h_new;
2089 }
2090 while (++h_old <= goal)
1555d43c 2091@@ -2247,6 +2375,7 @@ print_sep_string (void)
56ae3f82
SS
2092 {
2093 char *s;
2094 int l = col_sep_length;
2095+ int not_space_flag;
2096
2097 s = col_sep_string;
2098
1555d43c 2099@@ -2260,6 +2389,7 @@ print_sep_string (void)
56ae3f82
SS
2100 {
2101 for (; separators_not_printed > 0; --separators_not_printed)
2102 {
2103+ not_space_flag = 0;
2104 while (l-- > 0)
2105 {
2106 /* 3 types of sep_strings: spaces only, spaces and chars,
1555d43c 2107@@ -2273,12 +2403,15 @@ print_sep_string (void)
56ae3f82
SS
2108 }
2109 else
2110 {
2111+ not_space_flag = 1;
2112 if (spaces_not_printed > 0)
2113 print_white_space ();
2114 putchar (*s++);
2115- ++output_position;
2116 }
2117 }
2118+ if (not_space_flag)
2119+ output_position += col_sep_width;
2120+
2121 /* sep_string ends with some spaces */
2122 if (spaces_not_printed > 0)
2123 print_white_space ();
1555d43c 2124@@ -2306,7 +2439,7 @@ print_clump (COLUMN *p, int n, char *clu
56ae3f82
SS
2125 required number of tabs and spaces. */
2126
2127 static void
2128-print_char (char c)
2129+print_char_single (char c)
2130 {
2131 if (tabify_output)
2132 {
1555d43c 2133@@ -2330,6 +2463,74 @@ print_char (char c)
56ae3f82
SS
2134 putchar (c);
2135 }
2136
2137+#ifdef HAVE_MBRTOWC
2138+static void
2139+print_char_multi (char c)
2140+{
2141+ static size_t mbc_pos = 0;
2142+ static char mbc[MB_LEN_MAX] = {'\0'};
2143+ static mbstate_t state = {'\0'};
2144+ mbstate_t state_bak;
2145+ wchar_t wc;
2146+ size_t mblength;
2147+ int width;
2148+
2149+ if (tabify_output)
2150+ {
2151+ state_bak = state;
2152+ mbc[mbc_pos++] = c;
2153+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2154+
2155+ while (mbc_pos > 0)
2156+ {
2157+ switch (mblength)
2158+ {
2159+ case (size_t)-2:
2160+ state = state_bak;
2161+ return;
2162+
2163+ case (size_t)-1:
2164+ state = state_bak;
2165+ ++output_position;
2166+ putchar (mbc[0]);
2167+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
2168+ --mbc_pos;
2169+ break;
2170+
2171+ case 0:
2172+ mblength = 1;
2173+
2174+ default:
2175+ if (wc == L' ')
2176+ {
2177+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2178+ --mbc_pos;
2179+ ++spaces_not_printed;
2180+ return;
2181+ }
2182+ else if (spaces_not_printed > 0)
2183+ print_white_space ();
2184+
2185+ /* Nonprintables are assumed to have width 0, except L'\b'. */
2186+ if ((width = wcwidth (wc)) < 1)
2187+ {
2188+ if (wc == L'\b')
2189+ --output_position;
2190+ }
2191+ else
2192+ output_position += width;
2193+
2194+ fwrite (mbc, sizeof(char), mblength, stdout);
2195+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2196+ mbc_pos -= mblength;
2197+ }
2198+ }
2199+ return;
2200+ }
2201+ putchar (c);
2202+}
2203+#endif
2204+
2205 /* Skip to page PAGE before printing.
2206 PAGE may be larger than total number of pages. */
2207
1555d43c 2208@@ -2509,9 +2710,9 @@ read_line (COLUMN *p)
56ae3f82
SS
2209 align_empty_cols = false;
2210 }
2211
2212- if (padding_not_printed - col_sep_length > 0)
2213+ if (padding_not_printed - col_sep_width > 0)
2214 {
2215- pad_across_to (padding_not_printed - col_sep_length);
2216+ pad_across_to (padding_not_printed - col_sep_width);
2217 padding_not_printed = ANYWHERE;
2218 }
2219
1555d43c 2220@@ -2612,9 +2813,9 @@ print_stored (COLUMN *p)
56ae3f82
SS
2221 }
2222 }
2223
2224- if (padding_not_printed - col_sep_length > 0)
2225+ if (padding_not_printed - col_sep_width > 0)
2226 {
2227- pad_across_to (padding_not_printed - col_sep_length);
2228+ pad_across_to (padding_not_printed - col_sep_width);
2229 padding_not_printed = ANYWHERE;
2230 }
2231
1555d43c 2232@@ -2627,8 +2828,8 @@ print_stored (COLUMN *p)
56ae3f82
SS
2233 if (spaces_not_printed == 0)
2234 {
2235 output_position = p->start_position + end_vector[line];
2236- if (p->start_position - col_sep_length == chars_per_margin)
2237- output_position -= col_sep_length;
2238+ if (p->start_position - col_sep_width == chars_per_margin)
2239+ output_position -= col_sep_width;
2240 }
2241
2242 return true;
1555d43c 2243@@ -2647,7 +2848,7 @@ print_stored (COLUMN *p)
56ae3f82
SS
2244 number of characters is 1.) */
2245
2246 static int
2247-char_to_clump (char c)
2248+char_to_clump_single (char c)
2249 {
2250 unsigned char uc = c;
2251 char *s = clump_buff;
1555d43c 2252@@ -2657,10 +2858,10 @@ char_to_clump (char c)
56ae3f82
SS
2253 int chars;
2254 int chars_per_c = 8;
2255
2256- if (c == input_tab_char)
2257+ if (c == input_tab_char[0])
2258 chars_per_c = chars_per_input_tab;
2259
2260- if (c == input_tab_char || c == '\t')
2261+ if (c == input_tab_char[0] || c == '\t')
2262 {
2263 width = TAB_WIDTH (chars_per_c, input_position);
2264
1555d43c 2265@@ -2741,6 +2942,154 @@ char_to_clump (char c)
56ae3f82
SS
2266 return chars;
2267 }
2268
2269+#ifdef HAVE_MBRTOWC
2270+static int
2271+char_to_clump_multi (char c)
2272+{
2273+ static size_t mbc_pos = 0;
2274+ static char mbc[MB_LEN_MAX] = {'\0'};
2275+ static mbstate_t state = {'\0'};
2276+ mbstate_t state_bak;
2277+ wchar_t wc;
2278+ size_t mblength;
2279+ int wc_width;
2280+ register char *s = clump_buff;
2281+ register int i, j;
2282+ char esc_buff[4];
2283+ int width;
2284+ int chars;
2285+ int chars_per_c = 8;
2286+
2287+ state_bak = state;
2288+ mbc[mbc_pos++] = c;
2289+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2290+
2291+ width = 0;
2292+ chars = 0;
2293+ while (mbc_pos > 0)
2294+ {
2295+ switch (mblength)
2296+ {
2297+ case (size_t)-2:
2298+ state = state_bak;
2299+ return 0;
2300+
2301+ case (size_t)-1:
2302+ state = state_bak;
2303+ mblength = 1;
2304+
2305+ if (use_esc_sequence || use_cntrl_prefix)
2306+ {
2307+ width = +4;
2308+ chars = +4;
2309+ *s++ = '\\';
2310+ sprintf (esc_buff, "%03o", mbc[0]);
2311+ for (i = 0; i <= 2; ++i)
2312+ *s++ = (int) esc_buff[i];
2313+ }
2314+ else
2315+ {
2316+ width += 1;
2317+ chars += 1;
2318+ *s++ = mbc[0];
2319+ }
2320+ break;
2321+
2322+ case 0:
2323+ mblength = 1;
2324+ /* Fall through */
2325+
2326+ default:
2327+ if (memcmp (mbc, input_tab_char, mblength) == 0)
2328+ chars_per_c = chars_per_input_tab;
2329+
2330+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
2331+ {
2332+ int width_inc;
2333+
2334+ width_inc = TAB_WIDTH (chars_per_c, input_position);
2335+ width += width_inc;
2336+
2337+ if (untabify_input)
2338+ {
2339+ for (i = width_inc; i; --i)
2340+ *s++ = ' ';
2341+ chars += width_inc;
2342+ }
2343+ else
2344+ {
2345+ for (i = 0; i < mblength; i++)
2346+ *s++ = mbc[i];
2347+ chars += mblength;
2348+ }
2349+ }
2350+ else if ((wc_width = wcwidth (wc)) < 1)
2351+ {
2352+ if (use_esc_sequence)
2353+ {
2354+ for (i = 0; i < mblength; i++)
2355+ {
2356+ width += 4;
2357+ chars += 4;
2358+ *s++ = '\\';
2359+ sprintf (esc_buff, "%03o", c);
2360+ for (j = 0; j <= 2; ++j)
2361+ *s++ = (int) esc_buff[j];
2362+ }
2363+ }
2364+ else if (use_cntrl_prefix)
2365+ {
2366+ if (wc < 0200)
2367+ {
2368+ width += 2;
2369+ chars += 2;
2370+ *s++ = '^';
2371+ *s++ = wc ^ 0100;
2372+ }
2373+ else
2374+ {
2375+ for (i = 0; i < mblength; i++)
2376+ {
2377+ width += 4;
2378+ chars += 4;
2379+ *s++ = '\\';
2380+ sprintf (esc_buff, "%03o", c);
2381+ for (j = 0; j <= 2; ++j)
2382+ *s++ = (int) esc_buff[j];
2383+ }
2384+ }
2385+ }
2386+ else if (wc == L'\b')
2387+ {
2388+ width += -1;
2389+ chars += 1;
2390+ *s++ = c;
2391+ }
2392+ else
2393+ {
2394+ width += 0;
2395+ chars += mblength;
2396+ for (i = 0; i < mblength; i++)
2397+ *s++ = mbc[i];
2398+ }
2399+ }
2400+ else
2401+ {
2402+ width += wc_width;
2403+ chars += mblength;
2404+ for (i = 0; i < mblength; i++)
2405+ *s++ = mbc[i];
2406+ }
2407+ }
2408+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2409+ mbc_pos -= mblength;
2410+ }
2411+
2412+ input_position += width;
2413+ return chars;
2414+}
2415+#endif
2416+
2417 /* We've just printed some files and need to clean up things before
2418 looking for more options and printing the next batch of files.
2419
407c5be3
SS
2420diff -urNp coreutils-8.9-orig/src/sort.c coreutils-8.9/src/sort.c
2421--- coreutils-8.9-orig/src/sort.c 2011-01-01 22:19:23.000000000 +0100
2422+++ coreutils-8.9/src/sort.c 2011-01-04 17:41:55.384888730 +0100
1555d43c 2423@@ -22,11 +22,20 @@
56ae3f82
SS
2424
2425 #include <config.h>
2426
2427+#include <assert.h>
2428 #include <getopt.h>
1555d43c 2429 #include <pthread.h>
56ae3f82
SS
2430 #include <sys/types.h>
2431 #include <sys/wait.h>
2432 #include <signal.h>
2433+#if HAVE_WCHAR_H
2434+# include <wchar.h>
2435+#endif
2436+/* Get isw* functions. */
2437+#if HAVE_WCTYPE_H
2438+# include <wctype.h>
2439+#endif
2440+
2441 #include "system.h"
2442 #include "argmatch.h"
2443 #include "error.h"
407c5be3 2444@@ -163,12 +172,34 @@ static int thousands_sep;
56ae3f82 2445
56ae3f82
SS
2446 /* Nonzero if the corresponding locales are hard. */
2447 static bool hard_LC_COLLATE;
2448-#if HAVE_NL_LANGINFO
2449+#if HAVE_LANGINFO_CODESET
2450 static bool hard_LC_TIME;
2451 #endif
2452
2453 #define NONZERO(x) ((x) != 0)
2454
2455+/* get a multibyte character's byte length. */
2456+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
2457+ do \
2458+ { \
2459+ wchar_t wc; \
2460+ mbstate_t state_bak; \
2461+ \
2462+ state_bak = STATE; \
2463+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
2464+ \
2465+ switch (MBLENGTH) \
2466+ { \
2467+ case (size_t)-1: \
2468+ case (size_t)-2: \
2469+ STATE = state_bak; \
2470+ /* Fall through. */ \
2471+ case 0: \
2472+ MBLENGTH = 1; \
2473+ } \
2474+ } \
2475+ while (0)
2476+
2477 /* The kind of blanks for '-b' to skip in various options. */
2478 enum blanktype { bl_start, bl_end, bl_both };
2479
407c5be3 2480@@ -335,13 +366,11 @@ static bool reverse;
56ae3f82
SS
2481 they were read if all keys compare equal. */
2482 static bool stable;
2483
2484-/* If TAB has this value, blanks separate fields. */
2485-enum { TAB_DEFAULT = CHAR_MAX + 1 };
2486-
2487-/* Tab character separating fields. If TAB_DEFAULT, then fields are
2488+/* Tab character separating fields. If tab_length is 0, then fields are
2489 separated by the empty string between a non-blank character and a blank
2490 character. */
2491-static int tab = TAB_DEFAULT;
2492+static char tab[MB_LEN_MAX + 1];
2493+static size_t tab_length = 0;
2494
2495 /* Flag to remove consecutive duplicate lines from the output.
2496 Only the last of a sequence of equal lines will be output. */
407c5be3
SS
2497@@ -775,6 +804,46 @@ reap_all (void)
2498 reap (-1);
56ae3f82
SS
2499 }
2500
2501+/* Function pointers. */
2502+static void
2503+(*inittables) (void);
2504+static char *
2505+(*begfield) (const struct line*, const struct keyfield *);
2506+static char *
2507+(*limfield) (const struct line*, const struct keyfield *);
1555d43c
SS
2508+static void
2509+(*skipblanks) (char **ptr, char *lim);
56ae3f82 2510+static int
1555d43c 2511+(*getmonth) (char const *, size_t, char **);
56ae3f82
SS
2512+static int
2513+(*keycompare) (const struct line *, const struct line *);
2514+static int
2515+(*numcompare) (const char *, const char *);
2516+
2517+/* Test for white space multibyte character.
2518+ Set LENGTH the byte length of investigated multibyte character. */
2519+#if HAVE_MBRTOWC
2520+static int
2521+ismbblank (const char *str, size_t len, size_t *length)
2522+{
2523+ size_t mblength;
2524+ wchar_t wc;
2525+ mbstate_t state;
2526+
2527+ memset (&state, '\0', sizeof(mbstate_t));
2528+ mblength = mbrtowc (&wc, str, len, &state);
2529+
2530+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
2531+ {
2532+ *length = 1;
2533+ return 0;
2534+ }
2535+
2536+ *length = (mblength < 1) ? 1 : mblength;
2537+ return iswblank (wc);
2538+}
2539+#endif
2540+
2541 /* Clean up any remaining temporary files. */
2542
2543 static void
407c5be3 2544@@ -1207,7 +1276,7 @@ zaptemp (char const *name)
56ae3f82
SS
2545 free (node);
2546 }
2547
2548-#if HAVE_NL_LANGINFO
2549+#if HAVE_LANGINFO_CODESET
2550
2551 static int
1555d43c 2552 struct_month_cmp (void const *m1, void const *m2)
407c5be3 2553@@ -1222,7 +1291,7 @@ struct_month_cmp (void const *m1, void c
56ae3f82
SS
2554 /* Initialize the character class tables. */
2555
2556 static void
2557-inittables (void)
2558+inittables_uni (void)
2559 {
2560 size_t i;
2561
407c5be3 2562@@ -1234,7 +1303,7 @@ inittables (void)
56ae3f82
SS
2563 fold_toupper[i] = toupper (i);
2564 }
2565
2566-#if HAVE_NL_LANGINFO
2567+#if HAVE_LANGINFO_CODESET
2568 /* If we're not in the "C" locale, read different names for months. */
2569 if (hard_LC_TIME)
2570 {
407c5be3 2571@@ -1316,6 +1385,84 @@ specify_nmerge (int oi, char c, char con
56ae3f82
SS
2572 xstrtol_fatal (e, oi, c, long_options, s);
2573 }
2574
2575+#if HAVE_MBRTOWC
2576+static void
2577+inittables_mb (void)
2578+{
2579+ int i, j, k, l;
1555d43c 2580+ char *name, *s, *lc_time, *lc_ctype;
56ae3f82
SS
2581+ size_t s_len, mblength;
2582+ char mbc[MB_LEN_MAX];
2583+ wchar_t wc, pwc;
2584+ mbstate_t state_mb, state_wc;
2585+
1555d43c
SS
2586+ lc_time = setlocale (LC_TIME, "");
2587+ if (lc_time)
2588+ lc_time = xstrdup (lc_time);
2589+
2590+ lc_ctype = setlocale (LC_CTYPE, "");
2591+ if (lc_ctype)
2592+ lc_ctype = xstrdup (lc_ctype);
2593+
2594+ if (lc_time && lc_ctype)
2595+ /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert
2596+ * the names of months to upper case */
2597+ setlocale (LC_CTYPE, lc_time);
2598+
56ae3f82
SS
2599+ for (i = 0; i < MONTHS_PER_YEAR; i++)
2600+ {
2601+ s = (char *) nl_langinfo (ABMON_1 + i);
2602+ s_len = strlen (s);
2603+ monthtab[i].name = name = (char *) xmalloc (s_len + 1);
2604+ monthtab[i].val = i + 1;
2605+
2606+ memset (&state_mb, '\0', sizeof (mbstate_t));
2607+ memset (&state_wc, '\0', sizeof (mbstate_t));
2608+
2609+ for (j = 0; j < s_len;)
2610+ {
2611+ if (!ismbblank (s + j, s_len - j, &mblength))
2612+ break;
2613+ j += mblength;
2614+ }
2615+
2616+ for (k = 0; j < s_len;)
2617+ {
2618+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
2619+ assert (mblength != (size_t)-1 && mblength != (size_t)-2);
2620+ if (mblength == 0)
2621+ break;
2622+
2623+ pwc = towupper (wc);
2624+ if (pwc == wc)
2625+ {
2626+ memcpy (mbc, s + j, mblength);
2627+ j += mblength;
2628+ }
2629+ else
2630+ {
2631+ j += mblength;
2632+ mblength = wcrtomb (mbc, pwc, &state_wc);
2633+ assert (mblength != (size_t)0 && mblength != (size_t)-1);
2634+ }
2635+
2636+ for (l = 0; l < mblength; l++)
2637+ name[k++] = mbc[l];
2638+ }
2639+ name[k] = '\0';
2640+ }
2641+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
2642+ sizeof (struct month), struct_month_cmp);
1555d43c
SS
2643+
2644+ if (lc_time && lc_ctype)
2645+ /* restore the original locales */
2646+ setlocale (LC_CTYPE, lc_ctype);
2647+
2648+ free (lc_ctype);
2649+ free (lc_time);
56ae3f82
SS
2650+}
2651+#endif
2652+
2653 /* Specify the amount of main memory to use when sorting. */
2654 static void
2655 specify_sort_size (int oi, char c, char const *s)
407c5be3 2656@@ -1544,7 +1691,7 @@ buffer_linelim (struct buffer const *buf
56ae3f82
SS
2657 by KEY in LINE. */
2658
2659 static char *
1555d43c 2660-begfield (struct line const *line, struct keyfield const *key)
56ae3f82
SS
2661+begfield_uni (const struct line *line, const struct keyfield *key)
2662 {
2663 char *ptr = line->text, *lim = ptr + line->length - 1;
2664 size_t sword = key->sword;
407c5be3 2665@@ -1553,10 +1700,10 @@ begfield (struct line const *line, struc
56ae3f82
SS
2666 /* The leading field separator itself is included in a field when -t
2667 is absent. */
2668
2669- if (tab != TAB_DEFAULT)
2670+ if (tab_length)
2671 while (ptr < lim && sword--)
2672 {
2673- while (ptr < lim && *ptr != tab)
2674+ while (ptr < lim && *ptr != tab[0])
2675 ++ptr;
2676 if (ptr < lim)
2677 ++ptr;
407c5be3 2678@@ -1582,11 +1729,70 @@ begfield (struct line const *line, struc
56ae3f82
SS
2679 return ptr;
2680 }
2681
2682+#if HAVE_MBRTOWC
2683+static char *
2684+begfield_mb (const struct line *line, const struct keyfield *key)
2685+{
2686+ int i;
2687+ char *ptr = line->text, *lim = ptr + line->length - 1;
2688+ size_t sword = key->sword;
2689+ size_t schar = key->schar;
2690+ size_t mblength;
2691+ mbstate_t state;
2692+
2693+ memset (&state, '\0', sizeof(mbstate_t));
2694+
2695+ if (tab_length)
2696+ while (ptr < lim && sword--)
2697+ {
2698+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2699+ {
2700+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2701+ ptr += mblength;
2702+ }
2703+ if (ptr < lim)
2704+ {
2705+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2706+ ptr += mblength;
2707+ }
2708+ }
2709+ else
2710+ while (ptr < lim && sword--)
2711+ {
2712+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2713+ ptr += mblength;
2714+ if (ptr < lim)
2715+ {
2716+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2717+ ptr += mblength;
2718+ }
2719+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2720+ ptr += mblength;
2721+ }
2722+
2723+ if (key->skipsblanks)
2724+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2725+ ptr += mblength;
2726+
2727+ for (i = 0; i < schar; i++)
2728+ {
2729+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2730+
2731+ if (ptr + mblength > lim)
2732+ break;
2733+ else
2734+ ptr += mblength;
2735+ }
2736+
2737+ return ptr;
2738+}
2739+#endif
2740+
2741 /* Return the limit of (a pointer to the first character after) the field
2742 in LINE specified by KEY. */
2743
2744 static char *
1555d43c 2745-limfield (struct line const *line, struct keyfield const *key)
56ae3f82
SS
2746+limfield_uni (const struct line *line, const struct keyfield *key)
2747 {
2748 char *ptr = line->text, *lim = ptr + line->length - 1;
2749 size_t eword = key->eword, echar = key->echar;
407c5be3 2750@@ -1601,10 +1807,10 @@ limfield (struct line const *line, struc
56ae3f82
SS
2751 `beginning' is the first character following the delimiting TAB.
2752 Otherwise, leave PTR pointing at the first `blank' character after
2753 the preceding field. */
2754- if (tab != TAB_DEFAULT)
2755+ if (tab_length)
2756 while (ptr < lim && eword--)
2757 {
2758- while (ptr < lim && *ptr != tab)
2759+ while (ptr < lim && *ptr != tab[0])
2760 ++ptr;
2761 if (ptr < lim && (eword || echar))
2762 ++ptr;
407c5be3 2763@@ -1650,10 +1856,10 @@ limfield (struct line const *line, struc
56ae3f82
SS
2764 */
2765
2766 /* Make LIM point to the end of (one byte past) the current field. */
2767- if (tab != TAB_DEFAULT)
2768+ if (tab_length)
2769 {
2770 char *newlim;
2771- newlim = memchr (ptr, tab, lim - ptr);
2772+ newlim = memchr (ptr, tab[0], lim - ptr);
2773 if (newlim)
2774 lim = newlim;
2775 }
407c5be3 2776@@ -1684,6 +1890,130 @@ limfield (struct line const *line, struc
56ae3f82
SS
2777 return ptr;
2778 }
2779
2780+#if HAVE_MBRTOWC
2781+static char *
2782+limfield_mb (const struct line *line, const struct keyfield *key)
2783+{
2784+ char *ptr = line->text, *lim = ptr + line->length - 1;
2785+ size_t eword = key->eword, echar = key->echar;
2786+ int i;
2787+ size_t mblength;
2788+ mbstate_t state;
2789+
2790+ if (echar == 0)
2791+ eword++; /* skip all of end field. */
2792+
2793+ memset (&state, '\0', sizeof(mbstate_t));
2794+
2795+ if (tab_length)
2796+ while (ptr < lim && eword--)
2797+ {
2798+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2799+ {
2800+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2801+ ptr += mblength;
2802+ }
2803+ if (ptr < lim && (eword | echar))
2804+ {
2805+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2806+ ptr += mblength;
2807+ }
2808+ }
2809+ else
2810+ while (ptr < lim && eword--)
2811+ {
2812+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2813+ ptr += mblength;
2814+ if (ptr < lim)
2815+ {
2816+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2817+ ptr += mblength;
2818+ }
2819+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2820+ ptr += mblength;
2821+ }
2822+
2823+
2824+# ifdef POSIX_UNSPECIFIED
2825+ /* Make LIM point to the end of (one byte past) the current field. */
2826+ if (tab_length)
2827+ {
2828+ char *newlim, *p;
2829+
2830+ newlim = NULL;
2831+ for (p = ptr; p < lim;)
2832+ {
2833+ if (memcmp (p, tab, tab_length) == 0)
2834+ {
2835+ newlim = p;
2836+ break;
2837+ }
2838+
2839+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2840+ p += mblength;
2841+ }
2842+ }
2843+ else
2844+ {
2845+ char *newlim;
2846+ newlim = ptr;
2847+
2848+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2849+ newlim += mblength;
2850+ if (ptr < lim)
2851+ {
2852+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2853+ ptr += mblength;
2854+ }
2855+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2856+ newlim += mblength;
2857+ lim = newlim;
2858+ }
2859+# endif
2860+
2861+ if (echar != 0)
2862+ {
2863+ /* If we're skipping leading blanks, don't start counting characters
2864+ * until after skipping past any leading blanks. */
2865+ if (key->skipsblanks)
2866+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2867+ ptr += mblength;
2868+
2869+ memset (&state, '\0', sizeof(mbstate_t));
2870+
2871+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2872+ for (i = 0; i < echar; i++)
2873+ {
2874+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2875+
2876+ if (ptr + mblength > lim)
2877+ break;
2878+ else
2879+ ptr += mblength;
2880+ }
2881+ }
2882+
2883+ return ptr;
2884+}
2885+#endif
1555d43c
SS
2886+
2887+static void
2888+skipblanks_uni (char **ptr, char *lim)
2889+{
2890+ while (*ptr < lim && blanks[to_uchar (**ptr)])
2891+ ++(*ptr);
2892+}
2893+
2894+#if HAVE_MBRTOWC
2895+static void
2896+skipblanks_mb (char **ptr, char *lim)
2897+{
2898+ size_t mblength;
2899+ while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
2900+ (*ptr) += mblength;
2901+}
2902+#endif
56ae3f82
SS
2903+
2904 /* Fill BUF reading from FP, moving buf->left bytes from the end
2905 of buf->buf to the beginning first. If EOF is reached and the
2906 file wasn't terminated by a newline, supply one. Set up BUF's line
407c5be3 2907@@ -1770,8 +2100,22 @@ fillbuf (struct buffer *buf, FILE *fp, c
56ae3f82
SS
2908 else
2909 {
2910 if (key->skipsblanks)
2911- while (blanks[to_uchar (*line_start)])
2912- line_start++;
2913+ {
2914+#if HAVE_MBRTOWC
2915+ if (MB_CUR_MAX > 1)
2916+ {
2917+ size_t mblength;
56ae3f82
SS
2918+ while (line_start < line->keylim &&
2919+ ismbblank (line_start,
2920+ line->keylim - line_start,
2921+ &mblength))
2922+ line_start += mblength;
2923+ }
2924+ else
2925+#endif
2926+ while (blanks[to_uchar (*line_start)])
2927+ line_start++;
2928+ }
2929 line->keybeg = line_start;
2930 }
2931 }
407c5be3 2932@@ -1892,7 +2236,7 @@ human_numcompare (char const *a, char co
56ae3f82
SS
2933 hideously fast. */
2934
2935 static int
1555d43c 2936-numcompare (char const *a, char const *b)
56ae3f82
SS
2937+numcompare_uni (const char *a, const char *b)
2938 {
2939 while (blanks[to_uchar (*a)])
2940 a++;
407c5be3 2941@@ -1902,6 +2246,25 @@ numcompare (char const *a, char const *b
1555d43c 2942 return strnumcmp (a, b, decimal_point, thousands_sep);
56ae3f82
SS
2943 }
2944
2945+#if HAVE_MBRTOWC
2946+static int
2947+numcompare_mb (const char *a, const char *b)
2948+{
2949+ size_t mblength, len;
2950+ len = strlen (a); /* okay for UTF-8 */
2951+ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2952+ {
2953+ a += mblength;
2954+ len -= mblength;
2955+ }
2956+ len = strlen (b); /* okay for UTF-8 */
2957+ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2958+ b += mblength;
2959+
2960+ return strnumcmp (a, b, decimal_point, thousands_sep);
2961+}
2962+#endif /* HAV_EMBRTOWC */
2963+
2964 static int
1555d43c 2965 general_numcompare (char const *sa, char const *sb)
56ae3f82 2966 {
407c5be3 2967@@ -1934,7 +2297,7 @@ general_numcompare (char const *sa, char
56ae3f82
SS
2968 Return 0 if the name in S is not recognized. */
2969
2970 static int
1555d43c
SS
2971-getmonth (char const *month, char **ea)
2972+getmonth_uni (char const *month, size_t len, char **ea)
56ae3f82
SS
2973 {
2974 size_t lo = 0;
2975 size_t hi = MONTHS_PER_YEAR;
407c5be3
SS
2976@@ -2209,13 +2572,12 @@ debug_key (struct line const *line, stru
2977 char saved = *lim;
2978 *lim = '\0';
1555d43c
SS
2979
2980- while (blanks[to_uchar (*beg)])
2981- beg++;
2982+ skipblanks (&beg, lim);
2983
2984 char *tighter_lim = beg;
2985
2986 if (key->month)
2987- getmonth (beg, &tighter_lim);
2988+ getmonth (beg, lim-beg, &tighter_lim);
2989 else if (key->general_numeric)
2990 ignore_value (strtold (beg, &tighter_lim));
2991 else if (key->numeric || key->human_numeric)
407c5be3 2992@@ -2359,7 +2721,7 @@ key_warnings (struct keyfield const *gke
1555d43c
SS
2993 bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
2994 && !(key->schar || key->echar);
2995 bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
2996- if (!gkey_only && tab == TAB_DEFAULT && !line_offset
2997+ if (!gkey_only && !tab_length && !line_offset
2998 && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
2999 || (!key->skipsblanks && key->schar)
3000 || (!key->skipeblanks && key->echar)))
407c5be3 3001@@ -2417,11 +2779,83 @@ key_warnings (struct keyfield const *gke
1555d43c 3002 error (0, 0, _("option `-r' only applies to last-resort comparison"));
56ae3f82
SS
3003 }
3004
3005+#if HAVE_MBRTOWC
3006+static int
1555d43c 3007+getmonth_mb (const char *s, size_t len, char **ea)
56ae3f82
SS
3008+{
3009+ char *month;
3010+ register size_t i;
3011+ register int lo = 0, hi = MONTHS_PER_YEAR, result;
3012+ char *tmp;
3013+ size_t wclength, mblength;
3014+ const char **pp;
3015+ const wchar_t **wpp;
3016+ wchar_t *month_wcs;
3017+ mbstate_t state;
3018+
3019+ while (len > 0 && ismbblank (s, len, &mblength))
3020+ {
3021+ s += mblength;
3022+ len -= mblength;
3023+ }
3024+
3025+ if (len == 0)
3026+ return 0;
3027+
3028+ month = (char *) alloca (len + 1);
3029+
3030+ tmp = (char *) alloca (len + 1);
3031+ memcpy (tmp, s, len);
3032+ tmp[len] = '\0';
3033+ pp = (const char **)&tmp;
3034+ month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3035+ memset (&state, '\0', sizeof(mbstate_t));
3036+
3037+ wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
1555d43c
SS
3038+ if (wclength == (size_t)-1 || *pp != NULL)
3039+ error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s));
56ae3f82
SS
3040+
3041+ for (i = 0; i < wclength; i++)
3042+ {
3043+ month_wcs[i] = towupper(month_wcs[i]);
3044+ if (iswblank (month_wcs[i]))
3045+ {
3046+ month_wcs[i] = L'\0';
3047+ break;
3048+ }
3049+ }
3050+
3051+ wpp = (const wchar_t **)&month_wcs;
3052+
3053+ mblength = wcsrtombs (month, wpp, len + 1, &state);
3054+ assert (mblength != (-1) && *wpp == NULL);
3055+
3056+ do
3057+ {
3058+ int ix = (lo + hi) / 2;
3059+
3060+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3061+ hi = ix;
3062+ else
3063+ lo = ix;
3064+ }
3065+ while (hi - lo > 1);
3066+
1555d43c
SS
3067+ if (ea)
3068+ *ea = (char *) month;
3069+
56ae3f82
SS
3070+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3071+ ? monthtab[lo].val : 0);
3072+
3073+ return result;
3074+}
3075+#endif
3076+
3077 /* Compare two lines A and B trying every key in sequence until there
3078 are no more keys or a difference is found. */
3079
3080 static int
1555d43c 3081-keycompare (struct line const *a, struct line const *b)
56ae3f82
SS
3082+keycompare_uni (const struct line *a, const struct line *b)
3083 {
3084 struct keyfield *key = keylist;
3085
407c5be3 3086@@ -2506,7 +2940,7 @@ keycompare (struct line const *a, struct
1555d43c
SS
3087 else if (key->human_numeric)
3088 diff = human_numcompare (ta, tb);
3089 else if (key->month)
3090- diff = getmonth (ta, NULL) - getmonth (tb, NULL);
3091+ diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL);
3092 else if (key->random)
3093 diff = compare_random (ta, tlena, tb, tlenb);
3094 else if (key->version)
407c5be3 3095@@ -2622,6 +3056,179 @@ keycompare (struct line const *a, struct
56ae3f82
SS
3096 return key->reverse ? -diff : diff;
3097 }
3098
3099+#if HAVE_MBRTOWC
3100+static int
3101+keycompare_mb (const struct line *a, const struct line *b)
3102+{
3103+ struct keyfield *key = keylist;
3104+
3105+ /* For the first iteration only, the key positions have been
3106+ precomputed for us. */
3107+ char *texta = a->keybeg;
3108+ char *textb = b->keybeg;
3109+ char *lima = a->keylim;
3110+ char *limb = b->keylim;
3111+
3112+ size_t mblength_a, mblength_b;
3113+ wchar_t wc_a, wc_b;
3114+ mbstate_t state_a, state_b;
3115+
3116+ int diff;
3117+
3118+ memset (&state_a, '\0', sizeof(mbstate_t));
3119+ memset (&state_b, '\0', sizeof(mbstate_t));
3120+
3121+ for (;;)
3122+ {
3123+ char const *translate = key->translate;
3124+ bool const *ignore = key->ignore;
3125+
3126+ /* Find the lengths. */
3127+ size_t lena = lima <= texta ? 0 : lima - texta;
3128+ size_t lenb = limb <= textb ? 0 : limb - textb;
3129+
3130+ /* Actually compare the fields. */
3131+ if (key->random)
3132+ diff = compare_random (texta, lena, textb, lenb);
3133+ else if (key->numeric | key->general_numeric | key->human_numeric)
3134+ {
3135+ char savea = *lima, saveb = *limb;
3136+
3137+ *lima = *limb = '\0';
3138+ diff = (key->numeric ? numcompare (texta, textb)
3139+ : key->general_numeric ? general_numcompare (texta, textb)
1555d43c 3140+ : human_numcompare (texta, textb));
56ae3f82
SS
3141+ *lima = savea, *limb = saveb;
3142+ }
3143+ else if (key->version)
1555d43c 3144+ diff = filevercmp (texta, textb);
56ae3f82 3145+ else if (key->month)
1555d43c 3146+ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL);
56ae3f82
SS
3147+ else
3148+ {
3149+ if (ignore || translate)
3150+ {
3151+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3152+ char *copy_b = copy_a + lena + 1;
3153+ size_t new_len_a, new_len_b;
3154+ size_t i, j;
3155+
3156+ /* Ignore and/or translate chars before comparing. */
3157+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3158+ do \
3159+ { \
3160+ wchar_t uwc; \
3161+ char mbc[MB_LEN_MAX]; \
3162+ mbstate_t state_wc; \
3163+ \
3164+ for (NEW_LEN = i = 0; i < LEN;) \
3165+ { \
3166+ mbstate_t state_bak; \
3167+ \
3168+ state_bak = STATE; \
3169+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3170+ \
3171+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3172+ || MBLENGTH == 0) \
3173+ { \
3174+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3175+ STATE = state_bak; \
3176+ if (!ignore) \
3177+ COPY[NEW_LEN++] = TEXT[i++]; \
3178+ continue; \
3179+ } \
3180+ \
3181+ if (ignore) \
3182+ { \
3183+ if ((ignore == nonprinting && !iswprint (WC)) \
3184+ || (ignore == nondictionary \
3185+ && !iswalnum (WC) && !iswblank (WC))) \
3186+ { \
3187+ i += MBLENGTH; \
3188+ continue; \
3189+ } \
3190+ } \
3191+ \
3192+ if (translate) \
3193+ { \
3194+ \
3195+ uwc = towupper(WC); \
3196+ if (WC == uwc) \
3197+ { \
3198+ memcpy (mbc, TEXT + i, MBLENGTH); \
3199+ i += MBLENGTH; \
3200+ } \
3201+ else \
3202+ { \
3203+ i += MBLENGTH; \
3204+ WC = uwc; \
3205+ memset (&state_wc, '\0', sizeof (mbstate_t)); \
3206+ \
3207+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3208+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3209+ } \
3210+ \
3211+ for (j = 0; j < MBLENGTH; j++) \
3212+ COPY[NEW_LEN++] = mbc[j]; \
3213+ } \
3214+ else \
3215+ for (j = 0; j < MBLENGTH; j++) \
3216+ COPY[NEW_LEN++] = TEXT[i++]; \
3217+ } \
3218+ COPY[NEW_LEN] = '\0'; \
3219+ } \
3220+ while (0)
3221+ IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3222+ wc_a, mblength_a, state_a);
3223+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3224+ wc_b, mblength_b, state_b);
3225+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3226+ }
3227+ else if (lena == 0)
3228+ diff = - NONZERO (lenb);
3229+ else if (lenb == 0)
3230+ goto greater;
3231+ else
3232+ diff = xmemcoll (texta, lena, textb, lenb);
3233+ }
3234+
3235+ if (diff)
3236+ goto not_equal;
3237+
3238+ key = key->next;
3239+ if (! key)
3240+ break;
3241+
3242+ /* Find the beginning and limit of the next field. */
3243+ if (key->eword != -1)
3244+ lima = limfield (a, key), limb = limfield (b, key);
3245+ else
3246+ lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3247+
3248+ if (key->sword != -1)
3249+ texta = begfield (a, key), textb = begfield (b, key);
3250+ else
3251+ {
3252+ texta = a->text, textb = b->text;
3253+ if (key->skipsblanks)
3254+ {
3255+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3256+ texta += mblength_a;
3257+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3258+ textb += mblength_b;
3259+ }
3260+ }
3261+ }
3262+
3263+ return 0;
3264+
3265+greater:
3266+ diff = 1;
3267+not_equal:
3268+ return key->reverse ? -diff : diff;
3269+}
3270+#endif
3271+
3272 /* Compare two lines A and B, returning negative, zero, or positive
3273 depending on whether A compares less than, equal to, or greater than B. */
3274
407c5be3 3275@@ -4084,7 +4691,7 @@ main (int argc, char **argv)
56ae3f82
SS
3276 initialize_exit_failure (SORT_FAILURE);
3277
3278 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3279-#if HAVE_NL_LANGINFO
3280+#if HAVE_LANGINFO_CODESET
3281 hard_LC_TIME = hard_locale (LC_TIME);
3282 #endif
3283
407c5be3 3284@@ -4105,6 +4712,29 @@ main (int argc, char **argv)
56ae3f82
SS
3285 thousands_sep = -1;
3286 }
3287
3288+#if HAVE_MBRTOWC
3289+ if (MB_CUR_MAX > 1)
3290+ {
3291+ inittables = inittables_mb;
3292+ begfield = begfield_mb;
3293+ limfield = limfield_mb;
1555d43c 3294+ skipblanks = skipblanks_mb;
56ae3f82
SS
3295+ getmonth = getmonth_mb;
3296+ keycompare = keycompare_mb;
3297+ numcompare = numcompare_mb;
3298+ }
3299+ else
3300+#endif
3301+ {
3302+ inittables = inittables_uni;
3303+ begfield = begfield_uni;
3304+ limfield = limfield_uni;
1555d43c 3305+ skipblanks = skipblanks_uni;
56ae3f82
SS
3306+ getmonth = getmonth_uni;
3307+ keycompare = keycompare_uni;
3308+ numcompare = numcompare_uni;
3309+ }
3310+
3311 have_read_stdin = false;
3312 inittables ();
3313
407c5be3 3314@@ -4375,13 +5005,34 @@ main (int argc, char **argv)
56ae3f82
SS
3315
3316 case 't':
3317 {
3318- char newtab = optarg[0];
3319- if (! newtab)
3320+ char newtab[MB_LEN_MAX + 1];
3321+ size_t newtab_length = 1;
3322+ strncpy (newtab, optarg, MB_LEN_MAX);
3323+ if (! newtab[0])
3324 error (SORT_FAILURE, 0, _("empty tab"));
3325- if (optarg[1])
3326+#if HAVE_MBRTOWC
3327+ if (MB_CUR_MAX > 1)
3328+ {
3329+ wchar_t wc;
3330+ mbstate_t state;
56ae3f82
SS
3331+
3332+ memset (&state, '\0', sizeof (mbstate_t));
3333+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3334+ MB_LEN_MAX),
3335+ &state);
3336+ switch (newtab_length)
3337+ {
3338+ case (size_t) -1:
3339+ case (size_t) -2:
3340+ case 0:
3341+ newtab_length = 1;
3342+ }
3343+ }
3344+#endif
3345+ if (newtab_length == 1 && optarg[1])
3346 {
3347 if (STREQ (optarg, "\\0"))
3348- newtab = '\0';
3349+ newtab[0] = '\0';
3350 else
3351 {
3352 /* Provoke with `sort -txx'. Complain about
407c5be3 3353@@ -4392,9 +5043,12 @@ main (int argc, char **argv)
56ae3f82
SS
3354 quote (optarg));
3355 }
3356 }
3357- if (tab != TAB_DEFAULT && tab != newtab)
3358+ if (tab_length
3359+ && (tab_length != newtab_length
3360+ || memcmp (tab, newtab, tab_length) != 0))
3361 error (SORT_FAILURE, 0, _("incompatible tabs"));
3362- tab = newtab;
3363+ memcpy (tab, newtab, newtab_length);
3364+ tab_length = newtab_length;
3365 }
3366 break;
3367
407c5be3
SS
3368diff -urNp coreutils-8.9-orig/src/unexpand.c coreutils-8.9/src/unexpand.c
3369--- coreutils-8.9-orig/src/unexpand.c 2011-01-01 22:19:23.000000000 +0100
3370+++ coreutils-8.9/src/unexpand.c 2011-01-04 17:41:55.387888171 +0100
1555d43c 3371@@ -39,12 +39,29 @@
56ae3f82
SS
3372 #include <stdio.h>
3373 #include <getopt.h>
3374 #include <sys/types.h>
3375+
3376+/* Get mbstate_t, mbrtowc(), wcwidth(). */
3377+#if HAVE_WCHAR_H
3378+# include <wchar.h>
3379+#endif
3380+
3381 #include "system.h"
3382 #include "error.h"
1555d43c 3383 #include "fadvise.h"
56ae3f82
SS
3384 #include "quote.h"
3385 #include "xstrndup.h"
3386
3387+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3388+ installation; work around this configuration error. */
3389+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3390+# define MB_LEN_MAX 16
3391+#endif
3392+
3393+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3394+#if HAVE_MBRTOWC && defined mbstate_t
3395+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3396+#endif
3397+
3398 /* The official name of this program (e.g., no `g' prefix). */
3399 #define PROGRAM_NAME "unexpand"
3400
1555d43c 3401@@ -104,6 +121,208 @@ static struct option const longopts[] =
56ae3f82
SS
3402 {NULL, 0, NULL, 0}
3403 };
3404
3405+static FILE *next_file (FILE *fp);
3406+
3407+#if HAVE_MBRTOWC
3408+static void
3409+unexpand_multibyte (void)
3410+{
3411+ FILE *fp; /* Input stream. */
3412+ mbstate_t i_state; /* Current shift state of the input stream. */
3413+ mbstate_t i_state_bak; /* Back up the I_STATE. */
3414+ mbstate_t o_state; /* Current shift state of the output stream. */
3415+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3416+ char *bufpos; /* Next read position of BUF. */
3417+ size_t buflen = 0; /* The length of the byte sequence in buf. */
3418+ wint_t wc; /* A gotten wide character. */
3419+ size_t mblength; /* The byte size of a multibyte character
3420+ which shows as same character as WC. */
3421+
3422+ /* Index in `tab_list' of next tabstop: */
3423+ int tab_index = 0; /* For calculating width of pending tabs. */
3424+ int print_tab_index = 0; /* For printing as many tabs as possible. */
3425+ unsigned int column = 0; /* Column on screen of next char. */
3426+ int next_tab_column; /* Column the next tab stop is on. */
3427+ int convert = 1; /* If nonzero, perform translations. */
3428+ unsigned int pending = 0; /* Pending columns of blanks. */
3429+
3430+ fp = next_file ((FILE *) NULL);
3431+ if (fp == NULL)
3432+ return;
3433+
3434+ memset (&o_state, '\0', sizeof(mbstate_t));
3435+ memset (&i_state, '\0', sizeof(mbstate_t));
3436+
3437+ for (;;)
3438+ {
3439+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
3440+ {
3441+ memmove (buf, bufpos, buflen);
3442+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
3443+ bufpos = buf;
3444+ }
3445+
3446+ /* Get a wide character. */
3447+ if (buflen < 1)
3448+ {
3449+ mblength = 1;
3450+ wc = WEOF;
3451+ }
3452+ else
3453+ {
3454+ i_state_bak = i_state;
3455+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
3456+ }
3457+
3458+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
3459+ {
3460+ i_state = i_state_bak;
3461+ wc = L'\0';
3462+ }
3463+
3464+ if (wc == L' ' && convert && column < INT_MAX)
3465+ {
3466+ ++pending;
3467+ ++column;
3468+ }
3469+ else if (wc == L'\t' && convert)
3470+ {
3471+ if (tab_size == 0)
3472+ {
3473+ /* Do not let tab_index == first_free_tab;
3474+ stop when it is 1 less. */
3475+ while (tab_index < first_free_tab - 1
3476+ && column >= tab_list[tab_index])
3477+ tab_index++;
3478+ next_tab_column = tab_list[tab_index];
3479+ if (tab_index < first_free_tab - 1)
3480+ tab_index++;
3481+ if (column >= next_tab_column)
3482+ {
3483+ convert = 0; /* Ran out of tab stops. */
3484+ goto flush_pend_mb;
3485+ }
3486+ }
3487+ else
3488+ {
3489+ next_tab_column = column + tab_size - column % tab_size;
3490+ }
3491+ pending += next_tab_column - column;
3492+ column = next_tab_column;
3493+ }
3494+ else
3495+ {
3496+flush_pend_mb:
3497+ /* Flush pending spaces. Print as many tabs as possible,
3498+ then print the rest as spaces. */
3499+ if (pending == 1)
3500+ {
3501+ putchar (' ');
3502+ pending = 0;
3503+ }
3504+ column -= pending;
3505+ while (pending > 0)
3506+ {
3507+ if (tab_size == 0)
3508+ {
3509+ /* Do not let print_tab_index == first_free_tab;
3510+ stop when it is 1 less. */
3511+ while (print_tab_index < first_free_tab - 1
3512+ && column >= tab_list[print_tab_index])
3513+ print_tab_index++;
3514+ next_tab_column = tab_list[print_tab_index];
3515+ if (print_tab_index < first_free_tab - 1)
3516+ print_tab_index++;
3517+ }
3518+ else
3519+ {
3520+ next_tab_column =
3521+ column + tab_size - column % tab_size;
3522+ }
3523+ if (next_tab_column - column <= pending)
3524+ {
3525+ putchar ('\t');
3526+ pending -= next_tab_column - column;
3527+ column = next_tab_column;
3528+ }
3529+ else
3530+ {
3531+ --print_tab_index;
3532+ column += pending;
3533+ while (pending != 0)
3534+ {
3535+ putchar (' ');
3536+ pending--;
3537+ }
3538+ }
3539+ }
3540+
3541+ if (wc == WEOF)
3542+ {
3543+ fp = next_file (fp);
3544+ if (fp == NULL)
3545+ break; /* No more files. */
3546+ else
3547+ {
3548+ memset (&i_state, '\0', sizeof(mbstate_t));
3549+ continue;
3550+ }
3551+ }
3552+
3553+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
3554+ {
3555+ if (convert)
3556+ {
3557+ ++column;
3558+ if (convert_entire_line == 0)
3559+ convert = 0;
3560+ }
3561+ mblength = 1;
3562+ putchar (buf[0]);
3563+ }
3564+ else if (mblength == 0)
3565+ {
3566+ if (convert && convert_entire_line == 0)
3567+ convert = 0;
3568+ mblength = 1;
3569+ putchar ('\0');
3570+ }
3571+ else
3572+ {
3573+ if (convert)
3574+ {
3575+ if (wc == L'\b')
3576+ {
3577+ if (column > 0)
3578+ --column;
3579+ }
3580+ else
3581+ {
3582+ int width; /* The width of WC. */
3583+
3584+ width = wcwidth (wc);
3585+ column += (width > 0) ? width : 0;
3586+ if (convert_entire_line == 0)
3587+ convert = 0;
3588+ }
3589+ }
3590+
3591+ if (wc == L'\n')
3592+ {
3593+ tab_index = print_tab_index = 0;
3594+ column = pending = 0;
3595+ convert = 1;
3596+ }
3597+ fwrite (bufpos, sizeof(char), mblength, stdout);
3598+ }
3599+ }
3600+ buflen -= mblength;
3601+ bufpos += mblength;
3602+ }
3603+}
3604+#endif
3605+
3606+
3607 void
3608 usage (int status)
3609 {
1555d43c 3610@@ -526,7 +745,12 @@ main (int argc, char **argv)
56ae3f82
SS
3611
3612 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3613
3614- unexpand ();
3615+#if HAVE_MBRTOWC
3616+ if (MB_CUR_MAX > 1)
3617+ unexpand_multibyte ();
3618+ else
3619+#endif
3620+ unexpand ();
3621
3622 if (have_read_stdin && fclose (stdin) != 0)
3623 error (EXIT_FAILURE, errno, "-");
407c5be3
SS
3624diff -urNp coreutils-8.9-orig/src/uniq.c coreutils-8.9/src/uniq.c
3625--- coreutils-8.9-orig/src/uniq.c 2011-01-01 22:19:23.000000000 +0100
3626+++ coreutils-8.9/src/uniq.c 2011-01-04 17:41:55.391888381 +0100
56ae3f82
SS
3627@@ -21,6 +21,16 @@
3628 #include <getopt.h>
3629 #include <sys/types.h>
3630
3631+/* Get mbstate_t, mbrtowc(). */
3632+#if HAVE_WCHAR_H
3633+# include <wchar.h>
3634+#endif
3635+
3636+/* Get isw* functions. */
3637+#if HAVE_WCTYPE_H
3638+# include <wctype.h>
3639+#endif
3640+
3641 #include "system.h"
3642 #include "argmatch.h"
3643 #include "linebuffer.h"
1555d43c 3644@@ -32,7 +42,19 @@
56ae3f82
SS
3645 #include "stdio--.h"
3646 #include "xmemcoll.h"
3647 #include "xstrtol.h"
3648-#include "memcasecmp.h"
3649+#include "xmemcoll.h"
3650+
3651+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3652+ installation; work around this configuration error. */
3653+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3654+# define MB_LEN_MAX 16
3655+#endif
3656+
3657+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3658+#if HAVE_MBRTOWC && defined mbstate_t
3659+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3660+#endif
3661+
3662
3663 /* The official name of this program (e.g., no `g' prefix). */
3664 #define PROGRAM_NAME "uniq"
1555d43c 3665@@ -108,6 +130,10 @@ static enum delimit_method const delimit
56ae3f82
SS
3666 /* Select whether/how to delimit groups of duplicate lines. */
3667 static enum delimit_method delimit_groups;
3668
3669+/* Function pointers. */
3670+static char *
3671+(*find_field) (struct linebuffer *line);
3672+
3673 static struct option const longopts[] =
3674 {
3675 {"count", no_argument, NULL, 'c'},
1555d43c 3676@@ -207,7 +233,7 @@ size_opt (char const *opt, char const *m
56ae3f82
SS
3677 return a pointer to the beginning of the line's field to be compared. */
3678
3679 static char *
3680-find_field (struct linebuffer const *line)
3681+find_field_uni (struct linebuffer *line)
3682 {
3683 size_t count;
3684 char const *lp = line->buffer;
1555d43c 3685@@ -228,6 +254,83 @@ find_field (struct linebuffer const *lin
56ae3f82
SS
3686 return line->buffer + i;
3687 }
3688
3689+#if HAVE_MBRTOWC
3690+
3691+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
3692+ do \
3693+ { \
3694+ mbstate_t state_bak; \
3695+ \
3696+ CONVFAIL = 0; \
3697+ state_bak = *STATEP; \
3698+ \
3699+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
3700+ \
3701+ switch (MBLENGTH) \
3702+ { \
3703+ case (size_t)-2: \
3704+ case (size_t)-1: \
3705+ *STATEP = state_bak; \
3706+ CONVFAIL++; \
3707+ /* Fall through */ \
3708+ case 0: \
3709+ MBLENGTH = 1; \
3710+ } \
3711+ } \
3712+ while (0)
3713+
3714+static char *
3715+find_field_multi (struct linebuffer *line)
3716+{
3717+ size_t count;
3718+ char *lp = line->buffer;
3719+ size_t size = line->length - 1;
3720+ size_t pos;
3721+ size_t mblength;
3722+ wchar_t wc;
3723+ mbstate_t *statep;
3724+ int convfail;
3725+
3726+ pos = 0;
3727+ statep = &(line->state);
3728+
3729+ /* skip fields. */
3730+ for (count = 0; count < skip_fields && pos < size; count++)
3731+ {
3732+ while (pos < size)
3733+ {
3734+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3735+
3736+ if (convfail || !iswblank (wc))
3737+ {
3738+ pos += mblength;
3739+ break;
3740+ }
3741+ pos += mblength;
3742+ }
3743+
3744+ while (pos < size)
3745+ {
3746+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3747+
3748+ if (!convfail && iswblank (wc))
3749+ break;
3750+
3751+ pos += mblength;
3752+ }
3753+ }
3754+
3755+ /* skip fields. */
3756+ for (count = 0; count < skip_chars && pos < size; count++)
3757+ {
3758+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3759+ pos += mblength;
3760+ }
3761+
3762+ return lp + pos;
3763+}
3764+#endif
3765+
3766 /* Return false if two strings OLD and NEW match, true if not.
3767 OLD and NEW point not to the beginnings of the lines
3768 but rather to the beginnings of the fields to compare.
1555d43c 3769@@ -236,6 +339,8 @@ find_field (struct linebuffer const *lin
56ae3f82
SS
3770 static bool
3771 different (char *old, char *new, size_t oldlen, size_t newlen)
3772 {
3773+ char *copy_old, *copy_new;
3774+
3775 if (check_chars < oldlen)
3776 oldlen = check_chars;
3777 if (check_chars < newlen)
1555d43c 3778@@ -243,14 +348,92 @@ different (char *old, char *new, size_t
56ae3f82
SS
3779
3780 if (ignore_case)
3781 {
3782- /* FIXME: This should invoke strcoll somehow. */
3783- return oldlen != newlen || memcasecmp (old, new, oldlen);
3784+ size_t i;
3785+
3786+ copy_old = alloca (oldlen + 1);
3787+ copy_new = alloca (oldlen + 1);
3788+
3789+ for (i = 0; i < oldlen; i++)
3790+ {
3791+ copy_old[i] = toupper (old[i]);
3792+ copy_new[i] = toupper (new[i]);
3793+ }
3794 }
3795- else if (hard_LC_COLLATE)
3796- return xmemcoll (old, oldlen, new, newlen) != 0;
3797 else
3798- return oldlen != newlen || memcmp (old, new, oldlen);
3799+ {
3800+ copy_old = (char *)old;
3801+ copy_new = (char *)new;
3802+ }
3803+
3804+ return xmemcoll (copy_old, oldlen, copy_new, newlen);
3805+}
3806+
3807+#if HAVE_MBRTOWC
3808+static int
3809+different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
3810+{
3811+ size_t i, j, chars;
3812+ const char *str[2];
3813+ char *copy[2];
3814+ size_t len[2];
3815+ mbstate_t state[2];
3816+ size_t mblength;
3817+ wchar_t wc, uwc;
3818+ mbstate_t state_bak;
3819+
3820+ str[0] = old;
3821+ str[1] = new;
3822+ len[0] = oldlen;
3823+ len[1] = newlen;
3824+ state[0] = oldstate;
3825+ state[1] = newstate;
3826+
3827+ for (i = 0; i < 2; i++)
3828+ {
3829+ copy[i] = alloca (len[i] + 1);
3830+
3831+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
3832+ {
3833+ state_bak = state[i];
3834+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
3835+
3836+ switch (mblength)
3837+ {
3838+ case (size_t)-1:
3839+ case (size_t)-2:
3840+ state[i] = state_bak;
3841+ /* Fall through */
3842+ case 0:
3843+ mblength = 1;
3844+ break;
3845+
3846+ default:
3847+ if (ignore_case)
3848+ {
3849+ uwc = towupper (wc);
3850+
3851+ if (uwc != wc)
3852+ {
3853+ mbstate_t state_wc;
3854+
3855+ memset (&state_wc, '\0', sizeof(mbstate_t));
3856+ wcrtomb (copy[i] + j, uwc, &state_wc);
3857+ }
3858+ else
3859+ memcpy (copy[i] + j, str[i] + j, mblength);
3860+ }
3861+ else
3862+ memcpy (copy[i] + j, str[i] + j, mblength);
3863+ }
3864+ j += mblength;
3865+ }
3866+ copy[i][j] = '\0';
3867+ len[i] = j;
3868+ }
3869+
3870+ return xmemcoll (copy[0], len[0], copy[1], len[1]);
3871 }
3872+#endif
3873
3874 /* Output the line in linebuffer LINE to standard output
3875 provided that the switches say it should be output.
1555d43c 3876@@ -306,15 +489,43 @@ check_file (const char *infile, const ch
56ae3f82 3877 {
1555d43c
SS
3878 char *prevfield IF_LINT ( = NULL);
3879 size_t prevlen IF_LINT ( = 0);
56ae3f82
SS
3880+#if HAVE_MBRTOWC
3881+ mbstate_t prevstate;
3882+
3883+ memset (&prevstate, '\0', sizeof (mbstate_t));
3884+#endif
3885
3886 while (!feof (stdin))
3887 {
3888 char *thisfield;
3889 size_t thislen;
3890+#if HAVE_MBRTOWC
3891+ mbstate_t thisstate;
3892+#endif
3893+
3894 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3895 break;
3896 thisfield = find_field (thisline);
3897 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3898+#if HAVE_MBRTOWC
3899+ if (MB_CUR_MAX > 1)
3900+ {
3901+ thisstate = thisline->state;
3902+
3903+ if (prevline->length == 0 || different_multi
3904+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
3905+ {
3906+ fwrite (thisline->buffer, sizeof (char),
3907+ thisline->length, stdout);
3908+
3909+ SWAP_LINES (prevline, thisline);
3910+ prevfield = thisfield;
3911+ prevlen = thislen;
3912+ prevstate = thisstate;
3913+ }
3914+ }
3915+ else
3916+#endif
3917 if (prevline->length == 0
3918 || different (thisfield, prevfield, thislen, prevlen))
3919 {
1555d43c 3920@@ -333,17 +544,26 @@ check_file (const char *infile, const ch
56ae3f82
SS
3921 size_t prevlen;
3922 uintmax_t match_count = 0;
3923 bool first_delimiter = true;
3924+#if HAVE_MBRTOWC
3925+ mbstate_t prevstate;
3926+#endif
3927
3928 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
3929 goto closefiles;
3930 prevfield = find_field (prevline);
3931 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
3932+#if HAVE_MBRTOWC
3933+ prevstate = prevline->state;
3934+#endif
3935
3936 while (!feof (stdin))
3937 {
3938 bool match;
3939 char *thisfield;
3940 size_t thislen;
3941+#if HAVE_MBRTOWC
3942+ mbstate_t thisstate;
3943+#endif
3944 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3945 {
3946 if (ferror (stdin))
1555d43c 3947@@ -352,6 +572,15 @@ check_file (const char *infile, const ch
56ae3f82
SS
3948 }
3949 thisfield = find_field (thisline);
3950 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3951+#if HAVE_MBRTOWC
3952+ if (MB_CUR_MAX > 1)
3953+ {
3954+ thisstate = thisline->state;
3955+ match = !different_multi (thisfield, prevfield,
3956+ thislen, prevlen, thisstate, prevstate);
3957+ }
3958+ else
3959+#endif
3960 match = !different (thisfield, prevfield, thislen, prevlen);
3961 match_count += match;
3962
1555d43c 3963@@ -384,6 +613,9 @@ check_file (const char *infile, const ch
56ae3f82
SS
3964 SWAP_LINES (prevline, thisline);
3965 prevfield = thisfield;
3966 prevlen = thislen;
3967+#if HAVE_MBRTOWC
3968+ prevstate = thisstate;
3969+#endif
3970 if (!match)
3971 match_count = 0;
3972 }
1555d43c 3973@@ -429,6 +661,19 @@ main (int argc, char **argv)
56ae3f82
SS
3974
3975 atexit (close_stdout);
3976
3977+#if HAVE_MBRTOWC
3978+ if (MB_CUR_MAX > 1)
3979+ {
3980+ find_field = find_field_multi;
3981+ }
3982+ else
3983+#endif
3984+ {
3985+ find_field = find_field_uni;
3986+ }
3987+
3988+
3989+
3990 skip_chars = 0;
3991 skip_fields = 0;
3992 check_chars = SIZE_MAX;
407c5be3
SS
3993diff -urNp coreutils-8.9-orig/tests/Makefile.am coreutils-8.9/tests/Makefile.am
3994--- coreutils-8.9-orig/tests/Makefile.am 2011-01-04 17:41:12.682173268 +0100
3995+++ coreutils-8.9/tests/Makefile.am 2011-01-04 17:41:55.392900534 +0100
3996@@ -233,6 +233,7 @@ TESTS = \
1555d43c
SS
3997 misc/sort-debug-keys \
3998 misc/sort-debug-warn \
56ae3f82
SS
3999 misc/sort-files0-from \
4000+ misc/sort-mb-tests \
1555d43c 4001 misc/sort-float \
56ae3f82
SS
4002 misc/sort-merge \
4003 misc/sort-merge-fdlimit \
407c5be3 4004@@ -498,6 +499,10 @@ TESTS = \
56ae3f82
SS
4005 $(root_tests)
4006
4007 pr_data = \
4008+ misc/mb1.X \
4009+ misc/mb1.I \
4010+ misc/mb2.X \
4011+ misc/mb2.I \
4012 pr/0F \
4013 pr/0FF \
4014 pr/0FFnt \
407c5be3
SS
4015diff -urNp coreutils-8.9-orig/tests/misc/cut coreutils-8.9/tests/misc/cut
4016--- coreutils-8.9-orig/tests/misc/cut 2011-01-01 22:19:23.000000000 +0100
4017+++ coreutils-8.9/tests/misc/cut 2011-01-04 17:41:55.393887822 +0100
56ae3f82
SS
4018@@ -26,7 +26,7 @@ use strict;
4019 my $prog = 'cut';
4020 my $try = "Try \`$prog --help' for more information.\n";
4021 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
4022-my $inval = "$prog: invalid byte or field list\n$try";
4023+my $inval = "$prog: invalid byte, character or field list\n$try";
4024 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
4025
4026 my @Tests =
407c5be3 4027@@ -143,7 +143,7 @@ my @Tests =
56ae3f82
SS
4028
4029 # None of the following invalid ranges provoked an error up to coreutils-6.9.
4030 ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
4031- {ERR=>"$prog: invalid decreasing range\n$try"}],
4032+ {ERR=>"$prog: invalid byte, character or field list\n$try"}],
4033 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
4034 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
407c5be3
SS
4035 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1},
4036diff -urNp coreutils-8.9-orig/tests/misc/mb1.I coreutils-8.9/tests/misc/mb1.I
4037--- coreutils-8.9-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100
4038+++ coreutils-8.9/tests/misc/mb1.I 2011-01-04 17:41:55.394899835 +0100
56ae3f82
SS
4039@@ -0,0 +1,4 @@
4040+Apple@10
4041+Banana@5
4042+Citrus@20
4043+Cherry@30
407c5be3
SS
4044diff -urNp coreutils-8.9-orig/tests/misc/mb1.X coreutils-8.9/tests/misc/mb1.X
4045--- coreutils-8.9-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100
4046+++ coreutils-8.9/tests/misc/mb1.X 2011-01-04 17:41:55.395888102 +0100
56ae3f82
SS
4047@@ -0,0 +1,4 @@
4048+Banana@5
4049+Apple@10
4050+Citrus@20
4051+Cherry@30
407c5be3
SS
4052diff -urNp coreutils-8.9-orig/tests/misc/mb2.I coreutils-8.9/tests/misc/mb2.I
4053--- coreutils-8.9-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100
4054+++ coreutils-8.9/tests/misc/mb2.I 2011-01-04 17:41:55.395888102 +0100
56ae3f82
SS
4055@@ -0,0 +1,4 @@
4056+Apple@AA10@@20
4057+Banana@AA5@@30
4058+Citrus@AA20@@5
4059+Cherry@AA30@@10
407c5be3
SS
4060diff -urNp coreutils-8.9-orig/tests/misc/mb2.X coreutils-8.9/tests/misc/mb2.X
4061--- coreutils-8.9-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100
4062+++ coreutils-8.9/tests/misc/mb2.X 2011-01-04 17:41:55.396892432 +0100
56ae3f82
SS
4063@@ -0,0 +1,4 @@
4064+Citrus@AA20@@5
4065+Cherry@AA30@@10
4066+Apple@AA10@@20
4067+Banana@AA5@@30
407c5be3
SS
4068diff -urNp coreutils-8.9-orig/tests/misc/sort-mb-tests coreutils-8.9/tests/misc/sort-mb-tests
4069--- coreutils-8.9-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100
4070+++ coreutils-8.9/tests/misc/sort-mb-tests 2011-01-04 17:41:55.396892432 +0100
56ae3f82
SS
4071@@ -0,0 +1,58 @@
4072+#! /bin/sh
4073+case $# in
4074+ 0) xx='../src/sort';;
4075+ *) xx="$1";;
4076+esac
4077+test "$VERBOSE" && echo=echo || echo=:
4078+$echo testing program: $xx
4079+errors=0
4080+test "$srcdir" || srcdir=.
4081+test "$VERBOSE" && $xx --version 2> /dev/null
4082+
4083+export LC_ALL=en_US.UTF-8
4084+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
4085+errors=0
4086+
4087+$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
4088+code=$?
4089+if test $code != 0; then
4090+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
4091+ errors=`expr $errors + 1`
4092+else
4093+ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
4094+ case $? in
4095+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
4096+ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
4097+ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
4098+ errors=`expr $errors + 1`;;
4099+ 2) $echo "Test mb1 may have failed." 1>&2
4100+ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
4101+ errors=`expr $errors + 1`;;
4102+ esac
4103+fi
4104+
4105+$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
4106+code=$?
4107+if test $code != 0; then
4108+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
4109+ errors=`expr $errors + 1`
4110+else
4111+ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
4112+ case $? in
4113+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
4114+ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
4115+ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
4116+ errors=`expr $errors + 1`;;
4117+ 2) $echo "Test mb2 may have failed." 1>&2
4118+ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
4119+ errors=`expr $errors + 1`;;
4120+ esac
4121+fi
4122+
4123+if test $errors = 0; then
4124+ $echo Passed all 113 tests. 1>&2
4125+else
4126+ $echo Failed $errors tests. 1>&2
4127+fi
4128+test $errors = 0 || errors=1
4129+exit $errors