]> git.ipfire.org Git - people/arne_f/ipfire-3.x.git/blame - coreutils/patches/coreutils-i18n.patch
coreutils: Update to 8.16.
[people/arne_f/ipfire-3.x.git] / coreutils / patches / coreutils-i18n.patch
CommitLineData
6987acf5
MT
1diff -urNp coreutils-8.16-orig/lib/linebuffer.h coreutils-8.16/lib/linebuffer.h
2--- coreutils-8.16-orig/lib/linebuffer.h 2012-01-06 10:14:31.000000000 +0100
3+++ coreutils-8.16/lib/linebuffer.h 2012-03-26 18:02:00.993889446 +0200
56ae3f82
SS
4@@ -21,6 +21,11 @@
5
6 # include <stdio.h>
7
8+/* Get mbstate_t. */
9+# if HAVE_WCHAR_H
10+# include <wchar.h>
11+# endif
12+
fa4603be 13 /* A 'struct linebuffer' holds a line of text. */
56ae3f82
SS
14
15 struct linebuffer
16@@ -28,6 +33,9 @@ struct linebuffer
17 size_t size; /* Allocated. */
18 size_t length; /* Used. */
19 char *buffer;
20+# if HAVE_WCHAR_H
21+ mbstate_t state;
22+# endif
23 };
24
25 /* Initialize linebuffer LINEBUFFER for use. */
6987acf5
MT
26diff -urNp coreutils-8.16-orig/src/cut.c coreutils-8.16/src/cut.c
27--- coreutils-8.16-orig/src/cut.c 2012-03-24 21:26:51.000000000 +0100
28+++ coreutils-8.16/src/cut.c 2012-03-26 17:46:48.000000000 +0200
56ae3f82
SS
29@@ -28,6 +28,11 @@
30 #include <assert.h>
31 #include <getopt.h>
32 #include <sys/types.h>
33+
34+/* Get mbstate_t, mbrtowc(). */
35+#if HAVE_WCHAR_H
36+# include <wchar.h>
37+#endif
38 #include "system.h"
39
40 #include "error.h"
1555d43c 41@@ -37,6 +42,18 @@
56ae3f82
SS
42 #include "quote.h"
43 #include "xstrndup.h"
44
45+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
46+ installation; work around this configuration error. */
47+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
48+# undef MB_LEN_MAX
49+# define MB_LEN_MAX 16
50+#endif
51+
52+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
53+#if HAVE_MBRTOWC && defined mbstate_t
54+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
55+#endif
56+
6987acf5 57 /* The official name of this program (e.g., no 'g' prefix). */
56ae3f82
SS
58 #define PROGRAM_NAME "cut"
59
1555d43c 60@@ -72,6 +89,52 @@
56ae3f82
SS
61 } \
62 while (0)
63
64+/* Refill the buffer BUF to get a multibyte character. */
65+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
66+ do \
67+ { \
68+ if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
69+ { \
70+ memmove (BUF, BUFPOS, BUFLEN); \
71+ BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
72+ BUFPOS = BUF; \
73+ } \
74+ } \
75+ while (0)
76+
77+/* Get wide character on BUFPOS. BUFPOS is not included after that.
e7f6ab54 78+ If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
56ae3f82
SS
79+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
80+ do \
81+ { \
82+ mbstate_t state_bak; \
83+ \
84+ if (BUFLEN < 1) \
85+ { \
86+ WC = WEOF; \
87+ break; \
88+ } \
89+ \
90+ /* Get a wide character. */ \
91+ CONVFAIL = 0; \
92+ state_bak = STATE; \
93+ MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
94+ \
95+ switch (MBLENGTH) \
96+ { \
97+ case (size_t)-1: \
98+ case (size_t)-2: \
99+ CONVFAIL++; \
100+ STATE = state_bak; \
101+ /* Fall througn. */ \
102+ \
103+ case 0: \
104+ MBLENGTH = 1; \
105+ break; \
106+ } \
107+ } \
108+ while (0)
109+
110 struct range_pair
111 {
112 size_t lo;
1555d43c 113@@ -90,7 +153,7 @@ static char *field_1_buffer;
56ae3f82
SS
114 /* The number of bytes allocated for FIELD_1_BUFFER. */
115 static size_t field_1_bufsize;
116
117-/* The largest field or byte index used as an endpoint of a closed
118+/* The largest byte, character or field index used as an endpoint of a closed
119 or degenerate range specification; this doesn't include the starting
120 index of right-open-ended ranges. For example, with either range spec
6987acf5 121 '2-5,9-', '2-3,5,9-' this variable would be set to 5. */
1555d43c 122@@ -102,10 +165,11 @@ static size_t eol_range_start;
56ae3f82
SS
123
124 /* This is a bit vector.
125 In byte mode, which bytes to output.
126+ In character mode, which characters to output.
127 In field mode, which DELIM-separated fields to output.
128- Both bytes and fields are numbered starting with 1,
129+ Bytes, characters and fields are numbered starting with 1,
130 so the zeroth bit of this array is unused.
131- A field or byte K has been selected if
132+ A byte, character or field K has been selected if
133 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
134 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
135 static unsigned char *printable_field;
1555d43c 136@@ -114,15 +178,25 @@ enum operating_mode
56ae3f82
SS
137 {
138 undefined_mode,
139
140- /* Output characters that are in the given bytes. */
141+ /* Output bytes that are at the given positions. */
142 byte_mode,
143
144+ /* Output characters that are at the given positions. */
145+ character_mode,
146+
147 /* Output the given delimeter-separated fields. */
148 field_mode
149 };
150
151 static enum operating_mode operating_mode;
152
153+/* If nonzero, when in byte mode, don't split multibyte characters. */
154+static int byte_mode_character_aware;
155+
156+/* If nonzero, the function for single byte locale is work
157+ if this program runs on multibyte locale. */
158+static int force_singlebyte_mode;
159+
160 /* If true do not output lines containing no delimeter characters.
161 Otherwise, all such lines are printed. This option is valid only
162 with field mode. */
1555d43c 163@@ -134,6 +208,9 @@ static bool complement;
56ae3f82
SS
164
165 /* The delimeter character for field mode. */
166 static unsigned char delim;
167+#if HAVE_WCHAR_H
168+static wchar_t wcdelim;
169+#endif
170
171 /* True if the --output-delimiter=STRING option was specified. */
172 static bool output_delimiter_specified;
6987acf5 173@@ -206,7 +283,7 @@ Mandatory arguments to long options are
56ae3f82
SS
174 -f, --fields=LIST select only these fields; also print any line\n\
175 that contains no delimiter character, unless\n\
176 the -s option is specified\n\
177- -n (ignored)\n\
178+ -n with -b: don't split multibyte characters\n\
179 "), stdout);
180 fputs (_("\
181 --complement complement the set of selected bytes, characters\n\
6987acf5 182@@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
56ae3f82
SS
183 in_digits = false;
184 /* Starting a range. */
185 if (dash_found)
186- FATAL_ERROR (_("invalid byte or field list"));
187+ FATAL_ERROR (_("invalid byte, character or field list"));
188 dash_found = true;
189 fieldstr++;
190
6987acf5 191@@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
56ae3f82
SS
192 if (!rhs_specified)
193 {
6987acf5 194 /* 'n-'. From 'initial' to end of line. */
56ae3f82
SS
195- eol_range_start = initial;
196+ if (eol_range_start == 0 ||
197+ (eol_range_start != 0 && eol_range_start > initial))
198+ eol_range_start = initial;
199 field_found = true;
200 }
201 else
202 {
6987acf5 203 /* 'm-n' or '-n' (1-n). */
56ae3f82
SS
204 if (value < initial)
205- FATAL_ERROR (_("invalid decreasing range"));
206+ FATAL_ERROR (_("invalid byte, character or field list"));
207
208 /* Is there already a range going to end of line? */
209 if (eol_range_start != 0)
6987acf5 210@@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
56ae3f82
SS
211 if (operating_mode == byte_mode)
212 error (0, 0,
213 _("byte offset %s is too large"), quote (bad_num));
214+ else if (operating_mode == character_mode)
215+ error (0, 0,
216+ _("character offset %s is too large"), quote (bad_num));
217 else
218 error (0, 0,
219 _("field number %s is too large"), quote (bad_num));
6987acf5 220@@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
56ae3f82
SS
221 fieldstr++;
222 }
223 else
224- FATAL_ERROR (_("invalid byte or field list"));
225+ FATAL_ERROR (_("invalid byte, character or field list"));
226 }
227
228 max_range_endpoint = 0;
6987acf5 229@@ -581,6 +663,77 @@ cut_bytes (FILE *stream)
56ae3f82
SS
230 }
231 }
232
233+#if HAVE_MBRTOWC
234+/* This function is in use for the following case.
235+
236+ 1. Read from the stream STREAM, printing to standard output any selected
e7f6ab54 237+ characters.
56ae3f82
SS
238+
239+ 2. Read from stream STREAM, printing to standard output any selected bytes,
240+ without splitting multibyte characters. */
e7f6ab54 241+
56ae3f82
SS
242+static void
243+cut_characters_or_cut_bytes_no_split (FILE *stream)
244+{
245+ int idx; /* number of bytes or characters in the line so far. */
246+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
247+ char *bufpos; /* Next read position of BUF. */
248+ size_t buflen; /* The length of the byte sequence in buf. */
249+ wint_t wc; /* A gotten wide character. */
250+ size_t mblength; /* The byte size of a multibyte character which shows
251+ as same character as WC. */
252+ mbstate_t state; /* State of the stream. */
3badd2da 253+ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
e7f6ab54
SS
254+ /* Whether to begin printing delimiters between ranges for the current line.
255+ Set after we've begun printing data corresponding to the first range. */
256+ bool print_delimiter = false;
56ae3f82
SS
257+
258+ idx = 0;
259+ buflen = 0;
260+ bufpos = buf;
261+ memset (&state, '\0', sizeof(mbstate_t));
262+
263+ while (1)
264+ {
265+ REFILL_BUFFER (buf, bufpos, buflen, stream);
266+
267+ GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
268+
269+ if (wc == WEOF)
270+ {
271+ if (idx > 0)
272+ putchar ('\n');
273+ break;
274+ }
275+ else if (wc == L'\n')
276+ {
277+ putchar ('\n');
278+ idx = 0;
e7f6ab54 279+ print_delimiter = false;
56ae3f82
SS
280+ }
281+ else
282+ {
e7f6ab54
SS
283+ bool range_start;
284+ bool *rs = output_delimiter_specified ? &range_start : NULL;
56ae3f82 285+ idx += (operating_mode == byte_mode) ? mblength : 1;
e7f6ab54
SS
286+ if (print_kth (idx, rs))
287+ {
288+ if (rs && *rs && print_delimiter)
289+ {
290+ fwrite (output_delimiter_string, sizeof (char),
291+ output_delimiter_length, stdout);
292+ }
293+ print_delimiter = true;
294+ fwrite (bufpos, mblength, sizeof(char), stdout);
295+ }
56ae3f82
SS
296+ }
297+
298+ buflen -= mblength;
299+ bufpos += mblength;
300+ }
301+}
302+#endif
e7f6ab54 303+
56ae3f82
SS
304 /* Read from stream STREAM, printing to standard output any selected fields. */
305
306 static void
6987acf5 307@@ -703,13 +856,195 @@ cut_fields (FILE *stream)
56ae3f82
SS
308 }
309 }
310
311+#if HAVE_MBRTOWC
312+static void
313+cut_fields_mb (FILE *stream)
314+{
315+ int c;
316+ unsigned int field_idx;
317+ int found_any_selected_field;
318+ int buffer_first_field;
319+ int empty_input;
320+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
321+ char *bufpos; /* Next read position of BUF. */
322+ size_t buflen; /* The length of the byte sequence in buf. */
323+ wint_t wc = 0; /* A gotten wide character. */
324+ size_t mblength; /* The byte size of a multibyte character which shows
325+ as same character as WC. */
326+ mbstate_t state; /* State of the stream. */
e7f6ab54 327+ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
56ae3f82
SS
328+
329+ found_any_selected_field = 0;
330+ field_idx = 1;
331+ bufpos = buf;
332+ buflen = 0;
333+ memset (&state, '\0', sizeof(mbstate_t));
334+
335+ c = getc (stream);
336+ empty_input = (c == EOF);
337+ if (c != EOF)
e7f6ab54 338+ {
56ae3f82 339+ ungetc (c, stream);
e7f6ab54
SS
340+ wc = 0;
341+ }
56ae3f82
SS
342+ else
343+ wc = WEOF;
344+
345+ /* To support the semantics of the -s flag, we may have to buffer
346+ all of the first field to determine whether it is `delimited.'
347+ But that is unnecessary if all non-delimited lines must be printed
348+ and the first field has been selected, or if non-delimited lines
349+ must be suppressed and the first field has *not* been selected.
350+ That is because a non-delimited line has exactly one field. */
351+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
352+
353+ while (1)
354+ {
355+ if (field_idx == 1 && buffer_first_field)
356+ {
357+ int len = 0;
358+
359+ while (1)
360+ {
361+ REFILL_BUFFER (buf, bufpos, buflen, stream);
362+
363+ GET_NEXT_WC_FROM_BUFFER
364+ (wc, bufpos, buflen, mblength, state, convfail);
365+
366+ if (wc == WEOF)
367+ break;
368+
369+ field_1_buffer = xrealloc (field_1_buffer, len + mblength);
370+ memcpy (field_1_buffer + len, bufpos, mblength);
371+ len += mblength;
372+ buflen -= mblength;
373+ bufpos += mblength;
374+
375+ if (!convfail && (wc == L'\n' || wc == wcdelim))
376+ break;
377+ }
378+
379+ if (wc == WEOF)
380+ break;
381+
382+ /* If the first field extends to the end of line (it is not
383+ delimited) and we are printing all non-delimited lines,
384+ print this one. */
385+ if (convfail || (!convfail && wc != wcdelim))
386+ {
387+ if (suppress_non_delimited)
388+ {
389+ /* Empty. */
390+ }
391+ else
392+ {
393+ fwrite (field_1_buffer, sizeof (char), len, stdout);
394+ /* Make sure the output line is newline terminated. */
395+ if (convfail || (!convfail && wc != L'\n'))
396+ putchar ('\n');
397+ }
398+ continue;
399+ }
400+
401+ if (print_kth (1, NULL))
402+ {
403+ /* Print the field, but not the trailing delimiter. */
404+ fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
405+ found_any_selected_field = 1;
406+ }
407+ ++field_idx;
408+ }
409+
410+ if (wc != WEOF)
411+ {
412+ if (print_kth (field_idx, NULL))
413+ {
414+ if (found_any_selected_field)
415+ {
416+ fwrite (output_delimiter_string, sizeof (char),
417+ output_delimiter_length, stdout);
418+ }
419+ found_any_selected_field = 1;
420+ }
421+
422+ while (1)
423+ {
424+ REFILL_BUFFER (buf, bufpos, buflen, stream);
425+
426+ GET_NEXT_WC_FROM_BUFFER
427+ (wc, bufpos, buflen, mblength, state, convfail);
428+
429+ if (wc == WEOF)
430+ break;
431+ else if (!convfail && (wc == wcdelim || wc == L'\n'))
432+ {
433+ buflen -= mblength;
434+ bufpos += mblength;
435+ break;
436+ }
437+
438+ if (print_kth (field_idx, NULL))
439+ fwrite (bufpos, mblength, sizeof(char), stdout);
440+
441+ buflen -= mblength;
442+ bufpos += mblength;
443+ }
444+ }
445+
446+ if ((!convfail || wc == L'\n') && buflen < 1)
447+ wc = WEOF;
448+
449+ if (!convfail && wc == wcdelim)
450+ ++field_idx;
451+ else if (wc == WEOF || (!convfail && wc == L'\n'))
452+ {
453+ if (found_any_selected_field
454+ || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
455+ putchar ('\n');
456+ if (wc == WEOF)
457+ break;
458+ field_idx = 1;
459+ found_any_selected_field = 0;
460+ }
461+ }
462+}
463+#endif
464+
465 static void
466 cut_stream (FILE *stream)
467 {
468- if (operating_mode == byte_mode)
469- cut_bytes (stream);
470+#if HAVE_MBRTOWC
471+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
472+ {
473+ switch (operating_mode)
474+ {
475+ case byte_mode:
476+ if (byte_mode_character_aware)
477+ cut_characters_or_cut_bytes_no_split (stream);
478+ else
479+ cut_bytes (stream);
480+ break;
481+
482+ case character_mode:
483+ cut_characters_or_cut_bytes_no_split (stream);
484+ break;
485+
486+ case field_mode:
487+ cut_fields_mb (stream);
488+ break;
489+
490+ default:
491+ abort ();
492+ }
493+ }
494 else
495- cut_fields (stream);
496+#endif
497+ {
498+ if (operating_mode == field_mode)
499+ cut_fields (stream);
500+ else
501+ cut_bytes (stream);
502+ }
503 }
504
505 /* Process file FILE to standard output.
6987acf5 506@@ -761,6 +1096,8 @@ main (int argc, char **argv)
56ae3f82
SS
507 bool ok;
508 bool delim_specified = false;
1555d43c 509 char *spec_list_string IF_LINT ( = NULL);
56ae3f82
SS
510+ char mbdelim[MB_LEN_MAX + 1];
511+ size_t delimlen = 0;
512
513 initialize_main (&argc, &argv);
514 set_program_name (argv[0]);
6987acf5 515@@ -783,7 +1120,6 @@ main (int argc, char **argv)
56ae3f82
SS
516 switch (optc)
517 {
518 case 'b':
519- case 'c':
520 /* Build the byte list. */
521 if (operating_mode != undefined_mode)
522 FATAL_ERROR (_("only one type of list may be specified"));
6987acf5 523@@ -791,6 +1127,14 @@ main (int argc, char **argv)
56ae3f82
SS
524 spec_list_string = optarg;
525 break;
526
527+ case 'c':
528+ /* Build the character list. */
529+ if (operating_mode != undefined_mode)
530+ FATAL_ERROR (_("only one type of list may be specified"));
531+ operating_mode = character_mode;
532+ spec_list_string = optarg;
533+ break;
534+
535 case 'f':
536 /* Build the field list. */
537 if (operating_mode != undefined_mode)
6987acf5 538@@ -802,10 +1146,35 @@ main (int argc, char **argv)
56ae3f82
SS
539 case 'd':
540 /* New delimiter. */
6987acf5 541 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
56ae3f82
SS
542- if (optarg[0] != '\0' && optarg[1] != '\0')
543- FATAL_ERROR (_("the delimiter must be a single character"));
544- delim = optarg[0];
545- delim_specified = true;
546+ {
547+#if HAVE_MBRTOWC
548+ if(MB_CUR_MAX > 1)
549+ {
550+ mbstate_t state;
551+
552+ memset (&state, '\0', sizeof(mbstate_t));
553+ delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
554+
555+ if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
556+ ++force_singlebyte_mode;
557+ else
558+ {
559+ delimlen = (delimlen < 1) ? 1 : delimlen;
560+ if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
561+ FATAL_ERROR (_("the delimiter must be a single character"));
562+ memcpy (mbdelim, optarg, delimlen);
563+ }
564+ }
565+
566+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
567+#endif
568+ {
569+ if (optarg[0] != '\0' && optarg[1] != '\0')
570+ FATAL_ERROR (_("the delimiter must be a single character"));
571+ delim = (unsigned char) optarg[0];
572+ }
573+ delim_specified = true;
574+ }
575 break;
576
577 case OUTPUT_DELIMITER_OPTION:
6987acf5 578@@ -818,6 +1187,7 @@ main (int argc, char **argv)
56ae3f82
SS
579 break;
580
581 case 'n':
582+ byte_mode_character_aware = 1;
583 break;
584
585 case 's':
6987acf5 586@@ -840,7 +1210,7 @@ main (int argc, char **argv)
56ae3f82
SS
587 if (operating_mode == undefined_mode)
588 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
589
590- if (delim != '\0' && operating_mode != field_mode)
591+ if (delim_specified && operating_mode != field_mode)
592 FATAL_ERROR (_("an input delimiter may be specified only\
593 when operating on fields"));
594
6987acf5 595@@ -867,15 +1237,34 @@ main (int argc, char **argv)
56ae3f82
SS
596 }
597
598 if (!delim_specified)
599- delim = '\t';
600+ {
601+ delim = '\t';
602+#ifdef HAVE_MBRTOWC
603+ wcdelim = L'\t';
604+ mbdelim[0] = '\t';
605+ mbdelim[1] = '\0';
606+ delimlen = 1;
607+#endif
608+ }
609
610 if (output_delimiter_string == NULL)
611 {
612- static char dummy[2];
613- dummy[0] = delim;
614- dummy[1] = '\0';
615- output_delimiter_string = dummy;
616- output_delimiter_length = 1;
617+#ifdef HAVE_MBRTOWC
618+ if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
619+ {
620+ output_delimiter_string = xstrdup(mbdelim);
621+ output_delimiter_length = delimlen;
622+ }
623+
624+ if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
625+#endif
626+ {
e7f6ab54 627+ static char dummy[2];
56ae3f82
SS
628+ dummy[0] = delim;
629+ dummy[1] = '\0';
630+ output_delimiter_string = dummy;
631+ output_delimiter_length = 1;
632+ }
633 }
634
635 if (optind == argc)
6987acf5
MT
636diff -urNp coreutils-8.16-orig/src/expand.c coreutils-8.16/src/expand.c
637--- coreutils-8.16-orig/src/expand.c 2012-03-24 21:26:51.000000000 +0100
638+++ coreutils-8.16/src/expand.c 2012-03-26 17:42:56.000000000 +0200
639@@ -37,12 +37,29 @@
56ae3f82
SS
640 #include <stdio.h>
641 #include <getopt.h>
642 #include <sys/types.h>
643+
644+/* Get mbstate_t, mbrtowc(), wcwidth(). */
645+#if HAVE_WCHAR_H
646+# include <wchar.h>
647+#endif
648+
649 #include "system.h"
650 #include "error.h"
1555d43c 651 #include "fadvise.h"
56ae3f82
SS
652 #include "quote.h"
653 #include "xstrndup.h"
654
655+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
656+ installation; work around this configuration error. */
657+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
658+# define MB_LEN_MAX 16
659+#endif
660+
661+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
662+#if HAVE_MBRTOWC && defined mbstate_t
663+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
664+#endif
665+
6987acf5 666 /* The official name of this program (e.g., no 'g' prefix). */
56ae3f82
SS
667 #define PROGRAM_NAME "expand"
668
6987acf5 669@@ -358,6 +375,142 @@ expand (void)
56ae3f82
SS
670 }
671 }
672
673+#if HAVE_MBRTOWC
674+static void
675+expand_multibyte (void)
676+{
677+ FILE *fp; /* Input strem. */
678+ mbstate_t i_state; /* Current shift state of the input stream. */
679+ mbstate_t i_state_bak; /* Back up the I_STATE. */
680+ mbstate_t o_state; /* Current shift state of the output stream. */
681+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3badd2da 682+ char *bufpos = buf; /* Next read position of BUF. */
56ae3f82
SS
683+ size_t buflen = 0; /* The length of the byte sequence in buf. */
684+ wchar_t wc; /* A gotten wide character. */
685+ size_t mblength; /* The byte size of a multibyte character
686+ which shows as same character as WC. */
687+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
688+ int column = 0; /* Column on screen of the next char. */
689+ int next_tab_column; /* Column the next tab stop is on. */
690+ int convert = 1; /* If nonzero, perform translations. */
691+
692+ fp = next_file ((FILE *) NULL);
693+ if (fp == NULL)
694+ return;
695+
696+ memset (&o_state, '\0', sizeof(mbstate_t));
697+ memset (&i_state, '\0', sizeof(mbstate_t));
698+
699+ for (;;)
700+ {
701+ /* Refill the buffer BUF. */
702+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
703+ {
704+ memmove (buf, bufpos, buflen);
705+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
706+ bufpos = buf;
707+ }
708+
709+ /* No character is left in BUF. */
710+ if (buflen < 1)
711+ {
712+ fp = next_file (fp);
713+
714+ if (fp == NULL)
715+ break; /* No more files. */
716+ else
717+ {
718+ memset (&i_state, '\0', sizeof(mbstate_t));
719+ continue;
720+ }
721+ }
722+
723+ /* Get a wide character. */
724+ i_state_bak = i_state;
725+ mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
726+
727+ switch (mblength)
728+ {
729+ case (size_t)-1: /* illegal byte sequence. */
730+ case (size_t)-2:
731+ mblength = 1;
732+ i_state = i_state_bak;
733+ if (convert)
734+ {
735+ ++column;
736+ if (convert_entire_line == 0)
737+ convert = 0;
738+ }
739+ putchar (*bufpos);
740+ break;
741+
742+ case 0: /* null. */
743+ mblength = 1;
744+ if (convert && convert_entire_line == 0)
745+ convert = 0;
746+ putchar ('\0');
747+ break;
748+
749+ default:
750+ if (wc == L'\n') /* LF. */
751+ {
752+ tab_index = 0;
753+ column = 0;
754+ convert = 1;
755+ putchar ('\n');
756+ }
757+ else if (wc == L'\t' && convert) /* Tab. */
758+ {
759+ if (tab_size == 0)
760+ {
761+ /* Do not let tab_index == first_free_tab;
762+ stop when it is 1 less. */
763+ while (tab_index < first_free_tab - 1
764+ && column >= tab_list[tab_index])
765+ tab_index++;
766+ next_tab_column = tab_list[tab_index];
767+ if (tab_index < first_free_tab - 1)
768+ tab_index++;
769+ if (column >= next_tab_column)
770+ next_tab_column = column + 1;
771+ }
772+ else
773+ next_tab_column = column + tab_size - column % tab_size;
774+
775+ while (column < next_tab_column)
776+ {
777+ putchar (' ');
778+ ++column;
779+ }
780+ }
781+ else /* Others. */
782+ {
783+ if (convert)
784+ {
785+ if (wc == L'\b')
786+ {
787+ if (column > 0)
788+ --column;
789+ }
790+ else
791+ {
792+ int width; /* The width of WC. */
793+
794+ width = wcwidth (wc);
795+ column += (width > 0) ? width : 0;
796+ if (convert_entire_line == 0)
797+ convert = 0;
798+ }
799+ }
800+ fwrite (bufpos, sizeof(char), mblength, stdout);
801+ }
802+ }
803+ buflen -= mblength;
804+ bufpos += mblength;
805+ }
806+}
807+#endif
808+
809 int
810 main (int argc, char **argv)
811 {
6987acf5 812@@ -422,7 +575,12 @@ main (int argc, char **argv)
56ae3f82
SS
813
814 file_list = (optind < argc ? &argv[optind] : stdin_argv);
815
816- expand ();
817+#if HAVE_MBRTOWC
818+ if (MB_CUR_MAX > 1)
819+ expand_multibyte ();
820+ else
821+#endif
822+ expand ();
823
824 if (have_read_stdin && fclose (stdin) != 0)
825 error (EXIT_FAILURE, errno, "-");
6987acf5
MT
826diff -urNp coreutils-8.16-orig/src/fold.c coreutils-8.16/src/fold.c
827--- coreutils-8.16-orig/src/fold.c 2012-03-24 19:22:13.000000000 +0100
828+++ coreutils-8.16/src/fold.c 2012-03-26 17:48:37.000000000 +0200
1555d43c 829@@ -22,12 +22,34 @@
56ae3f82
SS
830 #include <getopt.h>
831 #include <sys/types.h>
832
833+/* Get mbstate_t, mbrtowc(), wcwidth(). */
834+#if HAVE_WCHAR_H
835+# include <wchar.h>
836+#endif
837+
838+/* Get iswprint(), iswblank(), wcwidth(). */
839+#if HAVE_WCTYPE_H
840+# include <wctype.h>
841+#endif
842+
843 #include "system.h"
844 #include "error.h"
1555d43c 845 #include "fadvise.h"
56ae3f82
SS
846 #include "quote.h"
847 #include "xstrtol.h"
848
849+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
850+ installation; work around this configuration error. */
851+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
852+# undef MB_LEN_MAX
853+# define MB_LEN_MAX 16
854+#endif
855+
856+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
857+#if HAVE_MBRTOWC && defined mbstate_t
858+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
859+#endif
860+
861 #define TAB_WIDTH 8
862
6987acf5 863 /* The official name of this program (e.g., no 'g' prefix). */
1555d43c 864@@ -35,20 +57,41 @@
56ae3f82
SS
865
866 #define AUTHORS proper_name ("David MacKenzie")
867
868+#define FATAL_ERROR(Message) \
869+ do \
870+ { \
871+ error (0, 0, (Message)); \
872+ usage (2); \
873+ } \
874+ while (0)
875+
876+enum operating_mode
877+{
878+ /* Fold texts by columns that are at the given positions. */
879+ column_mode,
880+
881+ /* Fold texts by bytes that are at the given positions. */
882+ byte_mode,
883+
884+ /* Fold texts by characters that are at the given positions. */
885+ character_mode,
886+};
887+
888+/* The argument shows current mode. (Default: column_mode) */
889+static enum operating_mode operating_mode;
890+
891 /* If nonzero, try to break on whitespace. */
892 static bool break_spaces;
893
894-/* If nonzero, count bytes, not column positions. */
895-static bool count_bytes;
896-
897 /* If nonzero, at least one of the files we read was standard input. */
898 static bool have_read_stdin;
899
900-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
901+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
902
903 static struct option const longopts[] =
904 {
905 {"bytes", no_argument, NULL, 'b'},
906+ {"characters", no_argument, NULL, 'c'},
907 {"spaces", no_argument, NULL, 's'},
908 {"width", required_argument, NULL, 'w'},
909 {GETOPT_HELP_OPTION_DECL},
6987acf5 910@@ -77,6 +120,7 @@ Mandatory arguments to long options are
56ae3f82
SS
911 "), stdout);
912 fputs (_("\
913 -b, --bytes count bytes rather than columns\n\
914+ -c, --characters count characters rather than columns\n\
915 -s, --spaces break at spaces\n\
916 -w, --width=WIDTH use WIDTH columns instead of 80\n\
917 "), stdout);
6987acf5 918@@ -94,7 +138,7 @@ Mandatory arguments to long options are
56ae3f82
SS
919 static size_t
920 adjust_column (size_t column, char c)
921 {
922- if (!count_bytes)
923+ if (operating_mode != byte_mode)
924 {
925 if (c == '\b')
926 {
6987acf5 927@@ -117,30 +161,14 @@ adjust_column (size_t column, char c)
56ae3f82
SS
928 to stdout, with maximum line length WIDTH.
929 Return true if successful. */
930
931-static bool
932-fold_file (char const *filename, size_t width)
933+static void
934+fold_text (FILE *istream, size_t width, int *saved_errno)
935 {
936- FILE *istream;
937 int c;
938 size_t column = 0; /* Screen column where next char will go. */
6987acf5 939 size_t offset_out = 0; /* Index in 'line_out' for next char. */
56ae3f82
SS
940 static char *line_out = NULL;
941 static size_t allocated_out = 0;
942- int saved_errno;
943-
944- if (STREQ (filename, "-"))
945- {
946- istream = stdin;
947- have_read_stdin = true;
948- }
949- else
950- istream = fopen (filename, "r");
951-
952- if (istream == NULL)
953- {
954- error (0, errno, "%s", filename);
955- return false;
956- }
957
1555d43c
SS
958 fadvise (istream, FADVISE_SEQUENTIAL);
959
6987acf5 960@@ -170,6 +198,15 @@ fold_file (char const *filename, size_t
56ae3f82
SS
961 bool found_blank = false;
962 size_t logical_end = offset_out;
963
964+ /* If LINE_OUT has no wide character,
965+ put a new wide character in LINE_OUT
966+ if column is bigger than width. */
967+ if (offset_out == 0)
968+ {
969+ line_out[offset_out++] = c;
970+ continue;
971+ }
972+
973 /* Look for the last blank. */
974 while (logical_end)
975 {
6987acf5 976@@ -216,11 +253,221 @@ fold_file (char const *filename, size_t
56ae3f82
SS
977 line_out[offset_out++] = c;
978 }
979
980- saved_errno = errno;
981+ *saved_errno = errno;
982
983 if (offset_out)
984 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
985
986+}
987+
988+#if HAVE_MBRTOWC
989+static void
990+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
991+{
992+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
993+ size_t buflen = 0; /* The length of the byte sequence in buf. */
3badd2da 994+ char *bufpos = buf; /* Next read position of BUF. */
56ae3f82
SS
995+ wint_t wc; /* A gotten wide character. */
996+ size_t mblength; /* The byte size of a multibyte character which shows
997+ as same character as WC. */
998+ mbstate_t state, state_bak; /* State of the stream. */
3badd2da 999+ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
56ae3f82
SS
1000+
1001+ static char *line_out = NULL;
1002+ size_t offset_out = 0; /* Index in `line_out' for next char. */
1003+ static size_t allocated_out = 0;
1004+
1005+ int increment;
1006+ size_t column = 0;
1007+
1008+ size_t last_blank_pos;
1009+ size_t last_blank_column;
1010+ int is_blank_seen;
1011+ int last_blank_increment = 0;
1012+ int is_bs_following_last_blank;
1013+ size_t bs_following_last_blank_num;
1014+ int is_cr_after_last_blank;
1015+
1016+#define CLEAR_FLAGS \
1017+ do \
1018+ { \
1019+ last_blank_pos = 0; \
1020+ last_blank_column = 0; \
1021+ is_blank_seen = 0; \
1022+ is_bs_following_last_blank = 0; \
1023+ bs_following_last_blank_num = 0; \
1024+ is_cr_after_last_blank = 0; \
1025+ } \
1026+ while (0)
1027+
1028+#define START_NEW_LINE \
1029+ do \
1030+ { \
1031+ putchar ('\n'); \
1032+ column = 0; \
1033+ offset_out = 0; \
1034+ CLEAR_FLAGS; \
1035+ } \
1036+ while (0)
1037+
1038+ CLEAR_FLAGS;
1039+ memset (&state, '\0', sizeof(mbstate_t));
1040+
1041+ for (;; bufpos += mblength, buflen -= mblength)
1042+ {
1043+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1044+ {
1045+ memmove (buf, bufpos, buflen);
1046+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1047+ bufpos = buf;
1048+ }
1049+
1050+ if (buflen < 1)
1051+ break;
1052+
1053+ /* Get a wide character. */
56ae3f82
SS
1054+ state_bak = state;
1055+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1056+
1057+ switch (mblength)
1058+ {
1059+ case (size_t)-1:
1060+ case (size_t)-2:
1061+ convfail++;
1062+ state = state_bak;
1063+ /* Fall through. */
1064+
1065+ case 0:
1066+ mblength = 1;
1067+ break;
1068+ }
1069+
1070+rescan:
1071+ if (operating_mode == byte_mode) /* byte mode */
1072+ increment = mblength;
1073+ else if (operating_mode == character_mode) /* character mode */
1074+ increment = 1;
1075+ else /* column mode */
1076+ {
1077+ if (convfail)
1078+ increment = 1;
1079+ else
1080+ {
1081+ switch (wc)
1082+ {
1083+ case L'\n':
1084+ fwrite (line_out, sizeof(char), offset_out, stdout);
1085+ START_NEW_LINE;
1086+ continue;
1087+
1088+ case L'\b':
1089+ increment = (column > 0) ? -1 : 0;
1090+ break;
1091+
1092+ case L'\r':
1093+ increment = -1 * column;
1094+ break;
1095+
1096+ case L'\t':
1097+ increment = 8 - column % 8;
1098+ break;
1099+
1100+ default:
1101+ increment = wcwidth (wc);
1102+ increment = (increment < 0) ? 0 : increment;
1103+ }
1104+ }
1105+ }
1106+
1107+ if (column + increment > width && break_spaces && last_blank_pos)
1108+ {
1109+ fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1110+ putchar ('\n');
1111+
1112+ offset_out = offset_out - last_blank_pos;
1113+ column = column - last_blank_column + ((is_cr_after_last_blank)
1114+ ? last_blank_increment : bs_following_last_blank_num);
1115+ memmove (line_out, line_out + last_blank_pos, offset_out);
1116+ CLEAR_FLAGS;
1117+ goto rescan;
1118+ }
1119+
1120+ if (column + increment > width && column != 0)
1121+ {
1122+ fwrite (line_out, sizeof(char), offset_out, stdout);
1123+ START_NEW_LINE;
1124+ goto rescan;
1125+ }
1126+
1127+ if (allocated_out < offset_out + mblength)
1128+ {
1129+ line_out = X2REALLOC (line_out, &allocated_out);
1130+ }
1131+
1132+ memcpy (line_out + offset_out, bufpos, mblength);
1133+ offset_out += mblength;
1134+ column += increment;
1135+
1136+ if (is_blank_seen && !convfail && wc == L'\r')
1137+ is_cr_after_last_blank = 1;
1138+
1139+ if (is_bs_following_last_blank && !convfail && wc == L'\b')
1140+ ++bs_following_last_blank_num;
1141+ else
1142+ is_bs_following_last_blank = 0;
1143+
1144+ if (break_spaces && !convfail && iswblank (wc))
1145+ {
1146+ last_blank_pos = offset_out;
1147+ last_blank_column = column;
1148+ is_blank_seen = 1;
1149+ last_blank_increment = increment;
1150+ is_bs_following_last_blank = 1;
1151+ bs_following_last_blank_num = 0;
1152+ is_cr_after_last_blank = 0;
1153+ }
1154+ }
1155+
1156+ *saved_errno = errno;
1157+
1158+ if (offset_out)
1159+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1160+
1161+}
1162+#endif
1163+
1164+/* Fold file FILENAME, or standard input if FILENAME is "-",
1165+ to stdout, with maximum line length WIDTH.
1166+ Return 0 if successful, 1 if an error occurs. */
1167+
1168+static bool
1169+fold_file (char *filename, size_t width)
1170+{
1171+ FILE *istream;
1172+ int saved_errno;
1173+
1174+ if (STREQ (filename, "-"))
1175+ {
1176+ istream = stdin;
1177+ have_read_stdin = 1;
1178+ }
1179+ else
1180+ istream = fopen (filename, "r");
1181+
1182+ if (istream == NULL)
1183+ {
1184+ error (0, errno, "%s", filename);
1185+ return 1;
1186+ }
1187+
1188+ /* Define how ISTREAM is being folded. */
1189+#if HAVE_MBRTOWC
1190+ if (MB_CUR_MAX > 1)
1191+ fold_multibyte_text (istream, width, &saved_errno);
1192+ else
1193+#endif
1194+ fold_text (istream, width, &saved_errno);
1195+
1196 if (ferror (istream))
1197 {
1198 error (0, saved_errno, "%s", filename);
6987acf5 1199@@ -253,7 +500,8 @@ main (int argc, char **argv)
56ae3f82
SS
1200
1201 atexit (close_stdout);
1202
1203- break_spaces = count_bytes = have_read_stdin = false;
1204+ operating_mode = column_mode;
1205+ break_spaces = have_read_stdin = false;
1206
1207 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1208 {
6987acf5 1209@@ -262,7 +510,15 @@ main (int argc, char **argv)
56ae3f82
SS
1210 switch (optc)
1211 {
1212 case 'b': /* Count bytes rather than columns. */
1213- count_bytes = true;
1214+ if (operating_mode != column_mode)
1215+ FATAL_ERROR (_("only one way of folding may be specified"));
1216+ operating_mode = byte_mode;
1217+ break;
1218+
1219+ case 'c':
1220+ if (operating_mode != column_mode)
1221+ FATAL_ERROR (_("only one way of folding may be specified"));
1222+ operating_mode = character_mode;
1223 break;
1224
1225 case 's': /* Break at word boundaries. */
6987acf5
MT
1226diff -urNp coreutils-8.16-orig/src/join.c coreutils-8.16/src/join.c
1227--- coreutils-8.16-orig/src/join.c 2012-03-24 21:26:51.000000000 +0100
1228+++ coreutils-8.16/src/join.c 2012-03-26 17:50:02.000000000 +0200
1555d43c 1229@@ -22,18 +22,32 @@
56ae3f82
SS
1230 #include <sys/types.h>
1231 #include <getopt.h>
1232
1233+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1234+#if HAVE_WCHAR_H
1235+# include <wchar.h>
1236+#endif
1237+
1238+/* Get iswblank(), towupper. */
1239+#if HAVE_WCTYPE_H
1240+# include <wctype.h>
1241+#endif
1242+
1243 #include "system.h"
1244 #include "error.h"
1555d43c 1245 #include "fadvise.h"
56ae3f82
SS
1246 #include "hard-locale.h"
1247 #include "linebuffer.h"
1248-#include "memcasecmp.h"
1249 #include "quote.h"
1250 #include "stdio--.h"
1251 #include "xmemcoll.h"
1252 #include "xstrtol.h"
1253 #include "argmatch.h"
1254
1255+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1256+#if HAVE_MBRTOWC && defined mbstate_t
1257+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1258+#endif
1259+
6987acf5 1260 /* The official name of this program (e.g., no 'g' prefix). */
56ae3f82
SS
1261 #define PROGRAM_NAME "join"
1262
e7f6ab54 1263@@ -135,10 +149,12 @@ static struct outlist outlist_head;
6987acf5 1264 /* Last element in 'outlist', where a new element can be added. */
56ae3f82
SS
1265 static struct outlist *outlist_end = &outlist_head;
1266
1267-/* Tab character separating fields. If negative, fields are separated
1268- by any nonempty string of blanks, otherwise by exactly one
1269- tab character whose value (when cast to unsigned char) equals TAB. */
1270-static int tab = -1;
1271+/* Tab character separating fields. If NULL, fields are separated
1272+ by any nonempty string of blanks. */
1273+static char *tab = NULL;
1274+
1275+/* The number of bytes used for tab. */
1276+static size_t tablen = 0;
1277
1278 /* If nonzero, check that the input is correctly ordered. */
1279 static enum
6987acf5 1280@@ -262,13 +278,14 @@ xfields (struct line *line)
56ae3f82
SS
1281 if (ptr == lim)
1282 return;
1283
1555d43c 1284- if (0 <= tab && tab != '\n')
56ae3f82
SS
1285+ if (tab != NULL)
1286 {
1287+ unsigned char t = tab[0];
1288 char *sep;
1289- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1290+ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1291 extract_field (line, ptr, sep - ptr);
1292 }
1555d43c
SS
1293- else if (tab < 0)
1294+ else
1295 {
1296 /* Skip leading blanks before the first field. */
1297 while (isblank (to_uchar (*ptr)))
6987acf5 1298@@ -292,6 +309,148 @@ xfields (struct line *line)
56ae3f82
SS
1299 extract_field (line, ptr, lim - ptr);
1300 }
1301
1302+#if HAVE_MBRTOWC
1303+static void
1304+xfields_multibyte (struct line *line)
1305+{
1306+ char *ptr = line->buf.buffer;
1307+ char const *lim = ptr + line->buf.length - 1;
1308+ wchar_t wc = 0;
1309+ size_t mblength = 1;
1310+ mbstate_t state, state_bak;
1311+
1312+ memset (&state, 0, sizeof (mbstate_t));
1313+
1314+ if (ptr >= lim)
1315+ return;
1316+
1317+ if (tab != NULL)
1318+ {
1319+ unsigned char t = tab[0];
1320+ char *sep = ptr;
1321+ for (; ptr < lim; ptr = sep + mblength)
1322+ {
1323+ sep = ptr;
1324+ while (sep < lim)
1325+ {
1326+ state_bak = state;
1327+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1328+
1329+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1330+ {
1331+ mblength = 1;
1332+ state = state_bak;
1333+ }
1334+ mblength = (mblength < 1) ? 1 : mblength;
1335+
1336+ if (mblength == tablen && !memcmp (sep, tab, mblength))
1337+ break;
1338+ else
1339+ {
1340+ sep += mblength;
1341+ continue;
1342+ }
1343+ }
1344+
1345+ if (sep >= lim)
1346+ break;
1347+
1348+ extract_field (line, ptr, sep - ptr);
1349+ }
1350+ }
1351+ else
1352+ {
1353+ /* Skip leading blanks before the first field. */
1354+ while(ptr < lim)
1355+ {
1356+ state_bak = state;
1357+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1358+
1359+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1360+ {
1361+ mblength = 1;
1362+ state = state_bak;
1363+ break;
1364+ }
1365+ mblength = (mblength < 1) ? 1 : mblength;
1366+
1367+ if (!iswblank(wc))
1368+ break;
1369+ ptr += mblength;
1370+ }
1371+
1372+ do
1373+ {
1374+ char *sep;
1375+ state_bak = state;
1376+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1377+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1378+ {
1379+ mblength = 1;
1380+ state = state_bak;
1381+ break;
1382+ }
1383+ mblength = (mblength < 1) ? 1 : mblength;
1384+
1385+ sep = ptr + mblength;
1386+ while (sep < lim)
1387+ {
1388+ state_bak = state;
1389+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1390+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1391+ {
1392+ mblength = 1;
1393+ state = state_bak;
1394+ break;
1395+ }
1396+ mblength = (mblength < 1) ? 1 : mblength;
1397+
1398+ if (iswblank (wc))
1399+ break;
1400+
1401+ sep += mblength;
1402+ }
1403+
1404+ extract_field (line, ptr, sep - ptr);
1405+ if (sep >= lim)
1406+ return;
1407+
1408+ state_bak = state;
1409+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1410+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1411+ {
1412+ mblength = 1;
1413+ state = state_bak;
1414+ break;
1415+ }
1416+ mblength = (mblength < 1) ? 1 : mblength;
1417+
1418+ ptr = sep + mblength;
1419+ while (ptr < lim)
1420+ {
1421+ state_bak = state;
1422+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1423+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1424+ {
1425+ mblength = 1;
1426+ state = state_bak;
1427+ break;
1428+ }
1429+ mblength = (mblength < 1) ? 1 : mblength;
1430+
1431+ if (!iswblank (wc))
1432+ break;
1433+
1434+ ptr += mblength;
1435+ }
1436+ }
1437+ while (ptr < lim);
1438+ }
1439+
1440+ extract_field (line, ptr, lim - ptr);
1441+}
1442+#endif
1443+
1444 static void
1445 freeline (struct line *line)
1446 {
6987acf5 1447@@ -313,56 +472,115 @@ keycmp (struct line const *line1, struct
56ae3f82
SS
1448 size_t jf_1, size_t jf_2)
1449 {
1450 /* Start of field to compare in each file. */
1451- char *beg1;
1452- char *beg2;
1453-
1454- size_t len1;
1455- size_t len2; /* Length of fields to compare. */
1456+ char *beg[2];
1457+ char *copy[2];
1458+ size_t len[2]; /* Length of fields to compare. */
1459 int diff;
1460+ int i, j;
1461
1462 if (jf_1 < line1->nfields)
1463 {
1464- beg1 = line1->fields[jf_1].beg;
1465- len1 = line1->fields[jf_1].len;
1466+ beg[0] = line1->fields[jf_1].beg;
1467+ len[0] = line1->fields[jf_1].len;
1468 }
1469 else
1470 {
1471- beg1 = NULL;
1472- len1 = 0;
1473+ beg[0] = NULL;
1474+ len[0] = 0;
1475 }
1476
1477 if (jf_2 < line2->nfields)
1478 {
1479- beg2 = line2->fields[jf_2].beg;
1480- len2 = line2->fields[jf_2].len;
1481+ beg[1] = line2->fields[jf_2].beg;
1482+ len[1] = line2->fields[jf_2].len;
1483 }
1484 else
1485 {
1486- beg2 = NULL;
1487- len2 = 0;
1488+ beg[1] = NULL;
1489+ len[1] = 0;
1490 }
1491
1492- if (len1 == 0)
1493- return len2 == 0 ? 0 : -1;
1494- if (len2 == 0)
1495+ if (len[0] == 0)
1496+ return len[1] == 0 ? 0 : -1;
1497+ if (len[1] == 0)
1498 return 1;
1499
1500 if (ignore_case)
1501 {
1502- /* FIXME: ignore_case does not work with NLS (in particular,
1503- with multibyte chars). */
1504- diff = memcasecmp (beg1, beg2, MIN (len1, len2));
1505+#ifdef HAVE_MBRTOWC
1506+ if (MB_CUR_MAX > 1)
1507+ {
1508+ size_t mblength;
1509+ wchar_t wc, uwc;
1510+ mbstate_t state, state_bak;
1511+
1512+ memset (&state, '\0', sizeof (mbstate_t));
1513+
1514+ for (i = 0; i < 2; i++)
1515+ {
1516+ copy[i] = alloca (len[i] + 1);
1517+
1518+ for (j = 0; j < MIN (len[0], len[1]);)
1519+ {
1520+ state_bak = state;
1521+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
1522+
1523+ switch (mblength)
1524+ {
1525+ case (size_t) -1:
1526+ case (size_t) -2:
1527+ state = state_bak;
1528+ /* Fall through */
1529+ case 0:
1530+ mblength = 1;
1531+ break;
1532+
1533+ default:
1534+ uwc = towupper (wc);
1535+
1536+ if (uwc != wc)
1537+ {
1538+ mbstate_t state_wc;
1539+
1540+ memset (&state_wc, '\0', sizeof (mbstate_t));
1541+ wcrtomb (copy[i] + j, uwc, &state_wc);
1542+ }
1543+ else
1544+ memcpy (copy[i] + j, beg[i] + j, mblength);
1545+ }
1546+ j += mblength;
1547+ }
1548+ copy[i][j] = '\0';
1549+ }
1550+ }
1551+ else
1552+#endif
1553+ {
1554+ for (i = 0; i < 2; i++)
1555+ {
1556+ copy[i] = alloca (len[i] + 1);
1557+
1558+ for (j = 0; j < MIN (len[0], len[1]); j++)
1559+ copy[i][j] = toupper (beg[i][j]);
1560+
1561+ copy[i][j] = '\0';
1562+ }
1563+ }
1564 }
1565 else
1566 {
1567- if (hard_LC_COLLATE)
1568- return xmemcoll (beg1, len1, beg2, len2);
1569- diff = memcmp (beg1, beg2, MIN (len1, len2));
1570+ copy[0] = (unsigned char *) beg[0];
e7f6ab54 1571+ copy[1] = (unsigned char *) beg[1];
56ae3f82
SS
1572 }
1573
1574+ if (hard_LC_COLLATE)
1575+ return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
1576+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
1577+
1578+
1579 if (diff)
1580 return diff;
1581- return len1 < len2 ? -1 : len1 != len2;
1582+ return len[0] - len[1];
1583 }
1584
1585 /* Check that successive input lines PREV and CURRENT from input file
6987acf5 1586@@ -454,6 +672,11 @@ get_line (FILE *fp, struct line **linep,
56ae3f82 1587 }
e7f6ab54 1588 ++line_no[which - 1];
56ae3f82
SS
1589
1590+#if HAVE_MBRTOWC
1591+ if (MB_CUR_MAX > 1)
1592+ xfields_multibyte (line);
1593+ else
1594+#endif
1595 xfields (line);
1596
1597 if (prevline[which - 1])
6987acf5 1598@@ -553,21 +776,28 @@ prfield (size_t n, struct line const *li
56ae3f82 1599
3badd2da 1600 /* Output all the fields in line, other than the join field. */
56ae3f82
SS
1601
1602+#define PUT_TAB_CHAR \
1603+ do \
1604+ { \
1605+ (tab != NULL) ? \
1606+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
1607+ } \
3badd2da 1608+ while (0)
56ae3f82
SS
1609+
1610 static void
3badd2da
SS
1611 prfields (struct line const *line, size_t join_field, size_t autocount)
1612 {
1613 size_t i;
1614 size_t nfields = autoformat ? autocount : line->nfields;
1615- char output_separator = tab < 0 ? ' ' : tab;
1616
1617 for (i = 0; i < join_field && i < nfields; ++i)
1618 {
1619- putchar (output_separator);
1620+ PUT_TAB_CHAR;
1621 prfield (i, line);
1622 }
1623 for (i = join_field + 1; i < nfields; ++i)
1624 {
1625- putchar (output_separator);
1626+ PUT_TAB_CHAR;
1627 prfield (i, line);
1628 }
1629 }
6987acf5 1630@@ -578,7 +808,6 @@ static void
56ae3f82
SS
1631 prjoin (struct line const *line1, struct line const *line2)
1632 {
1633 const struct outlist *outlist;
1634- char output_separator = tab < 0 ? ' ' : tab;
3badd2da
SS
1635 size_t field;
1636 struct line const *line;
56ae3f82 1637
6987acf5 1638@@ -612,7 +841,7 @@ prjoin (struct line const *line1, struct
56ae3f82
SS
1639 o = o->next;
1640 if (o == NULL)
1641 break;
1642- putchar (output_separator);
1643+ PUT_TAB_CHAR;
1644 }
1645 putchar ('\n');
1646 }
6987acf5 1647@@ -1090,21 +1319,46 @@ main (int argc, char **argv)
56ae3f82
SS
1648
1649 case 't':
1650 {
1651- unsigned char newtab = optarg[0];
e7f6ab54 1652+ char *newtab = NULL;
56ae3f82
SS
1653+ size_t newtablen;
1654+ newtab = xstrdup (optarg);
1655+#if HAVE_MBRTOWC
1656+ if (MB_CUR_MAX > 1)
1657+ {
1658+ mbstate_t state;
1659+
1660+ memset (&state, 0, sizeof (mbstate_t));
1661+ newtablen = mbrtowc (NULL, newtab,
1662+ strnlen (newtab, MB_LEN_MAX),
1663+ &state);
1664+ if (newtablen == (size_t) 0
1665+ || newtablen == (size_t) -1
1666+ || newtablen == (size_t) -2)
1667+ newtablen = 1;
1668+ }
1669+ else
1670+#endif
1671+ newtablen = 1;
1672 if (! newtab)
e7f6ab54 1673- newtab = '\n'; /* '' => process the whole line. */
6987acf5 1674+ {
e7f6ab54 1675+ newtab = "\n"; /* '' => process the whole line. */
56ae3f82
SS
1676+ }
1677 else if (optarg[1])
1678 {
1679- if (STREQ (optarg, "\\0"))
1680- newtab = '\0';
1681- else
1682- error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1683- quote (optarg));
1684+ if (newtablen == 1 && newtab[1])
1685+ {
1686+ if (STREQ (newtab, "\\0"))
1687+ newtab[0] = '\0';
1688+ }
1689+ }
1690+ if (tab != NULL && strcmp (tab, newtab))
1691+ {
1692+ free (newtab);
1693+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
1694 }
1695- if (0 <= tab && tab != newtab)
1696- error (EXIT_FAILURE, 0, _("incompatible tabs"));
1697 tab = newtab;
1698- }
1699+ tablen = newtablen;
1700+ }
1701 break;
1702
1703 case NOCHECK_ORDER_OPTION:
6987acf5
MT
1704diff -urNp coreutils-8.16-orig/src/pr.c coreutils-8.16/src/pr.c
1705--- coreutils-8.16-orig/src/pr.c 2012-03-24 21:26:51.000000000 +0100
1706+++ coreutils-8.16/src/pr.c 2012-03-26 17:50:48.000000000 +0200
56ae3f82
SS
1707@@ -312,6 +312,32 @@
1708
1709 #include <getopt.h>
1710 #include <sys/types.h>
1711+
1712+/* Get MB_LEN_MAX. */
1713+#include <limits.h>
1714+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1715+ installation; work around this configuration error. */
1716+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
1717+# define MB_LEN_MAX 16
1718+#endif
1719+
1720+/* Get MB_CUR_MAX. */
1721+#include <stdlib.h>
1722+
1723+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
1724+/* Get mbstate_t, mbrtowc(), wcwidth(). */
1725+#if HAVE_WCHAR_H
1726+# include <wchar.h>
1727+#endif
1728+
1729+/* Get iswprint(). -- for wcwidth(). */
1730+#if HAVE_WCTYPE_H
1731+# include <wctype.h>
1732+#endif
1733+#if !defined iswprint && !HAVE_ISWPRINT
1734+# define iswprint(wc) 1
1735+#endif
1736+
1737 #include "system.h"
1738 #include "error.h"
1555d43c
SS
1739 #include "fadvise.h"
1740@@ -323,6 +349,18 @@
56ae3f82
SS
1741 #include "strftime.h"
1742 #include "xstrtol.h"
1743
1744+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1745+#if HAVE_MBRTOWC && defined mbstate_t
1746+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1747+#endif
1748+
1749+#ifndef HAVE_DECL_WCWIDTH
1750+"this configure-time declaration test was not run"
1751+#endif
1752+#if !HAVE_DECL_WCWIDTH
1753+extern int wcwidth ();
1754+#endif
1755+
6987acf5 1756 /* The official name of this program (e.g., no 'g' prefix). */
56ae3f82
SS
1757 #define PROGRAM_NAME "pr"
1758
1555d43c 1759@@ -415,7 +453,20 @@ struct COLUMN
56ae3f82
SS
1760
1761 typedef struct COLUMN COLUMN;
1762
1763-static int char_to_clump (char c);
1764+/* Funtion pointers to switch functions for single byte locale or for
1765+ multibyte locale. If multibyte functions do not exist in your sysytem,
1766+ these pointers always point the function for single byte locale. */
1767+static void (*print_char) (char c);
1768+static int (*char_to_clump) (char c);
1769+
1770+/* Functions for single byte locale. */
1771+static void print_char_single (char c);
1772+static int char_to_clump_single (char c);
1773+
1774+/* Functions for multibyte locale. */
1775+static void print_char_multi (char c);
1776+static int char_to_clump_multi (char c);
1777+
1778 static bool read_line (COLUMN *p);
1779 static bool print_page (void);
1780 static bool print_stored (COLUMN *p);
1555d43c 1781@@ -425,6 +476,7 @@ static void print_header (void);
56ae3f82
SS
1782 static void pad_across_to (int position);
1783 static void add_line_number (COLUMN *p);
1784 static void getoptarg (char *arg, char switch_char, char *character,
1785+ int *character_length, int *character_width,
1786 int *number);
56ae3f82 1787 static void print_files (int number_of_files, char **av);
fa4603be 1788 static void init_parameters (int number_of_files);
6987acf5 1789@@ -438,7 +490,6 @@ static void store_char (char c);
56ae3f82
SS
1790 static void pad_down (int lines);
1791 static void read_rest_of_line (COLUMN *p);
1792 static void skip_read (COLUMN *p, int column_number);
1793-static void print_char (char c);
1794 static void cleanup (void);
1795 static void print_sep_string (void);
1796 static void separator_string (const char *optarg_S);
6987acf5 1797@@ -450,7 +501,7 @@ static COLUMN *column_vector;
56ae3f82
SS
1798 we store the leftmost columns contiguously in buff.
1799 To print a line from buff, get the index of the first character
1800 from line_vector[i], and print up to line_vector[i + 1]. */
1801-static char *buff;
1802+static unsigned char *buff;
1803
1804 /* Index of the position in buff where the next character
1805 will be stored. */
6987acf5 1806@@ -554,7 +605,7 @@ static int chars_per_column;
56ae3f82
SS
1807 static bool untabify_input = false;
1808
1809 /* (-e) The input tab character. */
1810-static char input_tab_char = '\t';
1811+static char input_tab_char[MB_LEN_MAX] = "\t";
1812
1813 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
1814 where the leftmost column is 1. */
6987acf5 1815@@ -564,7 +615,10 @@ static int chars_per_input_tab = 8;
56ae3f82
SS
1816 static bool tabify_output = false;
1817
1818 /* (-i) The output tab character. */
1819-static char output_tab_char = '\t';
1820+static char output_tab_char[MB_LEN_MAX] = "\t";
1821+
1822+/* (-i) The byte length of output tab character. */
1823+static int output_tab_char_length = 1;
1824
1825 /* (-i) The width of the output tab. */
1826 static int chars_per_output_tab = 8;
6987acf5 1827@@ -638,7 +692,13 @@ static int power_10;
56ae3f82
SS
1828 static bool numbered_lines = false;
1829
1830 /* (-n) Character which follows each line number. */
1831-static char number_separator = '\t';
1832+static char number_separator[MB_LEN_MAX] = "\t";
1833+
1834+/* (-n) The byte length of the character which follows each line number. */
1835+static int number_separator_length = 1;
1836+
1837+/* (-n) The character width of the character which follows each line number. */
1838+static int number_separator_width = 0;
1839
1840 /* (-n) line counting starts with 1st line of input file (not with 1st
1841 line of 1st page printed). */
6987acf5
MT
1842@@ -691,6 +751,7 @@ static bool use_col_separator = false;
1843 -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */
56ae3f82
SS
1844 static char *col_sep_string = (char *) "";
1845 static int col_sep_length = 0;
1846+static int col_sep_width = 0;
1847 static char *column_separator = (char *) " ";
1848 static char *line_separator = (char *) "\t";
1849
6987acf5 1850@@ -847,6 +908,13 @@ separator_string (const char *optarg_S)
56ae3f82
SS
1851 col_sep_length = (int) strlen (optarg_S);
1852 col_sep_string = xmalloc (col_sep_length + 1);
1853 strcpy (col_sep_string, optarg_S);
1854+
1855+#if HAVE_MBRTOWC
1856+ if (MB_CUR_MAX > 1)
1857+ col_sep_width = mbswidth (col_sep_string, 0);
1858+ else
1859+#endif
1860+ col_sep_width = col_sep_length;
1861 }
1862
1863 int
6987acf5 1864@@ -871,6 +939,21 @@ main (int argc, char **argv)
56ae3f82
SS
1865
1866 atexit (close_stdout);
1867
1868+/* Define which functions are used, the ones for single byte locale or the ones
1869+ for multibyte locale. */
1870+#if HAVE_MBRTOWC
1871+ if (MB_CUR_MAX > 1)
1872+ {
1873+ print_char = print_char_multi;
1874+ char_to_clump = char_to_clump_multi;
1875+ }
1876+ else
1877+#endif
1878+ {
1879+ print_char = print_char_single;
1880+ char_to_clump = char_to_clump_single;
1881+ }
1882+
1883 n_files = 0;
1884 file_names = (argc > 1
1885 ? xmalloc ((argc - 1) * sizeof (char *))
6987acf5 1886@@ -947,8 +1030,12 @@ main (int argc, char **argv)
56ae3f82
SS
1887 break;
1888 case 'e':
1889 if (optarg)
1890- getoptarg (optarg, 'e', &input_tab_char,
1891- &chars_per_input_tab);
1892+ {
1893+ int dummy_length, dummy_width;
1894+
1895+ getoptarg (optarg, 'e', input_tab_char, &dummy_length,
1896+ &dummy_width, &chars_per_input_tab);
1897+ }
1898 /* Could check tab width > 0. */
1899 untabify_input = true;
1900 break;
6987acf5 1901@@ -961,8 +1048,12 @@ main (int argc, char **argv)
56ae3f82
SS
1902 break;
1903 case 'i':
1904 if (optarg)
1905- getoptarg (optarg, 'i', &output_tab_char,
1906- &chars_per_output_tab);
1907+ {
1908+ int dummy_width;
1909+
1910+ getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
1911+ &dummy_width, &chars_per_output_tab);
1912+ }
1913 /* Could check tab width > 0. */
1914 tabify_output = true;
1915 break;
6987acf5 1916@@ -989,8 +1080,8 @@ main (int argc, char **argv)
56ae3f82
SS
1917 case 'n':
1918 numbered_lines = true;
1919 if (optarg)
1920- getoptarg (optarg, 'n', &number_separator,
1921- &chars_per_number);
1922+ getoptarg (optarg, 'n', number_separator, &number_separator_length,
1923+ &number_separator_width, &chars_per_number);
1924 break;
1925 case 'N':
1926 skip_count = false;
6987acf5 1927@@ -1029,7 +1120,7 @@ main (int argc, char **argv)
56ae3f82
SS
1928 old_s = false;
1929 /* Reset an additional input of -s, -S dominates -s */
1930 col_sep_string = bad_cast ("");
1931- col_sep_length = 0;
1932+ col_sep_length = col_sep_width = 0;
1933 use_col_separator = true;
1934 if (optarg)
1935 separator_string (optarg);
6987acf5 1936@@ -1186,10 +1277,45 @@ main (int argc, char **argv)
56ae3f82
SS
1937 a number. */
1938
1939 static void
1940-getoptarg (char *arg, char switch_char, char *character, int *number)
1941+getoptarg (char *arg, char switch_char, char *character, int *character_length,
1942+ int *character_width, int *number)
1943 {
1944 if (!ISDIGIT (*arg))
1945- *character = *arg++;
1946+ {
1947+#ifdef HAVE_MBRTOWC
1948+ if (MB_CUR_MAX > 1) /* for multibyte locale. */
1949+ {
1950+ wchar_t wc;
1951+ size_t mblength;
1952+ int width;
1953+ mbstate_t state = {'\0'};
1954+
1955+ mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
1956+
1957+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
1958+ {
1959+ *character_length = 1;
1960+ *character_width = 1;
1961+ }
1962+ else
1963+ {
1964+ *character_length = (mblength < 1) ? 1 : mblength;
1965+ width = wcwidth (wc);
1966+ *character_width = (width < 0) ? 0 : width;
1967+ }
1968+
1969+ strncpy (character, arg, *character_length);
1970+ arg += *character_length;
1971+ }
1972+ else /* for single byte locale. */
1973+#endif
1974+ {
1975+ *character = *arg++;
1976+ *character_length = 1;
1977+ *character_width = 1;
1978+ }
1979+ }
1980+
1981 if (*arg)
1982 {
1983 long int tmp_long;
6987acf5
MT
1984@@ -1211,6 +1337,11 @@ static void
1985 init_parameters (int number_of_files)
1986 {
1987 int chars_used_by_number = 0;
1988+ int mb_len = 1;
1989+#if HAVE_MBRTOWC
1990+ if (MB_CUR_MAX > 1)
1991+ mb_len = MB_LEN_MAX;
1992+#endif
1993
1994 lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
1995 if (lines_per_body <= 0)
1996@@ -1248,7 +1379,7 @@ init_parameters (int number_of_files)
56ae3f82
SS
1997 else
1998 col_sep_string = column_separator;
1999
2000- col_sep_length = 1;
2001+ col_sep_length = col_sep_width = 1;
2002 use_col_separator = true;
2003 }
2004 /* It's rather pointless to define a TAB separator with column
6987acf5 2005@@ -1279,11 +1410,11 @@ init_parameters (int number_of_files)
56ae3f82
SS
2006 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
2007
2008 /* Estimate chars_per_text without any margin and keep it constant. */
2009- if (number_separator == '\t')
2010+ if (number_separator[0] == '\t')
2011 number_width = chars_per_number +
2012 TAB_WIDTH (chars_per_default_tab, chars_per_number);
2013 else
2014- number_width = chars_per_number + 1;
2015+ number_width = chars_per_number + number_separator_width;
2016
2017 /* The number is part of the column width unless we are
2018 printing files in parallel. */
6987acf5 2019@@ -1298,7 +1429,7 @@ init_parameters (int number_of_files)
56ae3f82
SS
2020 }
2021
2022 chars_per_column = (chars_per_line - chars_used_by_number -
2023- (columns - 1) * col_sep_length) / columns;
2024+ (columns - 1) * col_sep_width) / columns;
2025
2026 if (chars_per_column < 1)
2027 error (EXIT_FAILURE, 0, _("page width too narrow"));
6987acf5
MT
2028@@ -1315,7 +1446,7 @@ init_parameters (int number_of_files)
2029 We've to use 8 as the lower limit, if we use chars_per_default_tab = 8
2030 to expand a tab which is not an input_tab-char. */
2031 free (clump_buff);
2032- clump_buff = xmalloc (MAX (8, chars_per_input_tab));
2033+ clump_buff = xmalloc (mb_len * MAX (8, chars_per_input_tab));
2034 }
2035 \f
2036 /* Open the necessary files,
2037@@ -1423,7 +1554,7 @@ init_funcs (void)
56ae3f82
SS
2038
2039 /* Enlarge p->start_position of first column to use the same form of
2040 padding_not_printed with all columns. */
2041- h = h + col_sep_length;
2042+ h = h + col_sep_width;
2043
2044 /* This loop takes care of all but the rightmost column. */
2045
6987acf5 2046@@ -1457,7 +1588,7 @@ init_funcs (void)
56ae3f82
SS
2047 }
2048 else
2049 {
2050- h = h_next + col_sep_length;
2051+ h = h_next + col_sep_width;
2052 h_next = h + chars_per_column;
2053 }
2054 }
6987acf5 2055@@ -1748,9 +1879,9 @@ static void
56ae3f82
SS
2056 align_column (COLUMN *p)
2057 {
2058 padding_not_printed = p->start_position;
2059- if (padding_not_printed - col_sep_length > 0)
2060+ if (padding_not_printed - col_sep_width > 0)
2061 {
2062- pad_across_to (padding_not_printed - col_sep_length);
2063+ pad_across_to (padding_not_printed - col_sep_width);
2064 padding_not_printed = ANYWHERE;
2065 }
2066
6987acf5 2067@@ -2021,13 +2152,13 @@ store_char (char c)
56ae3f82
SS
2068 /* May be too generous. */
2069 buff = X2REALLOC (buff, &buff_allocated);
2070 }
2071- buff[buff_current++] = c;
2072+ buff[buff_current++] = (unsigned char) c;
2073 }
2074
2075 static void
2076 add_line_number (COLUMN *p)
2077 {
2078- int i;
2079+ int i, j;
2080 char *s;
2081 int left_cut;
2082
6987acf5 2083@@ -2050,22 +2181,24 @@ add_line_number (COLUMN *p)
56ae3f82 2084 /* Tabification is assumed for multiple columns, also for n-separators,
6987acf5 2085 but 'default n-separator = TAB' hasn't been given priority over
56ae3f82
SS
2086 equal column_width also specified by POSIX. */
2087- if (number_separator == '\t')
2088+ if (number_separator[0] == '\t')
2089 {
2090 i = number_width - chars_per_number;
2091 while (i-- > 0)
2092 (p->char_func) (' ');
2093 }
2094 else
2095- (p->char_func) (number_separator);
2096+ for (j = 0; j < number_separator_length; j++)
2097+ (p->char_func) (number_separator[j]);
2098 }
2099 else
2100 /* To comply with POSIX, we avoid any expansion of default TAB
2101 separator with a single column output. No column_width requirement
2102 has to be considered. */
2103 {
2104- (p->char_func) (number_separator);
2105- if (number_separator == '\t')
2106+ for (j = 0; j < number_separator_length; j++)
2107+ (p->char_func) (number_separator[j]);
2108+ if (number_separator[0] == '\t')
2109 output_position = POS_AFTER_TAB (chars_per_output_tab,
2110 output_position);
2111 }
6987acf5 2112@@ -2226,7 +2359,7 @@ print_white_space (void)
56ae3f82
SS
2113 while (goal - h_old > 1
2114 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
2115 {
2116- putchar (output_tab_char);
2117+ fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
2118 h_old = h_new;
2119 }
2120 while (++h_old <= goal)
6987acf5 2121@@ -2246,6 +2379,7 @@ print_sep_string (void)
56ae3f82
SS
2122 {
2123 char *s;
2124 int l = col_sep_length;
2125+ int not_space_flag;
2126
2127 s = col_sep_string;
2128
6987acf5 2129@@ -2259,6 +2393,7 @@ print_sep_string (void)
56ae3f82
SS
2130 {
2131 for (; separators_not_printed > 0; --separators_not_printed)
2132 {
2133+ not_space_flag = 0;
2134 while (l-- > 0)
2135 {
2136 /* 3 types of sep_strings: spaces only, spaces and chars,
6987acf5 2137@@ -2272,12 +2407,15 @@ print_sep_string (void)
56ae3f82
SS
2138 }
2139 else
2140 {
2141+ not_space_flag = 1;
2142 if (spaces_not_printed > 0)
2143 print_white_space ();
2144 putchar (*s++);
2145- ++output_position;
2146 }
2147 }
2148+ if (not_space_flag)
2149+ output_position += col_sep_width;
2150+
2151 /* sep_string ends with some spaces */
2152 if (spaces_not_printed > 0)
2153 print_white_space ();
6987acf5 2154@@ -2305,7 +2443,7 @@ print_clump (COLUMN *p, int n, char *clu
56ae3f82
SS
2155 required number of tabs and spaces. */
2156
2157 static void
2158-print_char (char c)
2159+print_char_single (char c)
2160 {
2161 if (tabify_output)
2162 {
6987acf5 2163@@ -2329,6 +2467,74 @@ print_char (char c)
56ae3f82
SS
2164 putchar (c);
2165 }
2166
2167+#ifdef HAVE_MBRTOWC
2168+static void
2169+print_char_multi (char c)
2170+{
2171+ static size_t mbc_pos = 0;
2172+ static char mbc[MB_LEN_MAX] = {'\0'};
2173+ static mbstate_t state = {'\0'};
2174+ mbstate_t state_bak;
2175+ wchar_t wc;
2176+ size_t mblength;
2177+ int width;
2178+
2179+ if (tabify_output)
2180+ {
2181+ state_bak = state;
2182+ mbc[mbc_pos++] = c;
2183+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2184+
2185+ while (mbc_pos > 0)
2186+ {
2187+ switch (mblength)
2188+ {
2189+ case (size_t)-2:
2190+ state = state_bak;
2191+ return;
2192+
2193+ case (size_t)-1:
2194+ state = state_bak;
2195+ ++output_position;
2196+ putchar (mbc[0]);
2197+ memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
2198+ --mbc_pos;
2199+ break;
2200+
2201+ case 0:
2202+ mblength = 1;
2203+
2204+ default:
2205+ if (wc == L' ')
2206+ {
2207+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2208+ --mbc_pos;
2209+ ++spaces_not_printed;
2210+ return;
2211+ }
2212+ else if (spaces_not_printed > 0)
2213+ print_white_space ();
2214+
2215+ /* Nonprintables are assumed to have width 0, except L'\b'. */
2216+ if ((width = wcwidth (wc)) < 1)
2217+ {
2218+ if (wc == L'\b')
2219+ --output_position;
2220+ }
2221+ else
2222+ output_position += width;
2223+
2224+ fwrite (mbc, sizeof(char), mblength, stdout);
2225+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2226+ mbc_pos -= mblength;
2227+ }
2228+ }
2229+ return;
2230+ }
2231+ putchar (c);
2232+}
2233+#endif
2234+
2235 /* Skip to page PAGE before printing.
2236 PAGE may be larger than total number of pages. */
2237
6987acf5 2238@@ -2508,9 +2714,9 @@ read_line (COLUMN *p)
56ae3f82
SS
2239 align_empty_cols = false;
2240 }
2241
2242- if (padding_not_printed - col_sep_length > 0)
2243+ if (padding_not_printed - col_sep_width > 0)
2244 {
2245- pad_across_to (padding_not_printed - col_sep_length);
2246+ pad_across_to (padding_not_printed - col_sep_width);
2247 padding_not_printed = ANYWHERE;
2248 }
2249
6987acf5 2250@@ -2611,9 +2817,9 @@ print_stored (COLUMN *p)
56ae3f82
SS
2251 }
2252 }
2253
2254- if (padding_not_printed - col_sep_length > 0)
2255+ if (padding_not_printed - col_sep_width > 0)
2256 {
2257- pad_across_to (padding_not_printed - col_sep_length);
2258+ pad_across_to (padding_not_printed - col_sep_width);
2259 padding_not_printed = ANYWHERE;
2260 }
2261
6987acf5 2262@@ -2626,8 +2832,8 @@ print_stored (COLUMN *p)
56ae3f82
SS
2263 if (spaces_not_printed == 0)
2264 {
2265 output_position = p->start_position + end_vector[line];
2266- if (p->start_position - col_sep_length == chars_per_margin)
2267- output_position -= col_sep_length;
2268+ if (p->start_position - col_sep_width == chars_per_margin)
2269+ output_position -= col_sep_width;
2270 }
2271
2272 return true;
6987acf5 2273@@ -2646,7 +2852,7 @@ print_stored (COLUMN *p)
56ae3f82
SS
2274 number of characters is 1.) */
2275
2276 static int
2277-char_to_clump (char c)
2278+char_to_clump_single (char c)
2279 {
2280 unsigned char uc = c;
2281 char *s = clump_buff;
6987acf5 2282@@ -2656,10 +2862,10 @@ char_to_clump (char c)
56ae3f82
SS
2283 int chars;
2284 int chars_per_c = 8;
2285
2286- if (c == input_tab_char)
2287+ if (c == input_tab_char[0])
2288 chars_per_c = chars_per_input_tab;
2289
2290- if (c == input_tab_char || c == '\t')
2291+ if (c == input_tab_char[0] || c == '\t')
2292 {
2293 width = TAB_WIDTH (chars_per_c, input_position);
2294
6987acf5 2295@@ -2740,6 +2946,154 @@ char_to_clump (char c)
56ae3f82
SS
2296 return chars;
2297 }
2298
2299+#ifdef HAVE_MBRTOWC
2300+static int
2301+char_to_clump_multi (char c)
2302+{
2303+ static size_t mbc_pos = 0;
2304+ static char mbc[MB_LEN_MAX] = {'\0'};
2305+ static mbstate_t state = {'\0'};
2306+ mbstate_t state_bak;
2307+ wchar_t wc;
2308+ size_t mblength;
2309+ int wc_width;
2310+ register char *s = clump_buff;
2311+ register int i, j;
2312+ char esc_buff[4];
2313+ int width;
2314+ int chars;
2315+ int chars_per_c = 8;
2316+
2317+ state_bak = state;
2318+ mbc[mbc_pos++] = c;
2319+ mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2320+
2321+ width = 0;
2322+ chars = 0;
2323+ while (mbc_pos > 0)
2324+ {
2325+ switch (mblength)
2326+ {
2327+ case (size_t)-2:
2328+ state = state_bak;
2329+ return 0;
2330+
2331+ case (size_t)-1:
2332+ state = state_bak;
2333+ mblength = 1;
2334+
2335+ if (use_esc_sequence || use_cntrl_prefix)
2336+ {
2337+ width = +4;
2338+ chars = +4;
2339+ *s++ = '\\';
6987acf5 2340+ sprintf (esc_buff, "%03o", (unsigned char) mbc[0]);
56ae3f82
SS
2341+ for (i = 0; i <= 2; ++i)
2342+ *s++ = (int) esc_buff[i];
2343+ }
2344+ else
2345+ {
2346+ width += 1;
2347+ chars += 1;
2348+ *s++ = mbc[0];
2349+ }
2350+ break;
2351+
2352+ case 0:
2353+ mblength = 1;
2354+ /* Fall through */
2355+
2356+ default:
2357+ if (memcmp (mbc, input_tab_char, mblength) == 0)
2358+ chars_per_c = chars_per_input_tab;
2359+
2360+ if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
2361+ {
2362+ int width_inc;
2363+
2364+ width_inc = TAB_WIDTH (chars_per_c, input_position);
2365+ width += width_inc;
2366+
2367+ if (untabify_input)
2368+ {
2369+ for (i = width_inc; i; --i)
2370+ *s++ = ' ';
2371+ chars += width_inc;
2372+ }
2373+ else
2374+ {
2375+ for (i = 0; i < mblength; i++)
2376+ *s++ = mbc[i];
2377+ chars += mblength;
2378+ }
2379+ }
2380+ else if ((wc_width = wcwidth (wc)) < 1)
2381+ {
2382+ if (use_esc_sequence)
2383+ {
2384+ for (i = 0; i < mblength; i++)
2385+ {
2386+ width += 4;
2387+ chars += 4;
2388+ *s++ = '\\';
6987acf5 2389+ sprintf (esc_buff, "%03o", (unsigned char) mbc[i]);
56ae3f82
SS
2390+ for (j = 0; j <= 2; ++j)
2391+ *s++ = (int) esc_buff[j];
2392+ }
2393+ }
2394+ else if (use_cntrl_prefix)
2395+ {
2396+ if (wc < 0200)
2397+ {
2398+ width += 2;
2399+ chars += 2;
2400+ *s++ = '^';
2401+ *s++ = wc ^ 0100;
2402+ }
2403+ else
2404+ {
2405+ for (i = 0; i < mblength; i++)
2406+ {
2407+ width += 4;
2408+ chars += 4;
2409+ *s++ = '\\';
6987acf5 2410+ sprintf (esc_buff, "%03o", (unsigned char) mbc[i]);
56ae3f82
SS
2411+ for (j = 0; j <= 2; ++j)
2412+ *s++ = (int) esc_buff[j];
2413+ }
2414+ }
2415+ }
2416+ else if (wc == L'\b')
2417+ {
2418+ width += -1;
2419+ chars += 1;
2420+ *s++ = c;
2421+ }
2422+ else
2423+ {
2424+ width += 0;
2425+ chars += mblength;
2426+ for (i = 0; i < mblength; i++)
2427+ *s++ = mbc[i];
2428+ }
2429+ }
2430+ else
2431+ {
2432+ width += wc_width;
2433+ chars += mblength;
2434+ for (i = 0; i < mblength; i++)
2435+ *s++ = mbc[i];
2436+ }
2437+ }
2438+ memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2439+ mbc_pos -= mblength;
2440+ }
2441+
2442+ input_position += width;
2443+ return chars;
2444+}
2445+#endif
2446+
2447 /* We've just printed some files and need to clean up things before
2448 looking for more options and printing the next batch of files.
2449
6987acf5
MT
2450diff -urNp coreutils-8.16-orig/src/sort.c coreutils-8.16/src/sort.c
2451--- coreutils-8.16-orig/src/sort.c 2012-03-24 21:26:51.000000000 +0100
2452+++ coreutils-8.16/src/sort.c 2012-03-26 17:35:09.000000000 +0200
1555d43c 2453@@ -22,11 +22,20 @@
56ae3f82
SS
2454
2455 #include <config.h>
2456
2457+#include <assert.h>
2458 #include <getopt.h>
1555d43c 2459 #include <pthread.h>
56ae3f82
SS
2460 #include <sys/types.h>
2461 #include <sys/wait.h>
2462 #include <signal.h>
2463+#if HAVE_WCHAR_H
2464+# include <wchar.h>
2465+#endif
2466+/* Get isw* functions. */
2467+#if HAVE_WCTYPE_H
2468+# include <wctype.h>
2469+#endif
2470+
2471 #include "system.h"
2472 #include "argmatch.h"
2473 #include "error.h"
e7f6ab54 2474@@ -167,12 +176,34 @@ static int thousands_sep;
56ae3f82 2475
56ae3f82
SS
2476 /* Nonzero if the corresponding locales are hard. */
2477 static bool hard_LC_COLLATE;
2478-#if HAVE_NL_LANGINFO
2479+#if HAVE_LANGINFO_CODESET
2480 static bool hard_LC_TIME;
2481 #endif
2482
2483 #define NONZERO(x) ((x) != 0)
2484
2485+/* get a multibyte character's byte length. */
2486+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
2487+ do \
2488+ { \
2489+ wchar_t wc; \
2490+ mbstate_t state_bak; \
2491+ \
2492+ state_bak = STATE; \
2493+ mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
2494+ \
2495+ switch (MBLENGTH) \
2496+ { \
2497+ case (size_t)-1: \
2498+ case (size_t)-2: \
2499+ STATE = state_bak; \
2500+ /* Fall through. */ \
2501+ case 0: \
2502+ MBLENGTH = 1; \
2503+ } \
2504+ } \
2505+ while (0)
2506+
2507 /* The kind of blanks for '-b' to skip in various options. */
2508 enum blanktype { bl_start, bl_end, bl_both };
2509
e7f6ab54 2510@@ -343,13 +374,11 @@ static bool reverse;
56ae3f82
SS
2511 they were read if all keys compare equal. */
2512 static bool stable;
2513
2514-/* If TAB has this value, blanks separate fields. */
2515-enum { TAB_DEFAULT = CHAR_MAX + 1 };
2516-
2517-/* Tab character separating fields. If TAB_DEFAULT, then fields are
2518+/* Tab character separating fields. If tab_length is 0, then fields are
2519 separated by the empty string between a non-blank character and a blank
2520 character. */
2521-static int tab = TAB_DEFAULT;
2522+static char tab[MB_LEN_MAX + 1];
2523+static size_t tab_length = 0;
2524
2525 /* Flag to remove consecutive duplicate lines from the output.
2526 Only the last of a sequence of equal lines will be output. */
6987acf5 2527@@ -782,6 +811,46 @@ reap_all (void)
407c5be3 2528 reap (-1);
56ae3f82
SS
2529 }
2530
2531+/* Function pointers. */
2532+static void
2533+(*inittables) (void);
2534+static char *
2535+(*begfield) (const struct line*, const struct keyfield *);
2536+static char *
2537+(*limfield) (const struct line*, const struct keyfield *);
1555d43c
SS
2538+static void
2539+(*skipblanks) (char **ptr, char *lim);
56ae3f82 2540+static int
1555d43c 2541+(*getmonth) (char const *, size_t, char **);
56ae3f82
SS
2542+static int
2543+(*keycompare) (const struct line *, const struct line *);
2544+static int
2545+(*numcompare) (const char *, const char *);
2546+
2547+/* Test for white space multibyte character.
2548+ Set LENGTH the byte length of investigated multibyte character. */
2549+#if HAVE_MBRTOWC
2550+static int
2551+ismbblank (const char *str, size_t len, size_t *length)
2552+{
2553+ size_t mblength;
2554+ wchar_t wc;
2555+ mbstate_t state;
2556+
2557+ memset (&state, '\0', sizeof(mbstate_t));
2558+ mblength = mbrtowc (&wc, str, len, &state);
2559+
2560+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
2561+ {
2562+ *length = 1;
2563+ return 0;
2564+ }
2565+
2566+ *length = (mblength < 1) ? 1 : mblength;
2567+ return iswblank (wc);
2568+}
2569+#endif
2570+
2571 /* Clean up any remaining temporary files. */
2572
2573 static void
6987acf5 2574@@ -1214,7 +1283,7 @@ zaptemp (char const *name)
56ae3f82
SS
2575 free (node);
2576 }
2577
2578-#if HAVE_NL_LANGINFO
2579+#if HAVE_LANGINFO_CODESET
2580
2581 static int
1555d43c 2582 struct_month_cmp (void const *m1, void const *m2)
6987acf5 2583@@ -1229,7 +1298,7 @@ struct_month_cmp (void const *m1, void c
56ae3f82
SS
2584 /* Initialize the character class tables. */
2585
2586 static void
2587-inittables (void)
2588+inittables_uni (void)
2589 {
2590 size_t i;
2591
6987acf5 2592@@ -1241,7 +1310,7 @@ inittables (void)
56ae3f82
SS
2593 fold_toupper[i] = toupper (i);
2594 }
2595
2596-#if HAVE_NL_LANGINFO
2597+#if HAVE_LANGINFO_CODESET
2598 /* If we're not in the "C" locale, read different names for months. */
2599 if (hard_LC_TIME)
2600 {
6987acf5 2601@@ -1323,6 +1392,84 @@ specify_nmerge (int oi, char c, char con
56ae3f82
SS
2602 xstrtol_fatal (e, oi, c, long_options, s);
2603 }
2604
2605+#if HAVE_MBRTOWC
2606+static void
2607+inittables_mb (void)
2608+{
2609+ int i, j, k, l;
1555d43c 2610+ char *name, *s, *lc_time, *lc_ctype;
56ae3f82
SS
2611+ size_t s_len, mblength;
2612+ char mbc[MB_LEN_MAX];
2613+ wchar_t wc, pwc;
2614+ mbstate_t state_mb, state_wc;
2615+
1555d43c
SS
2616+ lc_time = setlocale (LC_TIME, "");
2617+ if (lc_time)
2618+ lc_time = xstrdup (lc_time);
2619+
2620+ lc_ctype = setlocale (LC_CTYPE, "");
2621+ if (lc_ctype)
2622+ lc_ctype = xstrdup (lc_ctype);
2623+
2624+ if (lc_time && lc_ctype)
2625+ /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert
2626+ * the names of months to upper case */
2627+ setlocale (LC_CTYPE, lc_time);
2628+
56ae3f82
SS
2629+ for (i = 0; i < MONTHS_PER_YEAR; i++)
2630+ {
2631+ s = (char *) nl_langinfo (ABMON_1 + i);
2632+ s_len = strlen (s);
2633+ monthtab[i].name = name = (char *) xmalloc (s_len + 1);
2634+ monthtab[i].val = i + 1;
2635+
2636+ memset (&state_mb, '\0', sizeof (mbstate_t));
2637+ memset (&state_wc, '\0', sizeof (mbstate_t));
2638+
2639+ for (j = 0; j < s_len;)
2640+ {
2641+ if (!ismbblank (s + j, s_len - j, &mblength))
2642+ break;
2643+ j += mblength;
2644+ }
2645+
2646+ for (k = 0; j < s_len;)
2647+ {
2648+ mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
2649+ assert (mblength != (size_t)-1 && mblength != (size_t)-2);
2650+ if (mblength == 0)
2651+ break;
2652+
2653+ pwc = towupper (wc);
2654+ if (pwc == wc)
2655+ {
2656+ memcpy (mbc, s + j, mblength);
2657+ j += mblength;
2658+ }
2659+ else
2660+ {
2661+ j += mblength;
2662+ mblength = wcrtomb (mbc, pwc, &state_wc);
2663+ assert (mblength != (size_t)0 && mblength != (size_t)-1);
2664+ }
2665+
2666+ for (l = 0; l < mblength; l++)
2667+ name[k++] = mbc[l];
2668+ }
2669+ name[k] = '\0';
2670+ }
2671+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
2672+ sizeof (struct month), struct_month_cmp);
1555d43c
SS
2673+
2674+ if (lc_time && lc_ctype)
2675+ /* restore the original locales */
2676+ setlocale (LC_CTYPE, lc_ctype);
2677+
2678+ free (lc_ctype);
2679+ free (lc_time);
56ae3f82
SS
2680+}
2681+#endif
2682+
2683 /* Specify the amount of main memory to use when sorting. */
2684 static void
2685 specify_sort_size (int oi, char c, char const *s)
6987acf5 2686@@ -1551,7 +1698,7 @@ buffer_linelim (struct buffer const *buf
56ae3f82
SS
2687 by KEY in LINE. */
2688
2689 static char *
1555d43c 2690-begfield (struct line const *line, struct keyfield const *key)
56ae3f82
SS
2691+begfield_uni (const struct line *line, const struct keyfield *key)
2692 {
2693 char *ptr = line->text, *lim = ptr + line->length - 1;
2694 size_t sword = key->sword;
6987acf5 2695@@ -1560,10 +1707,10 @@ begfield (struct line const *line, struc
56ae3f82
SS
2696 /* The leading field separator itself is included in a field when -t
2697 is absent. */
2698
2699- if (tab != TAB_DEFAULT)
2700+ if (tab_length)
2701 while (ptr < lim && sword--)
2702 {
2703- while (ptr < lim && *ptr != tab)
2704+ while (ptr < lim && *ptr != tab[0])
2705 ++ptr;
2706 if (ptr < lim)
2707 ++ptr;
6987acf5 2708@@ -1589,11 +1736,70 @@ begfield (struct line const *line, struc
56ae3f82
SS
2709 return ptr;
2710 }
2711
2712+#if HAVE_MBRTOWC
2713+static char *
2714+begfield_mb (const struct line *line, const struct keyfield *key)
2715+{
2716+ int i;
2717+ char *ptr = line->text, *lim = ptr + line->length - 1;
2718+ size_t sword = key->sword;
2719+ size_t schar = key->schar;
2720+ size_t mblength;
2721+ mbstate_t state;
2722+
2723+ memset (&state, '\0', sizeof(mbstate_t));
2724+
2725+ if (tab_length)
2726+ while (ptr < lim && sword--)
2727+ {
2728+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2729+ {
2730+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2731+ ptr += mblength;
2732+ }
2733+ if (ptr < lim)
2734+ {
2735+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2736+ ptr += mblength;
2737+ }
2738+ }
2739+ else
2740+ while (ptr < lim && sword--)
2741+ {
2742+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2743+ ptr += mblength;
2744+ if (ptr < lim)
2745+ {
2746+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2747+ ptr += mblength;
2748+ }
2749+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2750+ ptr += mblength;
2751+ }
2752+
2753+ if (key->skipsblanks)
2754+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2755+ ptr += mblength;
2756+
2757+ for (i = 0; i < schar; i++)
2758+ {
2759+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2760+
2761+ if (ptr + mblength > lim)
2762+ break;
2763+ else
2764+ ptr += mblength;
2765+ }
2766+
2767+ return ptr;
2768+}
2769+#endif
2770+
2771 /* Return the limit of (a pointer to the first character after) the field
2772 in LINE specified by KEY. */
2773
2774 static char *
1555d43c 2775-limfield (struct line const *line, struct keyfield const *key)
56ae3f82
SS
2776+limfield_uni (const struct line *line, const struct keyfield *key)
2777 {
2778 char *ptr = line->text, *lim = ptr + line->length - 1;
2779 size_t eword = key->eword, echar = key->echar;
6987acf5
MT
2780@@ -1608,10 +1814,10 @@ limfield (struct line const *line, struc
2781 'beginning' is the first character following the delimiting TAB.
2782 Otherwise, leave PTR pointing at the first 'blank' character after
56ae3f82
SS
2783 the preceding field. */
2784- if (tab != TAB_DEFAULT)
2785+ if (tab_length)
2786 while (ptr < lim && eword--)
2787 {
2788- while (ptr < lim && *ptr != tab)
2789+ while (ptr < lim && *ptr != tab[0])
2790 ++ptr;
2791 if (ptr < lim && (eword || echar))
2792 ++ptr;
6987acf5 2793@@ -1657,10 +1863,10 @@ limfield (struct line const *line, struc
56ae3f82
SS
2794 */
2795
2796 /* Make LIM point to the end of (one byte past) the current field. */
2797- if (tab != TAB_DEFAULT)
2798+ if (tab_length)
2799 {
2800 char *newlim;
2801- newlim = memchr (ptr, tab, lim - ptr);
2802+ newlim = memchr (ptr, tab[0], lim - ptr);
2803 if (newlim)
2804 lim = newlim;
2805 }
6987acf5 2806@@ -1691,6 +1897,130 @@ limfield (struct line const *line, struc
56ae3f82
SS
2807 return ptr;
2808 }
2809
2810+#if HAVE_MBRTOWC
2811+static char *
2812+limfield_mb (const struct line *line, const struct keyfield *key)
2813+{
2814+ char *ptr = line->text, *lim = ptr + line->length - 1;
2815+ size_t eword = key->eword, echar = key->echar;
2816+ int i;
2817+ size_t mblength;
2818+ mbstate_t state;
2819+
2820+ if (echar == 0)
2821+ eword++; /* skip all of end field. */
2822+
2823+ memset (&state, '\0', sizeof(mbstate_t));
2824+
2825+ if (tab_length)
2826+ while (ptr < lim && eword--)
2827+ {
2828+ while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2829+ {
2830+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2831+ ptr += mblength;
2832+ }
2833+ if (ptr < lim && (eword | echar))
2834+ {
2835+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2836+ ptr += mblength;
2837+ }
2838+ }
2839+ else
2840+ while (ptr < lim && eword--)
2841+ {
2842+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2843+ ptr += mblength;
2844+ if (ptr < lim)
2845+ {
2846+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2847+ ptr += mblength;
2848+ }
2849+ while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2850+ ptr += mblength;
2851+ }
2852+
2853+
2854+# ifdef POSIX_UNSPECIFIED
2855+ /* Make LIM point to the end of (one byte past) the current field. */
2856+ if (tab_length)
2857+ {
2858+ char *newlim, *p;
2859+
2860+ newlim = NULL;
2861+ for (p = ptr; p < lim;)
2862+ {
2863+ if (memcmp (p, tab, tab_length) == 0)
2864+ {
2865+ newlim = p;
2866+ break;
2867+ }
2868+
2869+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2870+ p += mblength;
2871+ }
2872+ }
2873+ else
2874+ {
2875+ char *newlim;
2876+ newlim = ptr;
2877+
2878+ while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2879+ newlim += mblength;
2880+ if (ptr < lim)
2881+ {
2882+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2883+ ptr += mblength;
2884+ }
2885+ while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2886+ newlim += mblength;
2887+ lim = newlim;
2888+ }
2889+# endif
2890+
2891+ if (echar != 0)
2892+ {
2893+ /* If we're skipping leading blanks, don't start counting characters
2894+ * until after skipping past any leading blanks. */
2895+ if (key->skipsblanks)
2896+ while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2897+ ptr += mblength;
2898+
2899+ memset (&state, '\0', sizeof(mbstate_t));
2900+
2901+ /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2902+ for (i = 0; i < echar; i++)
2903+ {
2904+ GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2905+
2906+ if (ptr + mblength > lim)
2907+ break;
2908+ else
2909+ ptr += mblength;
2910+ }
2911+ }
2912+
2913+ return ptr;
2914+}
2915+#endif
1555d43c
SS
2916+
2917+static void
2918+skipblanks_uni (char **ptr, char *lim)
2919+{
2920+ while (*ptr < lim && blanks[to_uchar (**ptr)])
2921+ ++(*ptr);
2922+}
2923+
2924+#if HAVE_MBRTOWC
2925+static void
2926+skipblanks_mb (char **ptr, char *lim)
2927+{
2928+ size_t mblength;
2929+ while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
2930+ (*ptr) += mblength;
2931+}
2932+#endif
56ae3f82
SS
2933+
2934 /* Fill BUF reading from FP, moving buf->left bytes from the end
2935 of buf->buf to the beginning first. If EOF is reached and the
2936 file wasn't terminated by a newline, supply one. Set up BUF's line
6987acf5 2937@@ -1777,8 +2107,22 @@ fillbuf (struct buffer *buf, FILE *fp, c
56ae3f82
SS
2938 else
2939 {
2940 if (key->skipsblanks)
2941- while (blanks[to_uchar (*line_start)])
2942- line_start++;
2943+ {
2944+#if HAVE_MBRTOWC
2945+ if (MB_CUR_MAX > 1)
2946+ {
2947+ size_t mblength;
56ae3f82
SS
2948+ while (line_start < line->keylim &&
2949+ ismbblank (line_start,
2950+ line->keylim - line_start,
2951+ &mblength))
2952+ line_start += mblength;
2953+ }
2954+ else
2955+#endif
2956+ while (blanks[to_uchar (*line_start)])
2957+ line_start++;
2958+ }
2959 line->keybeg = line_start;
2960 }
2961 }
6987acf5 2962@@ -1899,7 +2243,7 @@ human_numcompare (char const *a, char co
56ae3f82
SS
2963 hideously fast. */
2964
2965 static int
1555d43c 2966-numcompare (char const *a, char const *b)
56ae3f82
SS
2967+numcompare_uni (const char *a, const char *b)
2968 {
2969 while (blanks[to_uchar (*a)])
2970 a++;
6987acf5 2971@@ -1909,6 +2253,25 @@ numcompare (char const *a, char const *b
1555d43c 2972 return strnumcmp (a, b, decimal_point, thousands_sep);
56ae3f82
SS
2973 }
2974
2975+#if HAVE_MBRTOWC
2976+static int
2977+numcompare_mb (const char *a, const char *b)
2978+{
2979+ size_t mblength, len;
2980+ len = strlen (a); /* okay for UTF-8 */
2981+ while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2982+ {
2983+ a += mblength;
2984+ len -= mblength;
2985+ }
2986+ len = strlen (b); /* okay for UTF-8 */
2987+ while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2988+ b += mblength;
2989+
2990+ return strnumcmp (a, b, decimal_point, thousands_sep);
2991+}
2992+#endif /* HAV_EMBRTOWC */
2993+
fa4603be
SS
2994 /* Work around a problem whereby the long double value returned by glibc's
2995 strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
2996 A and B before calling strtold. FIXME: remove this function once
6987acf5 2997@@ -1959,7 +2322,7 @@ general_numcompare (char const *sa, char
56ae3f82
SS
2998 Return 0 if the name in S is not recognized. */
2999
3000 static int
1555d43c
SS
3001-getmonth (char const *month, char **ea)
3002+getmonth_uni (char const *month, size_t len, char **ea)
56ae3f82
SS
3003 {
3004 size_t lo = 0;
3005 size_t hi = MONTHS_PER_YEAR;
6987acf5 3006@@ -2234,15 +2597,14 @@ debug_key (struct line const *line, stru
407c5be3
SS
3007 char saved = *lim;
3008 *lim = '\0';
1555d43c
SS
3009
3010- while (blanks[to_uchar (*beg)])
3011- beg++;
3012+ skipblanks (&beg, lim);
3013
3014 char *tighter_lim = beg;
3015
3badd2da
SS
3016 if (lim < beg)
3017 tighter_lim = lim;
3018 else if (key->month)
1555d43c
SS
3019- getmonth (beg, &tighter_lim);
3020+ getmonth (beg, lim-beg, &tighter_lim);
3021 else if (key->general_numeric)
3022 ignore_value (strtold (beg, &tighter_lim));
3023 else if (key->numeric || key->human_numeric)
6987acf5 3024@@ -2386,7 +2748,7 @@ key_warnings (struct keyfield const *gke
1555d43c
SS
3025 bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
3026 && !(key->schar || key->echar);
3027 bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
3028- if (!gkey_only && tab == TAB_DEFAULT && !line_offset
3029+ if (!gkey_only && !tab_length && !line_offset
3030 && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
3031 || (!key->skipsblanks && key->schar)
3032 || (!key->skipeblanks && key->echar)))
6987acf5
MT
3033@@ -2444,11 +2806,83 @@ key_warnings (struct keyfield const *gke
3034 error (0, 0, _("option '-r' only applies to last-resort comparison"));
56ae3f82
SS
3035 }
3036
3037+#if HAVE_MBRTOWC
3038+static int
1555d43c 3039+getmonth_mb (const char *s, size_t len, char **ea)
56ae3f82
SS
3040+{
3041+ char *month;
3042+ register size_t i;
3043+ register int lo = 0, hi = MONTHS_PER_YEAR, result;
3044+ char *tmp;
3045+ size_t wclength, mblength;
3046+ const char **pp;
3047+ const wchar_t **wpp;
3048+ wchar_t *month_wcs;
3049+ mbstate_t state;
3050+
3051+ while (len > 0 && ismbblank (s, len, &mblength))
3052+ {
3053+ s += mblength;
3054+ len -= mblength;
3055+ }
3056+
3057+ if (len == 0)
3058+ return 0;
3059+
3060+ month = (char *) alloca (len + 1);
3061+
3062+ tmp = (char *) alloca (len + 1);
3063+ memcpy (tmp, s, len);
3064+ tmp[len] = '\0';
3065+ pp = (const char **)&tmp;
3066+ month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3067+ memset (&state, '\0', sizeof(mbstate_t));
3068+
3069+ wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
1555d43c
SS
3070+ if (wclength == (size_t)-1 || *pp != NULL)
3071+ error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s));
56ae3f82
SS
3072+
3073+ for (i = 0; i < wclength; i++)
3074+ {
3075+ month_wcs[i] = towupper(month_wcs[i]);
3076+ if (iswblank (month_wcs[i]))
3077+ {
3078+ month_wcs[i] = L'\0';
3079+ break;
3080+ }
3081+ }
3082+
3083+ wpp = (const wchar_t **)&month_wcs;
3084+
3085+ mblength = wcsrtombs (month, wpp, len + 1, &state);
3086+ assert (mblength != (-1) && *wpp == NULL);
3087+
3088+ do
3089+ {
3090+ int ix = (lo + hi) / 2;
3091+
3092+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3093+ hi = ix;
3094+ else
3095+ lo = ix;
3096+ }
3097+ while (hi - lo > 1);
3098+
3099+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3100+ ? monthtab[lo].val : 0);
3101+
6987acf5
MT
3102+ if (ea && result)
3103+ *ea = s + strlen (monthtab[lo].name);
3104+
56ae3f82
SS
3105+ return result;
3106+}
3107+#endif
3108+
3109 /* Compare two lines A and B trying every key in sequence until there
3110 are no more keys or a difference is found. */
3111
3112 static int
1555d43c 3113-keycompare (struct line const *a, struct line const *b)
56ae3f82
SS
3114+keycompare_uni (const struct line *a, const struct line *b)
3115 {
3116 struct keyfield *key = keylist;
3117
6987acf5 3118@@ -2533,7 +2967,7 @@ keycompare (struct line const *a, struct
1555d43c
SS
3119 else if (key->human_numeric)
3120 diff = human_numcompare (ta, tb);
3121 else if (key->month)
3122- diff = getmonth (ta, NULL) - getmonth (tb, NULL);
3123+ diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL);
3124 else if (key->random)
3125 diff = compare_random (ta, tlena, tb, tlenb);
3126 else if (key->version)
6987acf5 3127@@ -2649,6 +3083,180 @@ keycompare (struct line const *a, struct
56ae3f82
SS
3128 return key->reverse ? -diff : diff;
3129 }
3130
3131+#if HAVE_MBRTOWC
3132+static int
3133+keycompare_mb (const struct line *a, const struct line *b)
3134+{
3135+ struct keyfield *key = keylist;
3136+
3137+ /* For the first iteration only, the key positions have been
3138+ precomputed for us. */
3139+ char *texta = a->keybeg;
3140+ char *textb = b->keybeg;
3141+ char *lima = a->keylim;
3142+ char *limb = b->keylim;
3143+
3144+ size_t mblength_a, mblength_b;
3145+ wchar_t wc_a, wc_b;
3146+ mbstate_t state_a, state_b;
3147+
3148+ int diff;
3149+
3150+ memset (&state_a, '\0', sizeof(mbstate_t));
3151+ memset (&state_b, '\0', sizeof(mbstate_t));
3152+
3153+ for (;;)
3154+ {
3155+ char const *translate = key->translate;
3156+ bool const *ignore = key->ignore;
3157+
3158+ /* Find the lengths. */
3159+ size_t lena = lima <= texta ? 0 : lima - texta;
3160+ size_t lenb = limb <= textb ? 0 : limb - textb;
3161+
3162+ /* Actually compare the fields. */
3163+ if (key->random)
3164+ diff = compare_random (texta, lena, textb, lenb);
3165+ else if (key->numeric | key->general_numeric | key->human_numeric)
3166+ {
3167+ char savea = *lima, saveb = *limb;
3168+
3169+ *lima = *limb = '\0';
3170+ diff = (key->numeric ? numcompare (texta, textb)
3171+ : key->general_numeric ? general_numcompare (texta, textb)
1555d43c 3172+ : human_numcompare (texta, textb));
56ae3f82
SS
3173+ *lima = savea, *limb = saveb;
3174+ }
3175+ else if (key->version)
1555d43c 3176+ diff = filevercmp (texta, textb);
56ae3f82 3177+ else if (key->month)
1555d43c 3178+ diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL);
56ae3f82
SS
3179+ else
3180+ {
3181+ if (ignore || translate)
3182+ {
3183+ char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3184+ char *copy_b = copy_a + lena + 1;
3185+ size_t new_len_a, new_len_b;
3186+ size_t i, j;
3187+
3188+ /* Ignore and/or translate chars before comparing. */
3189+# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3190+ do \
3191+ { \
3192+ wchar_t uwc; \
3193+ char mbc[MB_LEN_MAX]; \
3194+ mbstate_t state_wc; \
3195+ \
3196+ for (NEW_LEN = i = 0; i < LEN;) \
3197+ { \
3198+ mbstate_t state_bak; \
3199+ \
3200+ state_bak = STATE; \
3201+ MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3202+ \
3203+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3204+ || MBLENGTH == 0) \
3205+ { \
3206+ if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3207+ STATE = state_bak; \
3208+ if (!ignore) \
e7f6ab54
SS
3209+ COPY[NEW_LEN++] = TEXT[i]; \
3210+ i++; \
56ae3f82
SS
3211+ continue; \
3212+ } \
3213+ \
3214+ if (ignore) \
3215+ { \
3216+ if ((ignore == nonprinting && !iswprint (WC)) \
3217+ || (ignore == nondictionary \
3218+ && !iswalnum (WC) && !iswblank (WC))) \
3219+ { \
3220+ i += MBLENGTH; \
3221+ continue; \
3222+ } \
3223+ } \
3224+ \
3225+ if (translate) \
3226+ { \
3227+ \
3228+ uwc = towupper(WC); \
3229+ if (WC == uwc) \
3230+ { \
3231+ memcpy (mbc, TEXT + i, MBLENGTH); \
3232+ i += MBLENGTH; \
3233+ } \
3234+ else \
3235+ { \
3236+ i += MBLENGTH; \
3237+ WC = uwc; \
3238+ memset (&state_wc, '\0', sizeof (mbstate_t)); \
3239+ \
3240+ MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3241+ assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3242+ } \
3243+ \
3244+ for (j = 0; j < MBLENGTH; j++) \
3245+ COPY[NEW_LEN++] = mbc[j]; \
3246+ } \
3247+ else \
3248+ for (j = 0; j < MBLENGTH; j++) \
3249+ COPY[NEW_LEN++] = TEXT[i++]; \
3250+ } \
3251+ COPY[NEW_LEN] = '\0'; \
3252+ } \
3253+ while (0)
3254+ IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3255+ wc_a, mblength_a, state_a);
3256+ IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3257+ wc_b, mblength_b, state_b);
3258+ diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3259+ }
3260+ else if (lena == 0)
3261+ diff = - NONZERO (lenb);
3262+ else if (lenb == 0)
3263+ goto greater;
3264+ else
3265+ diff = xmemcoll (texta, lena, textb, lenb);
3266+ }
3267+
3268+ if (diff)
3269+ goto not_equal;
3270+
3271+ key = key->next;
3272+ if (! key)
3273+ break;
3274+
3275+ /* Find the beginning and limit of the next field. */
3276+ if (key->eword != -1)
3277+ lima = limfield (a, key), limb = limfield (b, key);
3278+ else
3279+ lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3280+
3281+ if (key->sword != -1)
3282+ texta = begfield (a, key), textb = begfield (b, key);
3283+ else
3284+ {
3285+ texta = a->text, textb = b->text;
3286+ if (key->skipsblanks)
3287+ {
3288+ while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3289+ texta += mblength_a;
3290+ while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3291+ textb += mblength_b;
3292+ }
3293+ }
3294+ }
3295+
3296+ return 0;
3297+
3298+greater:
3299+ diff = 1;
3300+not_equal:
3301+ return key->reverse ? -diff : diff;
3302+}
3303+#endif
3304+
3305 /* Compare two lines A and B, returning negative, zero, or positive
3306 depending on whether A compares less than, equal to, or greater than B. */
3307
6987acf5 3308@@ -4109,7 +4717,7 @@ main (int argc, char **argv)
56ae3f82
SS
3309 initialize_exit_failure (SORT_FAILURE);
3310
3311 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3312-#if HAVE_NL_LANGINFO
3313+#if HAVE_LANGINFO_CODESET
3314 hard_LC_TIME = hard_locale (LC_TIME);
3315 #endif
3316
6987acf5 3317@@ -4130,6 +4738,29 @@ main (int argc, char **argv)
56ae3f82
SS
3318 thousands_sep = -1;
3319 }
3320
3321+#if HAVE_MBRTOWC
3322+ if (MB_CUR_MAX > 1)
3323+ {
3324+ inittables = inittables_mb;
3325+ begfield = begfield_mb;
3326+ limfield = limfield_mb;
1555d43c 3327+ skipblanks = skipblanks_mb;
56ae3f82
SS
3328+ getmonth = getmonth_mb;
3329+ keycompare = keycompare_mb;
3330+ numcompare = numcompare_mb;
3331+ }
3332+ else
3333+#endif
3334+ {
3335+ inittables = inittables_uni;
3336+ begfield = begfield_uni;
3337+ limfield = limfield_uni;
1555d43c 3338+ skipblanks = skipblanks_uni;
56ae3f82
SS
3339+ getmonth = getmonth_uni;
3340+ keycompare = keycompare_uni;
3341+ numcompare = numcompare_uni;
3342+ }
3343+
3344 have_read_stdin = false;
3345 inittables ();
3346
6987acf5 3347@@ -4400,13 +5031,34 @@ main (int argc, char **argv)
56ae3f82
SS
3348
3349 case 't':
3350 {
3351- char newtab = optarg[0];
3352- if (! newtab)
3353+ char newtab[MB_LEN_MAX + 1];
3354+ size_t newtab_length = 1;
3355+ strncpy (newtab, optarg, MB_LEN_MAX);
3356+ if (! newtab[0])
3357 error (SORT_FAILURE, 0, _("empty tab"));
3358- if (optarg[1])
3359+#if HAVE_MBRTOWC
3360+ if (MB_CUR_MAX > 1)
3361+ {
3362+ wchar_t wc;
3363+ mbstate_t state;
56ae3f82
SS
3364+
3365+ memset (&state, '\0', sizeof (mbstate_t));
3366+ newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3367+ MB_LEN_MAX),
3368+ &state);
3369+ switch (newtab_length)
3370+ {
3371+ case (size_t) -1:
3372+ case (size_t) -2:
3373+ case 0:
3374+ newtab_length = 1;
3375+ }
3376+ }
3377+#endif
3378+ if (newtab_length == 1 && optarg[1])
3379 {
3380 if (STREQ (optarg, "\\0"))
3381- newtab = '\0';
3382+ newtab[0] = '\0';
3383 else
3384 {
6987acf5
MT
3385 /* Provoke with 'sort -txx'. Complain about
3386@@ -4417,9 +5069,12 @@ main (int argc, char **argv)
56ae3f82
SS
3387 quote (optarg));
3388 }
3389 }
3390- if (tab != TAB_DEFAULT && tab != newtab)
3391+ if (tab_length
3392+ && (tab_length != newtab_length
3393+ || memcmp (tab, newtab, tab_length) != 0))
3394 error (SORT_FAILURE, 0, _("incompatible tabs"));
3395- tab = newtab;
3396+ memcpy (tab, newtab, newtab_length);
3397+ tab_length = newtab_length;
3398 }
3399 break;
3400
6987acf5
MT
3401diff -urNp coreutils-8.16-orig/src/unexpand.c coreutils-8.16/src/unexpand.c
3402--- coreutils-8.16-orig/src/unexpand.c 2012-03-24 21:26:51.000000000 +0100
3403+++ coreutils-8.16/src/unexpand.c 2012-03-26 17:51:46.000000000 +0200
3404@@ -38,12 +38,29 @@
56ae3f82
SS
3405 #include <stdio.h>
3406 #include <getopt.h>
3407 #include <sys/types.h>
3408+
3409+/* Get mbstate_t, mbrtowc(), wcwidth(). */
3410+#if HAVE_WCHAR_H
3411+# include <wchar.h>
3412+#endif
3413+
3414 #include "system.h"
3415 #include "error.h"
1555d43c 3416 #include "fadvise.h"
56ae3f82
SS
3417 #include "quote.h"
3418 #include "xstrndup.h"
3419
3420+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3421+ installation; work around this configuration error. */
3422+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3423+# define MB_LEN_MAX 16
3424+#endif
3425+
3426+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3427+#if HAVE_MBRTOWC && defined mbstate_t
3428+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3429+#endif
3430+
6987acf5 3431 /* The official name of this program (e.g., no 'g' prefix). */
56ae3f82
SS
3432 #define PROGRAM_NAME "unexpand"
3433
6987acf5 3434@@ -103,6 +120,208 @@ static struct option const longopts[] =
56ae3f82
SS
3435 {NULL, 0, NULL, 0}
3436 };
3437
3438+static FILE *next_file (FILE *fp);
3439+
3440+#if HAVE_MBRTOWC
3441+static void
3442+unexpand_multibyte (void)
3443+{
3444+ FILE *fp; /* Input stream. */
3445+ mbstate_t i_state; /* Current shift state of the input stream. */
3446+ mbstate_t i_state_bak; /* Back up the I_STATE. */
3447+ mbstate_t o_state; /* Current shift state of the output stream. */
3448+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3badd2da 3449+ char *bufpos = buf; /* Next read position of BUF. */
56ae3f82
SS
3450+ size_t buflen = 0; /* The length of the byte sequence in buf. */
3451+ wint_t wc; /* A gotten wide character. */
3452+ size_t mblength; /* The byte size of a multibyte character
3453+ which shows as same character as WC. */
3454+
3455+ /* Index in `tab_list' of next tabstop: */
3456+ int tab_index = 0; /* For calculating width of pending tabs. */
3457+ int print_tab_index = 0; /* For printing as many tabs as possible. */
3458+ unsigned int column = 0; /* Column on screen of next char. */
3459+ int next_tab_column; /* Column the next tab stop is on. */
3460+ int convert = 1; /* If nonzero, perform translations. */
3461+ unsigned int pending = 0; /* Pending columns of blanks. */
3462+
3463+ fp = next_file ((FILE *) NULL);
3464+ if (fp == NULL)
3465+ return;
3466+
3467+ memset (&o_state, '\0', sizeof(mbstate_t));
3468+ memset (&i_state, '\0', sizeof(mbstate_t));
3469+
3470+ for (;;)
3471+ {
3472+ if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
3473+ {
3474+ memmove (buf, bufpos, buflen);
3475+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
3476+ bufpos = buf;
3477+ }
3478+
3479+ /* Get a wide character. */
3480+ if (buflen < 1)
3481+ {
3482+ mblength = 1;
3483+ wc = WEOF;
3484+ }
3485+ else
3486+ {
3487+ i_state_bak = i_state;
3488+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
3489+ }
3490+
3491+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
3492+ {
3493+ i_state = i_state_bak;
3494+ wc = L'\0';
3495+ }
3496+
3497+ if (wc == L' ' && convert && column < INT_MAX)
3498+ {
3499+ ++pending;
3500+ ++column;
3501+ }
3502+ else if (wc == L'\t' && convert)
3503+ {
3504+ if (tab_size == 0)
3505+ {
3506+ /* Do not let tab_index == first_free_tab;
3507+ stop when it is 1 less. */
3508+ while (tab_index < first_free_tab - 1
3509+ && column >= tab_list[tab_index])
3510+ tab_index++;
3511+ next_tab_column = tab_list[tab_index];
3512+ if (tab_index < first_free_tab - 1)
3513+ tab_index++;
3514+ if (column >= next_tab_column)
3515+ {
3516+ convert = 0; /* Ran out of tab stops. */
3517+ goto flush_pend_mb;
3518+ }
3519+ }
3520+ else
3521+ {
3522+ next_tab_column = column + tab_size - column % tab_size;
3523+ }
3524+ pending += next_tab_column - column;
3525+ column = next_tab_column;
3526+ }
3527+ else
3528+ {
3529+flush_pend_mb:
3530+ /* Flush pending spaces. Print as many tabs as possible,
3531+ then print the rest as spaces. */
3532+ if (pending == 1)
3533+ {
3534+ putchar (' ');
3535+ pending = 0;
3536+ }
3537+ column -= pending;
3538+ while (pending > 0)
3539+ {
3540+ if (tab_size == 0)
3541+ {
3542+ /* Do not let print_tab_index == first_free_tab;
3543+ stop when it is 1 less. */
3544+ while (print_tab_index < first_free_tab - 1
3545+ && column >= tab_list[print_tab_index])
3546+ print_tab_index++;
3547+ next_tab_column = tab_list[print_tab_index];
3548+ if (print_tab_index < first_free_tab - 1)
3549+ print_tab_index++;
3550+ }
3551+ else
3552+ {
3553+ next_tab_column =
3554+ column + tab_size - column % tab_size;
3555+ }
3556+ if (next_tab_column - column <= pending)
3557+ {
3558+ putchar ('\t');
3559+ pending -= next_tab_column - column;
3560+ column = next_tab_column;
3561+ }
3562+ else
3563+ {
3564+ --print_tab_index;
3565+ column += pending;
3566+ while (pending != 0)
3567+ {
3568+ putchar (' ');
3569+ pending--;
3570+ }
3571+ }
3572+ }
3573+
3574+ if (wc == WEOF)
3575+ {
3576+ fp = next_file (fp);
3577+ if (fp == NULL)
3578+ break; /* No more files. */
3579+ else
3580+ {
3581+ memset (&i_state, '\0', sizeof(mbstate_t));
3582+ continue;
3583+ }
3584+ }
3585+
3586+ if (mblength == (size_t)-1 || mblength == (size_t)-2)
3587+ {
3588+ if (convert)
3589+ {
3590+ ++column;
3591+ if (convert_entire_line == 0)
3592+ convert = 0;
3593+ }
3594+ mblength = 1;
3595+ putchar (buf[0]);
3596+ }
3597+ else if (mblength == 0)
3598+ {
3599+ if (convert && convert_entire_line == 0)
3600+ convert = 0;
3601+ mblength = 1;
3602+ putchar ('\0');
3603+ }
3604+ else
3605+ {
3606+ if (convert)
3607+ {
3608+ if (wc == L'\b')
3609+ {
3610+ if (column > 0)
3611+ --column;
3612+ }
3613+ else
3614+ {
3615+ int width; /* The width of WC. */
3616+
3617+ width = wcwidth (wc);
3618+ column += (width > 0) ? width : 0;
3619+ if (convert_entire_line == 0)
3620+ convert = 0;
3621+ }
3622+ }
3623+
3624+ if (wc == L'\n')
3625+ {
3626+ tab_index = print_tab_index = 0;
3627+ column = pending = 0;
3628+ convert = 1;
3629+ }
3630+ fwrite (bufpos, sizeof(char), mblength, stdout);
3631+ }
3632+ }
3633+ buflen -= mblength;
3634+ bufpos += mblength;
3635+ }
3636+}
3637+#endif
3638+
3639+
3640 void
3641 usage (int status)
3642 {
6987acf5 3643@@ -524,7 +743,12 @@ main (int argc, char **argv)
56ae3f82
SS
3644
3645 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3646
3647- unexpand ();
3648+#if HAVE_MBRTOWC
3649+ if (MB_CUR_MAX > 1)
3650+ unexpand_multibyte ();
3651+ else
3652+#endif
3653+ unexpand ();
3654
3655 if (have_read_stdin && fclose (stdin) != 0)
3656 error (EXIT_FAILURE, errno, "-");
6987acf5
MT
3657diff -urNp coreutils-8.16-orig/src/uniq.c coreutils-8.16/src/uniq.c
3658--- coreutils-8.16-orig/src/uniq.c 2012-03-24 21:26:51.000000000 +0100
3659+++ coreutils-8.16/src/uniq.c 2012-03-26 17:35:09.000000000 +0200
56ae3f82
SS
3660@@ -21,6 +21,16 @@
3661 #include <getopt.h>
3662 #include <sys/types.h>
3663
3664+/* Get mbstate_t, mbrtowc(). */
3665+#if HAVE_WCHAR_H
3666+# include <wchar.h>
3667+#endif
3668+
3669+/* Get isw* functions. */
3670+#if HAVE_WCTYPE_H
3671+# include <wctype.h>
3672+#endif
3673+
3674 #include "system.h"
3675 #include "argmatch.h"
3676 #include "linebuffer.h"
1555d43c 3677@@ -32,7 +42,19 @@
56ae3f82
SS
3678 #include "stdio--.h"
3679 #include "xmemcoll.h"
3680 #include "xstrtol.h"
3681-#include "memcasecmp.h"
3682+#include "xmemcoll.h"
3683+
3684+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3685+ installation; work around this configuration error. */
3686+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3687+# define MB_LEN_MAX 16
3688+#endif
3689+
3690+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3691+#if HAVE_MBRTOWC && defined mbstate_t
3692+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3693+#endif
3694+
3695
6987acf5 3696 /* The official name of this program (e.g., no 'g' prefix). */
56ae3f82 3697 #define PROGRAM_NAME "uniq"
1555d43c 3698@@ -108,6 +130,10 @@ static enum delimit_method const delimit
56ae3f82
SS
3699 /* Select whether/how to delimit groups of duplicate lines. */
3700 static enum delimit_method delimit_groups;
3701
3702+/* Function pointers. */
3703+static char *
3704+(*find_field) (struct linebuffer *line);
3705+
3706 static struct option const longopts[] =
3707 {
3708 {"count", no_argument, NULL, 'c'},
6987acf5 3709@@ -206,7 +232,7 @@ size_opt (char const *opt, char const *m
56ae3f82
SS
3710 return a pointer to the beginning of the line's field to be compared. */
3711
e7f6ab54 3712 static char * _GL_ATTRIBUTE_PURE
56ae3f82
SS
3713-find_field (struct linebuffer const *line)
3714+find_field_uni (struct linebuffer *line)
3715 {
3716 size_t count;
3717 char const *lp = line->buffer;
6987acf5 3718@@ -226,6 +252,83 @@ find_field (struct linebuffer const *lin
56ae3f82
SS
3719 return line->buffer + i;
3720 }
3721
3722+#if HAVE_MBRTOWC
3723+
3724+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
3725+ do \
3726+ { \
3727+ mbstate_t state_bak; \
3728+ \
3729+ CONVFAIL = 0; \
3730+ state_bak = *STATEP; \
3731+ \
3732+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
3733+ \
3734+ switch (MBLENGTH) \
3735+ { \
3736+ case (size_t)-2: \
3737+ case (size_t)-1: \
3738+ *STATEP = state_bak; \
3739+ CONVFAIL++; \
3740+ /* Fall through */ \
3741+ case 0: \
3742+ MBLENGTH = 1; \
3743+ } \
3744+ } \
3745+ while (0)
3746+
3747+static char *
3748+find_field_multi (struct linebuffer *line)
3749+{
3750+ size_t count;
3751+ char *lp = line->buffer;
3752+ size_t size = line->length - 1;
3753+ size_t pos;
3754+ size_t mblength;
3755+ wchar_t wc;
3756+ mbstate_t *statep;
3badd2da 3757+ int convfail = 0;
56ae3f82
SS
3758+
3759+ pos = 0;
3760+ statep = &(line->state);
3761+
3762+ /* skip fields. */
3763+ for (count = 0; count < skip_fields && pos < size; count++)
3764+ {
3765+ while (pos < size)
3766+ {
3767+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3768+
3769+ if (convfail || !iswblank (wc))
3770+ {
3771+ pos += mblength;
3772+ break;
3773+ }
3774+ pos += mblength;
3775+ }
3776+
3777+ while (pos < size)
3778+ {
3779+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3780+
3781+ if (!convfail && iswblank (wc))
3782+ break;
3783+
3784+ pos += mblength;
3785+ }
3786+ }
3787+
3788+ /* skip fields. */
3789+ for (count = 0; count < skip_chars && pos < size; count++)
3790+ {
3791+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3792+ pos += mblength;
3793+ }
3794+
3795+ return lp + pos;
3796+}
3797+#endif
3798+
3799 /* Return false if two strings OLD and NEW match, true if not.
3800 OLD and NEW point not to the beginnings of the lines
3801 but rather to the beginnings of the fields to compare.
6987acf5 3802@@ -234,6 +337,8 @@ find_field (struct linebuffer const *lin
56ae3f82
SS
3803 static bool
3804 different (char *old, char *new, size_t oldlen, size_t newlen)
3805 {
3806+ char *copy_old, *copy_new;
3807+
3808 if (check_chars < oldlen)
3809 oldlen = check_chars;
3810 if (check_chars < newlen)
6987acf5 3811@@ -241,14 +346,92 @@ different (char *old, char *new, size_t
56ae3f82
SS
3812
3813 if (ignore_case)
3814 {
3815- /* FIXME: This should invoke strcoll somehow. */
3816- return oldlen != newlen || memcasecmp (old, new, oldlen);
3817+ size_t i;
3818+
3819+ copy_old = alloca (oldlen + 1);
3820+ copy_new = alloca (oldlen + 1);
3821+
3822+ for (i = 0; i < oldlen; i++)
3823+ {
3824+ copy_old[i] = toupper (old[i]);
3825+ copy_new[i] = toupper (new[i]);
3826+ }
3827 }
3828- else if (hard_LC_COLLATE)
3829- return xmemcoll (old, oldlen, new, newlen) != 0;
3830 else
3831- return oldlen != newlen || memcmp (old, new, oldlen);
3832+ {
3833+ copy_old = (char *)old;
3834+ copy_new = (char *)new;
3835+ }
3836+
3837+ return xmemcoll (copy_old, oldlen, copy_new, newlen);
3838+}
3839+
3840+#if HAVE_MBRTOWC
3841+static int
3842+different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
3843+{
3844+ size_t i, j, chars;
3845+ const char *str[2];
3846+ char *copy[2];
3847+ size_t len[2];
3848+ mbstate_t state[2];
3849+ size_t mblength;
3850+ wchar_t wc, uwc;
3851+ mbstate_t state_bak;
3852+
3853+ str[0] = old;
3854+ str[1] = new;
3855+ len[0] = oldlen;
3856+ len[1] = newlen;
3857+ state[0] = oldstate;
3858+ state[1] = newstate;
3859+
3860+ for (i = 0; i < 2; i++)
3861+ {
3862+ copy[i] = alloca (len[i] + 1);
3863+
3864+ for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
3865+ {
3866+ state_bak = state[i];
3867+ mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
3868+
3869+ switch (mblength)
3870+ {
3871+ case (size_t)-1:
3872+ case (size_t)-2:
3873+ state[i] = state_bak;
3874+ /* Fall through */
3875+ case 0:
3876+ mblength = 1;
3877+ break;
3878+
3879+ default:
3880+ if (ignore_case)
3881+ {
3882+ uwc = towupper (wc);
3883+
3884+ if (uwc != wc)
3885+ {
3886+ mbstate_t state_wc;
3887+
3888+ memset (&state_wc, '\0', sizeof(mbstate_t));
3889+ wcrtomb (copy[i] + j, uwc, &state_wc);
3890+ }
3891+ else
3892+ memcpy (copy[i] + j, str[i] + j, mblength);
3893+ }
3894+ else
3895+ memcpy (copy[i] + j, str[i] + j, mblength);
3896+ }
3897+ j += mblength;
3898+ }
3899+ copy[i][j] = '\0';
3900+ len[i] = j;
3901+ }
3902+
3903+ return xmemcoll (copy[0], len[0], copy[1], len[1]);
3904 }
3905+#endif
3906
3907 /* Output the line in linebuffer LINE to standard output
3908 provided that the switches say it should be output.
6987acf5 3909@@ -304,15 +487,43 @@ check_file (const char *infile, const ch
56ae3f82 3910 {
1555d43c
SS
3911 char *prevfield IF_LINT ( = NULL);
3912 size_t prevlen IF_LINT ( = 0);
56ae3f82
SS
3913+#if HAVE_MBRTOWC
3914+ mbstate_t prevstate;
3915+
3916+ memset (&prevstate, '\0', sizeof (mbstate_t));
3917+#endif
3918
3919 while (!feof (stdin))
3920 {
3921 char *thisfield;
3922 size_t thislen;
3923+#if HAVE_MBRTOWC
3924+ mbstate_t thisstate;
3925+#endif
3926+
3927 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3928 break;
3929 thisfield = find_field (thisline);
3930 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3931+#if HAVE_MBRTOWC
3932+ if (MB_CUR_MAX > 1)
3933+ {
3934+ thisstate = thisline->state;
3935+
3936+ if (prevline->length == 0 || different_multi
3937+ (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
3938+ {
3939+ fwrite (thisline->buffer, sizeof (char),
3940+ thisline->length, stdout);
3941+
3942+ SWAP_LINES (prevline, thisline);
3943+ prevfield = thisfield;
3944+ prevlen = thislen;
3945+ prevstate = thisstate;
3946+ }
3947+ }
3948+ else
3949+#endif
3950 if (prevline->length == 0
3951 || different (thisfield, prevfield, thislen, prevlen))
3952 {
6987acf5 3953@@ -331,17 +542,26 @@ check_file (const char *infile, const ch
56ae3f82
SS
3954 size_t prevlen;
3955 uintmax_t match_count = 0;
3956 bool first_delimiter = true;
3957+#if HAVE_MBRTOWC
3958+ mbstate_t prevstate;
3959+#endif
3960
3961 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
3962 goto closefiles;
3963 prevfield = find_field (prevline);
3964 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
3965+#if HAVE_MBRTOWC
3966+ prevstate = prevline->state;
3967+#endif
3968
3969 while (!feof (stdin))
3970 {
3971 bool match;
3972 char *thisfield;
3973 size_t thislen;
3974+#if HAVE_MBRTOWC
3badd2da 3975+ mbstate_t thisstate = thisline->state;
56ae3f82
SS
3976+#endif
3977 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3978 {
3979 if (ferror (stdin))
6987acf5 3980@@ -350,6 +570,14 @@ check_file (const char *infile, const ch
56ae3f82
SS
3981 }
3982 thisfield = find_field (thisline);
3983 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3984+#if HAVE_MBRTOWC
3985+ if (MB_CUR_MAX > 1)
3986+ {
56ae3f82
SS
3987+ match = !different_multi (thisfield, prevfield,
3988+ thislen, prevlen, thisstate, prevstate);
3989+ }
3990+ else
3991+#endif
3992 match = !different (thisfield, prevfield, thislen, prevlen);
3993 match_count += match;
3994
6987acf5 3995@@ -382,6 +610,9 @@ check_file (const char *infile, const ch
56ae3f82
SS
3996 SWAP_LINES (prevline, thisline);
3997 prevfield = thisfield;
3998 prevlen = thislen;
3999+#if HAVE_MBRTOWC
4000+ prevstate = thisstate;
4001+#endif
4002 if (!match)
4003 match_count = 0;
4004 }
6987acf5 4005@@ -427,6 +658,19 @@ main (int argc, char **argv)
56ae3f82
SS
4006
4007 atexit (close_stdout);
4008
4009+#if HAVE_MBRTOWC
4010+ if (MB_CUR_MAX > 1)
4011+ {
4012+ find_field = find_field_multi;
4013+ }
4014+ else
4015+#endif
4016+ {
4017+ find_field = find_field_uni;
4018+ }
4019+
4020+
4021+
4022 skip_chars = 0;
4023 skip_fields = 0;
4024 check_chars = SIZE_MAX;
6987acf5
MT
4025diff -urNp coreutils-8.16-orig/tests/Makefile.am coreutils-8.16/tests/Makefile.am
4026--- coreutils-8.16-orig/tests/Makefile.am 2012-03-26 18:01:35.564014659 +0200
4027+++ coreutils-8.16/tests/Makefile.am 2012-03-26 18:02:01.023015013 +0200
4028@@ -242,6 +242,7 @@ TESTS = \
1555d43c 4029 misc/sort-debug-warn \
6987acf5 4030 misc/sort-discrim \
56ae3f82
SS
4031 misc/sort-files0-from \
4032+ misc/sort-mb-tests \
1555d43c 4033 misc/sort-float \
56ae3f82
SS
4034 misc/sort-merge \
4035 misc/sort-merge-fdlimit \
6987acf5 4036@@ -537,6 +538,10 @@ TESTS = \
56ae3f82
SS
4037 $(root_tests)
4038
4039 pr_data = \
4040+ misc/mb1.X \
4041+ misc/mb1.I \
4042+ misc/mb2.X \
4043+ misc/mb2.I \
4044 pr/0F \
4045 pr/0FF \
4046 pr/0FFnt \
6987acf5
MT
4047diff -urNp coreutils-8.16-orig/tests/misc/cut coreutils-8.16/tests/misc/cut
4048--- coreutils-8.16-orig/tests/misc/cut 2012-02-03 10:22:06.000000000 +0100
4049+++ coreutils-8.16/tests/misc/cut 2012-03-26 17:40:49.000000000 +0200
4050@@ -23,14 +23,15 @@ use strict;
e7f6ab54
SS
4051 # Turn off localization of executable's output.
4052 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
4053
4054-my $mb_locale = $ENV{LOCALE_FR_UTF8};
4055-! defined $mb_locale || $mb_locale eq 'none'
4056- and $mb_locale = 'C';
4057+#my $mb_locale = $ENV{LOCALE_FR_UTF8};
4058+#! defined $mb_locale || $mb_locale eq 'none'
4059+# and $mb_locale = 'C';
4060+my $mb_locale = 'C';
4061
56ae3f82 4062 my $prog = 'cut';
6987acf5 4063 my $try = "Try '$prog --help' for more information.\n";
56ae3f82
SS
4064 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
4065-my $inval = "$prog: invalid byte or field list\n$try";
4066+my $inval = "$prog: invalid byte, character or field list\n$try";
4067 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
4068
4069 my @Tests =
6987acf5 4070@@ -147,7 +148,7 @@ my @Tests =
56ae3f82
SS
4071
4072 # None of the following invalid ranges provoked an error up to coreutils-6.9.
4073 ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
4074- {ERR=>"$prog: invalid decreasing range\n$try"}],
4075+ {ERR=>"$prog: invalid byte, character or field list\n$try"}],
4076 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
4077 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
407c5be3 4078 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1},
6987acf5
MT
4079diff -urNp coreutils-8.16-orig/tests/misc/mb1.I coreutils-8.16/tests/misc/mb1.I
4080--- coreutils-8.16-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100
4081+++ coreutils-8.16/tests/misc/mb1.I 2012-03-26 17:35:09.000000000 +0200
56ae3f82
SS
4082@@ -0,0 +1,4 @@
4083+Apple@10
4084+Banana@5
4085+Citrus@20
4086+Cherry@30
6987acf5
MT
4087diff -urNp coreutils-8.16-orig/tests/misc/mb1.X coreutils-8.16/tests/misc/mb1.X
4088--- coreutils-8.16-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100
4089+++ coreutils-8.16/tests/misc/mb1.X 2012-03-26 17:35:09.000000000 +0200
56ae3f82
SS
4090@@ -0,0 +1,4 @@
4091+Banana@5
4092+Apple@10
4093+Citrus@20
4094+Cherry@30
6987acf5
MT
4095diff -urNp coreutils-8.16-orig/tests/misc/mb2.I coreutils-8.16/tests/misc/mb2.I
4096--- coreutils-8.16-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100
4097+++ coreutils-8.16/tests/misc/mb2.I 2012-03-26 17:35:09.000000000 +0200
56ae3f82
SS
4098@@ -0,0 +1,4 @@
4099+Apple@AA10@@20
4100+Banana@AA5@@30
4101+Citrus@AA20@@5
4102+Cherry@AA30@@10
6987acf5
MT
4103diff -urNp coreutils-8.16-orig/tests/misc/mb2.X coreutils-8.16/tests/misc/mb2.X
4104--- coreutils-8.16-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100
4105+++ coreutils-8.16/tests/misc/mb2.X 2012-03-26 17:35:09.000000000 +0200
56ae3f82
SS
4106@@ -0,0 +1,4 @@
4107+Citrus@AA20@@5
4108+Cherry@AA30@@10
4109+Apple@AA10@@20
4110+Banana@AA5@@30
6987acf5
MT
4111diff -urNp coreutils-8.16-orig/tests/misc/sort-mb-tests coreutils-8.16/tests/misc/sort-mb-tests
4112--- coreutils-8.16-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100
4113+++ coreutils-8.16/tests/misc/sort-mb-tests 2012-03-26 17:35:09.000000000 +0200
56ae3f82
SS
4114@@ -0,0 +1,58 @@
4115+#! /bin/sh
4116+case $# in
4117+ 0) xx='../src/sort';;
4118+ *) xx="$1";;
4119+esac
4120+test "$VERBOSE" && echo=echo || echo=:
4121+$echo testing program: $xx
4122+errors=0
4123+test "$srcdir" || srcdir=.
4124+test "$VERBOSE" && $xx --version 2> /dev/null
4125+
4126+export LC_ALL=en_US.UTF-8
4127+locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
4128+errors=0
4129+
4130+$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
4131+code=$?
4132+if test $code != 0; then
e7f6ab54 4133+ $echo "Test mb1 failed: $xx return code $code differs from expected value 0"
56ae3f82
SS
4134+ errors=`expr $errors + 1`
4135+else
4136+ cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
4137+ case $? in
4138+ 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
4139+ 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
4140+ (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
4141+ errors=`expr $errors + 1`;;
4142+ 2) $echo "Test mb1 may have failed." 1>&2
4143+ $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
4144+ errors=`expr $errors + 1`;;
4145+ esac
4146+fi
4147+
4148+$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
4149+code=$?
4150+if test $code != 0; then
4151+ $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
4152+ errors=`expr $errors + 1`
4153+else
4154+ cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
4155+ case $? in
4156+ 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
4157+ 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
4158+ (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
4159+ errors=`expr $errors + 1`;;
4160+ 2) $echo "Test mb2 may have failed." 1>&2
4161+ $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
4162+ errors=`expr $errors + 1`;;
4163+ esac
4164+fi
4165+
4166+if test $errors = 0; then
4167+ $echo Passed all 113 tests. 1>&2
4168+else
4169+ $echo Failed $errors tests. 1>&2
4170+fi
4171+test $errors = 0 || errors=1
4172+exit $errors