1 diff -urNp coreutils-8.16-orig/lib/linebuffer.h coreutils-8.16/lib/linebuffer.h
2 --- coreutils-8.16-orig/lib/linebuffer.h 2012-01-06 10:14:31.000000000 +0100
3 +++ coreutils-8.16/lib/linebuffer.h 2012-03-26 18:02:00.993889446 +0200
13 /* A 'struct linebuffer' holds a line of text. */
16 @@ -28,6 +33,9 @@ struct linebuffer
17 size_t size; /* Allocated. */
18 size_t length; /* Used. */
25 /* Initialize linebuffer LINEBUFFER for use. */
26 diff -urNp coreutils-8.16-orig/src/cut.c coreutils-8.16/src/cut.c
27 --- coreutils-8.16-orig/src/cut.c 2012-03-24 21:26:51.000000000 +0100
28 +++ coreutils-8.16/src/cut.c 2012-03-26 17:46:48.000000000 +0200
32 #include <sys/types.h>
34 +/* Get mbstate_t, mbrtowc(). */
45 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
46 + installation; work around this configuration error. */
47 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
49 +# define MB_LEN_MAX 16
52 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
53 +#if HAVE_MBRTOWC && defined mbstate_t
54 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
57 /* The official name of this program (e.g., no 'g' prefix). */
58 #define PROGRAM_NAME "cut"
64 +/* Refill the buffer BUF to get a multibyte character. */
65 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
68 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
70 + memmove (BUF, BUFPOS, BUFLEN); \
71 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
77 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
78 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
79 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
82 + mbstate_t state_bak; \
90 + /* Get a wide character. */ \
92 + state_bak = STATE; \
93 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
100 + STATE = state_bak; \
101 + /* Fall througn. */ \
113 @@ -90,7 +153,7 @@ static char *field_1_buffer;
114 /* The number of bytes allocated for FIELD_1_BUFFER. */
115 static size_t field_1_bufsize;
117 -/* The largest field or byte index used as an endpoint of a closed
118 +/* The largest byte, character or field index used as an endpoint of a closed
119 or degenerate range specification; this doesn't include the starting
120 index of right-open-ended ranges. For example, with either range spec
121 '2-5,9-', '2-3,5,9-' this variable would be set to 5. */
122 @@ -102,10 +165,11 @@ static size_t eol_range_start;
124 /* This is a bit vector.
125 In byte mode, which bytes to output.
126 + In character mode, which characters to output.
127 In field mode, which DELIM-separated fields to output.
128 - Both bytes and fields are numbered starting with 1,
129 + Bytes, characters and fields are numbered starting with 1,
130 so the zeroth bit of this array is unused.
131 - A field or byte K has been selected if
132 + A byte, character or field K has been selected if
133 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
134 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
135 static unsigned char *printable_field;
136 @@ -114,15 +178,25 @@ enum operating_mode
140 - /* Output characters that are in the given bytes. */
141 + /* Output bytes that are at the given positions. */
144 + /* Output characters that are at the given positions. */
147 /* Output the given delimeter-separated fields. */
151 static enum operating_mode operating_mode;
153 +/* If nonzero, when in byte mode, don't split multibyte characters. */
154 +static int byte_mode_character_aware;
156 +/* If nonzero, the function for single byte locale is work
157 + if this program runs on multibyte locale. */
158 +static int force_singlebyte_mode;
160 /* If true do not output lines containing no delimeter characters.
161 Otherwise, all such lines are printed. This option is valid only
163 @@ -134,6 +208,9 @@ static bool complement;
165 /* The delimeter character for field mode. */
166 static unsigned char delim;
168 +static wchar_t wcdelim;
171 /* True if the --output-delimiter=STRING option was specified. */
172 static bool output_delimiter_specified;
173 @@ -206,7 +283,7 @@ Mandatory arguments to long options are
174 -f, --fields=LIST select only these fields; also print any line\n\
175 that contains no delimiter character, unless\n\
176 the -s option is specified\n\
178 + -n with -b: don't split multibyte characters\n\
181 --complement complement the set of selected bytes, characters\n\
182 @@ -365,7 +442,7 @@ set_fields (const char *fieldstr)
184 /* Starting a range. */
186 - FATAL_ERROR (_("invalid byte or field list"));
187 + FATAL_ERROR (_("invalid byte, character or field list"));
191 @@ -389,14 +466,16 @@ set_fields (const char *fieldstr)
194 /* 'n-'. From 'initial' to end of line. */
195 - eol_range_start = initial;
196 + if (eol_range_start == 0 ||
197 + (eol_range_start != 0 && eol_range_start > initial))
198 + eol_range_start = initial;
203 /* 'm-n' or '-n' (1-n). */
205 - FATAL_ERROR (_("invalid decreasing range"));
206 + FATAL_ERROR (_("invalid byte, character or field list"));
208 /* Is there already a range going to end of line? */
209 if (eol_range_start != 0)
210 @@ -476,6 +555,9 @@ set_fields (const char *fieldstr)
211 if (operating_mode == byte_mode)
213 _("byte offset %s is too large"), quote (bad_num));
214 + else if (operating_mode == character_mode)
216 + _("character offset %s is too large"), quote (bad_num));
219 _("field number %s is too large"), quote (bad_num));
220 @@ -486,7 +568,7 @@ set_fields (const char *fieldstr)
224 - FATAL_ERROR (_("invalid byte or field list"));
225 + FATAL_ERROR (_("invalid byte, character or field list"));
228 max_range_endpoint = 0;
229 @@ -581,6 +663,77 @@ cut_bytes (FILE *stream)
234 +/* This function is in use for the following case.
236 + 1. Read from the stream STREAM, printing to standard output any selected
239 + 2. Read from stream STREAM, printing to standard output any selected bytes,
240 + without splitting multibyte characters. */
243 +cut_characters_or_cut_bytes_no_split (FILE *stream)
245 + int idx; /* number of bytes or characters in the line so far. */
246 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
247 + char *bufpos; /* Next read position of BUF. */
248 + size_t buflen; /* The length of the byte sequence in buf. */
249 + wint_t wc; /* A gotten wide character. */
250 + size_t mblength; /* The byte size of a multibyte character which shows
251 + as same character as WC. */
252 + mbstate_t state; /* State of the stream. */
253 + int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
254 + /* Whether to begin printing delimiters between ranges for the current line.
255 + Set after we've begun printing data corresponding to the first range. */
256 + bool print_delimiter = false;
261 + memset (&state, '\0', sizeof(mbstate_t));
265 + REFILL_BUFFER (buf, bufpos, buflen, stream);
267 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
275 + else if (wc == L'\n')
279 + print_delimiter = false;
284 + bool *rs = output_delimiter_specified ? &range_start : NULL;
285 + idx += (operating_mode == byte_mode) ? mblength : 1;
286 + if (print_kth (idx, rs))
288 + if (rs && *rs && print_delimiter)
290 + fwrite (output_delimiter_string, sizeof (char),
291 + output_delimiter_length, stdout);
293 + print_delimiter = true;
294 + fwrite (bufpos, mblength, sizeof(char), stdout);
298 + buflen -= mblength;
299 + bufpos += mblength;
304 /* Read from stream STREAM, printing to standard output any selected fields. */
307 @@ -703,13 +856,195 @@ cut_fields (FILE *stream)
313 +cut_fields_mb (FILE *stream)
316 + unsigned int field_idx;
317 + int found_any_selected_field;
318 + int buffer_first_field;
320 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
321 + char *bufpos; /* Next read position of BUF. */
322 + size_t buflen; /* The length of the byte sequence in buf. */
323 + wint_t wc = 0; /* A gotten wide character. */
324 + size_t mblength; /* The byte size of a multibyte character which shows
325 + as same character as WC. */
326 + mbstate_t state; /* State of the stream. */
327 + int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
329 + found_any_selected_field = 0;
333 + memset (&state, '\0', sizeof(mbstate_t));
336 + empty_input = (c == EOF);
339 + ungetc (c, stream);
345 + /* To support the semantics of the -s flag, we may have to buffer
346 + all of the first field to determine whether it is `delimited.'
347 + But that is unnecessary if all non-delimited lines must be printed
348 + and the first field has been selected, or if non-delimited lines
349 + must be suppressed and the first field has *not* been selected.
350 + That is because a non-delimited line has exactly one field. */
351 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
355 + if (field_idx == 1 && buffer_first_field)
361 + REFILL_BUFFER (buf, bufpos, buflen, stream);
363 + GET_NEXT_WC_FROM_BUFFER
364 + (wc, bufpos, buflen, mblength, state, convfail);
369 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
370 + memcpy (field_1_buffer + len, bufpos, mblength);
372 + buflen -= mblength;
373 + bufpos += mblength;
375 + if (!convfail && (wc == L'\n' || wc == wcdelim))
382 + /* If the first field extends to the end of line (it is not
383 + delimited) and we are printing all non-delimited lines,
385 + if (convfail || (!convfail && wc != wcdelim))
387 + if (suppress_non_delimited)
393 + fwrite (field_1_buffer, sizeof (char), len, stdout);
394 + /* Make sure the output line is newline terminated. */
395 + if (convfail || (!convfail && wc != L'\n'))
401 + if (print_kth (1, NULL))
403 + /* Print the field, but not the trailing delimiter. */
404 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
405 + found_any_selected_field = 1;
412 + if (print_kth (field_idx, NULL))
414 + if (found_any_selected_field)
416 + fwrite (output_delimiter_string, sizeof (char),
417 + output_delimiter_length, stdout);
419 + found_any_selected_field = 1;
424 + REFILL_BUFFER (buf, bufpos, buflen, stream);
426 + GET_NEXT_WC_FROM_BUFFER
427 + (wc, bufpos, buflen, mblength, state, convfail);
431 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
433 + buflen -= mblength;
434 + bufpos += mblength;
438 + if (print_kth (field_idx, NULL))
439 + fwrite (bufpos, mblength, sizeof(char), stdout);
441 + buflen -= mblength;
442 + bufpos += mblength;
446 + if ((!convfail || wc == L'\n') && buflen < 1)
449 + if (!convfail && wc == wcdelim)
451 + else if (wc == WEOF || (!convfail && wc == L'\n'))
453 + if (found_any_selected_field
454 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
459 + found_any_selected_field = 0;
466 cut_stream (FILE *stream)
468 - if (operating_mode == byte_mode)
469 - cut_bytes (stream);
471 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
473 + switch (operating_mode)
476 + if (byte_mode_character_aware)
477 + cut_characters_or_cut_bytes_no_split (stream);
479 + cut_bytes (stream);
482 + case character_mode:
483 + cut_characters_or_cut_bytes_no_split (stream);
487 + cut_fields_mb (stream);
495 - cut_fields (stream);
498 + if (operating_mode == field_mode)
499 + cut_fields (stream);
501 + cut_bytes (stream);
505 /* Process file FILE to standard output.
506 @@ -761,6 +1096,8 @@ main (int argc, char **argv)
508 bool delim_specified = false;
509 char *spec_list_string IF_LINT ( = NULL);
510 + char mbdelim[MB_LEN_MAX + 1];
511 + size_t delimlen = 0;
513 initialize_main (&argc, &argv);
514 set_program_name (argv[0]);
515 @@ -783,7 +1120,6 @@ main (int argc, char **argv)
520 /* Build the byte list. */
521 if (operating_mode != undefined_mode)
522 FATAL_ERROR (_("only one type of list may be specified"));
523 @@ -791,6 +1127,14 @@ main (int argc, char **argv)
524 spec_list_string = optarg;
528 + /* Build the character list. */
529 + if (operating_mode != undefined_mode)
530 + FATAL_ERROR (_("only one type of list may be specified"));
531 + operating_mode = character_mode;
532 + spec_list_string = optarg;
536 /* Build the field list. */
537 if (operating_mode != undefined_mode)
538 @@ -802,10 +1146,35 @@ main (int argc, char **argv)
541 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
542 - if (optarg[0] != '\0' && optarg[1] != '\0')
543 - FATAL_ERROR (_("the delimiter must be a single character"));
545 - delim_specified = true;
552 + memset (&state, '\0', sizeof(mbstate_t));
553 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
555 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
556 + ++force_singlebyte_mode;
559 + delimlen = (delimlen < 1) ? 1 : delimlen;
560 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
561 + FATAL_ERROR (_("the delimiter must be a single character"));
562 + memcpy (mbdelim, optarg, delimlen);
566 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
569 + if (optarg[0] != '\0' && optarg[1] != '\0')
570 + FATAL_ERROR (_("the delimiter must be a single character"));
571 + delim = (unsigned char) optarg[0];
573 + delim_specified = true;
577 case OUTPUT_DELIMITER_OPTION:
578 @@ -818,6 +1187,7 @@ main (int argc, char **argv)
582 + byte_mode_character_aware = 1;
586 @@ -840,7 +1210,7 @@ main (int argc, char **argv)
587 if (operating_mode == undefined_mode)
588 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
590 - if (delim != '\0' && operating_mode != field_mode)
591 + if (delim_specified && operating_mode != field_mode)
592 FATAL_ERROR (_("an input delimiter may be specified only\
593 when operating on fields"));
595 @@ -867,15 +1237,34 @@ main (int argc, char **argv)
598 if (!delim_specified)
610 if (output_delimiter_string == NULL)
612 - static char dummy[2];
615 - output_delimiter_string = dummy;
616 - output_delimiter_length = 1;
618 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
620 + output_delimiter_string = xstrdup(mbdelim);
621 + output_delimiter_length = delimlen;
624 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
627 + static char dummy[2];
630 + output_delimiter_string = dummy;
631 + output_delimiter_length = 1;
636 diff -urNp coreutils-8.16-orig/src/expand.c coreutils-8.16/src/expand.c
637 --- coreutils-8.16-orig/src/expand.c 2012-03-24 21:26:51.000000000 +0100
638 +++ coreutils-8.16/src/expand.c 2012-03-26 17:42:56.000000000 +0200
642 #include <sys/types.h>
644 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
653 #include "xstrndup.h"
655 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
656 + installation; work around this configuration error. */
657 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
658 +# define MB_LEN_MAX 16
661 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
662 +#if HAVE_MBRTOWC && defined mbstate_t
663 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
666 /* The official name of this program (e.g., no 'g' prefix). */
667 #define PROGRAM_NAME "expand"
669 @@ -358,6 +375,142 @@ expand (void)
675 +expand_multibyte (void)
677 + FILE *fp; /* Input strem. */
678 + mbstate_t i_state; /* Current shift state of the input stream. */
679 + mbstate_t i_state_bak; /* Back up the I_STATE. */
680 + mbstate_t o_state; /* Current shift state of the output stream. */
681 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
682 + char *bufpos = buf; /* Next read position of BUF. */
683 + size_t buflen = 0; /* The length of the byte sequence in buf. */
684 + wchar_t wc; /* A gotten wide character. */
685 + size_t mblength; /* The byte size of a multibyte character
686 + which shows as same character as WC. */
687 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
688 + int column = 0; /* Column on screen of the next char. */
689 + int next_tab_column; /* Column the next tab stop is on. */
690 + int convert = 1; /* If nonzero, perform translations. */
692 + fp = next_file ((FILE *) NULL);
696 + memset (&o_state, '\0', sizeof(mbstate_t));
697 + memset (&i_state, '\0', sizeof(mbstate_t));
701 + /* Refill the buffer BUF. */
702 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
704 + memmove (buf, bufpos, buflen);
705 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
709 + /* No character is left in BUF. */
712 + fp = next_file (fp);
715 + break; /* No more files. */
718 + memset (&i_state, '\0', sizeof(mbstate_t));
723 + /* Get a wide character. */
724 + i_state_bak = i_state;
725 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
729 + case (size_t)-1: /* illegal byte sequence. */
732 + i_state = i_state_bak;
736 + if (convert_entire_line == 0)
742 + case 0: /* null. */
744 + if (convert && convert_entire_line == 0)
750 + if (wc == L'\n') /* LF. */
757 + else if (wc == L'\t' && convert) /* Tab. */
761 + /* Do not let tab_index == first_free_tab;
762 + stop when it is 1 less. */
763 + while (tab_index < first_free_tab - 1
764 + && column >= tab_list[tab_index])
766 + next_tab_column = tab_list[tab_index];
767 + if (tab_index < first_free_tab - 1)
769 + if (column >= next_tab_column)
770 + next_tab_column = column + 1;
773 + next_tab_column = column + tab_size - column % tab_size;
775 + while (column < next_tab_column)
792 + int width; /* The width of WC. */
794 + width = wcwidth (wc);
795 + column += (width > 0) ? width : 0;
796 + if (convert_entire_line == 0)
800 + fwrite (bufpos, sizeof(char), mblength, stdout);
803 + buflen -= mblength;
804 + bufpos += mblength;
810 main (int argc, char **argv)
812 @@ -422,7 +575,12 @@ main (int argc, char **argv)
814 file_list = (optind < argc ? &argv[optind] : stdin_argv);
818 + if (MB_CUR_MAX > 1)
819 + expand_multibyte ();
824 if (have_read_stdin && fclose (stdin) != 0)
825 error (EXIT_FAILURE, errno, "-");
826 diff -urNp coreutils-8.16-orig/src/fold.c coreutils-8.16/src/fold.c
827 --- coreutils-8.16-orig/src/fold.c 2012-03-24 19:22:13.000000000 +0100
828 +++ coreutils-8.16/src/fold.c 2012-03-26 17:48:37.000000000 +0200
831 #include <sys/types.h>
833 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
838 +/* Get iswprint(), iswblank(), wcwidth(). */
840 +# include <wctype.h>
849 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
850 + installation; work around this configuration error. */
851 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
853 +# define MB_LEN_MAX 16
856 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
857 +#if HAVE_MBRTOWC && defined mbstate_t
858 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
863 /* The official name of this program (e.g., no 'g' prefix). */
866 #define AUTHORS proper_name ("David MacKenzie")
868 +#define FATAL_ERROR(Message) \
871 + error (0, 0, (Message)); \
878 + /* Fold texts by columns that are at the given positions. */
881 + /* Fold texts by bytes that are at the given positions. */
884 + /* Fold texts by characters that are at the given positions. */
888 +/* The argument shows current mode. (Default: column_mode) */
889 +static enum operating_mode operating_mode;
891 /* If nonzero, try to break on whitespace. */
892 static bool break_spaces;
894 -/* If nonzero, count bytes, not column positions. */
895 -static bool count_bytes;
897 /* If nonzero, at least one of the files we read was standard input. */
898 static bool have_read_stdin;
900 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
901 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
903 static struct option const longopts[] =
905 {"bytes", no_argument, NULL, 'b'},
906 + {"characters", no_argument, NULL, 'c'},
907 {"spaces", no_argument, NULL, 's'},
908 {"width", required_argument, NULL, 'w'},
909 {GETOPT_HELP_OPTION_DECL},
910 @@ -77,6 +120,7 @@ Mandatory arguments to long options are
913 -b, --bytes count bytes rather than columns\n\
914 + -c, --characters count characters rather than columns\n\
915 -s, --spaces break at spaces\n\
916 -w, --width=WIDTH use WIDTH columns instead of 80\n\
918 @@ -94,7 +138,7 @@ Mandatory arguments to long options are
920 adjust_column (size_t column, char c)
923 + if (operating_mode != byte_mode)
927 @@ -117,30 +161,14 @@ adjust_column (size_t column, char c)
928 to stdout, with maximum line length WIDTH.
929 Return true if successful. */
932 -fold_file (char const *filename, size_t width)
934 +fold_text (FILE *istream, size_t width, int *saved_errno)
938 size_t column = 0; /* Screen column where next char will go. */
939 size_t offset_out = 0; /* Index in 'line_out' for next char. */
940 static char *line_out = NULL;
941 static size_t allocated_out = 0;
944 - if (STREQ (filename, "-"))
947 - have_read_stdin = true;
950 - istream = fopen (filename, "r");
952 - if (istream == NULL)
954 - error (0, errno, "%s", filename);
958 fadvise (istream, FADVISE_SEQUENTIAL);
960 @@ -170,6 +198,15 @@ fold_file (char const *filename, size_t
961 bool found_blank = false;
962 size_t logical_end = offset_out;
964 + /* If LINE_OUT has no wide character,
965 + put a new wide character in LINE_OUT
966 + if column is bigger than width. */
967 + if (offset_out == 0)
969 + line_out[offset_out++] = c;
973 /* Look for the last blank. */
976 @@ -216,11 +253,221 @@ fold_file (char const *filename, size_t
977 line_out[offset_out++] = c;
980 - saved_errno = errno;
981 + *saved_errno = errno;
984 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
990 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
992 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
993 + size_t buflen = 0; /* The length of the byte sequence in buf. */
994 + char *bufpos = buf; /* Next read position of BUF. */
995 + wint_t wc; /* A gotten wide character. */
996 + size_t mblength; /* The byte size of a multibyte character which shows
997 + as same character as WC. */
998 + mbstate_t state, state_bak; /* State of the stream. */
999 + int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */
1001 + static char *line_out = NULL;
1002 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1003 + static size_t allocated_out = 0;
1006 + size_t column = 0;
1008 + size_t last_blank_pos;
1009 + size_t last_blank_column;
1010 + int is_blank_seen;
1011 + int last_blank_increment = 0;
1012 + int is_bs_following_last_blank;
1013 + size_t bs_following_last_blank_num;
1014 + int is_cr_after_last_blank;
1016 +#define CLEAR_FLAGS \
1019 + last_blank_pos = 0; \
1020 + last_blank_column = 0; \
1021 + is_blank_seen = 0; \
1022 + is_bs_following_last_blank = 0; \
1023 + bs_following_last_blank_num = 0; \
1024 + is_cr_after_last_blank = 0; \
1028 +#define START_NEW_LINE \
1039 + memset (&state, '\0', sizeof(mbstate_t));
1041 + for (;; bufpos += mblength, buflen -= mblength)
1043 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1045 + memmove (buf, bufpos, buflen);
1046 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1053 + /* Get a wide character. */
1054 + state_bak = state;
1055 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1062 + state = state_bak;
1063 + /* Fall through. */
1071 + if (operating_mode == byte_mode) /* byte mode */
1072 + increment = mblength;
1073 + else if (operating_mode == character_mode) /* character mode */
1075 + else /* column mode */
1084 + fwrite (line_out, sizeof(char), offset_out, stdout);
1089 + increment = (column > 0) ? -1 : 0;
1093 + increment = -1 * column;
1097 + increment = 8 - column % 8;
1101 + increment = wcwidth (wc);
1102 + increment = (increment < 0) ? 0 : increment;
1107 + if (column + increment > width && break_spaces && last_blank_pos)
1109 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1112 + offset_out = offset_out - last_blank_pos;
1113 + column = column - last_blank_column + ((is_cr_after_last_blank)
1114 + ? last_blank_increment : bs_following_last_blank_num);
1115 + memmove (line_out, line_out + last_blank_pos, offset_out);
1120 + if (column + increment > width && column != 0)
1122 + fwrite (line_out, sizeof(char), offset_out, stdout);
1127 + if (allocated_out < offset_out + mblength)
1129 + line_out = X2REALLOC (line_out, &allocated_out);
1132 + memcpy (line_out + offset_out, bufpos, mblength);
1133 + offset_out += mblength;
1134 + column += increment;
1136 + if (is_blank_seen && !convfail && wc == L'\r')
1137 + is_cr_after_last_blank = 1;
1139 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1140 + ++bs_following_last_blank_num;
1142 + is_bs_following_last_blank = 0;
1144 + if (break_spaces && !convfail && iswblank (wc))
1146 + last_blank_pos = offset_out;
1147 + last_blank_column = column;
1148 + is_blank_seen = 1;
1149 + last_blank_increment = increment;
1150 + is_bs_following_last_blank = 1;
1151 + bs_following_last_blank_num = 0;
1152 + is_cr_after_last_blank = 0;
1156 + *saved_errno = errno;
1159 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1164 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1165 + to stdout, with maximum line length WIDTH.
1166 + Return 0 if successful, 1 if an error occurs. */
1169 +fold_file (char *filename, size_t width)
1174 + if (STREQ (filename, "-"))
1177 + have_read_stdin = 1;
1180 + istream = fopen (filename, "r");
1182 + if (istream == NULL)
1184 + error (0, errno, "%s", filename);
1188 + /* Define how ISTREAM is being folded. */
1190 + if (MB_CUR_MAX > 1)
1191 + fold_multibyte_text (istream, width, &saved_errno);
1194 + fold_text (istream, width, &saved_errno);
1196 if (ferror (istream))
1198 error (0, saved_errno, "%s", filename);
1199 @@ -253,7 +500,8 @@ main (int argc, char **argv)
1201 atexit (close_stdout);
1203 - break_spaces = count_bytes = have_read_stdin = false;
1204 + operating_mode = column_mode;
1205 + break_spaces = have_read_stdin = false;
1207 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1209 @@ -262,7 +510,15 @@ main (int argc, char **argv)
1212 case 'b': /* Count bytes rather than columns. */
1213 - count_bytes = true;
1214 + if (operating_mode != column_mode)
1215 + FATAL_ERROR (_("only one way of folding may be specified"));
1216 + operating_mode = byte_mode;
1220 + if (operating_mode != column_mode)
1221 + FATAL_ERROR (_("only one way of folding may be specified"));
1222 + operating_mode = character_mode;
1225 case 's': /* Break at word boundaries. */
1226 diff -urNp coreutils-8.16-orig/src/join.c coreutils-8.16/src/join.c
1227 --- coreutils-8.16-orig/src/join.c 2012-03-24 21:26:51.000000000 +0100
1228 +++ coreutils-8.16/src/join.c 2012-03-26 17:50:02.000000000 +0200
1230 #include <sys/types.h>
1233 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
1235 +# include <wchar.h>
1238 +/* Get iswblank(), towupper. */
1240 +# include <wctype.h>
1245 #include "fadvise.h"
1246 #include "hard-locale.h"
1247 #include "linebuffer.h"
1248 -#include "memcasecmp.h"
1250 #include "stdio--.h"
1251 #include "xmemcoll.h"
1252 #include "xstrtol.h"
1253 #include "argmatch.h"
1255 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1256 +#if HAVE_MBRTOWC && defined mbstate_t
1257 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1260 /* The official name of this program (e.g., no 'g' prefix). */
1261 #define PROGRAM_NAME "join"
1263 @@ -135,10 +149,12 @@ static struct outlist outlist_head;
1264 /* Last element in 'outlist', where a new element can be added. */
1265 static struct outlist *outlist_end = &outlist_head;
1267 -/* Tab character separating fields. If negative, fields are separated
1268 - by any nonempty string of blanks, otherwise by exactly one
1269 - tab character whose value (when cast to unsigned char) equals TAB. */
1270 -static int tab = -1;
1271 +/* Tab character separating fields. If NULL, fields are separated
1272 + by any nonempty string of blanks. */
1273 +static char *tab = NULL;
1275 +/* The number of bytes used for tab. */
1276 +static size_t tablen = 0;
1278 /* If nonzero, check that the input is correctly ordered. */
1280 @@ -262,13 +278,14 @@ xfields (struct line *line)
1284 - if (0 <= tab && tab != '\n')
1287 + unsigned char t = tab[0];
1289 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
1290 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
1291 extract_field (line, ptr, sep - ptr);
1296 /* Skip leading blanks before the first field. */
1297 while (isblank (to_uchar (*ptr)))
1298 @@ -292,6 +309,148 @@ xfields (struct line *line)
1299 extract_field (line, ptr, lim - ptr);
1304 +xfields_multibyte (struct line *line)
1306 + char *ptr = line->buf.buffer;
1307 + char const *lim = ptr + line->buf.length - 1;
1309 + size_t mblength = 1;
1310 + mbstate_t state, state_bak;
1312 + memset (&state, 0, sizeof (mbstate_t));
1319 + unsigned char t = tab[0];
1321 + for (; ptr < lim; ptr = sep + mblength)
1326 + state_bak = state;
1327 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1329 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1332 + state = state_bak;
1334 + mblength = (mblength < 1) ? 1 : mblength;
1336 + if (mblength == tablen && !memcmp (sep, tab, mblength))
1348 + extract_field (line, ptr, sep - ptr);
1353 + /* Skip leading blanks before the first field. */
1356 + state_bak = state;
1357 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1359 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1362 + state = state_bak;
1365 + mblength = (mblength < 1) ? 1 : mblength;
1367 + if (!iswblank(wc))
1375 + state_bak = state;
1376 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1377 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1380 + state = state_bak;
1383 + mblength = (mblength < 1) ? 1 : mblength;
1385 + sep = ptr + mblength;
1388 + state_bak = state;
1389 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1390 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1393 + state = state_bak;
1396 + mblength = (mblength < 1) ? 1 : mblength;
1398 + if (iswblank (wc))
1404 + extract_field (line, ptr, sep - ptr);
1408 + state_bak = state;
1409 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
1410 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1413 + state = state_bak;
1416 + mblength = (mblength < 1) ? 1 : mblength;
1418 + ptr = sep + mblength;
1421 + state_bak = state;
1422 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
1423 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1426 + state = state_bak;
1429 + mblength = (mblength < 1) ? 1 : mblength;
1431 + if (!iswblank (wc))
1437 + while (ptr < lim);
1440 + extract_field (line, ptr, lim - ptr);
1445 freeline (struct line *line)
1447 @@ -313,56 +472,115 @@ keycmp (struct line const *line1, struct
1448 size_t jf_1, size_t jf_2)
1450 /* Start of field to compare in each file. */
1455 - size_t len2; /* Length of fields to compare. */
1458 + size_t len[2]; /* Length of fields to compare. */
1462 if (jf_1 < line1->nfields)
1464 - beg1 = line1->fields[jf_1].beg;
1465 - len1 = line1->fields[jf_1].len;
1466 + beg[0] = line1->fields[jf_1].beg;
1467 + len[0] = line1->fields[jf_1].len;
1477 if (jf_2 < line2->nfields)
1479 - beg2 = line2->fields[jf_2].beg;
1480 - len2 = line2->fields[jf_2].len;
1481 + beg[1] = line2->fields[jf_2].beg;
1482 + len[1] = line2->fields[jf_2].len;
1493 - return len2 == 0 ? 0 : -1;
1496 + return len[1] == 0 ? 0 : -1;
1502 - /* FIXME: ignore_case does not work with NLS (in particular,
1503 - with multibyte chars). */
1504 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
1505 +#ifdef HAVE_MBRTOWC
1506 + if (MB_CUR_MAX > 1)
1510 + mbstate_t state, state_bak;
1512 + memset (&state, '\0', sizeof (mbstate_t));
1514 + for (i = 0; i < 2; i++)
1516 + copy[i] = alloca (len[i] + 1);
1518 + for (j = 0; j < MIN (len[0], len[1]);)
1520 + state_bak = state;
1521 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
1527 + state = state_bak;
1528 + /* Fall through */
1534 + uwc = towupper (wc);
1538 + mbstate_t state_wc;
1540 + memset (&state_wc, '\0', sizeof (mbstate_t));
1541 + wcrtomb (copy[i] + j, uwc, &state_wc);
1544 + memcpy (copy[i] + j, beg[i] + j, mblength);
1548 + copy[i][j] = '\0';
1554 + for (i = 0; i < 2; i++)
1556 + copy[i] = alloca (len[i] + 1);
1558 + for (j = 0; j < MIN (len[0], len[1]); j++)
1559 + copy[i][j] = toupper (beg[i][j]);
1561 + copy[i][j] = '\0';
1567 - if (hard_LC_COLLATE)
1568 - return xmemcoll (beg1, len1, beg2, len2);
1569 - diff = memcmp (beg1, beg2, MIN (len1, len2));
1570 + copy[0] = (unsigned char *) beg[0];
1571 + copy[1] = (unsigned char *) beg[1];
1574 + if (hard_LC_COLLATE)
1575 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
1576 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
1581 - return len1 < len2 ? -1 : len1 != len2;
1582 + return len[0] - len[1];
1585 /* Check that successive input lines PREV and CURRENT from input file
1586 @@ -454,6 +672,11 @@ get_line (FILE *fp, struct line **linep,
1588 ++line_no[which - 1];
1591 + if (MB_CUR_MAX > 1)
1592 + xfields_multibyte (line);
1597 if (prevline[which - 1])
1598 @@ -553,21 +776,28 @@ prfield (size_t n, struct line const *li
1600 /* Output all the fields in line, other than the join field. */
1602 +#define PUT_TAB_CHAR \
1606 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
1611 prfields (struct line const *line, size_t join_field, size_t autocount)
1614 size_t nfields = autoformat ? autocount : line->nfields;
1615 - char output_separator = tab < 0 ? ' ' : tab;
1617 for (i = 0; i < join_field && i < nfields; ++i)
1619 - putchar (output_separator);
1623 for (i = join_field + 1; i < nfields; ++i)
1625 - putchar (output_separator);
1630 @@ -578,7 +808,6 @@ static void
1631 prjoin (struct line const *line1, struct line const *line2)
1633 const struct outlist *outlist;
1634 - char output_separator = tab < 0 ? ' ' : tab;
1636 struct line const *line;
1638 @@ -612,7 +841,7 @@ prjoin (struct line const *line1, struct
1642 - putchar (output_separator);
1647 @@ -1090,21 +1319,46 @@ main (int argc, char **argv)
1651 - unsigned char newtab = optarg[0];
1652 + char *newtab = NULL;
1654 + newtab = xstrdup (optarg);
1656 + if (MB_CUR_MAX > 1)
1660 + memset (&state, 0, sizeof (mbstate_t));
1661 + newtablen = mbrtowc (NULL, newtab,
1662 + strnlen (newtab, MB_LEN_MAX),
1664 + if (newtablen == (size_t) 0
1665 + || newtablen == (size_t) -1
1666 + || newtablen == (size_t) -2)
1673 - newtab = '\n'; /* '' => process the whole line. */
1675 + newtab = "\n"; /* '' => process the whole line. */
1679 - if (STREQ (optarg, "\\0"))
1682 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1684 + if (newtablen == 1 && newtab[1])
1686 + if (STREQ (newtab, "\\0"))
1690 + if (tab != NULL && strcmp (tab, newtab))
1693 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
1695 - if (0 <= tab && tab != newtab)
1696 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
1699 + tablen = newtablen;
1703 case NOCHECK_ORDER_OPTION:
1704 diff -urNp coreutils-8.16-orig/src/pr.c coreutils-8.16/src/pr.c
1705 --- coreutils-8.16-orig/src/pr.c 2012-03-24 21:26:51.000000000 +0100
1706 +++ coreutils-8.16/src/pr.c 2012-03-26 17:50:48.000000000 +0200
1707 @@ -312,6 +312,32 @@
1710 #include <sys/types.h>
1712 +/* Get MB_LEN_MAX. */
1713 +#include <limits.h>
1714 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1715 + installation; work around this configuration error. */
1716 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
1717 +# define MB_LEN_MAX 16
1720 +/* Get MB_CUR_MAX. */
1721 +#include <stdlib.h>
1723 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
1724 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1726 +# include <wchar.h>
1729 +/* Get iswprint(). -- for wcwidth(). */
1731 +# include <wctype.h>
1733 +#if !defined iswprint && !HAVE_ISWPRINT
1734 +# define iswprint(wc) 1
1739 #include "fadvise.h"
1740 @@ -323,6 +349,18 @@
1741 #include "strftime.h"
1742 #include "xstrtol.h"
1744 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1745 +#if HAVE_MBRTOWC && defined mbstate_t
1746 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1749 +#ifndef HAVE_DECL_WCWIDTH
1750 +"this configure-time declaration test was not run"
1752 +#if !HAVE_DECL_WCWIDTH
1753 +extern int wcwidth ();
1756 /* The official name of this program (e.g., no 'g' prefix). */
1757 #define PROGRAM_NAME "pr"
1759 @@ -415,7 +453,20 @@ struct COLUMN
1761 typedef struct COLUMN COLUMN;
1763 -static int char_to_clump (char c);
1764 +/* Funtion pointers to switch functions for single byte locale or for
1765 + multibyte locale. If multibyte functions do not exist in your sysytem,
1766 + these pointers always point the function for single byte locale. */
1767 +static void (*print_char) (char c);
1768 +static int (*char_to_clump) (char c);
1770 +/* Functions for single byte locale. */
1771 +static void print_char_single (char c);
1772 +static int char_to_clump_single (char c);
1774 +/* Functions for multibyte locale. */
1775 +static void print_char_multi (char c);
1776 +static int char_to_clump_multi (char c);
1778 static bool read_line (COLUMN *p);
1779 static bool print_page (void);
1780 static bool print_stored (COLUMN *p);
1781 @@ -425,6 +476,7 @@ static void print_header (void);
1782 static void pad_across_to (int position);
1783 static void add_line_number (COLUMN *p);
1784 static void getoptarg (char *arg, char switch_char, char *character,
1785 + int *character_length, int *character_width,
1787 static void print_files (int number_of_files, char **av);
1788 static void init_parameters (int number_of_files);
1789 @@ -438,7 +490,6 @@ static void store_char (char c);
1790 static void pad_down (int lines);
1791 static void read_rest_of_line (COLUMN *p);
1792 static void skip_read (COLUMN *p, int column_number);
1793 -static void print_char (char c);
1794 static void cleanup (void);
1795 static void print_sep_string (void);
1796 static void separator_string (const char *optarg_S);
1797 @@ -450,7 +501,7 @@ static COLUMN *column_vector;
1798 we store the leftmost columns contiguously in buff.
1799 To print a line from buff, get the index of the first character
1800 from line_vector[i], and print up to line_vector[i + 1]. */
1802 +static unsigned char *buff;
1804 /* Index of the position in buff where the next character
1806 @@ -554,7 +605,7 @@ static int chars_per_column;
1807 static bool untabify_input = false;
1809 /* (-e) The input tab character. */
1810 -static char input_tab_char = '\t';
1811 +static char input_tab_char[MB_LEN_MAX] = "\t";
1813 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
1814 where the leftmost column is 1. */
1815 @@ -564,7 +615,10 @@ static int chars_per_input_tab = 8;
1816 static bool tabify_output = false;
1818 /* (-i) The output tab character. */
1819 -static char output_tab_char = '\t';
1820 +static char output_tab_char[MB_LEN_MAX] = "\t";
1822 +/* (-i) The byte length of output tab character. */
1823 +static int output_tab_char_length = 1;
1825 /* (-i) The width of the output tab. */
1826 static int chars_per_output_tab = 8;
1827 @@ -638,7 +692,13 @@ static int power_10;
1828 static bool numbered_lines = false;
1830 /* (-n) Character which follows each line number. */
1831 -static char number_separator = '\t';
1832 +static char number_separator[MB_LEN_MAX] = "\t";
1834 +/* (-n) The byte length of the character which follows each line number. */
1835 +static int number_separator_length = 1;
1837 +/* (-n) The character width of the character which follows each line number. */
1838 +static int number_separator_width = 0;
1840 /* (-n) line counting starts with 1st line of input file (not with 1st
1841 line of 1st page printed). */
1842 @@ -691,6 +751,7 @@ static bool use_col_separator = false;
1843 -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */
1844 static char *col_sep_string = (char *) "";
1845 static int col_sep_length = 0;
1846 +static int col_sep_width = 0;
1847 static char *column_separator = (char *) " ";
1848 static char *line_separator = (char *) "\t";
1850 @@ -847,6 +908,13 @@ separator_string (const char *optarg_S)
1851 col_sep_length = (int) strlen (optarg_S);
1852 col_sep_string = xmalloc (col_sep_length + 1);
1853 strcpy (col_sep_string, optarg_S);
1856 + if (MB_CUR_MAX > 1)
1857 + col_sep_width = mbswidth (col_sep_string, 0);
1860 + col_sep_width = col_sep_length;
1864 @@ -871,6 +939,21 @@ main (int argc, char **argv)
1866 atexit (close_stdout);
1868 +/* Define which functions are used, the ones for single byte locale or the ones
1869 + for multibyte locale. */
1871 + if (MB_CUR_MAX > 1)
1873 + print_char = print_char_multi;
1874 + char_to_clump = char_to_clump_multi;
1879 + print_char = print_char_single;
1880 + char_to_clump = char_to_clump_single;
1884 file_names = (argc > 1
1885 ? xmalloc ((argc - 1) * sizeof (char *))
1886 @@ -947,8 +1030,12 @@ main (int argc, char **argv)
1890 - getoptarg (optarg, 'e', &input_tab_char,
1891 - &chars_per_input_tab);
1893 + int dummy_length, dummy_width;
1895 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
1896 + &dummy_width, &chars_per_input_tab);
1898 /* Could check tab width > 0. */
1899 untabify_input = true;
1901 @@ -961,8 +1048,12 @@ main (int argc, char **argv)
1905 - getoptarg (optarg, 'i', &output_tab_char,
1906 - &chars_per_output_tab);
1910 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
1911 + &dummy_width, &chars_per_output_tab);
1913 /* Could check tab width > 0. */
1914 tabify_output = true;
1916 @@ -989,8 +1080,8 @@ main (int argc, char **argv)
1918 numbered_lines = true;
1920 - getoptarg (optarg, 'n', &number_separator,
1921 - &chars_per_number);
1922 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
1923 + &number_separator_width, &chars_per_number);
1927 @@ -1029,7 +1120,7 @@ main (int argc, char **argv)
1929 /* Reset an additional input of -s, -S dominates -s */
1930 col_sep_string = bad_cast ("");
1931 - col_sep_length = 0;
1932 + col_sep_length = col_sep_width = 0;
1933 use_col_separator = true;
1935 separator_string (optarg);
1936 @@ -1186,10 +1277,45 @@ main (int argc, char **argv)
1940 -getoptarg (char *arg, char switch_char, char *character, int *number)
1941 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
1942 + int *character_width, int *number)
1944 if (!ISDIGIT (*arg))
1945 - *character = *arg++;
1947 +#ifdef HAVE_MBRTOWC
1948 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
1953 + mbstate_t state = {'\0'};
1955 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
1957 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1959 + *character_length = 1;
1960 + *character_width = 1;
1964 + *character_length = (mblength < 1) ? 1 : mblength;
1965 + width = wcwidth (wc);
1966 + *character_width = (width < 0) ? 0 : width;
1969 + strncpy (character, arg, *character_length);
1970 + arg += *character_length;
1972 + else /* for single byte locale. */
1975 + *character = *arg++;
1976 + *character_length = 1;
1977 + *character_width = 1;
1984 @@ -1211,6 +1337,11 @@ static void
1985 init_parameters (int number_of_files)
1987 int chars_used_by_number = 0;
1990 + if (MB_CUR_MAX > 1)
1991 + mb_len = MB_LEN_MAX;
1994 lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
1995 if (lines_per_body <= 0)
1996 @@ -1248,7 +1379,7 @@ init_parameters (int number_of_files)
1998 col_sep_string = column_separator;
2000 - col_sep_length = 1;
2001 + col_sep_length = col_sep_width = 1;
2002 use_col_separator = true;
2004 /* It's rather pointless to define a TAB separator with column
2005 @@ -1279,11 +1410,11 @@ init_parameters (int number_of_files)
2006 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
2008 /* Estimate chars_per_text without any margin and keep it constant. */
2009 - if (number_separator == '\t')
2010 + if (number_separator[0] == '\t')
2011 number_width = chars_per_number +
2012 TAB_WIDTH (chars_per_default_tab, chars_per_number);
2014 - number_width = chars_per_number + 1;
2015 + number_width = chars_per_number + number_separator_width;
2017 /* The number is part of the column width unless we are
2018 printing files in parallel. */
2019 @@ -1298,7 +1429,7 @@ init_parameters (int number_of_files)
2022 chars_per_column = (chars_per_line - chars_used_by_number -
2023 - (columns - 1) * col_sep_length) / columns;
2024 + (columns - 1) * col_sep_width) / columns;
2026 if (chars_per_column < 1)
2027 error (EXIT_FAILURE, 0, _("page width too narrow"));
2028 @@ -1315,7 +1446,7 @@ init_parameters (int number_of_files)
2029 We've to use 8 as the lower limit, if we use chars_per_default_tab = 8
2030 to expand a tab which is not an input_tab-char. */
2032 - clump_buff = xmalloc (MAX (8, chars_per_input_tab));
2033 + clump_buff = xmalloc (mb_len * MAX (8, chars_per_input_tab));
2036 /* Open the necessary files,
2037 @@ -1423,7 +1554,7 @@ init_funcs (void)
2039 /* Enlarge p->start_position of first column to use the same form of
2040 padding_not_printed with all columns. */
2041 - h = h + col_sep_length;
2042 + h = h + col_sep_width;
2044 /* This loop takes care of all but the rightmost column. */
2046 @@ -1457,7 +1588,7 @@ init_funcs (void)
2050 - h = h_next + col_sep_length;
2051 + h = h_next + col_sep_width;
2052 h_next = h + chars_per_column;
2055 @@ -1748,9 +1879,9 @@ static void
2056 align_column (COLUMN *p)
2058 padding_not_printed = p->start_position;
2059 - if (padding_not_printed - col_sep_length > 0)
2060 + if (padding_not_printed - col_sep_width > 0)
2062 - pad_across_to (padding_not_printed - col_sep_length);
2063 + pad_across_to (padding_not_printed - col_sep_width);
2064 padding_not_printed = ANYWHERE;
2067 @@ -2021,13 +2152,13 @@ store_char (char c)
2068 /* May be too generous. */
2069 buff = X2REALLOC (buff, &buff_allocated);
2071 - buff[buff_current++] = c;
2072 + buff[buff_current++] = (unsigned char) c;
2076 add_line_number (COLUMN *p)
2083 @@ -2050,22 +2181,24 @@ add_line_number (COLUMN *p)
2084 /* Tabification is assumed for multiple columns, also for n-separators,
2085 but 'default n-separator = TAB' hasn't been given priority over
2086 equal column_width also specified by POSIX. */
2087 - if (number_separator == '\t')
2088 + if (number_separator[0] == '\t')
2090 i = number_width - chars_per_number;
2092 (p->char_func) (' ');
2095 - (p->char_func) (number_separator);
2096 + for (j = 0; j < number_separator_length; j++)
2097 + (p->char_func) (number_separator[j]);
2100 /* To comply with POSIX, we avoid any expansion of default TAB
2101 separator with a single column output. No column_width requirement
2102 has to be considered. */
2104 - (p->char_func) (number_separator);
2105 - if (number_separator == '\t')
2106 + for (j = 0; j < number_separator_length; j++)
2107 + (p->char_func) (number_separator[j]);
2108 + if (number_separator[0] == '\t')
2109 output_position = POS_AFTER_TAB (chars_per_output_tab,
2112 @@ -2226,7 +2359,7 @@ print_white_space (void)
2113 while (goal - h_old > 1
2114 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
2116 - putchar (output_tab_char);
2117 + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
2120 while (++h_old <= goal)
2121 @@ -2246,6 +2379,7 @@ print_sep_string (void)
2124 int l = col_sep_length;
2125 + int not_space_flag;
2129 @@ -2259,6 +2393,7 @@ print_sep_string (void)
2131 for (; separators_not_printed > 0; --separators_not_printed)
2133 + not_space_flag = 0;
2136 /* 3 types of sep_strings: spaces only, spaces and chars,
2137 @@ -2272,12 +2407,15 @@ print_sep_string (void)
2141 + not_space_flag = 1;
2142 if (spaces_not_printed > 0)
2143 print_white_space ();
2145 - ++output_position;
2148 + if (not_space_flag)
2149 + output_position += col_sep_width;
2151 /* sep_string ends with some spaces */
2152 if (spaces_not_printed > 0)
2153 print_white_space ();
2154 @@ -2305,7 +2443,7 @@ print_clump (COLUMN *p, int n, char *clu
2155 required number of tabs and spaces. */
2158 -print_char (char c)
2159 +print_char_single (char c)
2163 @@ -2329,6 +2467,74 @@ print_char (char c)
2167 +#ifdef HAVE_MBRTOWC
2169 +print_char_multi (char c)
2171 + static size_t mbc_pos = 0;
2172 + static char mbc[MB_LEN_MAX] = {'\0'};
2173 + static mbstate_t state = {'\0'};
2174 + mbstate_t state_bak;
2179 + if (tabify_output)
2181 + state_bak = state;
2182 + mbc[mbc_pos++] = c;
2183 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2185 + while (mbc_pos > 0)
2190 + state = state_bak;
2194 + state = state_bak;
2195 + ++output_position;
2197 + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
2207 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2209 + ++spaces_not_printed;
2212 + else if (spaces_not_printed > 0)
2213 + print_white_space ();
2215 + /* Nonprintables are assumed to have width 0, except L'\b'. */
2216 + if ((width = wcwidth (wc)) < 1)
2219 + --output_position;
2222 + output_position += width;
2224 + fwrite (mbc, sizeof(char), mblength, stdout);
2225 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2226 + mbc_pos -= mblength;
2235 /* Skip to page PAGE before printing.
2236 PAGE may be larger than total number of pages. */
2238 @@ -2508,9 +2714,9 @@ read_line (COLUMN *p)
2239 align_empty_cols = false;
2242 - if (padding_not_printed - col_sep_length > 0)
2243 + if (padding_not_printed - col_sep_width > 0)
2245 - pad_across_to (padding_not_printed - col_sep_length);
2246 + pad_across_to (padding_not_printed - col_sep_width);
2247 padding_not_printed = ANYWHERE;
2250 @@ -2611,9 +2817,9 @@ print_stored (COLUMN *p)
2254 - if (padding_not_printed - col_sep_length > 0)
2255 + if (padding_not_printed - col_sep_width > 0)
2257 - pad_across_to (padding_not_printed - col_sep_length);
2258 + pad_across_to (padding_not_printed - col_sep_width);
2259 padding_not_printed = ANYWHERE;
2262 @@ -2626,8 +2832,8 @@ print_stored (COLUMN *p)
2263 if (spaces_not_printed == 0)
2265 output_position = p->start_position + end_vector[line];
2266 - if (p->start_position - col_sep_length == chars_per_margin)
2267 - output_position -= col_sep_length;
2268 + if (p->start_position - col_sep_width == chars_per_margin)
2269 + output_position -= col_sep_width;
2273 @@ -2646,7 +2852,7 @@ print_stored (COLUMN *p)
2274 number of characters is 1.) */
2277 -char_to_clump (char c)
2278 +char_to_clump_single (char c)
2280 unsigned char uc = c;
2281 char *s = clump_buff;
2282 @@ -2656,10 +2862,10 @@ char_to_clump (char c)
2284 int chars_per_c = 8;
2286 - if (c == input_tab_char)
2287 + if (c == input_tab_char[0])
2288 chars_per_c = chars_per_input_tab;
2290 - if (c == input_tab_char || c == '\t')
2291 + if (c == input_tab_char[0] || c == '\t')
2293 width = TAB_WIDTH (chars_per_c, input_position);
2295 @@ -2740,6 +2946,154 @@ char_to_clump (char c)
2299 +#ifdef HAVE_MBRTOWC
2301 +char_to_clump_multi (char c)
2303 + static size_t mbc_pos = 0;
2304 + static char mbc[MB_LEN_MAX] = {'\0'};
2305 + static mbstate_t state = {'\0'};
2306 + mbstate_t state_bak;
2310 + register char *s = clump_buff;
2311 + register int i, j;
2315 + int chars_per_c = 8;
2317 + state_bak = state;
2318 + mbc[mbc_pos++] = c;
2319 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
2323 + while (mbc_pos > 0)
2328 + state = state_bak;
2332 + state = state_bak;
2335 + if (use_esc_sequence || use_cntrl_prefix)
2340 + sprintf (esc_buff, "%03o", (unsigned char) mbc[0]);
2341 + for (i = 0; i <= 2; ++i)
2342 + *s++ = (int) esc_buff[i];
2354 + /* Fall through */
2357 + if (memcmp (mbc, input_tab_char, mblength) == 0)
2358 + chars_per_c = chars_per_input_tab;
2360 + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
2364 + width_inc = TAB_WIDTH (chars_per_c, input_position);
2365 + width += width_inc;
2367 + if (untabify_input)
2369 + for (i = width_inc; i; --i)
2371 + chars += width_inc;
2375 + for (i = 0; i < mblength; i++)
2377 + chars += mblength;
2380 + else if ((wc_width = wcwidth (wc)) < 1)
2382 + if (use_esc_sequence)
2384 + for (i = 0; i < mblength; i++)
2389 + sprintf (esc_buff, "%03o", (unsigned char) mbc[i]);
2390 + for (j = 0; j <= 2; ++j)
2391 + *s++ = (int) esc_buff[j];
2394 + else if (use_cntrl_prefix)
2405 + for (i = 0; i < mblength; i++)
2410 + sprintf (esc_buff, "%03o", (unsigned char) mbc[i]);
2411 + for (j = 0; j <= 2; ++j)
2412 + *s++ = (int) esc_buff[j];
2416 + else if (wc == L'\b')
2425 + chars += mblength;
2426 + for (i = 0; i < mblength; i++)
2432 + width += wc_width;
2433 + chars += mblength;
2434 + for (i = 0; i < mblength; i++)
2438 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
2439 + mbc_pos -= mblength;
2442 + input_position += width;
2447 /* We've just printed some files and need to clean up things before
2448 looking for more options and printing the next batch of files.
2450 diff -urNp coreutils-8.16-orig/src/sort.c coreutils-8.16/src/sort.c
2451 --- coreutils-8.16-orig/src/sort.c 2012-03-24 21:26:51.000000000 +0100
2452 +++ coreutils-8.16/src/sort.c 2012-03-26 17:35:09.000000000 +0200
2457 +#include <assert.h>
2459 #include <pthread.h>
2460 #include <sys/types.h>
2461 #include <sys/wait.h>
2464 +# include <wchar.h>
2466 +/* Get isw* functions. */
2468 +# include <wctype.h>
2472 #include "argmatch.h"
2474 @@ -167,12 +176,34 @@ static int thousands_sep;
2476 /* Nonzero if the corresponding locales are hard. */
2477 static bool hard_LC_COLLATE;
2478 -#if HAVE_NL_LANGINFO
2479 +#if HAVE_LANGINFO_CODESET
2480 static bool hard_LC_TIME;
2483 #define NONZERO(x) ((x) != 0)
2485 +/* get a multibyte character's byte length. */
2486 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
2490 + mbstate_t state_bak; \
2492 + state_bak = STATE; \
2493 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
2495 + switch (MBLENGTH) \
2497 + case (size_t)-1: \
2498 + case (size_t)-2: \
2499 + STATE = state_bak; \
2500 + /* Fall through. */ \
2507 /* The kind of blanks for '-b' to skip in various options. */
2508 enum blanktype { bl_start, bl_end, bl_both };
2510 @@ -343,13 +374,11 @@ static bool reverse;
2511 they were read if all keys compare equal. */
2514 -/* If TAB has this value, blanks separate fields. */
2515 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
2517 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
2518 +/* Tab character separating fields. If tab_length is 0, then fields are
2519 separated by the empty string between a non-blank character and a blank
2521 -static int tab = TAB_DEFAULT;
2522 +static char tab[MB_LEN_MAX + 1];
2523 +static size_t tab_length = 0;
2525 /* Flag to remove consecutive duplicate lines from the output.
2526 Only the last of a sequence of equal lines will be output. */
2527 @@ -782,6 +811,46 @@ reap_all (void)
2531 +/* Function pointers. */
2533 +(*inittables) (void);
2535 +(*begfield) (const struct line*, const struct keyfield *);
2537 +(*limfield) (const struct line*, const struct keyfield *);
2539 +(*skipblanks) (char **ptr, char *lim);
2541 +(*getmonth) (char const *, size_t, char **);
2543 +(*keycompare) (const struct line *, const struct line *);
2545 +(*numcompare) (const char *, const char *);
2547 +/* Test for white space multibyte character.
2548 + Set LENGTH the byte length of investigated multibyte character. */
2551 +ismbblank (const char *str, size_t len, size_t *length)
2557 + memset (&state, '\0', sizeof(mbstate_t));
2558 + mblength = mbrtowc (&wc, str, len, &state);
2560 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2566 + *length = (mblength < 1) ? 1 : mblength;
2567 + return iswblank (wc);
2571 /* Clean up any remaining temporary files. */
2574 @@ -1214,7 +1283,7 @@ zaptemp (char const *name)
2578 -#if HAVE_NL_LANGINFO
2579 +#if HAVE_LANGINFO_CODESET
2582 struct_month_cmp (void const *m1, void const *m2)
2583 @@ -1229,7 +1298,7 @@ struct_month_cmp (void const *m1, void c
2584 /* Initialize the character class tables. */
2588 +inittables_uni (void)
2592 @@ -1241,7 +1310,7 @@ inittables (void)
2593 fold_toupper[i] = toupper (i);
2596 -#if HAVE_NL_LANGINFO
2597 +#if HAVE_LANGINFO_CODESET
2598 /* If we're not in the "C" locale, read different names for months. */
2601 @@ -1323,6 +1392,84 @@ specify_nmerge (int oi, char c, char con
2602 xstrtol_fatal (e, oi, c, long_options, s);
2607 +inittables_mb (void)
2610 + char *name, *s, *lc_time, *lc_ctype;
2611 + size_t s_len, mblength;
2612 + char mbc[MB_LEN_MAX];
2614 + mbstate_t state_mb, state_wc;
2616 + lc_time = setlocale (LC_TIME, "");
2618 + lc_time = xstrdup (lc_time);
2620 + lc_ctype = setlocale (LC_CTYPE, "");
2622 + lc_ctype = xstrdup (lc_ctype);
2624 + if (lc_time && lc_ctype)
2625 + /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert
2626 + * the names of months to upper case */
2627 + setlocale (LC_CTYPE, lc_time);
2629 + for (i = 0; i < MONTHS_PER_YEAR; i++)
2631 + s = (char *) nl_langinfo (ABMON_1 + i);
2632 + s_len = strlen (s);
2633 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
2634 + monthtab[i].val = i + 1;
2636 + memset (&state_mb, '\0', sizeof (mbstate_t));
2637 + memset (&state_wc, '\0', sizeof (mbstate_t));
2639 + for (j = 0; j < s_len;)
2641 + if (!ismbblank (s + j, s_len - j, &mblength))
2646 + for (k = 0; j < s_len;)
2648 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
2649 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
2650 + if (mblength == 0)
2653 + pwc = towupper (wc);
2656 + memcpy (mbc, s + j, mblength);
2662 + mblength = wcrtomb (mbc, pwc, &state_wc);
2663 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
2666 + for (l = 0; l < mblength; l++)
2667 + name[k++] = mbc[l];
2671 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
2672 + sizeof (struct month), struct_month_cmp);
2674 + if (lc_time && lc_ctype)
2675 + /* restore the original locales */
2676 + setlocale (LC_CTYPE, lc_ctype);
2683 /* Specify the amount of main memory to use when sorting. */
2685 specify_sort_size (int oi, char c, char const *s)
2686 @@ -1551,7 +1698,7 @@ buffer_linelim (struct buffer const *buf
2690 -begfield (struct line const *line, struct keyfield const *key)
2691 +begfield_uni (const struct line *line, const struct keyfield *key)
2693 char *ptr = line->text, *lim = ptr + line->length - 1;
2694 size_t sword = key->sword;
2695 @@ -1560,10 +1707,10 @@ begfield (struct line const *line, struc
2696 /* The leading field separator itself is included in a field when -t
2699 - if (tab != TAB_DEFAULT)
2701 while (ptr < lim && sword--)
2703 - while (ptr < lim && *ptr != tab)
2704 + while (ptr < lim && *ptr != tab[0])
2708 @@ -1589,11 +1736,70 @@ begfield (struct line const *line, struc
2714 +begfield_mb (const struct line *line, const struct keyfield *key)
2717 + char *ptr = line->text, *lim = ptr + line->length - 1;
2718 + size_t sword = key->sword;
2719 + size_t schar = key->schar;
2723 + memset (&state, '\0', sizeof(mbstate_t));
2726 + while (ptr < lim && sword--)
2728 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2730 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2735 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2740 + while (ptr < lim && sword--)
2742 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2746 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2749 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2753 + if (key->skipsblanks)
2754 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2757 + for (i = 0; i < schar; i++)
2759 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2761 + if (ptr + mblength > lim)
2771 /* Return the limit of (a pointer to the first character after) the field
2772 in LINE specified by KEY. */
2775 -limfield (struct line const *line, struct keyfield const *key)
2776 +limfield_uni (const struct line *line, const struct keyfield *key)
2778 char *ptr = line->text, *lim = ptr + line->length - 1;
2779 size_t eword = key->eword, echar = key->echar;
2780 @@ -1608,10 +1814,10 @@ limfield (struct line const *line, struc
2781 'beginning' is the first character following the delimiting TAB.
2782 Otherwise, leave PTR pointing at the first 'blank' character after
2783 the preceding field. */
2784 - if (tab != TAB_DEFAULT)
2786 while (ptr < lim && eword--)
2788 - while (ptr < lim && *ptr != tab)
2789 + while (ptr < lim && *ptr != tab[0])
2791 if (ptr < lim && (eword || echar))
2793 @@ -1657,10 +1863,10 @@ limfield (struct line const *line, struc
2796 /* Make LIM point to the end of (one byte past) the current field. */
2797 - if (tab != TAB_DEFAULT)
2801 - newlim = memchr (ptr, tab, lim - ptr);
2802 + newlim = memchr (ptr, tab[0], lim - ptr);
2806 @@ -1691,6 +1897,130 @@ limfield (struct line const *line, struc
2812 +limfield_mb (const struct line *line, const struct keyfield *key)
2814 + char *ptr = line->text, *lim = ptr + line->length - 1;
2815 + size_t eword = key->eword, echar = key->echar;
2821 + eword++; /* skip all of end field. */
2823 + memset (&state, '\0', sizeof(mbstate_t));
2826 + while (ptr < lim && eword--)
2828 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
2830 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2833 + if (ptr < lim && (eword | echar))
2835 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2840 + while (ptr < lim && eword--)
2842 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2846 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2849 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
2854 +# ifdef POSIX_UNSPECIFIED
2855 + /* Make LIM point to the end of (one byte past) the current field. */
2861 + for (p = ptr; p < lim;)
2863 + if (memcmp (p, tab, tab_length) == 0)
2869 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2878 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2879 + newlim += mblength;
2882 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2885 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2886 + newlim += mblength;
2893 + /* If we're skipping leading blanks, don't start counting characters
2894 + * until after skipping past any leading blanks. */
2895 + if (key->skipsblanks)
2896 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2899 + memset (&state, '\0', sizeof(mbstate_t));
2901 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2902 + for (i = 0; i < echar; i++)
2904 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2906 + if (ptr + mblength > lim)
2918 +skipblanks_uni (char **ptr, char *lim)
2920 + while (*ptr < lim && blanks[to_uchar (**ptr)])
2926 +skipblanks_mb (char **ptr, char *lim)
2929 + while (*ptr < lim && ismbblank (*ptr, lim - *ptr, &mblength))
2930 + (*ptr) += mblength;
2934 /* Fill BUF reading from FP, moving buf->left bytes from the end
2935 of buf->buf to the beginning first. If EOF is reached and the
2936 file wasn't terminated by a newline, supply one. Set up BUF's line
2937 @@ -1777,8 +2107,22 @@ fillbuf (struct buffer *buf, FILE *fp, c
2940 if (key->skipsblanks)
2941 - while (blanks[to_uchar (*line_start)])
2945 + if (MB_CUR_MAX > 1)
2948 + while (line_start < line->keylim &&
2949 + ismbblank (line_start,
2950 + line->keylim - line_start,
2952 + line_start += mblength;
2956 + while (blanks[to_uchar (*line_start)])
2959 line->keybeg = line_start;
2962 @@ -1899,7 +2243,7 @@ human_numcompare (char const *a, char co
2966 -numcompare (char const *a, char const *b)
2967 +numcompare_uni (const char *a, const char *b)
2969 while (blanks[to_uchar (*a)])
2971 @@ -1909,6 +2253,25 @@ numcompare (char const *a, char const *b
2972 return strnumcmp (a, b, decimal_point, thousands_sep);
2977 +numcompare_mb (const char *a, const char *b)
2979 + size_t mblength, len;
2980 + len = strlen (a); /* okay for UTF-8 */
2981 + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2986 + len = strlen (b); /* okay for UTF-8 */
2987 + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2990 + return strnumcmp (a, b, decimal_point, thousands_sep);
2992 +#endif /* HAV_EMBRTOWC */
2994 /* Work around a problem whereby the long double value returned by glibc's
2995 strtold ("NaN", ...) contains uninitialized bits: clear all bytes of
2996 A and B before calling strtold. FIXME: remove this function once
2997 @@ -1959,7 +2322,7 @@ general_numcompare (char const *sa, char
2998 Return 0 if the name in S is not recognized. */
3001 -getmonth (char const *month, char **ea)
3002 +getmonth_uni (char const *month, size_t len, char **ea)
3005 size_t hi = MONTHS_PER_YEAR;
3006 @@ -2234,15 +2597,14 @@ debug_key (struct line const *line, stru
3010 - while (blanks[to_uchar (*beg)])
3012 + skipblanks (&beg, lim);
3014 char *tighter_lim = beg;
3018 else if (key->month)
3019 - getmonth (beg, &tighter_lim);
3020 + getmonth (beg, lim-beg, &tighter_lim);
3021 else if (key->general_numeric)
3022 ignore_value (strtold (beg, &tighter_lim));
3023 else if (key->numeric || key->human_numeric)
3024 @@ -2386,7 +2748,7 @@ key_warnings (struct keyfield const *gke
3025 bool maybe_space_aligned = !hard_LC_COLLATE && default_key_compare (key)
3026 && !(key->schar || key->echar);
3027 bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */
3028 - if (!gkey_only && tab == TAB_DEFAULT && !line_offset
3029 + if (!gkey_only && !tab_length && !line_offset
3030 && ((!key->skipsblanks && !(implicit_skip || maybe_space_aligned))
3031 || (!key->skipsblanks && key->schar)
3032 || (!key->skipeblanks && key->echar)))
3033 @@ -2444,11 +2806,83 @@ key_warnings (struct keyfield const *gke
3034 error (0, 0, _("option '-r' only applies to last-resort comparison"));
3039 +getmonth_mb (const char *s, size_t len, char **ea)
3042 + register size_t i;
3043 + register int lo = 0, hi = MONTHS_PER_YEAR, result;
3045 + size_t wclength, mblength;
3047 + const wchar_t **wpp;
3048 + wchar_t *month_wcs;
3051 + while (len > 0 && ismbblank (s, len, &mblength))
3060 + month = (char *) alloca (len + 1);
3062 + tmp = (char *) alloca (len + 1);
3063 + memcpy (tmp, s, len);
3065 + pp = (const char **)&tmp;
3066 + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
3067 + memset (&state, '\0', sizeof(mbstate_t));
3069 + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
3070 + if (wclength == (size_t)-1 || *pp != NULL)
3071 + error (SORT_FAILURE, 0, _("Invalid multibyte input %s."), quote(s));
3073 + for (i = 0; i < wclength; i++)
3075 + month_wcs[i] = towupper(month_wcs[i]);
3076 + if (iswblank (month_wcs[i]))
3078 + month_wcs[i] = L'\0';
3083 + wpp = (const wchar_t **)&month_wcs;
3085 + mblength = wcsrtombs (month, wpp, len + 1, &state);
3086 + assert (mblength != (-1) && *wpp == NULL);
3090 + int ix = (lo + hi) / 2;
3092 + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
3097 + while (hi - lo > 1);
3099 + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
3100 + ? monthtab[lo].val : 0);
3103 + *ea = s + strlen (monthtab[lo].name);
3109 /* Compare two lines A and B trying every key in sequence until there
3110 are no more keys or a difference is found. */
3113 -keycompare (struct line const *a, struct line const *b)
3114 +keycompare_uni (const struct line *a, const struct line *b)
3116 struct keyfield *key = keylist;
3118 @@ -2533,7 +2967,7 @@ keycompare (struct line const *a, struct
3119 else if (key->human_numeric)
3120 diff = human_numcompare (ta, tb);
3121 else if (key->month)
3122 - diff = getmonth (ta, NULL) - getmonth (tb, NULL);
3123 + diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL);
3124 else if (key->random)
3125 diff = compare_random (ta, tlena, tb, tlenb);
3126 else if (key->version)
3127 @@ -2649,6 +3083,180 @@ keycompare (struct line const *a, struct
3128 return key->reverse ? -diff : diff;
3133 +keycompare_mb (const struct line *a, const struct line *b)
3135 + struct keyfield *key = keylist;
3137 + /* For the first iteration only, the key positions have been
3138 + precomputed for us. */
3139 + char *texta = a->keybeg;
3140 + char *textb = b->keybeg;
3141 + char *lima = a->keylim;
3142 + char *limb = b->keylim;
3144 + size_t mblength_a, mblength_b;
3145 + wchar_t wc_a, wc_b;
3146 + mbstate_t state_a, state_b;
3150 + memset (&state_a, '\0', sizeof(mbstate_t));
3151 + memset (&state_b, '\0', sizeof(mbstate_t));
3155 + char const *translate = key->translate;
3156 + bool const *ignore = key->ignore;
3158 + /* Find the lengths. */
3159 + size_t lena = lima <= texta ? 0 : lima - texta;
3160 + size_t lenb = limb <= textb ? 0 : limb - textb;
3162 + /* Actually compare the fields. */
3164 + diff = compare_random (texta, lena, textb, lenb);
3165 + else if (key->numeric | key->general_numeric | key->human_numeric)
3167 + char savea = *lima, saveb = *limb;
3169 + *lima = *limb = '\0';
3170 + diff = (key->numeric ? numcompare (texta, textb)
3171 + : key->general_numeric ? general_numcompare (texta, textb)
3172 + : human_numcompare (texta, textb));
3173 + *lima = savea, *limb = saveb;
3175 + else if (key->version)
3176 + diff = filevercmp (texta, textb);
3177 + else if (key->month)
3178 + diff = getmonth (texta, lena, NULL) - getmonth (textb, lenb, NULL);
3181 + if (ignore || translate)
3183 + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
3184 + char *copy_b = copy_a + lena + 1;
3185 + size_t new_len_a, new_len_b;
3188 + /* Ignore and/or translate chars before comparing. */
3189 +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
3193 + char mbc[MB_LEN_MAX]; \
3194 + mbstate_t state_wc; \
3196 + for (NEW_LEN = i = 0; i < LEN;) \
3198 + mbstate_t state_bak; \
3200 + state_bak = STATE; \
3201 + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
3203 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
3204 + || MBLENGTH == 0) \
3206 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
3207 + STATE = state_bak; \
3209 + COPY[NEW_LEN++] = TEXT[i]; \
3216 + if ((ignore == nonprinting && !iswprint (WC)) \
3217 + || (ignore == nondictionary \
3218 + && !iswalnum (WC) && !iswblank (WC))) \
3228 + uwc = towupper(WC); \
3231 + memcpy (mbc, TEXT + i, MBLENGTH); \
3238 + memset (&state_wc, '\0', sizeof (mbstate_t)); \
3240 + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
3241 + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
3244 + for (j = 0; j < MBLENGTH; j++) \
3245 + COPY[NEW_LEN++] = mbc[j]; \
3248 + for (j = 0; j < MBLENGTH; j++) \
3249 + COPY[NEW_LEN++] = TEXT[i++]; \
3251 + COPY[NEW_LEN] = '\0'; \
3254 + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
3255 + wc_a, mblength_a, state_a);
3256 + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
3257 + wc_b, mblength_b, state_b);
3258 + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
3260 + else if (lena == 0)
3261 + diff = - NONZERO (lenb);
3262 + else if (lenb == 0)
3265 + diff = xmemcoll (texta, lena, textb, lenb);
3275 + /* Find the beginning and limit of the next field. */
3276 + if (key->eword != -1)
3277 + lima = limfield (a, key), limb = limfield (b, key);
3279 + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
3281 + if (key->sword != -1)
3282 + texta = begfield (a, key), textb = begfield (b, key);
3285 + texta = a->text, textb = b->text;
3286 + if (key->skipsblanks)
3288 + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
3289 + texta += mblength_a;
3290 + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
3291 + textb += mblength_b;
3301 + return key->reverse ? -diff : diff;
3305 /* Compare two lines A and B, returning negative, zero, or positive
3306 depending on whether A compares less than, equal to, or greater than B. */
3308 @@ -4109,7 +4717,7 @@ main (int argc, char **argv)
3309 initialize_exit_failure (SORT_FAILURE);
3311 hard_LC_COLLATE = hard_locale (LC_COLLATE);
3312 -#if HAVE_NL_LANGINFO
3313 +#if HAVE_LANGINFO_CODESET
3314 hard_LC_TIME = hard_locale (LC_TIME);
3317 @@ -4130,6 +4738,29 @@ main (int argc, char **argv)
3322 + if (MB_CUR_MAX > 1)
3324 + inittables = inittables_mb;
3325 + begfield = begfield_mb;
3326 + limfield = limfield_mb;
3327 + skipblanks = skipblanks_mb;
3328 + getmonth = getmonth_mb;
3329 + keycompare = keycompare_mb;
3330 + numcompare = numcompare_mb;
3335 + inittables = inittables_uni;
3336 + begfield = begfield_uni;
3337 + limfield = limfield_uni;
3338 + skipblanks = skipblanks_uni;
3339 + getmonth = getmonth_uni;
3340 + keycompare = keycompare_uni;
3341 + numcompare = numcompare_uni;
3344 have_read_stdin = false;
3347 @@ -4400,13 +5031,34 @@ main (int argc, char **argv)
3351 - char newtab = optarg[0];
3353 + char newtab[MB_LEN_MAX + 1];
3354 + size_t newtab_length = 1;
3355 + strncpy (newtab, optarg, MB_LEN_MAX);
3357 error (SORT_FAILURE, 0, _("empty tab"));
3360 + if (MB_CUR_MAX > 1)
3365 + memset (&state, '\0', sizeof (mbstate_t));
3366 + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
3369 + switch (newtab_length)
3374 + newtab_length = 1;
3378 + if (newtab_length == 1 && optarg[1])
3380 if (STREQ (optarg, "\\0"))
3385 /* Provoke with 'sort -txx'. Complain about
3386 @@ -4417,9 +5069,12 @@ main (int argc, char **argv)
3390 - if (tab != TAB_DEFAULT && tab != newtab)
3392 + && (tab_length != newtab_length
3393 + || memcmp (tab, newtab, tab_length) != 0))
3394 error (SORT_FAILURE, 0, _("incompatible tabs"));
3396 + memcpy (tab, newtab, newtab_length);
3397 + tab_length = newtab_length;
3401 diff -urNp coreutils-8.16-orig/src/unexpand.c coreutils-8.16/src/unexpand.c
3402 --- coreutils-8.16-orig/src/unexpand.c 2012-03-24 21:26:51.000000000 +0100
3403 +++ coreutils-8.16/src/unexpand.c 2012-03-26 17:51:46.000000000 +0200
3407 #include <sys/types.h>
3409 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
3411 +# include <wchar.h>
3416 #include "fadvise.h"
3418 #include "xstrndup.h"
3420 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3421 + installation; work around this configuration error. */
3422 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3423 +# define MB_LEN_MAX 16
3426 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3427 +#if HAVE_MBRTOWC && defined mbstate_t
3428 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3431 /* The official name of this program (e.g., no 'g' prefix). */
3432 #define PROGRAM_NAME "unexpand"
3434 @@ -103,6 +120,208 @@ static struct option const longopts[] =
3438 +static FILE *next_file (FILE *fp);
3442 +unexpand_multibyte (void)
3444 + FILE *fp; /* Input stream. */
3445 + mbstate_t i_state; /* Current shift state of the input stream. */
3446 + mbstate_t i_state_bak; /* Back up the I_STATE. */
3447 + mbstate_t o_state; /* Current shift state of the output stream. */
3448 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3449 + char *bufpos = buf; /* Next read position of BUF. */
3450 + size_t buflen = 0; /* The length of the byte sequence in buf. */
3451 + wint_t wc; /* A gotten wide character. */
3452 + size_t mblength; /* The byte size of a multibyte character
3453 + which shows as same character as WC. */
3455 + /* Index in `tab_list' of next tabstop: */
3456 + int tab_index = 0; /* For calculating width of pending tabs. */
3457 + int print_tab_index = 0; /* For printing as many tabs as possible. */
3458 + unsigned int column = 0; /* Column on screen of next char. */
3459 + int next_tab_column; /* Column the next tab stop is on. */
3460 + int convert = 1; /* If nonzero, perform translations. */
3461 + unsigned int pending = 0; /* Pending columns of blanks. */
3463 + fp = next_file ((FILE *) NULL);
3467 + memset (&o_state, '\0', sizeof(mbstate_t));
3468 + memset (&i_state, '\0', sizeof(mbstate_t));
3472 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
3474 + memmove (buf, bufpos, buflen);
3475 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
3479 + /* Get a wide character. */
3487 + i_state_bak = i_state;
3488 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
3491 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3493 + i_state = i_state_bak;
3497 + if (wc == L' ' && convert && column < INT_MAX)
3502 + else if (wc == L'\t' && convert)
3504 + if (tab_size == 0)
3506 + /* Do not let tab_index == first_free_tab;
3507 + stop when it is 1 less. */
3508 + while (tab_index < first_free_tab - 1
3509 + && column >= tab_list[tab_index])
3511 + next_tab_column = tab_list[tab_index];
3512 + if (tab_index < first_free_tab - 1)
3514 + if (column >= next_tab_column)
3516 + convert = 0; /* Ran out of tab stops. */
3517 + goto flush_pend_mb;
3522 + next_tab_column = column + tab_size - column % tab_size;
3524 + pending += next_tab_column - column;
3525 + column = next_tab_column;
3530 + /* Flush pending spaces. Print as many tabs as possible,
3531 + then print the rest as spaces. */
3537 + column -= pending;
3538 + while (pending > 0)
3540 + if (tab_size == 0)
3542 + /* Do not let print_tab_index == first_free_tab;
3543 + stop when it is 1 less. */
3544 + while (print_tab_index < first_free_tab - 1
3545 + && column >= tab_list[print_tab_index])
3546 + print_tab_index++;
3547 + next_tab_column = tab_list[print_tab_index];
3548 + if (print_tab_index < first_free_tab - 1)
3549 + print_tab_index++;
3554 + column + tab_size - column % tab_size;
3556 + if (next_tab_column - column <= pending)
3559 + pending -= next_tab_column - column;
3560 + column = next_tab_column;
3564 + --print_tab_index;
3565 + column += pending;
3566 + while (pending != 0)
3576 + fp = next_file (fp);
3578 + break; /* No more files. */
3581 + memset (&i_state, '\0', sizeof(mbstate_t));
3586 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
3591 + if (convert_entire_line == 0)
3597 + else if (mblength == 0)
3599 + if (convert && convert_entire_line == 0)
3615 + int width; /* The width of WC. */
3617 + width = wcwidth (wc);
3618 + column += (width > 0) ? width : 0;
3619 + if (convert_entire_line == 0)
3626 + tab_index = print_tab_index = 0;
3627 + column = pending = 0;
3630 + fwrite (bufpos, sizeof(char), mblength, stdout);
3633 + buflen -= mblength;
3634 + bufpos += mblength;
3643 @@ -524,7 +743,12 @@ main (int argc, char **argv)
3645 file_list = (optind < argc ? &argv[optind] : stdin_argv);
3649 + if (MB_CUR_MAX > 1)
3650 + unexpand_multibyte ();
3655 if (have_read_stdin && fclose (stdin) != 0)
3656 error (EXIT_FAILURE, errno, "-");
3657 diff -urNp coreutils-8.16-orig/src/uniq.c coreutils-8.16/src/uniq.c
3658 --- coreutils-8.16-orig/src/uniq.c 2012-03-24 21:26:51.000000000 +0100
3659 +++ coreutils-8.16/src/uniq.c 2012-03-26 17:35:09.000000000 +0200
3662 #include <sys/types.h>
3664 +/* Get mbstate_t, mbrtowc(). */
3666 +# include <wchar.h>
3669 +/* Get isw* functions. */
3671 +# include <wctype.h>
3675 #include "argmatch.h"
3676 #include "linebuffer.h"
3678 #include "stdio--.h"
3679 #include "xmemcoll.h"
3680 #include "xstrtol.h"
3681 -#include "memcasecmp.h"
3682 +#include "xmemcoll.h"
3684 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3685 + installation; work around this configuration error. */
3686 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3687 +# define MB_LEN_MAX 16
3690 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3691 +#if HAVE_MBRTOWC && defined mbstate_t
3692 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3696 /* The official name of this program (e.g., no 'g' prefix). */
3697 #define PROGRAM_NAME "uniq"
3698 @@ -108,6 +130,10 @@ static enum delimit_method const delimit
3699 /* Select whether/how to delimit groups of duplicate lines. */
3700 static enum delimit_method delimit_groups;
3702 +/* Function pointers. */
3704 +(*find_field) (struct linebuffer *line);
3706 static struct option const longopts[] =
3708 {"count", no_argument, NULL, 'c'},
3709 @@ -206,7 +232,7 @@ size_opt (char const *opt, char const *m
3710 return a pointer to the beginning of the line's field to be compared. */
3712 static char * _GL_ATTRIBUTE_PURE
3713 -find_field (struct linebuffer const *line)
3714 +find_field_uni (struct linebuffer *line)
3717 char const *lp = line->buffer;
3718 @@ -226,6 +252,83 @@ find_field (struct linebuffer const *lin
3719 return line->buffer + i;
3724 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
3727 + mbstate_t state_bak; \
3730 + state_bak = *STATEP; \
3732 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
3734 + switch (MBLENGTH) \
3736 + case (size_t)-2: \
3737 + case (size_t)-1: \
3738 + *STATEP = state_bak; \
3740 + /* Fall through */ \
3748 +find_field_multi (struct linebuffer *line)
3751 + char *lp = line->buffer;
3752 + size_t size = line->length - 1;
3756 + mbstate_t *statep;
3760 + statep = &(line->state);
3762 + /* skip fields. */
3763 + for (count = 0; count < skip_fields && pos < size; count++)
3765 + while (pos < size)
3767 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3769 + if (convfail || !iswblank (wc))
3777 + while (pos < size)
3779 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3781 + if (!convfail && iswblank (wc))
3788 + /* skip fields. */
3789 + for (count = 0; count < skip_chars && pos < size; count++)
3791 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
3799 /* Return false if two strings OLD and NEW match, true if not.
3800 OLD and NEW point not to the beginnings of the lines
3801 but rather to the beginnings of the fields to compare.
3802 @@ -234,6 +337,8 @@ find_field (struct linebuffer const *lin
3804 different (char *old, char *new, size_t oldlen, size_t newlen)
3806 + char *copy_old, *copy_new;
3808 if (check_chars < oldlen)
3809 oldlen = check_chars;
3810 if (check_chars < newlen)
3811 @@ -241,14 +346,92 @@ different (char *old, char *new, size_t
3815 - /* FIXME: This should invoke strcoll somehow. */
3816 - return oldlen != newlen || memcasecmp (old, new, oldlen);
3819 + copy_old = alloca (oldlen + 1);
3820 + copy_new = alloca (oldlen + 1);
3822 + for (i = 0; i < oldlen; i++)
3824 + copy_old[i] = toupper (old[i]);
3825 + copy_new[i] = toupper (new[i]);
3828 - else if (hard_LC_COLLATE)
3829 - return xmemcoll (old, oldlen, new, newlen) != 0;
3831 - return oldlen != newlen || memcmp (old, new, oldlen);
3833 + copy_old = (char *)old;
3834 + copy_new = (char *)new;
3837 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
3842 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
3844 + size_t i, j, chars;
3845 + const char *str[2];
3848 + mbstate_t state[2];
3851 + mbstate_t state_bak;
3857 + state[0] = oldstate;
3858 + state[1] = newstate;
3860 + for (i = 0; i < 2; i++)
3862 + copy[i] = alloca (len[i] + 1);
3864 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
3866 + state_bak = state[i];
3867 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
3873 + state[i] = state_bak;
3874 + /* Fall through */
3882 + uwc = towupper (wc);
3886 + mbstate_t state_wc;
3888 + memset (&state_wc, '\0', sizeof(mbstate_t));
3889 + wcrtomb (copy[i] + j, uwc, &state_wc);
3892 + memcpy (copy[i] + j, str[i] + j, mblength);
3895 + memcpy (copy[i] + j, str[i] + j, mblength);
3899 + copy[i][j] = '\0';
3903 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
3907 /* Output the line in linebuffer LINE to standard output
3908 provided that the switches say it should be output.
3909 @@ -304,15 +487,43 @@ check_file (const char *infile, const ch
3911 char *prevfield IF_LINT ( = NULL);
3912 size_t prevlen IF_LINT ( = 0);
3914 + mbstate_t prevstate;
3916 + memset (&prevstate, '\0', sizeof (mbstate_t));
3919 while (!feof (stdin))
3924 + mbstate_t thisstate;
3927 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3929 thisfield = find_field (thisline);
3930 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3932 + if (MB_CUR_MAX > 1)
3934 + thisstate = thisline->state;
3936 + if (prevline->length == 0 || different_multi
3937 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
3939 + fwrite (thisline->buffer, sizeof (char),
3940 + thisline->length, stdout);
3942 + SWAP_LINES (prevline, thisline);
3943 + prevfield = thisfield;
3944 + prevlen = thislen;
3945 + prevstate = thisstate;
3950 if (prevline->length == 0
3951 || different (thisfield, prevfield, thislen, prevlen))
3953 @@ -331,17 +542,26 @@ check_file (const char *infile, const ch
3955 uintmax_t match_count = 0;
3956 bool first_delimiter = true;
3958 + mbstate_t prevstate;
3961 if (readlinebuffer_delim (prevline, stdin, delimiter) == 0)
3963 prevfield = find_field (prevline);
3964 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
3966 + prevstate = prevline->state;
3969 while (!feof (stdin))
3975 + mbstate_t thisstate = thisline->state;
3977 if (readlinebuffer_delim (thisline, stdin, delimiter) == 0)
3980 @@ -350,6 +570,14 @@ check_file (const char *infile, const ch
3982 thisfield = find_field (thisline);
3983 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
3985 + if (MB_CUR_MAX > 1)
3987 + match = !different_multi (thisfield, prevfield,
3988 + thislen, prevlen, thisstate, prevstate);
3992 match = !different (thisfield, prevfield, thislen, prevlen);
3993 match_count += match;
3995 @@ -382,6 +610,9 @@ check_file (const char *infile, const ch
3996 SWAP_LINES (prevline, thisline);
3997 prevfield = thisfield;
4000 + prevstate = thisstate;
4005 @@ -427,6 +658,19 @@ main (int argc, char **argv)
4007 atexit (close_stdout);
4010 + if (MB_CUR_MAX > 1)
4012 + find_field = find_field_multi;
4017 + find_field = find_field_uni;
4024 check_chars = SIZE_MAX;
4025 diff -urNp coreutils-8.16-orig/tests/Makefile.am coreutils-8.16/tests/Makefile.am
4026 --- coreutils-8.16-orig/tests/Makefile.am 2012-03-26 18:01:35.564014659 +0200
4027 +++ coreutils-8.16/tests/Makefile.am 2012-03-26 18:02:01.023015013 +0200
4028 @@ -242,6 +242,7 @@ TESTS = \
4029 misc/sort-debug-warn \
4031 misc/sort-files0-from \
4032 + misc/sort-mb-tests \
4035 misc/sort-merge-fdlimit \
4036 @@ -537,6 +538,10 @@ TESTS = \
4047 diff -urNp coreutils-8.16-orig/tests/misc/cut coreutils-8.16/tests/misc/cut
4048 --- coreutils-8.16-orig/tests/misc/cut 2012-02-03 10:22:06.000000000 +0100
4049 +++ coreutils-8.16/tests/misc/cut 2012-03-26 17:40:49.000000000 +0200
4050 @@ -23,14 +23,15 @@ use strict;
4051 # Turn off localization of executable's output.
4052 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
4054 -my $mb_locale = $ENV{LOCALE_FR_UTF8};
4055 -! defined $mb_locale || $mb_locale eq 'none'
4056 - and $mb_locale = 'C';
4057 +#my $mb_locale = $ENV{LOCALE_FR_UTF8};
4058 +#! defined $mb_locale || $mb_locale eq 'none'
4059 +# and $mb_locale = 'C';
4060 +my $mb_locale = 'C';
4063 my $try = "Try '$prog --help' for more information.\n";
4064 my $from_1 = "$prog: fields and positions are numbered from 1\n$try";
4065 -my $inval = "$prog: invalid byte or field list\n$try";
4066 +my $inval = "$prog: invalid byte, character or field list\n$try";
4067 my $no_endpoint = "$prog: invalid range with no endpoint: -\n$try";
4070 @@ -147,7 +148,7 @@ my @Tests =
4072 # None of the following invalid ranges provoked an error up to coreutils-6.9.
4073 ['inval1', qw(-f 2-0), {IN=>''}, {OUT=>''}, {EXIT=>1},
4074 - {ERR=>"$prog: invalid decreasing range\n$try"}],
4075 + {ERR=>"$prog: invalid byte, character or field list\n$try"}],
4076 ['inval2', qw(-f -), {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
4077 ['inval3', '-f', '4,-', {IN=>''}, {OUT=>''}, {EXIT=>1}, {ERR=>$no_endpoint}],
4078 ['inval4', '-f', '1-2,-', {IN=>''}, {OUT=>''}, {EXIT=>1},
4079 diff -urNp coreutils-8.16-orig/tests/misc/mb1.I coreutils-8.16/tests/misc/mb1.I
4080 --- coreutils-8.16-orig/tests/misc/mb1.I 1970-01-01 01:00:00.000000000 +0100
4081 +++ coreutils-8.16/tests/misc/mb1.I 2012-03-26 17:35:09.000000000 +0200
4087 diff -urNp coreutils-8.16-orig/tests/misc/mb1.X coreutils-8.16/tests/misc/mb1.X
4088 --- coreutils-8.16-orig/tests/misc/mb1.X 1970-01-01 01:00:00.000000000 +0100
4089 +++ coreutils-8.16/tests/misc/mb1.X 2012-03-26 17:35:09.000000000 +0200
4095 diff -urNp coreutils-8.16-orig/tests/misc/mb2.I coreutils-8.16/tests/misc/mb2.I
4096 --- coreutils-8.16-orig/tests/misc/mb2.I 1970-01-01 01:00:00.000000000 +0100
4097 +++ coreutils-8.16/tests/misc/mb2.I 2012-03-26 17:35:09.000000000 +0200
4103 diff -urNp coreutils-8.16-orig/tests/misc/mb2.X coreutils-8.16/tests/misc/mb2.X
4104 --- coreutils-8.16-orig/tests/misc/mb2.X 1970-01-01 01:00:00.000000000 +0100
4105 +++ coreutils-8.16/tests/misc/mb2.X 2012-03-26 17:35:09.000000000 +0200
4111 diff -urNp coreutils-8.16-orig/tests/misc/sort-mb-tests coreutils-8.16/tests/misc/sort-mb-tests
4112 --- coreutils-8.16-orig/tests/misc/sort-mb-tests 1970-01-01 01:00:00.000000000 +0100
4113 +++ coreutils-8.16/tests/misc/sort-mb-tests 2012-03-26 17:35:09.000000000 +0200
4117 + 0) xx='../src/sort';;
4120 +test "$VERBOSE" && echo=echo || echo=:
4121 +$echo testing program: $xx
4123 +test "$srcdir" || srcdir=.
4124 +test "$VERBOSE" && $xx --version 2> /dev/null
4126 +export LC_ALL=en_US.UTF-8
4127 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
4130 +$xx -t @ -k2 -n misc/mb1.I > misc/mb1.O
4132 +if test $code != 0; then
4133 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0"
4134 + errors=`expr $errors + 1`
4136 + cmp misc/mb1.O $srcdir/misc/mb1.X > /dev/null 2>&1
4138 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
4139 + 1) $echo "Test mb1 failed: files misc/mb1.O and $srcdir/misc/mb1.X differ" 1>&2
4140 + (diff -c misc/mb1.O $srcdir/misc/mb1.X) 2> /dev/null
4141 + errors=`expr $errors + 1`;;
4142 + 2) $echo "Test mb1 may have failed." 1>&2
4143 + $echo The command "cmp misc/mb1.O $srcdir/misc/mb1.X" failed. 1>&2
4144 + errors=`expr $errors + 1`;;
4148 +$xx -t @ -k4 -n misc/mb2.I > misc/mb2.O
4150 +if test $code != 0; then
4151 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
4152 + errors=`expr $errors + 1`
4154 + cmp misc/mb2.O $srcdir/misc/mb2.X > /dev/null 2>&1
4156 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
4157 + 1) $echo "Test mb2 failed: files misc/mb2.O and $srcdir/misc/mb2.X differ" 1>&2
4158 + (diff -c misc/mb2.O $srcdir/misc/mb2.X) 2> /dev/null
4159 + errors=`expr $errors + 1`;;
4160 + 2) $echo "Test mb2 may have failed." 1>&2
4161 + $echo The command "cmp misc/mb2.O $srcdir/misc/mb2.X" failed. 1>&2
4162 + errors=`expr $errors + 1`;;
4166 +if test $errors = 0; then
4167 + $echo Passed all 113 tests. 1>&2
4169 + $echo Failed $errors tests. 1>&2
4171 +test $errors = 0 || errors=1