+++ /dev/null
-From 7a7c776a4e228d180e74614fd8c8afcad5d4bdf7 Mon Sep 17 00:00:00 2001
-From: Jakub Martisko <jamartis@redhat.com>
-Date: Thu, 7 Jul 2016 12:53:26 +0200
-Subject: [PATCH] coreutils-i18n-un-expand-BOM.patch
-
----
- src/expand-common.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++
- src/expand-common.h | 12 ++++++
- src/expand.c | 45 +++++++++++++++++++-
- src/unexpand.c | 43 ++++++++++++++++++-
- tests/expand/mb.sh | 71 ++++++++++++++++++++++++++++++++
- tests/unexpand/mb.sh | 59 ++++++++++++++++++++++++++
- 6 files changed, 342 insertions(+), 2 deletions(-)
-
-diff --git a/src/expand-common.c b/src/expand-common.c
-index 4657e46..97cbb09 100644
---- a/src/expand-common.c
-+++ b/src/expand-common.c
-@@ -19,6 +19,7 @@
- #include <assert.h>
- #include <stdio.h>
- #include <sys/types.h>
-+#include <mbfile.h>
- #include "system.h"
- #include "die.h"
- #include "error.h"
-@@ -126,6 +127,119 @@ set_increment_size (uintmax_t tabval)
- return ok;
- }
-
-+extern int
-+set_utf_locale (void)
-+{
-+ /*try using some predefined locale */
-+ const char* predef_locales[] = {"C.UTF8","en_US.UTF8","en_GB.UTF8"};
-+
-+ const int predef_locales_count=3;
-+ for (int i=0;i<predef_locales_count;i++)
-+ {
-+ if (setlocale(LC_ALL,predef_locales[i])!=NULL)
-+ {
-+ break;
-+ }
-+ else if (i==predef_locales_count-1)
-+ {
-+ return 1;
-+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
-+ }
-+ }
-+ return 0;
-+}
-+
-+extern bool
-+check_utf_locale(void)
-+{
-+ char* locale = setlocale (LC_CTYPE , NULL);
-+ if (locale == NULL)
-+ {
-+ return false;
-+ }
-+ else if (strcasestr(locale, "utf8") == NULL && strcasestr(locale, "utf-8") == NULL)
-+ {
-+ return false;
-+ }
-+ return true;
-+}
-+
-+extern bool
-+check_bom(FILE* fp, mb_file_t *mbf)
-+{
-+ int c;
-+
-+
-+ c=fgetc(fp);
-+
-+ /*test BOM header of the first file */
-+ mbf->bufcount=0;
-+ if (c == 0xEF)
-+ {
-+ c=fgetc(fp);
-+ }
-+ else
-+ {
-+ if (c != EOF)
-+ {
-+ ungetc(c,fp);
-+ }
-+ return false;
-+ }
-+
-+ if (c == 0xBB)
-+ {
-+ c=fgetc(fp);
-+ }
-+ else
-+ {
-+ if ( c!= EOF )
-+ {
-+ mbf->buf[0]=(unsigned char) 0xEF;
-+ mbf->bufcount=1;
-+ ungetc(c,fp);
-+ return false;
-+ }
-+ else
-+ {
-+ ungetc(0xEF,fp);
-+ return false;
-+ }
-+ }
-+ if (c == 0xBF)
-+ {
-+ mbf->bufcount=0;
-+ return true;
-+ }
-+ else
-+ {
-+ if (c != EOF)
-+ {
-+ mbf->buf[0]=(unsigned char) 0xEF;
-+ mbf->buf[1]=(unsigned char) 0xBB;
-+ mbf->bufcount=2;
-+ ungetc(c,fp);
-+ return false;
-+ }
-+ else
-+ {
-+ mbf->buf[0]=(unsigned char) 0xEF;
-+ mbf->bufcount=1;
-+ ungetc(0xBB,fp);
-+ return false;
-+ }
-+ }
-+ return false;
-+}
-+
-+extern void
-+print_bom(void)
-+{
-+ putc (0xEF, stdout);
-+ putc (0xBB, stdout);
-+ putc (0xBF, stdout);
-+}
-+
- /* Add the comma or blank separated list of tab stops STOPS
- to the list of tab stops. */
- extern void
-diff --git a/src/expand-common.h b/src/expand-common.h
-index 8cb2079..763bfda 100644
---- a/src/expand-common.h
-+++ b/src/expand-common.h
-@@ -34,6 +34,18 @@ extern size_t max_column_width;
- /* The desired exit status. */
- extern int exit_status;
-
-+extern int
-+set_utf_locale (void);
-+
-+extern bool
-+check_utf_locale(void);
-+
-+extern bool
-+check_bom(FILE* fp, mb_file_t *mbf);
-+
-+extern void
-+print_bom(void);
-+
- /* Add tab stop TABVAL to the end of 'tab_list'. */
- extern void
- add_tab_stop (uintmax_t tabval);
-diff --git a/src/expand.c b/src/expand.c
-index 310b349..4136824 100644
---- a/src/expand.c
-+++ b/src/expand.c
-@@ -103,11 +103,33 @@ expand (void)
- FILE *fp = next_file (NULL);
- mb_file_t mbf;
- mbf_char_t c;
-+ /* True if the starting locale is utf8. */
-+ bool using_utf_locale;
-+
-+ /* True if the first file contains BOM header. */
-+ bool found_bom;
-+ using_utf_locale=check_utf_locale();
-
- if (!fp)
- return;
--
- mbf_init (mbf, fp);
-+ found_bom=check_bom(fp,&mbf);
-+
-+ if (using_utf_locale == false && found_bom == true)
-+ {
-+ /*try using some predefined locale */
-+
-+ if (set_utf_locale () != 0)
-+ {
-+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
-+ }
-+ }
-+
-+
-+ if (found_bom == true)
-+ {
-+ print_bom();
-+ }
-
- while (true)
- {
-@@ -132,6 +154,27 @@ expand (void)
- if ((mb_iseof (c)) && (fp = next_file (fp)))
- {
- mbf_init (mbf, fp);
-+ if (fp!=NULL)
-+ {
-+ if (check_bom(fp,&mbf)==true)
-+ {
-+ /*Not the first file - check BOM header*/
-+ if (using_utf_locale==false && found_bom==false)
-+ {
-+ /*BOM header in subsequent file but not in the first one. */
-+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
-+ }
-+ }
-+ else
-+ {
-+ if(using_utf_locale==false && found_bom==true)
-+ {
-+ /*First file conatined BOM header - locale was switched to UTF
-+ *all subsequent files should contain BOM. */
-+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
-+ }
-+ }
-+ }
- continue;
- }
- else
-diff --git a/src/unexpand.c b/src/unexpand.c
-index 863a90a..5681b58 100644
---- a/src/unexpand.c
-+++ b/src/unexpand.c
-@@ -116,16 +116,36 @@ unexpand (void)
- include characters other than spaces, so the blanks must be
- stored, not merely counted. */
- mbf_char_t *pending_blank;
-+ /* True if the starting locale is utf8. */
-+ bool using_utf_locale;
-+
-+ /* True if the first file contains BOM header. */
-+ bool found_bom;
-+ using_utf_locale=check_utf_locale();
-
- if (!fp)
- return;
-+ mbf_init (mbf, fp);
-+ found_bom=check_bom(fp,&mbf);
-+
-+ if (using_utf_locale == false && found_bom == true)
-+ {
-+ /*try using some predefined locale */
-
-+ if (set_utf_locale () != 0)
-+ {
-+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale"));
-+ }
-+ }
- /* The worst case is a non-blank character, then one blank, then a
- tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
- allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
- pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t));
-
-- mbf_init (mbf, fp);
-+ if (found_bom == true)
-+ {
-+ print_bom();
-+ }
-
- while (true)
- {
-@@ -169,6 +189,27 @@ unexpand (void)
- if ((mb_iseof (c)) && (fp = next_file (fp)))
- {
- mbf_init (mbf, fp);
-+ if (fp!=NULL)
-+ {
-+ if (check_bom(fp,&mbf)==true)
-+ {
-+ /*Not the first file - check BOM header*/
-+ if (using_utf_locale==false && found_bom==false)
-+ {
-+ /*BOM header in subsequent file but not in the first one. */
-+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
-+ }
-+ }
-+ else
-+ {
-+ if(using_utf_locale==false && found_bom==true)
-+ {
-+ /*First file conatined BOM header - locale was switched to UTF
-+ *all subsequent files should contain BOM. */
-+ error (EXIT_FAILURE, errno, _("combination of files with and without BOM header"));
-+ }
-+ }
-+ }
- continue;
- }
- else
-diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh
-index 031be7a..1621c84 100755
---- a/tests/expand/mb.sh
-+++ b/tests/expand/mb.sh
-@@ -109,4 +109,75 @@ env printf '12345678
- expand < in > out || fail=1
- compare exp out > /dev/null 2>&1 || fail=1
-
-+
-+
-+#BOM header test 1
-+printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+EOF
-+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_
-+
-+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+EOF
-+
-+
-+expand < in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LANG=C expand < in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LC_ALL=C expand < in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+
-+printf '\xEF\xBB\xBF' > in1; cat <<\EOF >> in1 || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+EOF
-+env printf ' äöü\t. öüä. \tä xx\n' >> in1 || framework_failure_
-+
-+
-+printf '\xEF\xBB\xBF' > exp; cat <<\EOF >> exp || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+EOF
-+
-+expand in1 in1 > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LANG=C expand in1 in1 > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LC_ALL=C expand in1 in1 > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
- exit $fail
-diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh
-index 8d75652..9d4ee3e 100755
---- a/tests/unexpand/mb.sh
-+++ b/tests/unexpand/mb.sh
-@@ -111,3 +111,62 @@ env printf '12345678
-
- unexpand -a < in > out || fail=1
- compare exp out > /dev/null 2>&1 || fail=1
-+
-+#BOM header test 1
-+printf "\xEF\xBB\xBF" > in; cat <<\EOF >> in || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+EOF
-+env printf ' äöü\t. öüä. \tä xx\n' >> in || framework_failure_
-+
-+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+EOF
-+
-+unexpand < in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LANG=C unexpand < in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LC_ALL=C unexpand < in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+
-+printf "\xEF\xBB\xBF" > exp; cat <<\EOF >> exp || framework_failure_
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+1234567812345678123456781
-+. . . .
-+a b c d
-+. . . .
-+ä ö ü ß
-+. . . .
-+ äöü . öüä. ä xx
-+EOF
-+
-+
-+unexpand in in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LANG=C unexpand in in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
-+
-+LC_ALL=C unexpand in in > out || fail=1
-+compare exp out > /dev/null 2>&1 || fail=1
---
-2.9.3
-