From: Bruno Haible Date: Wed, 29 May 2002 12:58:06 +0000 (+0000) Subject: Upgrade to Unicode 3.2. X-Git-Tag: v0.11.3~44 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=68b3954db1df5ae044b1be5c66181c4931ea970f;p=thirdparty%2Fgettext.git Upgrade to Unicode 3.2. --- diff --git a/lib/ChangeLog b/lib/ChangeLog index 6c52356b1..f7ca99942 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,8 @@ +2002-05-29 Bruno Haible + + * linebreak.c (nonspacing_table_data, uc_width): Upgrade to + Unicode 3.2.0. + 2002-05-18 Bruno Haible * basename.h: Use __GLIBC__ >= 2, because BeOS (which defines __GLIBC__ diff --git a/lib/linebreak.c b/lib/linebreak.c index 1edc69f3a..f62826b2f 100644 --- a/lib/linebreak.c +++ b/lib/linebreak.c @@ -226,9 +226,13 @@ int uc_width PARAMS ((unsigned int uc, const char *encoding)); /* * Non-spacing attribute table. - * See PropList.txt, or grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt - * Control characters are also marked non-spacing here, because they are not - * printable. Zero width characters are also marked non-spacing here. + * Consists of: + * - Non-spacing characters; generated from PropList.txt or + * "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt" + * - Format control characters; generated from + * "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt" + * - Zero width characters; generated from + * "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt" */ static const unsigned char nonspacing_table_data[16*64] = { /* 0x0000-0x01ff */ @@ -246,7 +250,7 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0280-0x02bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x02c0-0x02ff */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x0300-0x033f */ - 0xff, 0x7f, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, /* 0x0340-0x037f */ + 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, /* 0x0340-0x037f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0380-0x03bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x03c0-0x03ff */ /* 0x0400-0x05ff */ @@ -317,8 +321,8 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1640-0x167f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1680-0x16bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16c0-0x16ff */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1700-0x173f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1740-0x177f */ + 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, /* 0x1700-0x173f */ + 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1740-0x177f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, /* 0x1780-0x17bf */ 0x40, 0xfe, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */ /* 0x1800-0x19ff */ @@ -332,9 +336,9 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x19c0-0x19ff */ /* 0x2000-0x21ff */ 0x00, 0xf8, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, /* 0x2000-0x203f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, /* 0x2040-0x207f */ + 0x00, 0x00, 0x00, 0x00, 0x0f, 0xfc, 0x00, 0x00, /* 0x2040-0x207f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2080-0x20bf */ - 0x00, 0x00, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, /* 0x20c0-0x20ff */ + 0x00, 0x00, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00, /* 0x20c0-0x20ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2100-0x213f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2140-0x217f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2180-0x21bf */ @@ -358,7 +362,7 @@ static const unsigned char nonspacing_table_data[16*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfb80-0xfbbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfbc0-0xfbff */ /* 0xfe00-0xffff */ - 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, /* 0xfe00-0xfe3f */ + 0xff, 0xff, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, /* 0xfe00-0xfe3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfe40-0xfe7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfe80-0xfebf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0xfec0-0xfeff */ @@ -428,6 +432,11 @@ uc_width (uc, encoding) return 0; } } + else if ((uc >> 9) == (0xe0000 >> 9)) + { + if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001) + return 0; + } /* Test for double-width character. * Generated from "grep '^....;[WF]' EastAsianWidth.txt" * and "grep '^....;[^WF]' EastAsianWidth.txt" @@ -435,18 +444,19 @@ uc_width (uc, encoding) if (uc >= 0x1100 && ((uc < 0x1160) /* Hangul Jamo */ || (uc >= 0x2e80 && uc < 0xa4d0 /* CJK ... Yi */ - && !((uc & ~0x0013) == 0x3008 - || (uc & ~0x0001) == 0x3014 - || uc == 0x303f)) + && !(uc == 0x303f)) || (uc >= 0xac00 && uc < 0xd7a4) /* Hangul Syllables */ || (uc >= 0xf900 && uc < 0xfb00) /* CJK Compatibility Ideographs */ || (uc >= 0xfe30 && uc < 0xfe70) /* CJK Compatibility Forms */ - || (uc >= 0xff00 && uc < 0xff60) /* Fullwidth Forms */ - || (uc >= 0xffe0 && uc < 0xffe7))) + || (uc >= 0xff00 && uc < 0xff61) /* Fullwidth Forms */ + || (uc >= 0xffe0 && uc < 0xffe7) + || (uc >= 0x20000 && uc <= 0x2a6d6) /* CJK */ + || (uc >= 0x2f800 && uc <= 0x2fa1d) /* CJK Compatibility Ideographs */ + ) ) return 2; /* In ancient CJK encodings, Cyrillic and most other characters are double-width as well. */ - if (uc >= 0x00A1 && uc < 0xFF60 && uc != 0x20A9 + if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9 && is_cjk_encoding (encoding)) return 2; return 1;