/*
* Non-spacing attribute table.
- * See PropList.txt, or grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt
- * Control characters are also marked non-spacing here, because they are not
- * printable. Zero width characters are also marked non-spacing here.
+ * Consists of:
+ * - Non-spacing characters; generated from PropList.txt or
+ * "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt"
+ * - Format control characters; generated from
+ * "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt"
+ * - Zero width characters; generated from
+ * "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
*/
static const unsigned char nonspacing_table_data[16*64] = {
/* 0x0000-0x01ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0280-0x02bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x02c0-0x02ff */
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x0300-0x033f */
- 0xff, 0x7f, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, /* 0x0340-0x037f */
+ 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, /* 0x0340-0x037f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0380-0x03bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x03c0-0x03ff */
/* 0x0400-0x05ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1640-0x167f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1680-0x16bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16c0-0x16ff */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1700-0x173f */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1740-0x177f */
+ 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, /* 0x1700-0x173f */
+ 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1740-0x177f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, /* 0x1780-0x17bf */
0x40, 0xfe, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */
/* 0x1800-0x19ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x19c0-0x19ff */
/* 0x2000-0x21ff */
0x00, 0xf8, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, /* 0x2000-0x203f */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, /* 0x2040-0x207f */
+ 0x00, 0x00, 0x00, 0x00, 0x0f, 0xfc, 0x00, 0x00, /* 0x2040-0x207f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2080-0x20bf */
- 0x00, 0x00, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, /* 0x20c0-0x20ff */
+ 0x00, 0x00, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00, /* 0x20c0-0x20ff */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2100-0x213f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2140-0x217f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2180-0x21bf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfb80-0xfbbf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfbc0-0xfbff */
/* 0xfe00-0xffff */
- 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, /* 0xfe00-0xfe3f */
+ 0xff, 0xff, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, /* 0xfe00-0xfe3f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfe40-0xfe7f */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xfe80-0xfebf */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0xfec0-0xfeff */
return 0;
}
}
+ else if ((uc >> 9) == (0xe0000 >> 9))
+ {
+ if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
+ return 0;
+ }
/* Test for double-width character.
* Generated from "grep '^....;[WF]' EastAsianWidth.txt"
* and "grep '^....;[^WF]' EastAsianWidth.txt"
if (uc >= 0x1100
&& ((uc < 0x1160) /* Hangul Jamo */
|| (uc >= 0x2e80 && uc < 0xa4d0 /* CJK ... Yi */
- && !((uc & ~0x0013) == 0x3008
- || (uc & ~0x0001) == 0x3014
- || uc == 0x303f))
+ && !(uc == 0x303f))
|| (uc >= 0xac00 && uc < 0xd7a4) /* Hangul Syllables */
|| (uc >= 0xf900 && uc < 0xfb00) /* CJK Compatibility Ideographs */
|| (uc >= 0xfe30 && uc < 0xfe70) /* CJK Compatibility Forms */
- || (uc >= 0xff00 && uc < 0xff60) /* Fullwidth Forms */
- || (uc >= 0xffe0 && uc < 0xffe7)))
+ || (uc >= 0xff00 && uc < 0xff61) /* Fullwidth Forms */
+ || (uc >= 0xffe0 && uc < 0xffe7)
+ || (uc >= 0x20000 && uc <= 0x2a6d6) /* CJK */
+ || (uc >= 0x2f800 && uc <= 0x2fa1d) /* CJK Compatibility Ideographs */
+ ) )
return 2;
/* In ancient CJK encodings, Cyrillic and most other characters are
double-width as well. */
- if (uc >= 0x00A1 && uc < 0xFF60 && uc != 0x20A9
+ if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
&& is_cjk_encoding (encoding))
return 2;
return 1;