]>
Commit | Line | Data |
---|---|---|
f405e86d SL |
1 | /* gunicode.c - Unicode manipulation functions |
2 | * | |
3 | * Copyright (C) 1999, 2000 Tom Tromey | |
4 | * Copyright 2000, 2005 Red Hat, Inc. | |
5 | */ | |
6 | ||
7 | #include "gunicode.h" | |
8 | ||
9 | #define unichar uint32_t | |
10 | ||
11 | /** | |
12 | * g_utf8_prev_char: | |
13 | * @p: a pointer to a position within a UTF-8 encoded string | |
14 | * | |
15 | * Finds the previous UTF-8 character in the string before @p. | |
16 | * | |
17 | * @p does not have to be at the beginning of a UTF-8 character. No check | |
18 | * is made to see if the character found is actually valid other than | |
19 | * it starts with an appropriate byte. If @p might be the first | |
20 | * character of the string, you must use g_utf8_find_prev_char() instead. | |
21 | * | |
22 | * Return value: a pointer to the found character. | |
23 | **/ | |
24 | char * | |
25 | utf8_prev_char (const char *p) | |
26 | { | |
27 | while (1) | |
28 | { | |
29 | p--; | |
30 | if ((*p & 0xc0) != 0x80) | |
31 | return (char *)p; | |
32 | } | |
33 | } | |
34 | ||
35 | struct Interval | |
36 | { | |
37 | unichar start, end; | |
38 | }; | |
39 | ||
40 | static int | |
41 | interval_compare (const void *key, const void *elt) | |
42 | { | |
43 | unichar c = (unichar) (long) (key); | |
44 | struct Interval *interval = (struct Interval *)elt; | |
45 | ||
46 | if (c < interval->start) | |
47 | return -1; | |
48 | if (c > interval->end) | |
49 | return +1; | |
50 | ||
51 | return 0; | |
52 | } | |
53 | ||
54 | /* | |
55 | * NOTE: | |
56 | * | |
57 | * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are | |
58 | * generated from the Unicode Character Database's file | |
59 | * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py | |
60 | * in this way: | |
61 | * | |
62 | * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt | |
63 | * | |
64 | * Last update for Unicode 6.0. | |
65 | */ | |
66 | ||
67 | /** | |
68 | * g_unichar_iswide: | |
69 | * @c: a Unicode character | |
70 | * | |
71 | * Determines if a character is typically rendered in a double-width | |
72 | * cell. | |
73 | * | |
74 | * Return value: %TRUE if the character is wide | |
75 | **/ | |
76 | bool | |
77 | unichar_iswide (unichar c) | |
78 | { | |
79 | /* See NOTE earlier for how to update this table. */ | |
80 | static const struct Interval wide[] = { | |
81 | {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, | |
82 | {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, | |
83 | {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA}, | |
84 | {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE}, | |
85 | {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, | |
86 | {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, | |
87 | {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, | |
88 | {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, {0x1F240, | |
89 | 0x1F248}, {0x1F250, 0x1F251}, {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD} | |
90 | }; | |
91 | ||
92 | if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0], | |
93 | interval_compare)) | |
94 | return true; | |
95 | ||
96 | return false; | |
97 | } | |
98 | ||
99 | const char utf8_skip_data[256] = { | |
100 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
101 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
102 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
103 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
104 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
105 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
106 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
107 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 | |
108 | }; |