]>
Commit | Line | Data |
---|---|---|
f405e86d SL |
1 | /* gunicode.c - Unicode manipulation functions |
2 | * | |
3 | * Copyright (C) 1999, 2000 Tom Tromey | |
4 | * Copyright 2000, 2005 Red Hat, Inc. | |
5 | */ | |
6 | ||
11c3a366 TA |
7 | #include <stdlib.h> |
8 | ||
f405e86d SL |
9 | #include "gunicode.h" |
10 | ||
11 | #define unichar uint32_t | |
12 | ||
13 | /** | |
14 | * g_utf8_prev_char: | |
15 | * @p: a pointer to a position within a UTF-8 encoded string | |
16 | * | |
17 | * Finds the previous UTF-8 character in the string before @p. | |
18 | * | |
19 | * @p does not have to be at the beginning of a UTF-8 character. No check | |
20 | * is made to see if the character found is actually valid other than | |
21 | * it starts with an appropriate byte. If @p might be the first | |
22 | * character of the string, you must use g_utf8_find_prev_char() instead. | |
23 | * | |
24 | * Return value: a pointer to the found character. | |
25 | **/ | |
26 | char * | |
27 | utf8_prev_char (const char *p) | |
28 | { | |
e7f1334f | 29 | for (;;) |
f405e86d SL |
30 | { |
31 | p--; | |
32 | if ((*p & 0xc0) != 0x80) | |
33 | return (char *)p; | |
34 | } | |
35 | } | |
36 | ||
37 | struct Interval | |
38 | { | |
39 | unichar start, end; | |
40 | }; | |
41 | ||
42 | static int | |
43 | interval_compare (const void *key, const void *elt) | |
44 | { | |
45 | unichar c = (unichar) (long) (key); | |
46 | struct Interval *interval = (struct Interval *)elt; | |
47 | ||
48 | if (c < interval->start) | |
49 | return -1; | |
50 | if (c > interval->end) | |
51 | return +1; | |
52 | ||
53 | return 0; | |
54 | } | |
55 | ||
56 | /* | |
57 | * NOTE: | |
58 | * | |
59 | * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are | |
60 | * generated from the Unicode Character Database's file | |
61 | * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py | |
62 | * in this way: | |
63 | * | |
64 | * ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt | |
65 | * | |
66 | * Last update for Unicode 6.0. | |
67 | */ | |
68 | ||
69 | /** | |
70 | * g_unichar_iswide: | |
71 | * @c: a Unicode character | |
72 | * | |
73 | * Determines if a character is typically rendered in a double-width | |
74 | * cell. | |
75 | * | |
76 | * Return value: %TRUE if the character is wide | |
77 | **/ | |
78 | bool | |
79 | unichar_iswide (unichar c) | |
80 | { | |
81 | /* See NOTE earlier for how to update this table. */ | |
82 | static const struct Interval wide[] = { | |
83 | {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, | |
84 | {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096}, | |
85 | {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA}, | |
86 | {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE}, | |
87 | {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C}, | |
88 | {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52}, | |
89 | {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6}, | |
fb131646 ZJS |
90 | {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A}, |
91 | {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, | |
92 | {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */ | |
93 | {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}, | |
f405e86d SL |
94 | }; |
95 | ||
96 | if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0], | |
97 | interval_compare)) | |
98 | return true; | |
99 | ||
100 | return false; | |
101 | } | |
102 | ||
103 | const char utf8_skip_data[256] = { | |
104 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
105 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
106 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
107 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
108 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
109 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
110 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
111 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 | |
112 | }; |