]>
Commit | Line | Data |
---|---|---|
1c1af145 | 1 | /* |
2 | * internal.h - internal header stuff for the charset library. | |
3 | */ | |
4 | ||
5 | #ifndef charset_internal_h | |
6 | #define charset_internal_h | |
7 | ||
8 | /* This invariably comes in handy */ | |
9 | #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) | |
10 | ||
11 | /* This is an invalid Unicode value used to indicate an error. */ | |
12 | #define ERROR 0xFFFFL /* Unicode value representing error */ | |
13 | ||
14 | typedef struct charset_spec charset_spec; | |
15 | typedef struct sbcs_data sbcs_data; | |
16 | ||
17 | struct charset_spec { | |
18 | int charset; /* numeric identifier */ | |
19 | ||
20 | /* | |
21 | * A function to read the character set and output Unicode | |
22 | * characters. The `emit' function expects to get Unicode chars | |
23 | * passed to it; it should be sent ERROR for any encoding error | |
24 | * on the input. | |
25 | */ | |
26 | void (*read)(charset_spec const *charset, long int input_chr, | |
27 | charset_state *state, | |
28 | void (*emit)(void *ctx, long int output), void *emitctx); | |
29 | /* | |
30 | * A function to read Unicode characters and output in this | |
31 | * character set. The `emit' function expects to get byte | |
32 | * values passed to it; it should be sent ERROR for any | |
33 | * non-representable characters on the input. | |
34 | */ | |
35 | void (*write)(charset_spec const *charset, long int input_chr, | |
36 | charset_state *state, | |
37 | void (*emit)(void *ctx, long int output), void *emitctx); | |
38 | void const *data; | |
39 | }; | |
40 | ||
41 | /* | |
42 | * This is the format of `data' used by the SBCS read and write | |
43 | * functions; so it's the format used in all SBCS definitions. | |
44 | */ | |
45 | struct sbcs_data { | |
46 | /* | |
47 | * This is a simple mapping table converting each SBCS position | |
48 | * to a Unicode code point. Some positions may contain ERROR, | |
49 | * indicating that that byte value is not defined in the SBCS | |
50 | * in question and its occurrence in input is an error. | |
51 | */ | |
52 | unsigned long sbcs2ucs[256]; | |
53 | ||
54 | /* | |
55 | * This lookup table is used to convert Unicode back to the | |
56 | * SBCS. It consists of the valid byte values in the SBCS, | |
57 | * sorted in order of their Unicode translation. So given a | |
58 | * Unicode value U, you can do a binary search on this table | |
59 | * using the above table as a lookup: when testing the Xth | |
60 | * position in this table, you branch according to whether | |
61 | * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal | |
62 | * to U. | |
63 | * | |
64 | * Note that since there may be fewer than 256 valid byte | |
65 | * values in a particular SBCS, we must supply the length of | |
66 | * this table as well as the contents. | |
67 | */ | |
68 | unsigned char ucs2sbcs[256]; | |
69 | int nvalid; | |
70 | }; | |
71 | ||
72 | /* | |
73 | * Prototypes for internal library functions. | |
74 | */ | |
75 | charset_spec const *charset_find_spec(int charset); | |
76 | void read_sbcs(charset_spec const *charset, long int input_chr, | |
77 | charset_state *state, | |
78 | void (*emit)(void *ctx, long int output), void *emitctx); | |
79 | void write_sbcs(charset_spec const *charset, long int input_chr, | |
80 | charset_state *state, | |
81 | void (*emit)(void *ctx, long int output), void *emitctx); | |
82 | ||
83 | /* | |
84 | * Placate compiler warning about unused parameters, of which we | |
85 | * expect to have some in this library. | |
86 | */ | |
87 | #define UNUSEDARG(x) ( (x) = (x) ) | |
88 | ||
89 | #endif /* charset_internal_h */ |