]>
Commit | Line | Data |
---|---|---|
22231908 JS |
1 | #include "test-tool.h" |
2 | ||
3 | static const char *utf8_replace_character = "�"; | |
4 | ||
5 | /* | |
6 | * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded | |
7 | * in an XML file. | |
8 | */ | |
126e3b3d | 9 | int cmd__xml_encode(int argc UNUSED, const char **argv UNUSED) |
22231908 JS |
10 | { |
11 | unsigned char buf[1024], tmp[4], *tmp2 = NULL; | |
12 | ssize_t cur = 0, len = 1, remaining = 0; | |
13 | unsigned char ch; | |
14 | ||
15 | for (;;) { | |
16 | if (++cur == len) { | |
17 | len = xread(0, buf, sizeof(buf)); | |
18 | if (!len) | |
19 | return 0; | |
20 | if (len < 0) | |
21 | die_errno("Could not read <stdin>"); | |
22 | cur = 0; | |
23 | } | |
24 | ch = buf[cur]; | |
25 | ||
26 | if (tmp2) { | |
27 | if ((ch & 0xc0) != 0x80) { | |
28 | fputs(utf8_replace_character, stdout); | |
29 | tmp2 = NULL; | |
30 | cur--; | |
31 | continue; | |
32 | } | |
33 | *tmp2 = ch; | |
34 | tmp2++; | |
35 | if (--remaining == 0) { | |
36 | fwrite(tmp, tmp2 - tmp, 1, stdout); | |
37 | tmp2 = NULL; | |
38 | } | |
39 | continue; | |
40 | } | |
41 | ||
42 | if (!(ch & 0x80)) { | |
43 | /* 0xxxxxxx */ | |
44 | if (ch == '&') | |
45 | fputs("&", stdout); | |
46 | else if (ch == '\'') | |
47 | fputs("'", stdout); | |
48 | else if (ch == '"') | |
49 | fputs(""", stdout); | |
50 | else if (ch == '<') | |
51 | fputs("<", stdout); | |
52 | else if (ch == '>') | |
53 | fputs(">", stdout); | |
54 | else if (ch >= 0x20) | |
55 | fputc(ch, stdout); | |
56 | else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) | |
57 | fprintf(stdout, "&#x%02x;", ch); | |
58 | else | |
59 | fputs(utf8_replace_character, stdout); | |
60 | } else if ((ch & 0xe0) == 0xc0) { | |
61 | /* 110XXXXx 10xxxxxx */ | |
62 | tmp[0] = ch; | |
63 | remaining = 1; | |
64 | tmp2 = tmp + 1; | |
65 | } else if ((ch & 0xf0) == 0xe0) { | |
66 | /* 1110XXXX 10Xxxxxx 10xxxxxx */ | |
67 | tmp[0] = ch; | |
68 | remaining = 2; | |
69 | tmp2 = tmp + 1; | |
70 | } else if ((ch & 0xf8) == 0xf0) { | |
71 | /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ | |
72 | tmp[0] = ch; | |
73 | remaining = 3; | |
74 | tmp2 = tmp + 1; | |
75 | } else | |
76 | fputs(utf8_replace_character, stdout); | |
77 | } | |
78 | ||
79 | return 0; | |
80 | } |