#define SWITCH_XML_WS "\t\r\n " /* whitespace */
#define SWITCH_XML_ERRL 128 /* maximum error string length */
+/* Use UTF-8 as the general encoding */
+#define USE_UTF_8_ENCODING SWITCH_TRUE
+
static int preprocess(const char *cwd, const char *file, int write_fd, int rlevel);
typedef struct switch_xml_root *switch_xml_root_t;
{
const char *e = NULL;
int immune = 0;
+ int expecting_x_utf_8_char = 0;
+ int unicode_char = 0x000000;
if (!(s && *s))
return *dst;
*dlen += sprintf(*dst + *dlen, "
");
break;
default:
- (*dst)[(*dlen)++] = *s;
+ if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
+ int num = 1;
+ for (;num<4;num++) {
+ if (! ((*s >> (7-num)) & 0x01)) {
+ break;
+ }
+ }
+ switch (num) {
+ case 2:
+ unicode_char = *s & 0x1f;
+ break;
+ case 3:
+ unicode_char = *s & 0x0f;
+ break;
+ case 4:
+ unicode_char = *s & 0x07;
+ break;
+ default:
+ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Invalid UTF-8 Initial charactere, skip it\n");
+ /* ERROR HERE */
+ break;
+ }
+ expecting_x_utf_8_char = num - 1;
+
+ } else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) {
+ if (((*s >> 6) & 0x03) == 0x2) {
+
+ unicode_char = unicode_char << 6;
+ unicode_char = unicode_char | (*s & 0x3f);
+ } else {
+ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Invalid UTF-8 character to ampersand, skip it\n");
+ expecting_x_utf_8_char = 0;
+ break;
+ }
+ expecting_x_utf_8_char--;
+ if (expecting_x_utf_8_char == 0) {
+ *dlen += sprintf(*dst + *dlen, "&#x%X;", unicode_char);
+ }
+ } else {
+ (*dst)[(*dlen)++] = *s;
+ }
}
s++;
}