]> git.ipfire.org Git - thirdparty/freeswitch.git/commitdiff
FS-2081 --resolve
authorMarc Olivier Chouinard <mochouinard@moctel.com>
Fri, 16 Dec 2011 04:36:20 +0000 (23:36 -0500)
committerMarc Olivier Chouinard <mochouinard@moctel.com>
Fri, 16 Dec 2011 04:36:20 +0000 (23:36 -0500)
src/switch_xml.c

index 7790c1761249bc9ab96d9eb1e5b87690ba1e0f5c..7e717846ef4076c1183b2892c1ab42570cce9a3d 100644 (file)
@@ -101,6 +101,9 @@ void globfree(glob_t *);
 #define SWITCH_XML_WS   "\t\r\n "      /* whitespace */
 #define SWITCH_XML_ERRL 128            /* maximum error string length */
 
+/* Use UTF-8 as the general encoding */
+#define USE_UTF_8_ENCODING SWITCH_TRUE
+
 static int preprocess(const char *cwd, const char *file, int write_fd, int rlevel);
 
 typedef struct switch_xml_root *switch_xml_root_t;
@@ -2238,6 +2241,8 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
 {
        const char *e = NULL;
        int immune = 0;
+       int expecting_x_utf_8_char = 0;
+       int unicode_char = 0x000000;
 
        if (!(s && *s))
                return *dst;
@@ -2290,7 +2295,47 @@ static char *switch_xml_ampencode(const char *s, switch_size_t len, char **dst,
                                *dlen += sprintf(*dst + *dlen, "&#xD;");
                                break;
                        default:
-                               (*dst)[(*dlen)++] = *s;
+                               if (USE_UTF_8_ENCODING && expecting_x_utf_8_char == 0 && ((*s >> 8) & 0x01)) {
+                                       int num = 1;
+                                       for (;num<4;num++) {
+                                               if (! ((*s >> (7-num)) & 0x01)) {
+                                                       break;
+                                               }
+                                       }
+                                       switch (num) {
+                                               case 2:
+                                                       unicode_char = *s & 0x1f;
+                                                       break;
+                                               case 3:
+                                                       unicode_char = *s & 0x0f;
+                                                       break;
+                                               case 4:
+                                                       unicode_char = *s & 0x07;
+                                                       break;
+                                               default:
+                                                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Invalid UTF-8 Initial charactere, skip it\n");
+                                                       /* ERROR HERE */
+                                                       break;
+                                       }
+                                       expecting_x_utf_8_char = num - 1;
+
+                               } else if (USE_UTF_8_ENCODING && expecting_x_utf_8_char > 0) {
+                                       if (((*s >> 6) & 0x03) == 0x2) {
+
+                                               unicode_char = unicode_char << 6;
+                                               unicode_char = unicode_char | (*s & 0x3f);
+                                       } else {
+                                               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Invalid UTF-8 character to ampersand, skip it\n");
+                                               expecting_x_utf_8_char = 0;
+                                               break;
+                                       }
+                                       expecting_x_utf_8_char--;
+                                       if (expecting_x_utf_8_char == 0) {
+                                               *dlen += sprintf(*dst + *dlen, "&#x%X;", unicode_char);
+                                       }
+                               } else {
+                                       (*dst)[(*dlen)++] = *s;
+                               }
                        }
                s++;
        }