Fix backslash escaping mechanism.

author Ben Schmidt <none@none>

Tue, 28 Feb 2012 13:03:46 +0000 (00:03 +1100)

committer Ben Schmidt <none@none>

Tue, 28 Feb 2012 13:03:46 +0000 (00:03 +1100)
author Ben Schmidt <none@none>
Tue, 28 Feb 2012 13:03:46 +0000 (00:03 +1100)
committer Ben Schmidt <none@none>
Tue, 28 Feb 2012 13:03:46 +0000 (00:03 +1100)
diff --git a/ChangeLog b/ChangeLog

index d45b1fcb66d76b6bd9a07416e99596002d17f831..55e7434bc5caabe76d0f18e7cf7c6eaacc1ccad5 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,6 @@
+ o Fix backslash escaping mechanism so double backslash can't effectively
+   recurse and form part of another escape sequence, other non-unicode escapes
+   aren't ignored, and first lines of included files don't 'escape' escaping.
   o Add ability to subscribe to both (normal and digest).
   o Fix access logic so subonlypost doesn't override a send access rule.
   o Make +unsubscribe remove the requester from all versions of the list.
diff --git a/include/unistr.h b/include/unistr.h

index a9f3d3d0d2308349c5c74699f240d84c2d95cfba..c1811b65816cbaadc31793c77edb38db83168ade 100644 (file)
--- a/include/unistr.h
+++ b/include/unistr.h
@@ -34,16 +34,16 @@ typedef struct _unistr {
  
  unistr *unistr_new(void);
  void unistr_free(unistr *str);
-int unistr_cmp(unistr *str1, unistr *str2);
-unistr *unistr_dup(unistr *str);
+int unistr_cmp(const unistr *str1, const unistr *str2);
+unistr *unistr_dup(const unistr *str);
  void unistr_append_char(unistr *str, unistr_char uc);
-void unistr_append_usascii(unistr *str, char *binary, size_t bin_len);
-void unistr_append_utf8(unistr *str, char *binary, size_t bin_len);
-void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len);
-void unistr_dump(unistr *str);
-char *unistr_to_utf8(unistr *str);
-char *unistr_header_to_utf8(char *str);
-char *unistr_utf8_to_header(char *str);
-char *unistr_escaped_to_utf8(char *str);
+void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len);
+void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len);
+void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len);
+void unistr_dump(const unistr *str);
+char *unistr_to_utf8(const unistr *str);
+char *unistr_header_to_utf8(const char *str);
+char *unistr_utf8_to_header(const char *str);
+char *unistr_escaped_to_utf8(const char *str);
  
  #endif
diff --git a/src/prepstdreply.c b/src/prepstdreply.c

index 7a260db1d5048d5883b18ade7014035acd7a9c93..e0290e34b2e49b5e31b521af2b3573862b1f8f24 100644 (file)
--- a/src/prepstdreply.c
+++ b/src/prepstdreply.c
@@ -562,10 +562,10 @@ void register_formatted(text *txt, const char *token,
  
  
  static void begin_new_source_file(text *txt, char **line_p, char **pos_p,
-               const char *filename) {
+               const char *filename, int transparent) {
         char *line = *line_p;
         char *pos = *pos_p;
-       char *tmp;
+       char *tmp, *esc;
         source *src;
         int fd;
         size_t len;
@@ -593,7 +593,7 @@ static void begin_new_source_file(text *txt, char **line_p, char **pos_p,
         src->suffix = NULL;
         src->fd = fd;
         src->fmt = NULL;
-       src->transparent = 0;
+       src->transparent = transparent;
         src->limit = -1;
         txt->src = src;
         tmp = mygetline(fd);
@@ -602,6 +602,11 @@ static void begin_new_source_file(text *txt, char **line_p, char **pos_p,
                 **pos_p = '\0';
                 return;
         }
+       if (!transparent) {
+               esc = unistr_escaped_to_utf8(tmp);
+               myfree(tmp);
+               tmp = esc;
+       }
         line = concatstr(2, line, tmp);
         *pos_p = line + (*pos_p - *line_p);
         myfree(*line_p);
@@ -611,7 +616,7 @@ static void begin_new_source_file(text *txt, char **line_p, char **pos_p,
  
  
  static void begin_new_formatted_source(text *txt, char **line_p, char **pos_p,
-               char *suffix, formatted *fmt) {
+               char *suffix, formatted *fmt, int transparent) {
         char *line = *line_p;
         char *pos = *pos_p;
         const char *str;
@@ -640,7 +645,7 @@ static void begin_new_formatted_source(text *txt, char **line_p, char **pos_p,
         }
         src->fd = -1;
         src->fmt = fmt;
-       src->transparent = 0;
+       src->transparent = transparent;
         src->limit = -1;
         txt->src = src;
         str = (*fmt->get)(fmt->state);
@@ -650,6 +655,7 @@ static void begin_new_formatted_source(text *txt, char **line_p, char **pos_p,
                 *pos_p = *line_p;
                 return;
         }
+       if (!transparent) str = unistr_escaped_to_utf8(str);
         line = concatstr(2, line, str);
         /* The suffix will be added back in get_processed_text_line() */
         *pos_p = line + strlen(line);
@@ -911,7 +917,7 @@ static int handle_directive(text *txt, char **line_p, char **pos_p,
                 token = filename_token(token + 8);
                 if (token != NULL) {
                         filename = concatstr(3, listdir, "/control/", token);
-                       begin_new_source_file(txt, line_p, pos_p, filename);
+                       begin_new_source_file(txt, line_p, pos_p, filename, 0);
                         myfree(filename);
                         return 0;
                 }
@@ -919,7 +925,7 @@ static int handle_directive(text *txt, char **line_p, char **pos_p,
                 token = filename_token(token + 5);
                 if (token != NULL) {
                         filename = concatstr(3, listdir, "/text/", token);
-                       begin_new_source_file(txt, line_p, pos_p, filename);
+                       begin_new_source_file(txt, line_p, pos_p, filename, 0);
                         myfree(filename);
                         return 0;
                 }
@@ -938,8 +944,7 @@ static int handle_directive(text *txt, char **line_p, char **pos_p,
                 }
                 if (limit != 0) {
                         begin_new_source_file(txt, line_p, pos_p,
-                                       txt->mailname);
-                       txt->src->transparent = 1;
+                                       txt->mailname, 1);
                         if (limit == -1) txt->src->limit = -1;
                         else txt->src->limit = limit - 1;
                         return 0;
@@ -958,7 +963,7 @@ static int handle_directive(text *txt, char **line_p, char **pos_p,
         while (fmt != NULL) {
                 if (strcmp(token, fmt->token) == 0) {
                         begin_new_formatted_source(txt, line_p, pos_p,
-                                       endpos + 1, fmt);
+                                       endpos + 1, fmt, 0);
                         return 0;
                 }
                 fmt = fmt->next;
@@ -1004,17 +1009,25 @@ char *get_processed_text_line(text *txt, int headers,
                         }
                         if (txt->src->limit != 0) {
                                 if (txt->src->fd != -1) {
-                                       txt->src->upcoming =
-                                                       mygetline(txt->src->fd);
+                                       tmp = mygetline(txt->src->fd);
                                 } else if (txt->src->fmt != NULL) {
                                         item = (*txt->src->fmt->get)(
-                                                       txt->src->fmt->state);
-                                       if (item==NULL) txt->src->upcoming=NULL;
-                                       else txt->src->upcoming=mystrdup(item);
+                                               txt->src->fmt->state);
+                                       if (item==NULL) tmp = NULL;
+                                       else tmp = mystrdup(item);
                                 } else {
-                                       txt->src->upcoming = NULL;
+                                       tmp = NULL;
                                 }
                                 if (txt->src->limit > 0) txt->src->limit--;
+                               if (tmp == NULL) {
+                                       txt->src->upcoming = NULL;
+                               } else if (txt->src->transparent) {
+                                       txt->src->upcoming = tmp;
+                               } else {
+                                       txt->src->upcoming =
+                                               unistr_escaped_to_utf8(tmp);
+                                       myfree(tmp);
+                               }
                         } else {
                                 txt->src->upcoming = NULL;
                         }
@@ -1027,10 +1040,6 @@ char *get_processed_text_line(text *txt, int headers,
                         return NULL;
                 }
  
-               tmp = unistr_escaped_to_utf8(line);
-               myfree(line);
-               line = tmp;
-
                 if (prev != NULL) {
                         /* Wrapping */
                         len = strlen(prev);
@@ -1120,23 +1129,28 @@ char *get_processed_text_line(text *txt, int headers,
                                         spc = pos - line;
                                         spcnext = spc + 1;
                                 }
-                       } else if (*pos == '\\' && *(pos + 1) == ' ') {
-                               if (txt->skip == NULL) {
-                                       spc = pos - line - 1;
-                                       spcnext = spc + 1;
-                               }
-                               *pos = '\0';
-                               tmp = concatstr(2, line, pos + 2);
-                               pos = tmp + (pos - line);
-                               myfree(line);
-                               line = tmp;
-                               continue;
                         } else if (*pos == '\t') {
                                 /* Avoid breaking due to peeking */
                         } else if (txt->src->transparent) {
                                 /* Do nothing if the file is to be included
                                  * transparently */
                                 if (peeking && txt->skip == NULL) break;
+                       } else if (*pos == '\\' && txt->skip == NULL) {
+                               if (peeking) break;
+                               if (*(pos + 1) == ' ') {
+                                       spc = len - 1;
+                                       tmp = pos + 2;
+                               } else {
+                                       /* Includes backslash */
+                                       tmp = pos + 1;
+                               }
+                               *pos = '\0';
+                               tmp = concatstr(2, line, tmp);
+                               pos = tmp + len;
+                               myfree(line);
+                               line = tmp;
+                               skipwhite = 0;
+                               continue;
                         } else if (*pos == '$' && txt->skip == NULL) {
                                 if (peeking) break;
                                 substitute_one(&line, &pos, listaddr,
diff --git a/src/unistr.c b/src/unistr.c

index 5ce48e9e6c7fcbd1fc770e9eba6c4e98b4f43ff1..722b5c8ecfcc8982b93609d32e99230ff2a427ea 100644 (file)
--- a/src/unistr.c
+++ b/src/unistr.c
@@ -59,7 +59,7 @@ void unistr_free(unistr *str)
  }
  
  
-int unistr_cmp(unistr *str1, unistr *str2)
+int unistr_cmp(const unistr *str1, const unistr *str2)
  {
         unsigned int i;
  
@@ -77,7 +77,7 @@ int unistr_cmp(unistr *str1, unistr *str2)
  }
  
  
-unistr *unistr_dup(unistr *str)
+unistr *unistr_dup(const unistr *str)
  {
         unistr *ret;
         unsigned int i;
@@ -101,7 +101,7 @@ void unistr_append_char(unistr *str, unistr_char uc)
  }
  
  
-void unistr_append_usascii(unistr *str, char *binary, size_t bin_len)
+void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len)
  {
         unsigned int i;
  
@@ -115,7 +115,7 @@ void unistr_append_usascii(unistr *str, char *binary, size_t bin_len)
  }
  
  
-void unistr_append_utf8(unistr *str, char *binary, size_t bin_len)
+void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len)
  {
         unsigned int i, j;
         unistr_char ch;
@@ -166,7 +166,7 @@ void unistr_append_utf8(unistr *str, char *binary, size_t bin_len)
  }
  
  
-void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len)
+void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len)
  {
         unsigned int i;
  
@@ -180,7 +180,7 @@ void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len)
  }
  
  
-void unistr_dump(unistr *str)
+void unistr_dump(const unistr *str)
  {
         unsigned int i;
  
@@ -199,7 +199,7 @@ void unistr_dump(unistr *str)
  }
  
  
-char *unistr_to_utf8(unistr *str)
+char *unistr_to_utf8(const unistr *str)
  {
         unsigned int i;
         size_t len = 0;
@@ -433,7 +433,7 @@ static void header_decode_word(char *word, unistr *ret)
  /* IN: "=?iso-8859-1?Q?hyggem=F8de?= torsdag"
   * OUT: "hyggem\xC3\xB8de torsdag"
   */
-char *unistr_header_to_utf8(char *str)
+char *unistr_header_to_utf8(const char *str)
  {
         char *my_str;
         char *word;
@@ -479,11 +479,11 @@ static int is_ok_in_header(char ch)
  /* IN: "hyggem\xC3\xB8de torsdag"
   * OUT: "=?utf-8?Q?hyggem=C3=B8de_torsdag?="
   */
-char *unistr_utf8_to_header(char *str)
+char *unistr_utf8_to_header(const char *str)
  {
         unistr *us;
         char *ret;
-       char *p;
+       const char *p;
         int clean;
         char buf[4];
  
@@ -524,24 +524,21 @@ char *unistr_utf8_to_header(char *str)
  /* IN: "hyggem\\u00F8de torsdag"
   * OUT: "hyggem\xC3\xB8de torsdag"
   */
-char *unistr_escaped_to_utf8(char *str)
+char *unistr_escaped_to_utf8(const char *str)
  {
         unistr_char ch;
         unistr *us;
         char *ret;
         char u[5];
         int len;
+       int skip = 0;
  
         us = unistr_new();
  
         while (*str) {
                 if (*str == '\\') {
                         str++;
-                       if (*str == '\\') {
-                               str++;
-                               unistr_append_char(us, '\\');
-                               continue;
-                       } else if (*str == 'u') {
+                       if (*str == 'u' && !skip) {
                                 str++;
                                 if (!isxdigit(str[0]) ||
                                                 !isxdigit(str[1]) ||
@@ -559,7 +556,11 @@ char *unistr_escaped_to_utf8(char *str)
                                 unistr_append_char(us, ch);
                                 continue;
                         } else {
-                               unistr_append_char(us, '?');
+                               unistr_append_char(us, '\\');
+                               /* Avoid processing the second backslash of a
+                                * double-backslash; but if this was a such a
+                                * one, go back to normal */
+                               skip = !skip;
                                 continue;
                         }
                 } else {
author	Ben Schmidt <none@none>
	Tue, 28 Feb 2012 13:03:46 +0000 (00:03 +1100)
committer	Ben Schmidt <none@none>
	Tue, 28 Feb 2012 13:03:46 +0000 (00:03 +1100)
ChangeLog		patch \| blob \| blame \| history
include/unistr.h		patch \| blob \| blame \| history
src/prepstdreply.c		patch \| blob \| blame \| history
src/unistr.c		patch \| blob \| blame \| history