Improve backslash handling in JavaScript scanner

author Daiki Ueno <ueno@gnu.org>

Sun, 12 May 2013 07:54:58 +0000 (16:54 +0900)

committer Daiki Ueno <ueno@gnu.org>

Tue, 21 May 2013 06:06:03 +0000 (15:06 +0900)
author Daiki Ueno <ueno@gnu.org>
Sun, 12 May 2013 07:54:58 +0000 (16:54 +0900)
committer Daiki Ueno <ueno@gnu.org>
Tue, 21 May 2013 06:06:03 +0000 (15:06 +0900)
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index 42fc54d6eeab0f684bc1691037e912d7e0599f67..0608e15e3fca26784604c1608db1ca1b0e949379 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,8 @@
+2013-05-21  Daiki Ueno  <ueno@gnu.org>
+
+       * x-javascript.c (phase7_getuc): Treat non-legitimate character
+       escape sequences more strictly.
+
  2013-05-20  Pavel Kharitonov  <ineiev@gnu.org>  (tiny change)
  
         Add --previous option to msgattrib.
diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c

index a9296dfd9c8d2835b3cc6b8830e7b2ca6104d62e..654235507c1a2f618f2043b57f1d0665a4ae886d 100644 (file)
--- a/gettext-tools/src/x-javascript.c
+++ b/gettext-tools/src/x-javascript.c
@@ -960,7 +960,10 @@ free_token (token_ty *tp)
     sequences or deprecated octal escape sequences:
       \xXX, \OOO
     Any unicode point can be entered using Unicode escape sequences:
-     \uNNNN  */
+     \uNNNN
+   If a sequence after a backslash is not a legitimate character
+   escape sequence, the character value is the sequence itself without
+   a backslash.  For example, \xxx is treated as xxx.  */
  
  static int
  phase7_getuc (int quote_char)
@@ -976,7 +979,7 @@ phase7_getuc (int quote_char)
          return P7_EOF;
  
        if (c == quote_char)
-       return P7_STRING_END;
+        return P7_STRING_END;
  
        if (c == '\n')
          {
@@ -989,128 +992,124 @@ phase7_getuc (int quote_char)
          }
  
        if (c != '\\')
-       return UNICODE (c);
+        return UNICODE (c);
  
        /* Dispatch according to the character following the backslash.  */
        c = phase2_getc ();
        if (c == UEOF)
-       return UNICODE ('\\');
+        return P7_EOF;
  
-        switch (c)
+      switch (c)
+        {
+        case '\n':
+          continue;
+        case 'b':
+          return UNICODE ('\b');
+        case 'f':
+          return UNICODE ('\f');
+        case 'n':
+          return UNICODE ('\n');
+        case 'r':
+          return UNICODE ('\r');
+        case 't':
+          return UNICODE ('\t');
+        case 'v':
+          return UNICODE ('\v');
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7':
            {
-          case '\n':
-            continue;
-          case '\\':
-            return UNICODE (c);
-          case '\'': case '"':
-            return UNICODE (c);
-          case 'b':
-            return UNICODE ('\b');
-          case 'f':
-            return UNICODE ('\f');
-          case 'n':
-            return UNICODE ('\n');
-          case 'r':
-            return UNICODE ('\r');
-          case 't':
-            return UNICODE ('\t');
-          case 'v':
-            return UNICODE ('\v');
-          case '0': case '1': case '2': case '3': case '4':
-          case '5': case '6': case '7':
-            {
-              int n = c - '0';
+            int n = c - '0';
  
-              c = phase2_getc ();
-              if (c != UEOF)
-                {
-                  if (c >= '0' && c <= '7')
-                    {
-                      n = (n << 3) + (c - '0');
-                      c = phase2_getc ();
-                      if (c != UEOF)
-                        {
-                          if (c >= '0' && c <= '7')
-                            n = (n << 3) + (c - '0');
-                          else
-                            phase2_ungetc (c);
-                        }
-                    }
-                  else
-                    phase2_ungetc (c);
-                }
-              return UNICODE (n);
-            }
-          case 'x':
-            {
-              int c1 = phase2_getc ();
-              int n1;
-
-              if (c1 >= '0' && c1 <= '9')
-                n1 = c1 - '0';
-              else if (c1 >= 'A' && c1 <= 'F')
-                n1 = c1 - 'A' + 10;
-              else if (c1 >= 'a' && c1 <= 'f')
-                n1 = c1 - 'a' + 10;
-              else
-                n1 = -1;
+            c = phase2_getc ();
+            if (c != UEOF)
+              {
+                if (c >= '0' && c <= '7')
+                  {
+                    n = (n << 3) + (c - '0');
+                    c = phase2_getc ();
+                    if (c != UEOF)
+                      {
+                        if (c >= '0' && c <= '7')
+                          n = (n << 3) + (c - '0');
+                        else
+                          phase2_ungetc (c);
+                      }
+                  }
+                else
+                  phase2_ungetc (c);
+              }
+            return UNICODE (n);
+          }
+        case 'x':
+          {
+            int c1 = phase2_getc ();
+            int n1;
+
+            if (c1 >= '0' && c1 <= '9')
+              n1 = c1 - '0';
+            else if (c1 >= 'A' && c1 <= 'F')
+              n1 = c1 - 'A' + 10;
+            else if (c1 >= 'a' && c1 <= 'f')
+              n1 = c1 - 'a' + 10;
+            else
+              n1 = -1;
  
-              if (n1 >= 0)
-                {
-                  int c2 = phase2_getc ();
-                  int n2;
-
-                  if (c2 >= '0' && c2 <= '9')
-                    n2 = c2 - '0';
-                  else if (c2 >= 'A' && c2 <= 'F')
-                    n2 = c2 - 'A' + 10;
-                  else if (c2 >= 'a' && c2 <= 'f')
-                    n2 = c2 - 'a' + 10;
-                  else
-                    n2 = -1;
-
-                  if (n2 >= 0)
-                    {
-                      int n = (n1 << 4) + n2;
-                      return UNICODE (n);
-                    }
+            if (n1 >= 0)
+              {
+                int c2 = phase2_getc ();
+                int n2;
+
+                if (c2 >= '0' && c2 <= '9')
+                  n2 = c2 - '0';
+                else if (c2 >= 'A' && c2 <= 'F')
+                  n2 = c2 - 'A' + 10;
+                else if (c2 >= 'a' && c2 <= 'f')
+                  n2 = c2 - 'a' + 10;
+                else
+                  n2 = -1;
+
+                if (n2 >= 0)
+                  {
+                    int n = (n1 << 4) + n2;
+                    return UNICODE (n);
+                  }
  
-                  phase2_ungetc (c2);
-                }
-              phase2_ungetc (c1);
-              phase2_ungetc (c);
-              return UNICODE ('\\');
-            }
-          case 'u':
-            {
-              unsigned char buf[4];
-              unsigned int n = 0;
-              int i;
+                phase2_ungetc (c2);
+              }
+            phase2_ungetc (c1);
+            return UNICODE (c);
+          }
+        case 'u':
+          {
+            unsigned char buf[4];
+            unsigned int n = 0;
+            int i;
  
-              for (i = 0; i < 4; i++)
-                {
-                  int c1 = phase2_getc ();
-
-                  if (c1 >= '0' && c1 <= '9')
-                    n = (n << 4) + (c1 - '0');
-                  else if (c1 >= 'A' && c1 <= 'F')
-                    n = (n << 4) + (c1 - 'A' + 10);
-                  else if (c1 >= 'a' && c1 <= 'f')
-                    n = (n << 4) + (c1 - 'a' + 10);
-                  else
-                    {
-                      phase2_ungetc (c1);
-                      while (--i >= 0)
-                        phase2_ungetc (buf[i]);
-                      phase2_ungetc (c);
-                      return UNICODE ('\\');
-                    }
+            for (i = 0; i < 4; i++)
+              {
+                int c1 = phase2_getc ();
+
+                if (c1 >= '0' && c1 <= '9')
+                  n = (n << 4) + (c1 - '0');
+                else if (c1 >= 'A' && c1 <= 'F')
+                  n = (n << 4) + (c1 - 'A' + 10);
+                else if (c1 >= 'a' && c1 <= 'f')
+                  n = (n << 4) + (c1 - 'a' + 10);
+                else
+                  {
+                    phase2_ungetc (c1);
+                    while (--i >= 0)
+                      phase2_ungetc (buf[i]);
+                    return UNICODE (c);
+                  }
  
-                  buf[i] = c1;
-                }
-              return UNICODE (n);
-            }
+                buf[i] = c1;
+              }
+            return UNICODE (n);
            }
+        default:
+          return UNICODE (c);
+        }
      }
  }
  
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog

index 3727ac4d3e09c47418f940f1ee625872c9586569..721c29a5ddd611a1d81ab93587b21c386a33256c 100644 (file)
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,7 @@
+2013-05-12  Daiki Ueno  <ueno@gnu.org>
+
+       * xgettext-javascript-4: Add a test for normal escape sequences.
+
  2013-05-12  Andreas Stricker <astricker@futurelab.ch>
  
         Improve JavaScript scanner tests.
diff --git a/gettext-tools/tests/xgettext-javascript-4 b/gettext-tools/tests/xgettext-javascript-4

index 92805e92561f486ca2e74548ec315b08f5afe7ff..3b4ff47720ae280de9e9b56570c6f4955eaf4a6f 100755 (executable)
--- a/gettext-tools/tests/xgettext-javascript-4
+++ b/gettext-tools/tests/xgettext-javascript-4
@@ -1,6 +1,6 @@
  #!/bin/sh
  
-# Test of JavaScript Unicode support.
+# Test of JavaScript escape sequences in string literals.
  
  tmpfiles=""
  trap 'rm -fr $tmpfiles' 1 2 3 15
@@ -9,6 +9,7 @@ tmpfiles="$tmpfiles xg-js-4.js"
  cat <<\EOF > xg-js-4.js
  var s1 = _("Unicode escape \u3042");
  var s2 = _("Surrogate pair \uD835\uDC9C");
+var s3 = _("Escape sequence \1411 \x622 \xxx \y");
  EOF
  
  tmpfiles="$tmpfiles xg-js-4.err xg-js-4.tmp xg-js-4.pot"
@@ -44,6 +45,9 @@ msgstr ""
  
  msgid "Surrogate pair 𝒜"
  msgstr ""
+
+msgid "Escape sequence a1 b2 xxx y"
+msgstr ""
  EOF
  
  : ${DIFF=diff}
author	Daiki Ueno <ueno@gnu.org>
	Sun, 12 May 2013 07:54:58 +0000 (16:54 +0900)
committer	Daiki Ueno <ueno@gnu.org>
	Tue, 21 May 2013 06:06:03 +0000 (15:06 +0900)
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/x-javascript.c		patch \| blob \| blame \| history
gettext-tools/tests/ChangeLog		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-javascript-4		patch \| blob \| blame \| history