From: Daiki Ueno Date: Sun, 12 May 2013 07:54:58 +0000 (+0900) Subject: Improve backslash handling in JavaScript scanner X-Git-Tag: v0.18.3~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4f29f02b752d2c909200ae1ea9b684026be6f558;p=thirdparty%2Fgettext.git Improve backslash handling in JavaScript scanner --- diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 42fc54d6e..0608e15e3 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,8 @@ +2013-05-21 Daiki Ueno + + * x-javascript.c (phase7_getuc): Treat non-legitimate character + escape sequences more strictly. + 2013-05-20 Pavel Kharitonov (tiny change) Add --previous option to msgattrib. diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c index a9296dfd9..654235507 100644 --- a/gettext-tools/src/x-javascript.c +++ b/gettext-tools/src/x-javascript.c @@ -960,7 +960,10 @@ free_token (token_ty *tp) sequences or deprecated octal escape sequences: \xXX, \OOO Any unicode point can be entered using Unicode escape sequences: - \uNNNN */ + \uNNNN + If a sequence after a backslash is not a legitimate character + escape sequence, the character value is the sequence itself without + a backslash. For example, \xxx is treated as xxx. */ static int phase7_getuc (int quote_char) @@ -976,7 +979,7 @@ phase7_getuc (int quote_char) return P7_EOF; if (c == quote_char) - return P7_STRING_END; + return P7_STRING_END; if (c == '\n') { @@ -989,128 +992,124 @@ phase7_getuc (int quote_char) } if (c != '\\') - return UNICODE (c); + return UNICODE (c); /* Dispatch according to the character following the backslash. */ c = phase2_getc (); if (c == UEOF) - return UNICODE ('\\'); + return P7_EOF; - switch (c) + switch (c) + { + case '\n': + continue; + case 'b': + return UNICODE ('\b'); + case 'f': + return UNICODE ('\f'); + case 'n': + return UNICODE ('\n'); + case 'r': + return UNICODE ('\r'); + case 't': + return UNICODE ('\t'); + case 'v': + return UNICODE ('\v'); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': { - case '\n': - continue; - case '\\': - return UNICODE (c); - case '\'': case '"': - return UNICODE (c); - case 'b': - return UNICODE ('\b'); - case 'f': - return UNICODE ('\f'); - case 'n': - return UNICODE ('\n'); - case 'r': - return UNICODE ('\r'); - case 't': - return UNICODE ('\t'); - case 'v': - return UNICODE ('\v'); - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': - { - int n = c - '0'; + int n = c - '0'; - c = phase2_getc (); - if (c != UEOF) - { - if (c >= '0' && c <= '7') - { - n = (n << 3) + (c - '0'); - c = phase2_getc (); - if (c != UEOF) - { - if (c >= '0' && c <= '7') - n = (n << 3) + (c - '0'); - else - phase2_ungetc (c); - } - } - else - phase2_ungetc (c); - } - return UNICODE (n); - } - case 'x': - { - int c1 = phase2_getc (); - int n1; - - if (c1 >= '0' && c1 <= '9') - n1 = c1 - '0'; - else if (c1 >= 'A' && c1 <= 'F') - n1 = c1 - 'A' + 10; - else if (c1 >= 'a' && c1 <= 'f') - n1 = c1 - 'a' + 10; - else - n1 = -1; + c = phase2_getc (); + if (c != UEOF) + { + if (c >= '0' && c <= '7') + { + n = (n << 3) + (c - '0'); + c = phase2_getc (); + if (c != UEOF) + { + if (c >= '0' && c <= '7') + n = (n << 3) + (c - '0'); + else + phase2_ungetc (c); + } + } + else + phase2_ungetc (c); + } + return UNICODE (n); + } + case 'x': + { + int c1 = phase2_getc (); + int n1; + + if (c1 >= '0' && c1 <= '9') + n1 = c1 - '0'; + else if (c1 >= 'A' && c1 <= 'F') + n1 = c1 - 'A' + 10; + else if (c1 >= 'a' && c1 <= 'f') + n1 = c1 - 'a' + 10; + else + n1 = -1; - if (n1 >= 0) - { - int c2 = phase2_getc (); - int n2; - - if (c2 >= '0' && c2 <= '9') - n2 = c2 - '0'; - else if (c2 >= 'A' && c2 <= 'F') - n2 = c2 - 'A' + 10; - else if (c2 >= 'a' && c2 <= 'f') - n2 = c2 - 'a' + 10; - else - n2 = -1; - - if (n2 >= 0) - { - int n = (n1 << 4) + n2; - return UNICODE (n); - } + if (n1 >= 0) + { + int c2 = phase2_getc (); + int n2; + + if (c2 >= '0' && c2 <= '9') + n2 = c2 - '0'; + else if (c2 >= 'A' && c2 <= 'F') + n2 = c2 - 'A' + 10; + else if (c2 >= 'a' && c2 <= 'f') + n2 = c2 - 'a' + 10; + else + n2 = -1; + + if (n2 >= 0) + { + int n = (n1 << 4) + n2; + return UNICODE (n); + } - phase2_ungetc (c2); - } - phase2_ungetc (c1); - phase2_ungetc (c); - return UNICODE ('\\'); - } - case 'u': - { - unsigned char buf[4]; - unsigned int n = 0; - int i; + phase2_ungetc (c2); + } + phase2_ungetc (c1); + return UNICODE (c); + } + case 'u': + { + unsigned char buf[4]; + unsigned int n = 0; + int i; - for (i = 0; i < 4; i++) - { - int c1 = phase2_getc (); - - if (c1 >= '0' && c1 <= '9') - n = (n << 4) + (c1 - '0'); - else if (c1 >= 'A' && c1 <= 'F') - n = (n << 4) + (c1 - 'A' + 10); - else if (c1 >= 'a' && c1 <= 'f') - n = (n << 4) + (c1 - 'a' + 10); - else - { - phase2_ungetc (c1); - while (--i >= 0) - phase2_ungetc (buf[i]); - phase2_ungetc (c); - return UNICODE ('\\'); - } + for (i = 0; i < 4; i++) + { + int c1 = phase2_getc (); + + if (c1 >= '0' && c1 <= '9') + n = (n << 4) + (c1 - '0'); + else if (c1 >= 'A' && c1 <= 'F') + n = (n << 4) + (c1 - 'A' + 10); + else if (c1 >= 'a' && c1 <= 'f') + n = (n << 4) + (c1 - 'a' + 10); + else + { + phase2_ungetc (c1); + while (--i >= 0) + phase2_ungetc (buf[i]); + return UNICODE (c); + } - buf[i] = c1; - } - return UNICODE (n); - } + buf[i] = c1; + } + return UNICODE (n); } + default: + return UNICODE (c); + } } } diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 3727ac4d3..721c29a5d 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,7 @@ +2013-05-12 Daiki Ueno + + * xgettext-javascript-4: Add a test for normal escape sequences. + 2013-05-12 Andreas Stricker Improve JavaScript scanner tests. diff --git a/gettext-tools/tests/xgettext-javascript-4 b/gettext-tools/tests/xgettext-javascript-4 index 92805e925..3b4ff4772 100755 --- a/gettext-tools/tests/xgettext-javascript-4 +++ b/gettext-tools/tests/xgettext-javascript-4 @@ -1,6 +1,6 @@ #!/bin/sh -# Test of JavaScript Unicode support. +# Test of JavaScript escape sequences in string literals. tmpfiles="" trap 'rm -fr $tmpfiles' 1 2 3 15 @@ -9,6 +9,7 @@ tmpfiles="$tmpfiles xg-js-4.js" cat <<\EOF > xg-js-4.js var s1 = _("Unicode escape \u3042"); var s2 = _("Surrogate pair \uD835\uDC9C"); +var s3 = _("Escape sequence \1411 \x622 \xxx \y"); EOF tmpfiles="$tmpfiles xg-js-4.err xg-js-4.tmp xg-js-4.pot" @@ -44,6 +45,9 @@ msgstr "" msgid "Surrogate pair 𝒜" msgstr "" + +msgid "Escape sequence a1 b2 xxx y" +msgstr "" EOF : ${DIFF=diff}