token_type_plus, /* + */
token_type_regexp, /* /.../ */
token_type_operator, /* - * / % . < > = ~ ! | & ? : ^ */
+ token_type_equal, /* = */
token_type_string, /* "abc", 'abc' */
token_type_keyword, /* return, else */
token_type_symbol, /* symbol, number */
phase2_ungetc (c);
}
+static int xml_element_depth = 0;
+static bool inside_embedded_js_in_xml = false;
+
+static bool
+phase5_scan_xml_markup (token_ty *tp)
+{
+ struct
+ {
+ const char *start;
+ const char *end;
+ } markers[] =
+ {
+ { "!--", "--" },
+ { "![CDATA[", "]]" },
+ { "?", "?" }
+ };
+ int i;
+
+ for (i = 0; i < SIZEOF (markers); i++)
+ {
+ const char *start = markers[i].start;
+ const char *end = markers[i].end;
+ int j;
+
+ /* Look for a start marker. */
+ for (j = 0; start[j] != '\0'; j++)
+ {
+ int c;
+
+ assert (phase2_pushback_length + j < SIZEOF (phase2_pushback));
+ c = phase2_getc ();
+ if (c == UEOF)
+ goto eof;
+ if (c != start[j])
+ {
+ int k = j;
+
+ phase2_ungetc (c);
+ k--;
+
+ for (; k >= 0; k--)
+ phase2_ungetc (start[k]);
+ break;
+ }
+ }
+
+ if (start[j] != '\0')
+ continue;
+
+ /* Skip until the end marker. */
+ for (;;)
+ {
+ int c;
+
+ for (j = 0; end[j] != '\0'; j++)
+ {
+ assert (phase2_pushback_length + 1 < SIZEOF (phase2_pushback));
+ c = phase2_getc ();
+ if (c == UEOF)
+ goto eof;
+ if (c != end[j])
+ {
+ /* Don't push the first character back so the next
+ iteration start from the second character. */
+ if (j > 0)
+ {
+ int k = j;
+
+ phase2_ungetc (c);
+ k--;
+
+ for (; k > 0; k--)
+ phase2_ungetc (end[k]);
+ }
+ break;
+ }
+ }
+
+ if (end[j] != '\0')
+ continue;
+
+ c = phase2_getc ();
+ if (c == UEOF)
+ goto eof;
+ if (c != '>')
+ {
+ error_with_progname = false;
+ error (0, 0,
+ _("%s:%d: warning: %s is not allowed"),
+ logical_file_name, line_number,
+ end);
+ error_with_progname = true;
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+
+ eof:
+ error_with_progname = false;
+ error (0, 0,
+ _("%s:%d: warning: unterminated XML markup"),
+ logical_file_name, line_number);
+ error_with_progname = true;
+ return false;
+}
+
static void
phase5_get (token_ty *tp)
{
/* Identify operators. The multiple character ones are simply ignored
* as they are recognized here and are otherwise not relevant. */
case '-': case '*': /* '+' and '/' are not listed here! */
- case '%': case '<': case '>': case '=':
+ case '%':
case '~': case '!': case '|': case '&': case '^':
case '?': case ':':
tp->type = last_token_type = token_type_operator;
return;
+ case '=':
+ tp->type = last_token_type = token_type_equal;
+ return;
+
+ case '<':
+ {
+ /* We assume:
+ - XMLMarkup and XMLElement are only allowed after '=' or '('
+ - embedded JavaScript expressions in XML do not recurse
+ */
+ if (xml_element_depth > 0
+ || (!inside_embedded_js_in_xml
+ && (last_token_type == token_type_equal
+ || last_token_type == token_type_lparen)))
+ {
+ /* Comments, PI, or CDATA. */
+ if (phase5_scan_xml_markup (tp))
+ return;
+ c = phase2_getc ();
+
+ /* Closing tag. */
+ if (c == '/')
+ lexical_context = lc_xml_close_tag;
+
+ /* Opening element. */
+ else
+ {
+ phase2_ungetc (c);
+ lexical_context = lc_xml_open_tag;
+ xml_element_depth++;
+ }
+
+ tp->type = last_token_type = token_type_other;
+ }
+ else
+ tp->type = last_token_type = token_type_operator;
+ }
+ return;
+
+ case '>':
+ if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
+ {
+ switch (lexical_context)
+ {
+ case lc_xml_open_tag:
+ lexical_context = lc_xml_content;
+ break;
+
+ case lc_xml_close_tag:
+ if (xml_element_depth-- > 0)
+ lexical_context = lc_xml_content;
+ else
+ lexical_context = lc_outside;
+ break;
+
+ default:
+ break;
+ }
+ tp->type = last_token_type = token_type_other;
+ }
+ else
+ tp->type = last_token_type = token_type_operator;
+ return;
+
case '/':
+ if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
+ {
+ /* If it appears in an opening tag of an XML element, it's
+ part of '/>'. */
+ if (lexical_context == lc_xml_open_tag)
+ {
+ c = phase2_getc ();
+ if (c == '>')
+ lexical_context = lc_outside;
+ else
+ phase2_ungetc (c);
+ }
+ tp->type = last_token_type = token_type_other;
+ return;
+ }
+
/* Either a division operator or the start of a regular
expression literal. If the '/' token is spotted after a
symbol it's a division, otherwise it's a regular
}
return;
+ case '{':
+ if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
+ inside_embedded_js_in_xml = true;
+ tp->type = last_token_type = token_type_other;
+ return;
+
+ case '}':
+ if (xml_element_depth > 0 && inside_embedded_js_in_xml)
+ inside_embedded_js_in_xml = false;
+ tp->type = last_token_type = token_type_other;
+ return;
+
case '(':
tp->type = last_token_type = token_type_lparen;
return;
case token_type_plus:
case token_type_regexp:
case token_type_operator:
+ case token_type_equal:
case token_type_other:
next_context_iter = null_context_list_iterator;
state = 0;
last_comment_line = -1;
last_non_comment_line = -1;
+ xml_element_depth = 0;
+
xgettext_current_file_source_encoding = xgettext_global_source_encoding;
#if HAVE_ICONV
xgettext_current_file_source_iconv = xgettext_global_source_iconv;
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of JavaScript E4X support.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles xg-js-6.js"
+cat <<\EOF > xg-js-6.js
+var x1 = <x1></x1>;
+var s1 = _("Expected translation string #1");
+var s2 = "foo";
+var x2 = <{s2}>foo {s2} bar</{s2}>;
+var x3 = <x3 a1="/"><x4>{_("Expected translation string #2")}</x4></x3>;
+var x4 = <x5 a2='/'><x{_("Expected translation string #3")}>
+</x{_("Expected translation string #3")}></x5>;
+var x4 = <![CDATA[
+ _("Unexpected translation string #1")
+]]>;
+var x5 = <!-- - _("Unexpected translation string #2") - -->;
+var s6 = _("Expected translation string #4");
+var x6 = <? _("Unexpected translation string #3") ?>;
+var x7 = <!--- this is a comment --> <foo>
+</foo>;
+EOF
+
+tmpfiles="$tmpfiles xg-js-6.err xg-js-6.tmp xg-js-6.pot"
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-js-6.tmp xg-js-6.js 2>xg-js-6.err
+test $? = 0 || { cat xg-js-6.err; rm -fr $tmpfiles; exit 1; }
+# Don't simplify this to "grep ... < xg-js-6.tmp", otherwise OpenBSD 4.0 grep
+# only outputs "Binary file (standard input) matches".
+cat xg-js-6.tmp | grep -v 'POT-Creation-Date' | LC_ALL=C tr -d '\r' > xg-js-6.pot
+
+tmpfiles="$tmpfiles xg-js-6.ok"
+cat <<\EOF > xg-js-6.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Expected translation string #1"
+msgstr ""
+
+msgid "Expected translation string #2"
+msgstr ""
+
+msgid "Expected translation string #3"
+msgstr ""
+
+msgid "Expected translation string #4"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-js-6.ok xg-js-6.pot
+result=$?
+
+rm -fr $tmpfiles
+
+exit $result