New Java backend x-java.c, replaces the old Java backend x-java.l.

author Bruno Haible <bruno@clisp.org>

Wed, 8 Oct 2003 10:01:55 +0000 (10:01 +0000)

committer Bruno Haible <bruno@clisp.org>

Tue, 23 Jun 2009 10:11:02 +0000 (12:11 +0200)
author Bruno Haible <bruno@clisp.org>
Wed, 8 Oct 2003 10:01:55 +0000 (10:01 +0000)
committer Bruno Haible <bruno@clisp.org>
Tue, 23 Jun 2009 10:11:02 +0000 (12:11 +0200)
diff --git a/gettext-tools/ChangeLog b/gettext-tools/ChangeLog

index a45c20f1f47445cdf583ea4389ffa6c96226f126..dec1ad95e0d99c6beab3f2427c61ecc726958b5d 100644 (file)
--- a/gettext-tools/ChangeLog
+++ b/gettext-tools/ChangeLog
@@ -1,3 +1,7 @@
+2003-10-04  Bruno Haible  <bruno@clisp.org>
+
+       * configure.ac: Remove gt_PROG_LEX invocation.
+
  2003-09-18  Bruno Haible  <bruno@clisp.org>
  
         * windows/gettextpo.def: Add po_file_domain_header, po_header_field,
diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac

index f7c44a1fccbfad17a7941dbb5360cf1998e435f6..35d319217c4182a790fedfb6f2bae625907a4fd5 100644 (file)
--- a/gettext-tools/configure.ac
+++ b/gettext-tools/configure.ac
@@ -33,7 +33,6 @@ dnl Checks for programs.
  AC_PROG_CC
  AC_PROG_INSTALL
  AC_PROG_YACC
-gt_PROG_LEX
  
  gt_GCJ
  if test -n "$HAVE_GCJ"; then
diff --git a/gettext-tools/m4/ChangeLog b/gettext-tools/m4/ChangeLog

index 1c1ebea4f2911f6c1c5aa24c776d07272cb55bf0..ce8f86090e7106d494103b5f6d7ebd19b4b22a6e 100644 (file)
--- a/gettext-tools/m4/ChangeLog
+++ b/gettext-tools/m4/ChangeLog
@@ -1,3 +1,8 @@
+2003-10-04  Bruno Haible  <bruno@clisp.org>
+
+       * flex.m4: Remove file.
+       * Makefile.am (EXTRA_DIST): Remove it.
+
  2003-09-04  Bruno Haible  <bruno@clisp.org>
  
         * locale-fr.m4 (gt_LOCALE_FR): Add support for Solaris 7.
diff --git a/gettext-tools/m4/Makefile.am b/gettext-tools/m4/Makefile.am

index 823951699730550d1aaf6658e50976d10c1712e3..dc9018e51f7edfb3ba2e88e685122c14a3668870 100644 (file)
--- a/gettext-tools/m4/Makefile.am
+++ b/gettext-tools/m4/Makefile.am
@@ -40,7 +40,6 @@ canonicalize.m4 \
  eaccess.m4 \
  error.m4 \
  extensions.m4 \
-flex.m4 \
  fnmatch.m4 \
  gcj.m4 \
  getline.m4 \
diff --git a/gettext-tools/m4/flex.m4 b/gettext-tools/m4/flex.m4

deleted file mode 100644 (file)

index d3409a9..0000000
--- a/gettext-tools/m4/flex.m4
+++ /dev/null
@@ -1,16 +0,0 @@
-# flex.m4 serial 2 (gettext-0.12)
-dnl Copyright (C) 2001-2003 Free Software Foundation, Inc.
-dnl This file is free software, distributed under the terms of the GNU
-dnl General Public License.  As a special exception to the GNU General
-dnl Public License, this file may be distributed as part of a program
-dnl that contains a configuration script generated by Autoconf, under
-dnl the same distribution terms as the rest of that program.
-
-# Check for flex.
-
-AC_DEFUN([gt_PROG_LEX],
-[
-  dnl Don't use AC_PROG_LEX or AM_PROG_LEX; we insist on flex.
-  dnl Thus we don't need LEXLIB.
-  AC_CHECK_PROG(LEX, flex, flex, :)
-])
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog

index eb692dba6b7913052d302e3c573ecfbe65e506ce..36df08a02cdfe7e4016848874bbee60d05b18cf5 100644 (file)
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,12 @@
+2003-10-04  Bruno Haible  <bruno@clisp.org>
+
+       New Java backend.
+       * x-java.c: New file.
+       * x-java.l: Remove file.
+       * Makefile.am (xgettext_SOURCES): Add x-java.c, remove x-java.l.
+       (x-java.c): Remove rule.
+       * FILES: Update.
+
  2003-09-22  Bruno Haible  <bruno@clisp.org>
  
         * x-glade.c (start_element_handler): Implement extract_all behaviour.
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES

index bb48ecdcec5dfa69a3ab2040f4b78a4b2a34a1c8..9f4ad08eef089715f0a201cecea5ce800e86d02c 100644 (file)
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -238,7 +238,7 @@ format.c        Table of the language dependent format string handlers.
  | x-smalltalk.c
  |               String extractor for Smalltalk.
  | x-java.h
-| x-java.l
+| x-java.c
  |               String extractor for Java.
  | x-awk.h
  | x-awk.c
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am

index 3293c1af73c54c7c73ffe4a45cf5065f15bc151b..29c45c310cf3d25c0a8ec2f44eb8c3def13aa30f 100644 (file)
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -120,7 +120,7 @@ msgmerge_SOURCES = msgmerge.c plural-count.c
  msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c read-tcl.c
  xgettext_SOURCES = xgettext.c \
    x-c.c x-po.c x-sh.c x-python.c x-lisp.c x-elisp.c x-librep.c x-smalltalk.c \
-  x-java.l x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c x-rst.c x-glade.c
+  x-java.c x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c x-rst.c x-glade.c
  msgattrib_SOURCES = msgattrib.c
  msgcat_SOURCES = msgcat.c
  msgcomm_SOURCES = msgcomm.c
@@ -225,17 +225,6 @@ po-gram-gen2.h: po-gram-gen.h
         $(SED) -e 's/yy/po_gram_/g' -e 's/extern /extern DLL_VARIABLE /' $(srcdir)/po-gram-gen.h > $@-tmp
         mv $@-tmp $@
  
-# We must add the '#include <vms_jackets.h>' here, not inside x-java.l,
-# because on VMS, <vms_jackets.h> must be included before <stdio.h>.
-x-java.c: x-java.l
-       test "$(LEX)" = ":" || { \
-         $(LEX) -o$@-tmp -Px_java_yy $(srcdir)/x-java.l && \
-         (echo '#ifdef VMS'; echo '#include <vms_jackets.h>'; echo '#endif'; \
-          cat $@-tmp) > $@-tmq && \
-         rm -f $@-tmp && \
-         mv $@-tmq $@ ; \
-       }
-
  
  # Special rules for installation of auxiliary programs.
  
diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c

index ac8bf2858c70c5a5cc4e232be3774231ab7b0d26..dd0f9cd2780e9b2e510e037f4f1a67196dffe762 100644 (file)
--- a/gettext-tools/src/x-java.c
+++ b/gettext-tools/src/x-java.c
@@ -1,420 +1,6 @@
-#ifdef VMS
-#include <vms_jackets.h>
-#endif
-#define yy_create_buffer x_java_yy_create_buffer
-#define yy_delete_buffer x_java_yy_delete_buffer
-#define yy_scan_buffer x_java_yy_scan_buffer
-#define yy_scan_string x_java_yy_scan_string
-#define yy_scan_bytes x_java_yy_scan_bytes
-#define yy_flex_debug x_java_yy_flex_debug
-#define yy_init_buffer x_java_yy_init_buffer
-#define yy_flush_buffer x_java_yy_flush_buffer
-#define yy_load_buffer_state x_java_yy_load_buffer_state
-#define yy_switch_to_buffer x_java_yy_switch_to_buffer
-#define yyin x_java_yyin
-#define yyleng x_java_yyleng
-#define yylex x_java_yylex
-#define yyout x_java_yyout
-#define yyrestart x_java_yyrestart
-#define yytext x_java_yytext
-
-#line 19 "x-java.c-tmp"
-/* A lexical scanner generated by flex */
-
-/* Scanner skeleton version:
- * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
- */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-
-#include <stdio.h>
-
-
-/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
-#ifdef c_plusplus
-#ifndef __cplusplus
-#define __cplusplus
-#endif
-#endif
-
-
-#ifdef __cplusplus
-
-#include <stdlib.h>
-#include <unistd.h>
-
-/* Use prototypes in function declarations. */
-#define YY_USE_PROTOS
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else  /* ! __cplusplus */
-
-#if __STDC__
-
-#define YY_USE_PROTOS
-#define YY_USE_CONST
-
-#endif /* __STDC__ */
-#endif /* ! __cplusplus */
-
-#ifdef __TURBOC__
- #pragma warn -rch
- #pragma warn -use
-#include <io.h>
-#include <stdlib.h>
-#define YY_USE_CONST
-#define YY_USE_PROTOS
-#endif
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-
-#ifdef YY_USE_PROTOS
-#define YY_PROTO(proto) proto
-#else
-#define YY_PROTO(proto) ()
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index.  If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* Enter a start condition.  This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN yy_start = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state.  The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START ((yy_start - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE yyrestart( yyin )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#define YY_BUF_SIZE 16384
-
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-
-extern int yyleng;
-extern FILE *yyin, *yyout;
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
-/* The funky do-while in the following #define is used to turn the definition
- * int a single C statement (which needs a semi-colon terminator).  This
- * avoids problems with code like:
- *
- *     if ( condition_holds )
- *             yyless( 5 );
- *     else
- *             do_something_else();
- *
- * Prior to using the do-while the compiler would get upset at the
- * "else" because it interpreted the "if" statement as being all
- * done when it reached the ';' after the yyless() call.
- */
-
-/* Return all but the first 'n' matched characters back to the input stream. */
-
-#define yyless(n) \
-       do \
-               { \
-               /* Undo effects of setting up yytext. */ \
-               *yy_cp = yy_hold_char; \
-               YY_RESTORE_YY_MORE_OFFSET \
-               yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
-               YY_DO_BEFORE_ACTION; /* set up yytext again */ \
-               } \
-       while ( 0 )
-
-#define unput(c) yyunput( c, yytext_ptr )
-
-/* The following is because we cannot portably get our hands on size_t
- * (without autoconf's help, which isn't available because we want
- * flex-generated scanners to compile on their own).
- */
-typedef unsigned int yy_size_t;
-
-
-struct yy_buffer_state
-       {
-       FILE *yy_input_file;
-
-       char *yy_ch_buf;                /* input buffer */
-       char *yy_buf_pos;               /* current position in input buffer */
-
-       /* Size of input buffer in bytes, not including room for EOB
-        * characters.
-        */
-       yy_size_t yy_buf_size;
-
-       /* Number of characters read into yy_ch_buf, not including EOB
-        * characters.
-        */
-       int yy_n_chars;
-
-       /* Whether we "own" the buffer - i.e., we know we created it,
-        * and can realloc() it to grow it, and should free() it to
-        * delete it.
-        */
-       int yy_is_our_buffer;
-
-       /* Whether this is an "interactive" input source; if so, and
-        * if we're using stdio for input, then we want to use getc()
-        * instead of fread(), to make sure we stop fetching input after
-        * each newline.
-        */
-       int yy_is_interactive;
-
-       /* Whether we're considered to be at the beginning of a line.
-        * If so, '^' rules will be active on the next match, otherwise
-        * not.
-        */
-       int yy_at_bol;
-
-       /* Whether to try to fill the input buffer when we reach the
-        * end of it.
-        */
-       int yy_fill_buffer;
-
-       int yy_buffer_status;
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
-       /* When an EOF's been seen but there's still some text to process
-        * then we mark the buffer as YY_EOF_PENDING, to indicate that we
-        * shouldn't try reading from the input source any more.  We might
-        * still have a bunch of tokens to match, though, because of
-        * possible backing-up.
-        *
-        * When we actually see the EOF, we change the status to "new"
-        * (via yyrestart()), so that the user can continue scanning by
-        * just pointing yyin at a new input file.
-        */
-#define YY_BUFFER_EOF_PENDING 2
-       };
-
-static YY_BUFFER_STATE yy_current_buffer = 0;
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- */
-#define YY_CURRENT_BUFFER yy_current_buffer
-
-
-/* yy_hold_char holds the character lost when yytext is formed. */
-static char yy_hold_char;
-
-static int yy_n_chars;         /* number of characters read into yy_ch_buf */
-
-
-int yyleng;
-
-/* Points to current character in buffer. */
-static char *yy_c_buf_p = (char *) 0;
-static int yy_init = 1;                /* whether we need to initialize */
-static int yy_start = 0;       /* start state number */
-
-/* Flag which is used to allow yywrap()'s to do buffer switches
- * instead of setting up a fresh yyin.  A bit of a hack ...
- */
-static int yy_did_buffer_switch_on_eof;
-
-void yyrestart YY_PROTO(( FILE *input_file ));
-
-void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
-void yy_load_buffer_state YY_PROTO(( void ));
-YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
-void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
-void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
-void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b ));
-#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer )
-
-YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size ));
-YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str ));
-YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len ));
-
-static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
-static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
-static void yy_flex_free YY_PROTO(( void * ));
-
-#define yy_new_buffer yy_create_buffer
-
-#define yy_set_interactive(is_interactive) \
-       { \
-       if ( ! yy_current_buffer ) \
-               yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
-       yy_current_buffer->yy_is_interactive = is_interactive; \
-       }
-
-#define yy_set_bol(at_bol) \
-       { \
-       if ( ! yy_current_buffer ) \
-               yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
-       yy_current_buffer->yy_at_bol = at_bol; \
-       }
-
-#define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
-
-
-#define yywrap() 1
-#define YY_SKIP_YYWRAP
-typedef unsigned char YY_CHAR;
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
-typedef int yy_state_type;
-extern char *yytext;
-#define yytext_ptr yytext
-
-static yy_state_type yy_get_previous_state YY_PROTO(( void ));
-static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
-static int yy_get_next_buffer YY_PROTO(( void ));
-static void yy_fatal_error YY_PROTO(( yyconst char msg[] ));
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
-       yytext_ptr = yy_bp; \
-       yyleng = (int) (yy_cp - yy_bp); \
-       yy_hold_char = *yy_cp; \
-       *yy_cp = '\0'; \
-       yy_c_buf_p = yy_cp;
-
-#define YY_NUM_RULES 12
-#define YY_END_OF_BUFFER 13
-static yyconst short int yy_accept[26] =
-    {   0,
-        0,    0,   13,   11,   10,    9,    7,    6,    3,    7,
-        5,    6,    2,    4,   11,   10,    9,    1,    8,    4,
-        0,    8,    2,    2,    0
-    } ;
-
-static yyconst int yy_ec[256] =
-    {   0,
-        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
-        1,    1,    4,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    2,    5,    6,    7,    1,    8,    9,   10,   11,
-       12,   13,   14,   15,   16,   17,   18,   19,   19,   19,
-       19,   19,   19,   19,   19,   19,   19,   20,   21,   22,
-       23,   24,   25,   26,   27,   27,   27,   27,   27,   27,
-       27,   27,   27,   27,   27,   27,   27,   27,   27,   27,
-       27,   27,   27,   27,   27,   27,   27,   27,   27,   27,
-       28,   29,   30,   31,   27,   32,   27,   27,   27,   27,
-
-       27,   27,   27,   27,   27,   27,   27,   27,   27,   27,
-       27,   27,   27,   27,   27,   27,   27,   27,   27,   27,
-       27,   27,   33,   34,   35,   36,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,   37,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1
-    } ;
-
-static yyconst int yy_meta[38] =
-    {   0,
-        1,    1,    2,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1
-    } ;
-
-static yyconst short int yy_base[27] =
-    {   0,
-        0,    0,   53,   54,   50,   54,   48,   54,   54,   54,
-       54,   25,   54,   20,   31,   47,   54,   54,    0,   21,
-       25,    0,   27,   26,   54,   40
-    } ;
-
-static yyconst short int yy_def[27] =
-    {   0,
-       25,    1,   25,   25,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   26,   25,
-       25,   26,   25,   25,    0,   25
-    } ;
-
-static yyconst short int yy_nxt[92] =
-    {   0,
-        4,    5,    6,    7,    8,    9,   10,    8,    8,   11,
-       11,   11,    8,    8,   11,    8,   11,   12,   13,   11,
-       11,    8,    8,    8,   11,   10,   14,   11,   11,   11,
-        8,   10,   11,    8,   11,    8,   15,   18,   20,   20,
-       22,   23,   19,   21,   24,   24,   20,   20,   16,   21,
-       17,   16,   25,    3,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   25,   25,
-       25
-
-    } ;
-
-static yyconst short int yy_chk[92] =
-    {   0,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
-        1,    1,    1,    1,    1,    1,    1,   12,   14,   20,
-       26,   21,   12,   21,   24,   23,   14,   20,   16,   15,
-        7,    5,    3,   25,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   25,   25,
-       25,   25,   25,   25,   25,   25,   25,   25,   25,   25,
-       25
-
-    } ;
-
-static yy_state_type yy_last_accepting_state;
-static char *yy_last_accepting_cpos;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-char *yytext;
-#line 1 "./x-java.l"
-#define INITIAL 0
-/* xgettext Java backend.                                      -*- C -*-
-   Copyright (C) 2001-2002 Free Software Foundation, Inc.
-   Written by Tommy Johansson <tommy.johansson@kanalen.org>, 2001.
+/* xgettext Java backend.
+   Copyright (C) 2003 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2003.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -429,12 +15,13 @@ char *yytext;
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software Foundation,
     Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-#line 20 "./x-java.l"
+
  #ifdef HAVE_CONFIG_H
-# include <config.h>
+# include "config.h"
  #endif
  
-#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
@@ -442,1776 +29,1447 @@ char *yytext;
  #include "message.h"
  #include "x-java.h"
  #include "xgettext.h"
+#include "error.h"
  #include "xmalloc.h"
-#include "strstr.h"
+#include "exit.h"
+#include "hash.h"
+#include "po-charset.h"
+#include "utf16-ucs4.h"
+#include "ucs4-utf8.h"
+#include "gettext.h"
  
-typedef enum
-{
-  JAVA_WORD,
-  JAVA_STRING,
-  JAVA_OPERATOR,
-  JAVA_FLOW,
-  JAVA_COMMENT
-} TOKEN_TYPE;
-
-typedef struct
-{
-  char *word;
-  char *string;
-  char *operator;
-  char *flow;
-  char *comment;
+#define _(s) gettext(s)
  
-  int line_no;
-} PARSER_GLOBAL;
  
-static PARSER_GLOBAL pg;
-static PARSER_GLOBAL *parser_global = &pg;
-
-typedef enum
-{
-  STATE_NONE,
-  STATE_STRING,
-  STATE_WORD,
-  STATE_APPEND,
-  STATE_INVOCATION,
-  STATE_KEYWORD
-} PARSER_STATE;
-
-typedef struct
-{
-  char *data;
-  int len;
-  int maxlen;
-} char_buf;
+/* The Java syntax is defined in the
+     Java Language Specification, Second Edition,
+     (available from http://java.sun.com/),
+     chapter 3 "Lexical Structure".  */
  
  
-typedef struct _object_list
-{
-  int num_obj;
-  int max_num_obj;
-  void **objects;
-} object_list;
+/* ====================== Keyword set customization.  ====================== */
  
-#define INITIAL_OBJECT_LIST_SIZE 10
-#define OBJECT_LIST_GROWTH 10
+/* If true extract all strings.  */
+static bool extract_all = false;
  
-typedef struct _java_keyword
-{
-  char *keyword;
-  int msgid_arg;
-  int msgid_plural_arg;
-} java_keyword;
+static hash_table keywords;
+static bool default_keywords = true;
  
  
-#define INITIAL_CHARBUF_SIZE 500
-#define CHARBUF_GROWTH 100
-static char_buf *
-create_char_buf ()
+void
+x_java_extract_all ()
  {
-  char_buf *b = (char_buf *) xmalloc (sizeof (char_buf));
-  b->data = (char *) xmalloc (INITIAL_CHARBUF_SIZE);
-  b->data[0] = '\0';
-  b->len = 0;
-  b->maxlen = INITIAL_CHARBUF_SIZE;
-  return b;
+  extract_all = true;
  }
  
-static void
-append_char_buf (char_buf *b, int c)
+
+void
+x_java_keyword (const char *name)
  {
-  if (b->len >= b->maxlen - 1)
+  if (name == NULL)
+    default_keywords = false;
+  else
      {
-      b->data = (char *) xrealloc (b->data, b->maxlen + CHARBUF_GROWTH);
-      b->maxlen += CHARBUF_GROWTH;
-    }
-  b->data[b->len++] = c;
-  b->data[b->len] = '\0';
-}
+      const char *end;
+      int argnum1;
+      int argnum2;
+      const char *colon;
  
-static char *
-get_string (char_buf *b)
-{
-  return xstrdup (b->data);
-}
+      if (keywords.table == NULL)
+       init_hash (&keywords, 100);
  
-static void
-destroy_charbuf (char_buf *b)
-{
-  free (b->data);
-  free (b);
+      split_keywordspec (name, &end, &argnum1, &argnum2);
+
+      /* The characters between name and end should form a valid Java
+        identifier sequence with dots.
+        A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+       {
+         if (argnum1 == 0)
+           argnum1 = 1;
+         insert_entry (&keywords, name, end - name,
+                       (void *) (long) (argnum1 + (argnum2 << 10)));
+       }
+    }
  }
  
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
  static void
-update_line_no (int c)
+init_keywords ()
  {
-  if (c == '\n')
-    parser_global->line_no++;
+  if (default_keywords)
+    {
+      x_java_keyword ("GettextResource.gettext:2");    /* static method */
+      x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */
+      x_java_keyword ("gettext");
+      x_java_keyword ("ngettext:1,2");
+      x_java_keyword ("getString");    /* ResourceBundle.getString */
+      default_keywords = false;
+    }
  }
  
-static void
-strip_ending_spaces (char *str)
-{
-  int len = strlen (str);
  
-  while (len > 0 && isspace ((unsigned char) str[len - 1]))
-    len--;
-  str[len] = '\0';
-}
-#line 559 "x-java.c-tmp"
+/* ======================== Reading of characters.  ======================== */
  
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
  
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int yywrap YY_PROTO(( void ));
-#else
-extern int yywrap YY_PROTO(( void ));
-#endif
-#endif
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
  
-#ifndef YY_NO_UNPUT
-static void yyunput YY_PROTO(( int c, char *buf_ptr ));
-#endif
+/* The input file stream.  */
+static FILE *fp;
  
-#ifndef yytext_ptr
-static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
-#endif
  
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen YY_PROTO(( yyconst char * ));
-#endif
+/* Fetch the next single-byte character from the input file.
+   Pushback can consist of an unlimited number of 'u' followed by up to 4
+   other characters.  */
  
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
-static int yyinput YY_PROTO(( void ));
-#else
-static int input YY_PROTO(( void ));
-#endif
-#endif
+/* Special coding of multiple 'u's in the pushback buffer.  */
+#define MULTIPLE_U(count) (0x1000 + (count))
  
-#if YY_STACK_USED
-static int yy_start_stack_ptr = 0;
-static int yy_start_stack_depth = 0;
-static int *yy_start_stack = 0;
-#ifndef YY_NO_PUSH_STATE
-static void yy_push_state YY_PROTO(( int new_state ));
-#endif
-#ifndef YY_NO_POP_STATE
-static void yy_pop_state YY_PROTO(( void ));
-#endif
-#ifndef YY_NO_TOP_STATE
-static int yy_top_state YY_PROTO(( void ));
-#endif
+static int phase1_pushback[5];
+static unsigned int phase1_pushback_length;
  
-#else
-#define YY_NO_PUSH_STATE 1
-#define YY_NO_POP_STATE 1
-#define YY_NO_TOP_STATE 1
-#endif
+static int
+phase1_getc ()
+{
+  int c;
  
-#ifdef YY_MALLOC_DECL
-YY_MALLOC_DECL
-#else
-#if __STDC__
-#ifndef __cplusplus
-#include <stdlib.h>
-#endif
-#else
-/* Just try to get by without declaring the routines.  This will fail
- * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
- * or sizeof(void*) != sizeof(int).
- */
-#endif
-#endif
+  if (phase1_pushback_length)
+    {
+      c = phase1_pushback[--phase1_pushback_length];
+      if (c >= MULTIPLE_U (0))
+       {
+         if (c > MULTIPLE_U (1))
+           phase1_pushback[phase1_pushback_length++] = c - 1;
+         return 'u';
+       }
+      else
+       return c;
+    }
  
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#define YY_READ_BUF_SIZE 8192
-#endif
+  c = getc (fp);
  
-/* Copy whatever the last rule matched to the standard output. */
+  if (c == EOF)
+    {
+      if (ferror (fp))
+       error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+    }
  
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
-#endif
+  return c;
+}
  
-/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
-       if ( yy_current_buffer->yy_is_interactive ) \
-               { \
-               int c = '*', n; \
-               for ( n = 0; n < max_size && \
-                            (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
-                       buf[n] = (char) c; \
-               if ( c == '\n' ) \
-                       buf[n++] = (char) c; \
-               if ( c == EOF && ferror( yyin ) ) \
-                       YY_FATAL_ERROR( "input in flex scanner failed" ); \
-               result = n; \
-               } \
-       else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \
-                 && ferror( yyin ) ) \
-               YY_FATAL_ERROR( "input in flex scanner failed" );
-#endif
+static void
+phase1_ungetc (int c)
+{
+  if (c != EOF)
+    {
+      if (c == 'u')
+       {
+         if (phase1_pushback_length > 0
+             && phase1_pushback[phase1_pushback_length - 1] >= MULTIPLE_U (0))
+           phase1_pushback[phase1_pushback_length - 1]++;
+         else
+           phase1_pushback[phase1_pushback_length++] = MULTIPLE_U (1);
+       }
+      else
+       phase1_pushback[phase1_pushback_length++] = c;
+    }
+}
  
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
  
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
+/* Fetch the next single-byte character or Unicode character from the file.
+   (Here, as in the Java Language Specification, when we say "Unicode
+   character", we actually mean "UTF-16 encoding unit".)
+   Cope with potentially 2 pushback characters.  */
  
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
-#endif
+/* Return value of phase 2, 3, 4 when EOF is reached.  */
+#define P2_EOF 0xffff
  
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL int yylex YY_PROTO(( void ))
-#endif
+/* Convert an UTF-16 code point to a return value that can be distinguished
+   from a single-byte return value.  */
+#define UNICODE(code) (0x10000 + (code))
  
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
+/* Test a return value of phase 2, 3, 4 whether it designates an UTF-16 code
+   point.  */
+#define IS_UNICODE(p2_result) ((p2_result) >= 0x10000)
  
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
+/* Extract the UTF-16 code of a return value that satisfies IS_UNICODE.  */
+#define UTF16_VALUE(p2_result) ((p2_result) - 0x10000)
  
-#define YY_RULE_SETUP \
-       YY_USER_ACTION
+/* Reduces a return value of phase 2, 3, 4 by unmasking the UNICODE bit,
+   so that it can be more easily compared against an ASCII character.
+   (RED (c) == 'x')  is equivalent to  (c == 'x' || c == UNICODE ('x')).  */
+#define RED(p2_result) ((p2_result) & 0xffff)
  
-YY_DECL
-       {
-       register yy_state_type yy_current_state;
-       register char *yy_cp, *yy_bp;
-       register int yy_act;
+/* Maximum used guaranteed to be < 2.  */
+static int phase2_pushback[2];
+static int phase2_pushback_length;
+
+static int
+phase2_getc ()
+{
+  int c;
  
-#line 155 "./x-java.l"
+  if (phase2_pushback_length)
+    return phase2_pushback[--phase2_pushback_length];
  
+  c = phase1_getc ();
+  if (c == EOF)
+    return P2_EOF;
+  if (c == '\\')
+    {
+      c = phase1_getc ();
+      if (c == 'u')
+       {
+         unsigned int u_count = 1;
+         unsigned char buf[4];
+         unsigned int n;
+         int i;
  
-#line 713 "x-java.c-tmp"
+         for (;;)
+           {
+             c = phase1_getc ();
+             if (c != 'u')
+               break;
+             u_count++;
+           }
+         phase1_ungetc (c);
  
-       if ( yy_init )
+         n = 0;
+         for (i = 0; i < 4; i++)
+           {
+             c = phase1_getc ();
+
+             if (c >= '0' && c <= '9')
+               n = (n << 4) + (c - '0');
+             else if (c >= 'A' && c <= 'F')
+               n = (n << 4) + (c - 'A' + 10);
+             else if (c >= 'a' && c <= 'f')
+               n = (n << 4) + (c - 'a' + 10);
+             else
                 {
-               yy_init = 0;
-
-#ifdef YY_USER_INIT
-               YY_USER_INIT;
-#endif
+                 phase1_ungetc (c);
+                 while (--i >= 0)
+                   phase1_ungetc (buf[i]);
+                 for (; u_count > 0; u_count--)
+                   phase1_ungetc ('u');
+                 return '\\';
+               }
  
-               if ( ! yy_start )
-                       yy_start = 1;   /* first start state */
+             buf[i] = c;
+           }
+         return UNICODE (n);
+       }
+      phase1_ungetc (c);
+      return '\\';
+    }
+  return c;
+}
  
-               if ( ! yyin )
-                       yyin = stdin;
+#ifdef unused
+static void
+phase2_ungetc (int c)
+{
+  if (c != P2_EOF)
+    phase2_pushback[phase2_pushback_length++] = c;
+}
+#endif
  
-               if ( ! yyout )
-                       yyout = stdout;
  
-               if ( ! yy_current_buffer )
-                       yy_current_buffer =
-                               yy_create_buffer( yyin, YY_BUF_SIZE );
+/* Fetch the next single-byte character or Unicode character from the file.
+   With line number handling.
+   Convert line terminators to '\n' or UNICODE ('\n').
+   Cope with potentially 2 pushback characters.  */
  
-               yy_load_buffer_state();
-               }
+/* Maximum used guaranteed to be < 2.  */
+static int phase3_pushback[2];
+static int phase3_pushback_length;
  
-       while ( 1 )             /* loops until end-of-file is reached */
-               {
-               yy_cp = yy_c_buf_p;
-
-               /* Support of yytext. */
-               *yy_cp = yy_hold_char;
-
-               /* yy_bp points to the position in yy_ch_buf of the start of
-                * the current run.
-                */
-               yy_bp = yy_cp;
-
-               yy_current_state = yy_start;
-yy_match:
-               do
-                       {
-                       register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
-                       if ( yy_accept[yy_current_state] )
-                               {
-                               yy_last_accepting_state = yy_current_state;
-                               yy_last_accepting_cpos = yy_cp;
-                               }
-                       while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
-                               {
-                               yy_current_state = (int) yy_def[yy_current_state];
-                               if ( yy_current_state >= 26 )
-                                       yy_c = yy_meta[(unsigned int) yy_c];
-                               }
-                       yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-                       ++yy_cp;
-                       }
-               while ( yy_base[yy_current_state] != 54 );
-
-yy_find_action:
-               yy_act = yy_accept[yy_current_state];
-               if ( yy_act == 0 )
-                       { /* have to back up */
-                       yy_cp = yy_last_accepting_cpos;
-                       yy_current_state = yy_last_accepting_state;
-                       yy_act = yy_accept[yy_current_state];
-                       }
-
-               YY_DO_BEFORE_ACTION;
-
-
-do_action:     /* This label is used only to access EOF actions. */
-
-
-               switch ( yy_act )
-       { /* beginning of action switch */
-                       case 0: /* must back up */
-                       /* undo the effects of YY_DO_BEFORE_ACTION */
-                       *yy_cp = yy_hold_char;
-                       yy_cp = yy_last_accepting_cpos;
-                       yy_current_state = yy_last_accepting_state;
-                       goto yy_find_action;
-
-case 1:
-YY_RULE_SETUP
-#line 157 "./x-java.l"
+static int
+phase3_getc ()
  {
    int c;
-  int last;
-  char *str;
  
-  char_buf *charbuf = create_char_buf ();
-  for (;;)
+  if (phase3_pushback_length)
      {
-      c = input ();
-      last = input ();
-      update_line_no (c);
-      if ((c == '*' && last == '/') || c == EOF)
-       break;
-      unput (last);
-      append_char_buf (charbuf, c);
+      c = phase3_pushback[--phase3_pushback_length];
+      if (c == '\n')
+       ++line_number;
+      return c;
+    }
+
+  c = phase2_getc ();
+
+  /* Handle line terminators.  */
+  if (RED (c) == '\r')
+    {
+      int c1 = phase2_getc ();
+
+      if (RED (c1) != '\n')
+       phase2_getc (c1);
+
+      /* Seen line terminator CR or CR/LF.  */
+      if (c == '\r' || c1 == '\n')
+       {
+         ++line_number;
+         return '\n';
+       }
+      else
+       return UNICODE ('\n');
+    }
+  else if (RED (c) == '\n')
+    {
+      /* Seen line terminator LF.  */
+      if (c == '\n')
+       {
+         ++line_number;
+         return '\n';
+       }
+      else
+       return UNICODE ('\n');
      }
-  str = get_string (charbuf);
-  destroy_charbuf (charbuf);
-  strip_ending_spaces (str);
-  parser_global->comment = str;
-  return JAVA_COMMENT;
+
+  return c;
  }
-       YY_BREAK
-case 2:
-YY_RULE_SETUP
-#line 180 "./x-java.l"
-
-       YY_BREAK
-case 3:
-YY_RULE_SETUP
-#line 181 "./x-java.l"
+
+static void
+phase3_ungetc (int c)
  {
-  int c;
-  char *str;
-  char_buf *charbuf = create_char_buf ();
-  while ((c = input ()) != EOF && c != '"')
+  if (c != P2_EOF)
      {
-      update_line_no (c);
-      append_char_buf (charbuf, c);
+      if (c == '\n')
+       --line_number;
+      phase3_pushback[phase3_pushback_length++] = c;
      }
-  str = get_string (charbuf);
-  destroy_charbuf (charbuf);
-  parser_global->string = str;
-  return JAVA_STRING;
  }
-       YY_BREAK
-case 4:
-YY_RULE_SETUP
-#line 196 "./x-java.l"
+
+
+/* ========================= Accumulating strings.  ======================== */
+
+/* A string buffer type that allows appending bytes (in the
+   xgettext_current_source_encoding) or Unicode characters.
+   Returns the entire string in UTF-8 encoding.  */
+
+struct string_buffer
  {
-  parser_global->word = yytext;
-  return JAVA_WORD;
-}
-       YY_BREAK
-case 5:
-YY_RULE_SETUP
-#line 201 "./x-java.l"
+  /* The part of the string that has already been converted to UTF-8.  */
+  char *utf8_buffer;
+  size_t utf8_buflen;
+  size_t utf8_allocated;
+  /* The first half of an UTF-16 surrogate character.  */
+  unsigned short utf16_surr;
+  /* The part of the string that is still in the source encoding.  */
+  char *curr_buffer;
+  size_t curr_buflen;
+  size_t curr_allocated;
+};
+
+/* Initialize a 'struct string_buffer' to empty.  */
+static inline void
+init_string_buffer (struct string_buffer *bp)
  {
-  parser_global->flow = yytext;
-  return JAVA_FLOW;
+  bp->utf8_buffer = NULL;
+  bp->utf8_buflen = 0;
+  bp->utf8_allocated = 0;
+  bp->utf16_surr = 0;
+  bp->curr_buffer = NULL;
+  bp->curr_buflen = 0;
+  bp->curr_allocated = 0;
  }
-       YY_BREAK
-case 6:
-YY_RULE_SETUP
-#line 206 "./x-java.l"
+
+/* Auxiliary function: Append a byte to bp->curr.  */
+static inline void
+string_buffer_append_byte (struct string_buffer *bp, unsigned char c)
  {
-  parser_global->operator = yytext;
-  return JAVA_OPERATOR;
+  if (bp->curr_buflen == bp->curr_allocated)
+    {
+      bp->curr_allocated = 2 * bp->curr_allocated + 10;
+      bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
+    }
+  bp->curr_buffer[bp->curr_buflen++] = c;
  }
-       YY_BREAK
-case 7:
-YY_RULE_SETUP
-#line 211 "./x-java.l"
-/* ignore whitespace */
-       YY_BREAK
-case 8:
-YY_RULE_SETUP
-#line 213 "./x-java.l"
+
+/* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
+static inline void
+string_buffer_append_unicode_grow (struct string_buffer *bp, size_t count)
  {
-  parser_global->comment = xstrdup (yytext + 2);
-  return JAVA_COMMENT;
+  if (bp->utf8_buflen + count > bp->utf8_allocated)
+    {
+      size_t new_allocated = 2 * bp->utf8_allocated + 10;
+      if (new_allocated < bp->utf8_buflen + count)
+       new_allocated = bp->utf8_buflen + count;
+      bp->utf8_allocated = new_allocated;
+      bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
+    }
  }
-       YY_BREAK
-case 9:
-YY_RULE_SETUP
-#line 217 "./x-java.l"
-parser_global->line_no++;
-       YY_BREAK
-case 10:
-YY_RULE_SETUP
-#line 218 "./x-java.l"
-
-       YY_BREAK
-case 11:
-YY_RULE_SETUP
-#line 219 "./x-java.l"
-
-       YY_BREAK
-case YY_STATE_EOF(INITIAL):
-#line 220 "./x-java.l"
-return -1;
-       YY_BREAK
-case 12:
-YY_RULE_SETUP
-#line 221 "./x-java.l"
-ECHO;
-       YY_BREAK
-#line 906 "x-java.c-tmp"
-
-       case YY_END_OF_BUFFER:
-               {
-               /* Amount of text matched not including the EOB char. */
-               int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
-
-               /* Undo the effects of YY_DO_BEFORE_ACTION. */
-               *yy_cp = yy_hold_char;
-               YY_RESTORE_YY_MORE_OFFSET
-
-               if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
-                       {
-                       /* We're scanning a new file or input source.  It's
-                        * possible that this happened because the user
-                        * just pointed yyin at a new source and called
-                        * yylex().  If so, then we have to assure
-                        * consistency between yy_current_buffer and our
-                        * globals.  Here is the right place to do so, because
-                        * this is the first action (other than possibly a
-                        * back-up) that will match for the new input source.
-                        */
-                       yy_n_chars = yy_current_buffer->yy_n_chars;
-                       yy_current_buffer->yy_input_file = yyin;
-                       yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
-                       }
-
-               /* Note that here we test for yy_c_buf_p "<=" to the position
-                * of the first EOB in the buffer, since yy_c_buf_p will
-                * already have been incremented past the NUL character
-                * (since all states make transitions on EOB to the
-                * end-of-buffer state).  Contrast this with the test
-                * in input().
-                */
-               if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
-                       { /* This was really a NUL. */
-                       yy_state_type yy_next_state;
-
-                       yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
-
-                       yy_current_state = yy_get_previous_state();
-
-                       /* Okay, we're now positioned to make the NUL
-                        * transition.  We couldn't have
-                        * yy_get_previous_state() go ahead and do it
-                        * for us because it doesn't know how to deal
-                        * with the possibility of jamming (and we don't
-                        * want to build jamming into it because then it
-                        * will run more slowly).
-                        */
-
-                       yy_next_state = yy_try_NUL_trans( yy_current_state );
-
-                       yy_bp = yytext_ptr + YY_MORE_ADJ;
-
-                       if ( yy_next_state )
-                               {
-                               /* Consume the NUL. */
-                               yy_cp = ++yy_c_buf_p;
-                               yy_current_state = yy_next_state;
-                               goto yy_match;
-                               }
-
-                       else
-                               {
-                               yy_cp = yy_c_buf_p;
-                               goto yy_find_action;
-                               }
-                       }
-
-               else switch ( yy_get_next_buffer() )
-                       {
-                       case EOB_ACT_END_OF_FILE:
-                               {
-                               yy_did_buffer_switch_on_eof = 0;
-
-                               if ( yywrap() )
-                                       {
-                                       /* Note: because we've taken care in
-                                        * yy_get_next_buffer() to have set up
-                                        * yytext, we can now set up
-                                        * yy_c_buf_p so that if some total
-                                        * hoser (like flex itself) wants to
-                                        * call the scanner after we return the
-                                        * YY_NULL, it'll still work - another
-                                        * YY_NULL will get returned.
-                                        */
-                                       yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
-
-                                       yy_act = YY_STATE_EOF(YY_START);
-                                       goto do_action;
-                                       }
-
-                               else
-                                       {
-                                       if ( ! yy_did_buffer_switch_on_eof )
-                                               YY_NEW_FILE;
-                                       }
-                               break;
-                               }
-
-                       case EOB_ACT_CONTINUE_SCAN:
-                               yy_c_buf_p =
-                                       yytext_ptr + yy_amount_of_matched_text;
-
-                               yy_current_state = yy_get_previous_state();
-
-                               yy_cp = yy_c_buf_p;
-                               yy_bp = yytext_ptr + YY_MORE_ADJ;
-                               goto yy_match;
-
-                       case EOB_ACT_LAST_MATCH:
-                               yy_c_buf_p =
-                               &yy_current_buffer->yy_ch_buf[yy_n_chars];
-
-                               yy_current_state = yy_get_previous_state();
-
-                               yy_cp = yy_c_buf_p;
-                               yy_bp = yytext_ptr + YY_MORE_ADJ;
-                               goto yy_find_action;
-                       }
-               break;
-               }
-
-       default:
-               YY_FATAL_ERROR(
-                       "fatal flex scanner internal error--no action found" );
-       } /* end of action switch */
-               } /* end of scanning one token */
-       } /* end of yylex */
-
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- *     EOB_ACT_LAST_MATCH -
- *     EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- *     EOB_ACT_END_OF_FILE - end of file
- */
-
-static int yy_get_next_buffer()
-       {
-       register char *dest = yy_current_buffer->yy_ch_buf;
-       register char *source = yytext_ptr;
-       register int number_to_move, i;
-       int ret_val;
-
-       if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
-               YY_FATAL_ERROR(
-               "fatal flex scanner internal error--end of buffer missed" );
-
-       if ( yy_current_buffer->yy_fill_buffer == 0 )
-               { /* Don't try to fill the buffer, so this is an EOF. */
-               if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
-                       {
-                       /* We matched a single character, the EOB, so
-                        * treat this as a final EOF.
-                        */
-                       return EOB_ACT_END_OF_FILE;
-                       }
  
-               else
-                       {
-                       /* We matched some text prior to the EOB, first
-                        * process it.
-                        */
-                       return EOB_ACT_LAST_MATCH;
-                       }
-               }
+/* Auxiliary function: Append a Unicode character to bp->utf8.
+   uc must be < 0x110000.  */
+static inline void
+string_buffer_append_unicode (struct string_buffer *bp, unsigned int uc)
+{
+  unsigned char utf8buf[6];
+  int count = u8_uctomb (utf8buf, uc, 6);
  
-       /* Try to read more data. */
+  if (count < 0)
+    /* The caller should have ensured that uc is not out-of-range.  */
+    abort ();
  
-       /* First move last chars to start of buffer. */
-       number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
+  string_buffer_append_unicode_grow (bp, count);
+  memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
+  bp->utf8_buflen += count;
+}
  
-       for ( i = 0; i < number_to_move; ++i )
-               *(dest++) = *(source++);
+/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
+static inline void
+string_buffer_flush_utf16_surr (struct string_buffer *bp)
+{
+  if (bp->utf16_surr != 0)
+    {
+      /* A half surrogate is invalid, therefore use U+FFFD instead.  */
+      string_buffer_append_unicode (bp, 0xfffd);
+      bp->utf16_surr = 0;
+    }
+}
  
-       if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
-               /* don't do the read, it's not guaranteed to return an EOF,
-                * just force an EOF
-                */
-               yy_current_buffer->yy_n_chars = yy_n_chars = 0;
+/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
+static inline void
+string_buffer_flush_curr_buffer (struct string_buffer *bp, int lineno)
+{
+  if (bp->curr_buflen > 0)
+    {
+      char *curr;
+      size_t count;
  
-       else
-               {
-               int num_to_read =
-                       yy_current_buffer->yy_buf_size - number_to_move - 1;
-
-               while ( num_to_read <= 0 )
-                       { /* Not enough room in the buffer - grow it. */
-#ifdef YY_USES_REJECT
-                       YY_FATAL_ERROR(
-"input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
-#else
-
-                       /* just a shorter name for the current buffer */
-                       YY_BUFFER_STATE b = yy_current_buffer;
-
-                       int yy_c_buf_p_offset =
-                               (int) (yy_c_buf_p - b->yy_ch_buf);
-
-                       if ( b->yy_is_our_buffer )
-                               {
-                               int new_size = b->yy_buf_size * 2;
-
-                               if ( new_size <= 0 )
-                                       b->yy_buf_size += b->yy_buf_size / 8;
-                               else
-                                       b->yy_buf_size *= 2;
-
-                               b->yy_ch_buf = (char *)
-                                       /* Include room in for 2 EOB chars. */
-                                       yy_flex_realloc( (void *) b->yy_ch_buf,
-                                                        b->yy_buf_size + 2 );
-                               }
-                       else
-                               /* Can't grow it, we don't own it. */
-                               b->yy_ch_buf = 0;
-
-                       if ( ! b->yy_ch_buf )
-                               YY_FATAL_ERROR(
-                               "fatal error - scanner input buffer overflow" );
-
-                       yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
-
-                       num_to_read = yy_current_buffer->yy_buf_size -
-                                               number_to_move - 1;
-#endif
-                       }
+      string_buffer_append_byte (bp, '\0');
  
-               if ( num_to_read > YY_READ_BUF_SIZE )
-                       num_to_read = YY_READ_BUF_SIZE;
+      /* Convert from the source encoding to UTF-8.  */
+      curr = from_current_source_encoding (bp->curr_buffer,
+                                          logical_file_name, lineno);
  
-               /* Read in more data. */
-               YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
-                       yy_n_chars, num_to_read );
+      /* Append it to bp->utf8_buffer.  */
+      count = strlen (curr);
+      string_buffer_append_unicode_grow (bp, count);
+      memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
+      bp->utf8_buflen += count;
  
-               yy_current_buffer->yy_n_chars = yy_n_chars;
-               }
+      if (curr != bp->curr_buffer)
+       free (curr);
+      bp->curr_buflen = 0;
+    }
+}
  
-       if ( yy_n_chars == 0 )
-               {
-               if ( number_to_move == YY_MORE_ADJ )
-                       {
-                       ret_val = EOB_ACT_END_OF_FILE;
-                       yyrestart( yyin );
-                       }
+/* Append a character or Unicode character to a 'struct string_buffer'.  */
+static void
+string_buffer_append (struct string_buffer *bp, int c)
+{
+  if (IS_UNICODE (c))
+    {
+      /* Append a Unicode character.  */
  
-               else
-                       {
-                       ret_val = EOB_ACT_LAST_MATCH;
-                       yy_current_buffer->yy_buffer_status =
-                               YY_BUFFER_EOF_PENDING;
-                       }
-               }
+      /* Switch from multibyte character mode to Unicode character mode.  */
+      string_buffer_flush_curr_buffer (bp, line_number);
  
-       else
-               ret_val = EOB_ACT_CONTINUE_SCAN;
+      /* Test whether this character and the previous one form a Unicode
+        surrogate character pair.  */
+      if (bp->utf16_surr != 0
+         && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)))
+       {
+         unsigned short utf16buf[2] = { bp->utf16_surr, UTF16_VALUE (c) };
+         unsigned int uc;
  
-       yy_n_chars += number_to_move;
-       yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
-       yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+         if (u16_mbtouc_aux (&uc, utf16buf, 2) != 2)
+           abort ();
  
-       yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
+         string_buffer_append_unicode (bp, uc);
+         bp->utf16_surr = 0;
+       }
+      else
+       {
+         string_buffer_flush_utf16_surr (bp);
  
-       return ret_val;
+         if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00))
+           bp->utf16_surr = UTF16_VALUE (c);
+         else
+           string_buffer_append_unicode (bp, UTF16_VALUE (c));
         }
+    }
+  else
+    {
+      /* Append a single byte.  */
  
+      /* Switch from Unicode character mode to multibyte character mode.  */
+      string_buffer_flush_utf16_surr (bp);
  
-/* yy_get_previous_state - get the state just before the EOB char was reached */
+      /* When a newline is seen, convert the accumulated multibyte sequence.
+        This ensures a correct line number in the error message in case of
+        a conversion error.  The "- 1" is to account for the newline.  */
+      if (c == '\n')
+       string_buffer_flush_curr_buffer (bp, line_number - 1);
  
-static yy_state_type yy_get_previous_state()
-       {
-       register yy_state_type yy_current_state;
-       register char *yy_cp;
+      string_buffer_append_byte (bp, (unsigned char) c);
+    }
+}
  
-       yy_current_state = yy_start;
+/* Return the string buffer's contents.  */
+static char *
+string_buffer_result (struct string_buffer *bp)
+{
+  /* Flush all into bp->utf8_buffer.  */
+  string_buffer_flush_utf16_surr (bp);
+  string_buffer_flush_curr_buffer (bp, line_number);
+  /* NUL-terminate it.  */
+  string_buffer_append_unicode_grow (bp, 1);
+  bp->utf8_buffer[bp->utf8_buflen] = '\0';
+  /* Return it.  */
+  return bp->utf8_buffer;
+}
  
-       for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
-               {
-               register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
-               if ( yy_accept[yy_current_state] )
-                       {
-                       yy_last_accepting_state = yy_current_state;
-                       yy_last_accepting_cpos = yy_cp;
-                       }
-               while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
-                       {
-                       yy_current_state = (int) yy_def[yy_current_state];
-                       if ( yy_current_state >= 26 )
-                               yy_c = yy_meta[(unsigned int) yy_c];
-                       }
-               yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-               }
+/* Free the memory pointed to by a 'struct string_buffer'.  */
+static inline void
+free_string_buffer (struct string_buffer *bp)
+{
+  free (bp->utf8_buffer);
+  free (bp->curr_buffer);
+}
  
-       return yy_current_state;
-       }
  
+/* ======================== Accumulating comments.  ======================== */
  
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- *     next_state = yy_try_NUL_trans( current_state );
- */
  
-#ifdef YY_USE_PROTOS
-static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state )
-#else
-static yy_state_type yy_try_NUL_trans( yy_current_state )
-yy_state_type yy_current_state;
-#endif
-       {
-       register int yy_is_jam;
-       register char *yy_cp = yy_c_buf_p;
+/* In this backend we cannot use the xgettext_comment* functions directly,
+   because in multiline string expressions like
+           "string1" +
+           "string2"
+   the newline between "string1" and "string2" would cause a call to
+   xgettext_comment_reset(), thus destroying the accumulated comments
+   that we need a little later, when we have concatenated the two strings
+   and pass them to remember_a_message().
+   Instead, we do the bookkeeping of the accumulated comments directly,
+   and save a pointer to the accumulated comments when we read "string1".
+   In order to avoid excessive copying of strings, we use reference
+   counting.  */
  
-       register YY_CHAR yy_c = 1;
-       if ( yy_accept[yy_current_state] )
-               {
-               yy_last_accepting_state = yy_current_state;
-               yy_last_accepting_cpos = yy_cp;
-               }
-       while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
-               {
-               yy_current_state = (int) yy_def[yy_current_state];
-               if ( yy_current_state >= 26 )
-                       yy_c = yy_meta[(unsigned int) yy_c];
-               }
-       yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
-       yy_is_jam = (yy_current_state == 25);
+typedef struct refcounted_string_list_ty refcounted_string_list_ty;
+struct refcounted_string_list_ty
+{
+  unsigned int refcount;
+  struct string_list_ty contents;
+};
  
-       return yy_is_jam ? 0 : yy_current_state;
-       }
+static refcounted_string_list_ty *comment;
  
+static inline refcounted_string_list_ty *
+add_reference (refcounted_string_list_ty *rslp)
+{
+  if (rslp != NULL)
+    rslp->refcount++;
+  return rslp;
+}
  
-#ifndef YY_NO_UNPUT
-#ifdef YY_USE_PROTOS
-static void yyunput( int c, register char *yy_bp )
-#else
-static void yyunput( c, yy_bp )
-int c;
-register char *yy_bp;
-#endif
+static inline void
+drop_reference (refcounted_string_list_ty *rslp)
+{
+  if (rslp != NULL)
+    {
+      if (rslp->refcount > 1)
+       rslp->refcount--;
+      else
         {
-       register char *yy_cp = yy_c_buf_p;
-
-       /* undo effects of setting up yytext */
-       *yy_cp = yy_hold_char;
-
-       if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
-               { /* need to shift things up to make room */
-               /* +2 for EOB chars. */
-               register int number_to_move = yy_n_chars + 2;
-               register char *dest = &yy_current_buffer->yy_ch_buf[
-                                       yy_current_buffer->yy_buf_size + 2];
-               register char *source =
-                               &yy_current_buffer->yy_ch_buf[number_to_move];
-
-               while ( source > yy_current_buffer->yy_ch_buf )
-                       *--dest = *--source;
-
-               yy_cp += (int) (dest - source);
-               yy_bp += (int) (dest - source);
-               yy_current_buffer->yy_n_chars =
-                       yy_n_chars = yy_current_buffer->yy_buf_size;
-
-               if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
-                       YY_FATAL_ERROR( "flex scanner push-back overflow" );
-               }
-
-       *--yy_cp = (char) c;
-
-
-       yytext_ptr = yy_bp;
-       yy_hold_char = *yy_cp;
-       yy_c_buf_p = yy_cp;
+         string_list_destroy (&rslp->contents);
+         free (rslp);
         }
-#endif /* ifndef YY_NO_UNPUT */
-
+    }
+}
  
-#ifdef __cplusplus
-static int yyinput()
-#else
-static int input()
-#endif
-       {
-       int c;
+static void
+x_java_comment_add (const char *str)
+{
+  if (comment == NULL)
+    {
+      comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment));
+      comment->refcount = 1;
+      string_list_init (&comment->contents);
+    }
+  else if (comment->refcount > 1)
+    {
+      /* Unshare the list by making copies.  */
+      struct string_list_ty *oldcontents;
+      size_t i;
+
+      comment->refcount--;
+      oldcontents = &comment->contents;
+
+      comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment));
+      comment->refcount = 1;
+      string_list_init (&comment->contents);
+      for (i = 0; i < oldcontents->nitems; i++)
+       string_list_append (&comment->contents, oldcontents->item[i]);
+    }
+  string_list_append (&comment->contents, str);
+}
  
-       *yy_c_buf_p = yy_hold_char;
+static void
+x_java_comment_reset ()
+{
+  drop_reference (comment);
+  comment = NULL;
+}
  
-       if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
-               {
-               /* yy_c_buf_p now points to the character we want to return.
-                * If this occurs *before* the EOB characters, then it's a
-                * valid NUL; if not, then we've hit the end of the buffer.
-                */
-               if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
-                       /* This was really a NUL. */
-                       *yy_c_buf_p = '\0';
+static void
+x_java_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
+{
+  xgettext_comment_reset ();
+  if (rslp != NULL)
+    {
+      size_t i;
  
-               else
-                       { /* need more input */
-                       int offset = yy_c_buf_p - yytext_ptr;
-                       ++yy_c_buf_p;
-
-                       switch ( yy_get_next_buffer() )
-                               {
-                               case EOB_ACT_LAST_MATCH:
-                                       /* This happens because yy_g_n_b()
-                                        * sees that we've accumulated a
-                                        * token and flags that we need to
-                                        * try matching the token before
-                                        * proceeding.  But for input(),
-                                        * there's no matching to consider.
-                                        * So convert the EOB_ACT_LAST_MATCH
-                                        * to EOB_ACT_END_OF_FILE.
-                                        */
-
-                                       /* Reset buffer status. */
-                                       yyrestart( yyin );
-
-                                       /* fall through */
-
-                               case EOB_ACT_END_OF_FILE:
-                                       {
-                                       if ( yywrap() )
-                                               return EOF;
-
-                                       if ( ! yy_did_buffer_switch_on_eof )
-                                               YY_NEW_FILE;
-#ifdef __cplusplus
-                                       return yyinput();
-#else
-                                       return input();
-#endif
-                                       }
+      for (i = 0; i < rslp->contents.nitems; i++)
+       xgettext_comment_add (rslp->contents.item[i]);
+    }
+}
  
-                               case EOB_ACT_CONTINUE_SCAN:
-                                       yy_c_buf_p = yytext_ptr + offset;
-                                       break;
-                               }
-                       }
-               }
  
-       c = *(unsigned char *) yy_c_buf_p;      /* cast for 8-bit char's */
-       *yy_c_buf_p = '\0';     /* preserve yytext */
-       yy_hold_char = *++yy_c_buf_p;
+/* Accumulating a single comment line.  */
  
+static struct string_buffer comment_buffer;
  
-       return c;
-       }
+static inline void
+comment_start ()
+{
+  comment_buffer.utf8_buflen = 0;
+  comment_buffer.utf16_surr = 0;
+  comment_buffer.curr_buflen = 0;
+}
  
+static inline bool
+comment_at_start ()
+{
+  return (comment_buffer.utf8_buflen == 0 && comment_buffer.utf16_surr == 0
+         && comment_buffer.curr_buflen == 0);
+}
  
-#ifdef YY_USE_PROTOS
-void yyrestart( FILE *input_file )
-#else
-void yyrestart( input_file )
-FILE *input_file;
-#endif
-       {
-       if ( ! yy_current_buffer )
-               yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
+static inline void
+comment_add (int c)
+{
+  string_buffer_append (&comment_buffer, c);
+}
  
-       yy_init_buffer( yy_current_buffer, input_file );
-       yy_load_buffer_state();
-       }
+static inline void
+comment_line_end (size_t chars_to_remove)
+{
+  char *buffer = string_buffer_result (&comment_buffer);
+  size_t buflen = strlen (buffer);
+
+  buflen -= chars_to_remove;
+  while (buflen >= 1
+        && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+    --buflen;
+  buffer[buflen] = '\0';
+  x_java_comment_add (buffer);
+}
  
  
-#ifdef YY_USE_PROTOS
-void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
-#else
-void yy_switch_to_buffer( new_buffer )
-YY_BUFFER_STATE new_buffer;
-#endif
-       {
-       if ( yy_current_buffer == new_buffer )
-               return;
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
  
-       if ( yy_current_buffer )
-               {
-               /* Flush out information for old buffer. */
-               *yy_c_buf_p = yy_hold_char;
-               yy_current_buffer->yy_buf_pos = yy_c_buf_p;
-               yy_current_buffer->yy_n_chars = yy_n_chars;
-               }
  
-       yy_current_buffer = new_buffer;
-       yy_load_buffer_state();
-
-       /* We don't actually know whether we did this switch during
-        * EOF (yywrap()) processing, but the only time this flag
-        * is looked at is after yywrap() is called, so it's safe
-        * to go ahead and always set it.
-        */
-       yy_did_buffer_switch_on_eof = 1;
-       }
+/* Replace each comment that is not inside a character constant or string
+   literal with a space or newline character.  */
  
+static int
+phase4_getc ()
+{
+  int c0;
+  int c;
+  bool last_was_star;
  
-#ifdef YY_USE_PROTOS
-void yy_load_buffer_state( void )
-#else
-void yy_load_buffer_state()
-#endif
+  c0 = phase3_getc ();
+  if (RED (c0) != '/')
+    return c0;
+  c = phase3_getc ();
+  switch (RED (c))
+    {
+    default:
+      phase3_ungetc (c);
+      return c0;
+
+    case '*':
+      /* C style comment.  */
+      comment_start ();
+      last_was_star = false;
+      for (;;)
         {
-       yy_n_chars = yy_current_buffer->yy_n_chars;
-       yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
-       yyin = yy_current_buffer->yy_input_file;
-       yy_hold_char = *yy_c_buf_p;
-       }
-
+         c = phase3_getc ();
+         if (c == P2_EOF)
+           break;
+         /* We skip all leading white space, but not EOLs.  */
+         if (!(comment_at_start () && (RED (c) == ' ' || RED (c) == '\t')))
+           comment_add (c);
+         switch (RED (c))
+           {
+           case '\n':
+             comment_line_end (1);
+             comment_start ();
+             last_was_star = false;
+             continue;
+
+           case '*':
+             last_was_star = true;
+             continue;
+
+           case '/':
+             if (last_was_star)
+               {
+                 comment_line_end (2);
+                 break;
+               }
+             /* FALLTHROUGH */
  
-#ifdef YY_USE_PROTOS
-YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
-#else
-YY_BUFFER_STATE yy_create_buffer( file, size )
-FILE *file;
-int size;
-#endif
+           default:
+             last_was_star = false;
+             continue;
+           }
+         break;
+       }
+      last_comment_line = line_number;
+      return ' ';
+
+    case '/':
+      /* C++ style comment.  */
+      last_comment_line = line_number;
+      comment_start ();
+      for (;;)
         {
-       YY_BUFFER_STATE b;
-
-       b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
-       if ( ! b )
-               YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
-       b->yy_buf_size = size;
-
-       /* yy_ch_buf has to be 2 characters longer than the size given because
-        * we need to put in 2 end-of-buffer characters.
-        */
-       b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
-       if ( ! b->yy_ch_buf )
-               YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
-       b->yy_is_our_buffer = 1;
-
-       yy_init_buffer( b, file );
-
-       return b;
+         c = phase3_getc ();
+         if (RED (c) == '\n' || c == P2_EOF)
+           break;
+         comment_add (c);
         }
+      phase3_ungetc (c); /* push back the newline, to decrement line_number */
+      comment_line_end (0);
+      phase3_getc (); /* read the newline again */
+      return '\n';
+    }
+}
  
+static void
+phase4_ungetc (int c)
+{
+  phase3_ungetc (c);
+}
  
-#ifdef YY_USE_PROTOS
-void yy_delete_buffer( YY_BUFFER_STATE b )
-#else
-void yy_delete_buffer( b )
-YY_BUFFER_STATE b;
-#endif
-       {
-       if ( ! b )
-               return;
  
-       if ( b == yy_current_buffer )
-               yy_current_buffer = (YY_BUFFER_STATE) 0;
+/* ========================== Reading of tokens.  ========================== */
  
-       if ( b->yy_is_our_buffer )
-               yy_flex_free( (void *) b->yy_ch_buf );
+enum token_type_ty
+{
+  token_type_eof,
+  token_type_lparen,           /* ( */
+  token_type_rparen,           /* ) */
+  token_type_lbrace,           /* { */
+  token_type_rbrace,           /* } */
+  token_type_comma,            /* , */
+  token_type_dot,              /* . */
+  token_type_string_literal,   /* "abc" */
+  token_type_number,           /* 1.23 */
+  token_type_symbol,           /* identifier, keyword, null */
+  token_type_plus,             /* + */
+  token_type_other             /* character literal, misc. operator */
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+  token_type_ty type;
+  char *string;                /* for token_type_string_literal, token_type_symbol */
+  refcounted_string_list_ty *comment;  /* for token_type_string_literal */
+  int line_number;
+};
  
-       yy_flex_free( (void *) b );
-       }
  
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+    free (tp->string);
+  if (tp->type == token_type_string_literal)
+    drop_reference (tp->comment);
+}
  
-#ifndef YY_ALWAYS_INTERACTIVE
-#ifndef YY_NEVER_INTERACTIVE
-extern int isatty YY_PROTO(( int ));
-#endif
-#endif
  
-#ifdef YY_USE_PROTOS
-void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
-#else
-void yy_init_buffer( b, file )
-YY_BUFFER_STATE b;
-FILE *file;
-#endif
+/* Read an escape sequence inside a string literal or character literal.  */
+static inline int
+do_getc_escaped ()
+{
+  int c;
  
+  /* Use phase 3, because phase 4 elides comments.  */
+  c = phase3_getc ();
+  if (c == P2_EOF)
+    return UNICODE ('\\');
+  switch (RED (c))
+    {
+    case 'b':
+      return UNICODE (0x08);
+    case 't':
+      return UNICODE (0x09);
+    case 'n':
+      return UNICODE (0x0a);
+    case 'f':
+      return UNICODE (0x0c);
+    case 'r':
+      return UNICODE (0x0d);
+    case '"':
+      return UNICODE ('"');
+    case '\'':
+      return UNICODE ('\'');
+    case '\\':
+      return UNICODE ('\\');
+    case '0': case '1': case '2': case '3':
+    case '4': case '5': case '6': case '7':
+      {
+       int n = RED (c) - '0';
+       bool maybe3digits = (n < 4);
  
-       {
-       yy_flush_buffer( b );
-
-       b->yy_input_file = file;
-       b->yy_fill_buffer = 1;
-
-#if YY_ALWAYS_INTERACTIVE
-       b->yy_is_interactive = 1;
-#else
-#if YY_NEVER_INTERACTIVE
-       b->yy_is_interactive = 0;
-#else
-       b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
-#endif
-#endif
-       }
+       c = phase3_getc ();
+       if (RED (c) >= '0' && RED (c) <= '7')
+         {
+           n = (n << 3) + (RED (c) - '0');
+           if (maybe3digits)
+             {
+               c = phase3_getc ();
+               if (RED (c) >= '0' && RED (c) <= '7')
+                 n = (n << 3) + (RED (c) - '0');
+               else
+                 phase3_ungetc (c);
+             }
+         }
+       else
+         phase3_ungetc (c);
  
+       return UNICODE (n);
+      }
+    default:
+      /* Invalid escape sequence.  */
+      phase3_ungetc (c);
+      return UNICODE ('\\');
+    }
+}
  
-#ifdef YY_USE_PROTOS
-void yy_flush_buffer( YY_BUFFER_STATE b )
-#else
-void yy_flush_buffer( b )
-YY_BUFFER_STATE b;
-#endif
+/* Read a string literal or character literal.  */
+static void
+accumulate_escaped (struct string_buffer *literal, int delimiter)
+{
+  int c;
  
+  for (;;)
+    {
+      /* Use phase 3, because phase 4 elides comments.  */
+      c = phase3_getc ();
+      if (c == P2_EOF || RED (c) == delimiter)
+       break;
+      if (RED (c) == '\n')
         {
-       if ( ! b )
-               return;
-
-       b->yy_n_chars = 0;
-
-       /* We always need two end-of-buffer characters.  The first causes
-        * a transition to the end-of-buffer state.  The second causes
-        * a jam in that state.
-        */
-       b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
-       b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+         error_with_progname = false;
+         error (0, 0, _("%s:%d: warning: unterminated character constant"),
+                logical_file_name, line_number - 1);
+         error_with_progname = true;
+         phase3_ungetc (c);
+         break;
+       }
+      if (RED (c) == '\\')
+       c = do_getc_escaped ();
+      string_buffer_append (literal, c);
+    }
+}
  
-       b->yy_buf_pos = &b->yy_ch_buf[0];
  
-       b->yy_at_bol = 1;
-       b->yy_buffer_status = YY_BUFFER_NEW;
+/* Combine characters into tokens.  Discard whitespace.  */
  
-       if ( b == yy_current_buffer )
-               yy_load_buffer_state();
-       }
+/* Maximum used guaranteed to be < 4.  */
+static token_ty phase5_pushback[4];
+static int phase5_pushback_length;
  
+static void
+phase5_get (token_ty *tp)
+{
+  int c;
  
-#ifndef YY_NO_SCAN_BUFFER
-#ifdef YY_USE_PROTOS
-YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size )
-#else
-YY_BUFFER_STATE yy_scan_buffer( base, size )
-char *base;
-yy_size_t size;
-#endif
-       {
-       YY_BUFFER_STATE b;
-
-       if ( size < 2 ||
-            base[size-2] != YY_END_OF_BUFFER_CHAR ||
-            base[size-1] != YY_END_OF_BUFFER_CHAR )
-               /* They forgot to leave room for the EOB's. */
-               return 0;
-
-       b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
-       if ( ! b )
-               YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
-
-       b->yy_buf_size = size - 2;      /* "- 2" to take care of EOB's */
-       b->yy_buf_pos = b->yy_ch_buf = base;
-       b->yy_is_our_buffer = 0;
-       b->yy_input_file = 0;
-       b->yy_n_chars = b->yy_buf_size;
-       b->yy_is_interactive = 0;
-       b->yy_at_bol = 1;
-       b->yy_fill_buffer = 0;
-       b->yy_buffer_status = YY_BUFFER_NEW;
-
-       yy_switch_to_buffer( b );
-
-       return b;
-       }
-#endif
+  if (phase5_pushback_length)
+    {
+      *tp = phase5_pushback[--phase5_pushback_length];
+      return;
+    }
+  tp->string = NULL;
  
+  for (;;)
+    {
+      tp->line_number = line_number;
+      c = phase4_getc ();
  
-#ifndef YY_NO_SCAN_STRING
-#ifdef YY_USE_PROTOS
-YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str )
-#else
-YY_BUFFER_STATE yy_scan_string( yy_str )
-yyconst char *yy_str;
-#endif
+      if (c == P2_EOF)
         {
-       int len;
-       for ( len = 0; yy_str[len]; ++len )
-               ;
-
-       return yy_scan_bytes( yy_str, len );
+         tp->type = token_type_eof;
+         return;
         }
-#endif
  
-
-#ifndef YY_NO_SCAN_BYTES
-#ifdef YY_USE_PROTOS
-YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len )
-#else
-YY_BUFFER_STATE yy_scan_bytes( bytes, len )
-yyconst char *bytes;
-int len;
-#endif
+      switch (RED (c))
         {
-       YY_BUFFER_STATE b;
-       char *buf;
-       yy_size_t n;
-       int i;
-
-       /* Get memory for full buffer, including space for trailing EOB's. */
-       n = len + 2;
-       buf = (char *) yy_flex_alloc( n );
-       if ( ! buf )
-               YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
-
-       for ( i = 0; i < len; ++i )
-               buf[i] = bytes[i];
-
-       buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR;
-
-       b = yy_scan_buffer( buf, n );
-       if ( ! b )
-               YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
-
-       /* It's okay to grow etc. this buffer, and we should throw it
-        * away when we're done.
-        */
-       b->yy_is_our_buffer = 1;
-
-       return b;
+       case '\n':
+         if (last_non_comment_line > last_comment_line)
+           x_java_comment_reset ();
+         /* FALLTHROUGH */
+       case ' ':
+       case '\t':
+       case '\f':
+         /* Ignore whitespace and comments.  */
+         continue;
         }
-#endif
  
+      last_non_comment_line = tp->line_number;
  
-#ifndef YY_NO_PUSH_STATE
-#ifdef YY_USE_PROTOS
-static void yy_push_state( int new_state )
-#else
-static void yy_push_state( new_state )
-int new_state;
-#endif
+      switch (RED (c))
         {
-       if ( yy_start_stack_ptr >= yy_start_stack_depth )
-               {
-               yy_size_t new_size;
-
-               yy_start_stack_depth += YY_START_STACK_INCR;
-               new_size = yy_start_stack_depth * sizeof( int );
+       case '(':
+         tp->type = token_type_lparen;
+         return;
  
-               if ( ! yy_start_stack )
-                       yy_start_stack = (int *) yy_flex_alloc( new_size );
+       case ')':
+         tp->type = token_type_rparen;
+         return;
  
-               else
-                       yy_start_stack = (int *) yy_flex_realloc(
-                                       (void *) yy_start_stack, new_size );
+       case '{':
+         tp->type = token_type_lbrace;
+         return;
  
-               if ( ! yy_start_stack )
-                       YY_FATAL_ERROR(
-                       "out of memory expanding start-condition stack" );
-               }
+       case '}':
+         tp->type = token_type_rbrace;
+         return;
  
-       yy_start_stack[yy_start_stack_ptr++] = YY_START;
+       case ',':
+         tp->type = token_type_comma;
+         return;
  
-       BEGIN(new_state);
-       }
-#endif
+       case '.':
+         c = phase4_getc ();
+         if (!(RED (c) >= '0' && RED (c) <= '9'))
+           {
+             phase4_ungetc (c);
+             tp->type = token_type_dot;
+             return;
+           }
+         /* FALLTHROUGH */
  
+       case '0': case '1': case '2': case '3': case '4':
+       case '5': case '6': case '7': case '8': case '9':
+         {
+           /* Don't need to verify the complicated syntax of integers and
+              floating-point numbers.  We assume a valid Java input.
+              The simplified syntax that we recognize as number is: any
+              sequence of alphanumeric characters, additionally '+' and '-'
+              immediately after 'e' or 'E' except in hexadecimal numbers.  */
+           bool hexadecimal = false;
+
+           for (;;)
+             {
+               c = phase4_getc ();
+               if (RED (c) >= '0' && RED (c) <= '9')
+                 continue;
+               if ((RED (c) >= 'A' && RED (c) <= 'Z')
+                   || (RED (c) >= 'a' && RED (c) <= 'z'))
+                 {
+                   if (RED (c) == 'X' || RED (c) == 'x')
+                     hexadecimal = true;
+                   if ((RED (c) == 'E' || RED (c) == 'e') && !hexadecimal)
+                     {
+                       c = phase4_getc ();
+                       if (!(RED (c) == '+' || RED (c) == '-'))
+                         phase4_ungetc (c);
+                     }
+                   continue;
+                 }
+               if (RED (c) == '.')
+                 continue;
+               break;
+             }
+           phase4_ungetc (c);
+           tp->type = token_type_number;
+           return;
+         }
  
-#ifndef YY_NO_POP_STATE
-static void yy_pop_state()
-       {
-       if ( --yy_start_stack_ptr < 0 )
-               YY_FATAL_ERROR( "start-condition stack underflow" );
+       case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+       case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
+       case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+       case 'V': case 'W': case 'X': case 'Y': case 'Z':
+       case '_':
+       case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+       case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+       case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+       case 'v': case 'w': case 'x': case 'y': case 'z':
+         /* Although Java allows identifiers containing many Unicode
+            characters, we recognize only identifiers consisting of ASCII
+            characters.  This avoids conversion hassles w.r.t. the --keyword
+            arguments, and shouldn't be a big problem in practice.  */
+         {
+           static char *buffer;
+           static int bufmax;
+           int bufpos = 0;
+           for (;;)
+             {
+               if (bufpos >= bufmax)
+                 {
+                   bufmax = 2 * bufmax + 10;
+                   buffer = xrealloc (buffer, bufmax);
+                 }
+               buffer[bufpos++] = RED (c);
+               c = phase4_getc ();
+               if (!((RED (c) >= 'A' && RED (c) <= 'Z')
+                     || (RED (c) >= 'a' && RED (c) <= 'z')
+                     || (RED (c) >= '0' && RED (c) <= '9')
+                     || RED (c) == '_'))
+                 break;
+             }
+           phase4_ungetc (c);
+           if (bufpos >= bufmax)
+             {
+               bufmax = 2 * bufmax + 10;
+               buffer = xrealloc (buffer, bufmax);
+             }
+           buffer[bufpos] = '\0';
+           tp->string = xstrdup (buffer);
+           tp->type = token_type_symbol;
+           return;
+         }
  
-       BEGIN(yy_start_stack[yy_start_stack_ptr]);
-       }
-#endif
+       case '"':
+         /* String literal.  */
+         {
+           struct string_buffer literal;
+
+           init_string_buffer (&literal);
+           accumulate_escaped (&literal, '"');
+           tp->string = xstrdup (string_buffer_result (&literal));
+           free_string_buffer (&literal);
+           tp->comment = add_reference (comment);
+           tp->type = token_type_string_literal;
+           return;
+         }
  
+       case '\'':
+         /* Character literal.  */
+         {
+           struct string_buffer literal;
  
-#ifndef YY_NO_TOP_STATE
-static int yy_top_state()
-       {
-       return yy_start_stack[yy_start_stack_ptr - 1];
-       }
-#endif
+           init_string_buffer (&literal);
+           accumulate_escaped (&literal, '\'');
+           free_string_buffer (&literal);
+           tp->type = token_type_other;
+           return;
+         }
  
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
+       case '+':
+         c = phase4_getc ();
+         if (RED (c) == '+')
+           /* Operator ++ */
+           tp->type = token_type_other;
+         else if (RED (c) == '=')
+           /* Operator += */
+           tp->type = token_type_other;
+         else
+           {
+             /* Operator + */
+             phase4_ungetc (c);
+             tp->type = token_type_plus;
+           }
+         return;
  
-#ifdef YY_USE_PROTOS
-static void yy_fatal_error( yyconst char msg[] )
-#else
-static void yy_fatal_error( msg )
-char msg[];
-#endif
-       {
-       (void) fprintf( stderr, "%s\n", msg );
-       exit( YY_EXIT_FAILURE );
+       default:
+         /* Misc. operator.  */
+         tp->type = token_type_other;
+         return;
         }
+    }
+}
  
+static void
+phase5_unget (token_ty *tp)
+{
+  if (tp->type != token_type_eof)
+    phase5_pushback[phase5_pushback_length++] = *tp;
+}
  
  
-/* Redefine yyless() so it works in section 3 code. */
+/* Compile-time optimization of string literal concatenation.
+   Combine "string1" + ... + "stringN" to the concatenated string if
+     - the token before this expression is not ')' (because then the first
+       string could be part of a cast expression),
+     - the token after this expression is not '.' (because then the last
+       string could be part of a method call expression).  */
  
-#undef yyless
-#define yyless(n) \
-       do \
-               { \
-               /* Undo effects of setting up yytext. */ \
-               yytext[yyleng] = yy_hold_char; \
-               yy_c_buf_p = yytext + n; \
-               yy_hold_char = *yy_c_buf_p; \
-               *yy_c_buf_p = '\0'; \
-               yyleng = n; \
-               } \
-       while ( 0 )
+/* Maximum used guaranteed to be < 4.  */
+static token_ty phase6_pushback[4];
+static int phase6_pushback_length;
  
+static token_type_ty phase6_last;
  
-/* Internal utility routines. */
+static void
+phase6_get (token_ty *tp)
+{
+  if (phase6_pushback_length)
+    {
+      *tp = phase6_pushback[--phase6_pushback_length];
+      return;
+    }
  
-#ifndef yytext_ptr
-#ifdef YY_USE_PROTOS
-static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
-#else
-static void yy_flex_strncpy( s1, s2, n )
-char *s1;
-yyconst char *s2;
-int n;
-#endif
-       {
-       register int i;
-       for ( i = 0; i < n; ++i )
-               s1[i] = s2[i];
-       }
-#endif
+  phase5_get (tp);
+  if (tp->type == token_type_string_literal && phase6_last != token_type_rparen)
+    {
+      char *sum = tp->string;
+      size_t sum_len = strlen (sum);
  
-#ifdef YY_NEED_STRLEN
-#ifdef YY_USE_PROTOS
-static int yy_flex_strlen( yyconst char *s )
-#else
-static int yy_flex_strlen( s )
-yyconst char *s;
-#endif
+      for (;;)
         {
-       register int n;
-       for ( n = 0; s[n]; ++n )
-               ;
-
-       return n;
-       }
-#endif
+         token_ty token2;
  
+         phase5_get (&token2);
+         if (token2.type == token_type_plus)
+           {
+             token_ty token3;
  
-#ifdef YY_USE_PROTOS
-static void *yy_flex_alloc( yy_size_t size )
-#else
-static void *yy_flex_alloc( size )
-yy_size_t size;
-#endif
-       {
-       return (void *) malloc( size );
-       }
+             phase5_get (&token3);
+             if (token3.type == token_type_string_literal)
+               {
+                 token_ty token_after;
  
-#ifdef YY_USE_PROTOS
-static void *yy_flex_realloc( void *ptr, yy_size_t size )
-#else
-static void *yy_flex_realloc( ptr, size )
-void *ptr;
-yy_size_t size;
-#endif
-       {
-       /* The cast to (char *) in the following accommodates both
-        * implementations that use char* generic pointers, and those
-        * that use void* generic pointers.  It works with the latter
-        * because both ANSI C and C++ allow castless assignment from
-        * any pointer type to void*, and deal with argument conversions
-        * as though doing an assignment.
-        */
-       return (void *) realloc( (char *) ptr, size );
-       }
+                 phase5_get (&token_after);
+                 if (token_after.type != token_type_dot)
+                   {
+                     char *addend = token3.string;
+                     size_t addend_len = strlen (addend);
  
-#ifdef YY_USE_PROTOS
-static void yy_flex_free( void *ptr )
-#else
-static void yy_flex_free( ptr )
-void *ptr;
-#endif
-       {
-       free( ptr );
-       }
+                     sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
+                     memcpy (sum + sum_len, addend, addend_len + 1);
+                     sum_len += addend_len;
  
-#if YY_MAIN
-int main()
-       {
-       yylex();
-       return 0;
+                     phase5_unget (&token_after);
+                     free_token (&token3);
+                     free_token (&token2);
+                     continue;
+                   }
+                 phase5_unget (&token_after);
+               }
+             phase5_unget (&token3);
+           }
+         phase5_unget (&token2);
+         break;
         }
-#endif
-#line 221 "./x-java.l"
-
-
-static char *
-append_strings (char *a, char *b)
-{
-  int total_size = strlen (a) + strlen (b) + 1;
-  char *new_string = (char *) xmalloc (total_size);
-  strcpy (new_string, a);
-  strcat (new_string, b);
-  return new_string;
-}
-
-static inline bool
-isplus (char *s)
-{
-  return *s == '+';
-}
-
-static inline bool
-isdot (char *s)
-{
-  return *s == '.';
-}
-
-
-static char *
-translate_esc (char *s)
-{
-  char *n = (char *) xmalloc (strlen (s) + 1);
-  size_t i;
-  size_t j = 0;
-
-  for (i = 0; i < strlen (s); i++)
-    switch (s[i])
-      {
-      case '\\':
-       if (s[i + 1] == 'n')
-         {
-           n[j++] = '\n';
-           i++;
-         }
-       break;
-      default:
-       n[j++] = s[i];
-      }
-  n[j] = '\0';
-  return n;
-}
-
-static object_list *
-object_list_alloc ()
-{
-  object_list *list = xmalloc (sizeof (object_list));
-  list->max_num_obj = INITIAL_OBJECT_LIST_SIZE;
-  list->num_obj = 0;
-  list->objects = xmalloc (sizeof (void *) * INITIAL_OBJECT_LIST_SIZE);
-  return list;
+      tp->string = sum;
+    }
+  phase6_last = tp->type;
  }
  
  static void
-object_list_destroy (object_list *list)
+phase6_unget (token_ty *tp)
  {
-  free (list->objects);
-  free (list);
+  if (tp->type != token_type_eof)
+    phase6_pushback[phase6_pushback_length++] = *tp;
  }
  
-static int
-get_num_objects (const object_list *list)
-{
-  return list->num_obj;
-}
  
-static void *
-get_object (const object_list *list, int i)
+static void
+x_java_lex (token_ty *tp)
  {
-  return list->objects[i];
+  phase6_get (tp);
  }
  
  static void
-add_object (object_list *list, void *object)
+x_java_unlex (token_ty *tp)
  {
-  if (list->num_obj + 1 >= list->max_num_obj)
-    {
-      list->max_num_obj += OBJECT_LIST_GROWTH;
-      list->objects =
-       xrealloc (list->objects, list->max_num_obj * sizeof (void *));
-    }
-  list->objects[list->num_obj ++] = object;
+  phase6_unget (tp);
  }
  
  
-/* options */
-static bool extract_all_strings = false;
-
-void
-x_java_extract_all ()
-{
-  extract_all_strings = true;
-}
+/* ========================= Extracting strings.  ========================== */
  
+/* The file is broken into tokens.  Scan the token stream, looking for
+   a keyword, followed by a left paren, followed by a string.  When we
+   see this sequence, we have something to remember.  We assume we are
+   looking at a valid C or C++ program, and leave the complaints about
+   the grammar to the compiler.
  
-static java_keyword *
-alloc_keyword (const char *keyword, int arg1, int arg2)
-{
-  java_keyword *jk = xmalloc (sizeof (java_keyword));
-  jk->keyword = xstrdup (keyword);
-  jk->msgid_arg = arg1;
-  jk->msgid_plural_arg = arg2;
-  return jk;
-}
+     Normal handling: Look for
+       keyword ( ... msgid ... )
+     Plural handling: Look for
+       keyword ( ... msgid ... msgid_plural ... )
  
-static object_list *java_keywords = NULL;
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.  */
  
  
-/**
- * Backwards substring match.
- */
+/* Extract messages until the next balanced closing parenthesis or brace,
+   depending on TERMINATOR.
+   Extracted messages are added to MLP.
+   When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
+   if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
+   otherwise PLURAL_COMMAS = 0.
+   When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
+   Return true upon eof, false upon closing parenthesis or brace.  */
  static bool
-tailcmp (const char *s1, const char *s2)
+extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
+                      int commas_to_skip, int plural_commas)
  {
-  int len1 = strlen (s1);
-  int len2 = strlen (s2);
-  int start = len1 - len2;
-  if (start < 0)
-    return false;
-  return (start == 0 || s1[start-1] == '.') && (strcmp (s1 + start, s2) == 0);
-}
+  /* Remember the message containing the msgid, for msgid_plural.  */
+  message_ty *plural_mp = NULL;
  
-/**
- * Try to match a string against the keyword. If substring_match is
- * true substring match is used.
- */
-static bool
-do_compare (const char *s1, const char *s2)
-{
-  if (substring_match)
-    return strstr (s1, s2) != NULL;
-  else
-    return tailcmp (s1, s2);
-}
+  /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
+  int state;
+  /* Parameters of the keyword just seen.  Defined only in state 1.  */
+  int next_commas_to_skip = -1;
+  int next_plural_commas = 0;
  
-/**
- * Check if a string is a keyword or not.
- */
-static java_keyword *
-is_keyword (const char *s)
-{
-  int i;
-  int num_keywords = get_num_objects (java_keywords);
-  java_keyword *kw;
+  /* Start state is 0.  */
+  state = 0;
  
-  for (i = 0; i < num_keywords; i++)
+  for (;;)
      {
-      kw = (java_keyword *) get_object (java_keywords, i);
-
-      if (do_compare (s, kw->keyword))
-       return kw;
-    }
-  return NULL;
-}
-
-/**
- * Add a keyword to the list of possible keywords.
- */
-void
-x_java_keyword (const char *keyword)
-{
-  const char *keyword_end;
-  int arg1;
-  int arg2;
-  size_t len;
-  char *kw;
+      token_ty token;
  
-  if (keyword == NULL)
-    {
-      if (java_keywords != NULL)
+      x_java_lex (&token);
+      switch (token.type)
         {
-         object_list_destroy (java_keywords);
-         java_keywords = NULL;
-       }
-      return;
-    }
+       case token_type_symbol:
+         {
+           /* Combine symbol1 . ... . symbolN to a single strings, so that
+              we can recognize static function calls like
+              GettextResource.gettext.  The information present for
+              symbolI.....symbolN has precedence over the information for
+              symbolJ.....symbolN with J > I.  */
+           char *sum = token.string;
+           size_t sum_len = strlen (sum);
+           const char *dottedname;
+
+           for (;;)
+             {
+               token_ty token2;
+
+               x_java_lex (&token2);
+               if (token2.type == token_type_dot)
+                 {
+                   token_ty token3;
+
+                   x_java_lex (&token3);
+                   if (token3.type == token_type_symbol)
+                     {
+                       char *addend = token3.string;
+                       size_t addend_len = strlen (addend);
+
+                       sum =
+                         (char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
+                       sum[sum_len] = '.';
+                       memcpy (sum + sum_len + 1, addend, addend_len + 1);
+                       sum_len += 1 + addend_len;
+
+                       free_token (&token3);
+                       free_token (&token2);
+                       continue;
+                     }
+                   x_java_unlex (&token3);
+                 }
+               x_java_unlex (&token2);
+               break;
+             }
+
+           for (dottedname = sum;;)
+             {
+               void *keyword_value;
+
+               if (find_entry (&keywords, dottedname, strlen (dottedname),
+                               &keyword_value)
+                   == 0)
+                 {
+                   int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+                   int argnum2 = (int) (long) keyword_value >> 10;
+
+                   next_commas_to_skip = argnum1 - 1;
+                   next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
+                   state = 1;
+                   break;
+                 }
+
+               dottedname = strchr (dottedname, '.');
+               if (dottedname == NULL)
+                 {
+                   state = 0;
+                   break;
+                 }
+               dottedname++;
+             }
+           free (sum);
+           continue;
+         }
  
-  if (java_keywords == NULL)
-    java_keywords = object_list_alloc ();
+       case token_type_lparen:
+         if (extract_parenthesized (mlp, token_type_rparen,
+                                    state ? next_commas_to_skip : -1,
+                                    state ? next_plural_commas : 0))
+           return true;
+         state = 0;
+         continue;
+
+       case token_type_rparen:
+         if (terminator == token_type_rparen)
+           return false;
+         if (terminator == token_type_rbrace)
+           {
+             error_with_progname = false;
+             error (0, 0,
+                    _("%s:%d: warning: ')' found where '}' was expected"),
+                    logical_file_name, token.line_number);
+             error_with_progname = true;
+           }
+         state = 0;
+         continue;
+
+       case token_type_lbrace:
+         if (extract_parenthesized (mlp, token_type_rbrace, -1, 0))
+           return true;
+         state = 0;
+         continue;
+
+       case token_type_rbrace:
+         if (terminator == token_type_rbrace)
+           return false;
+         if (terminator == token_type_rparen)
+           {
+             error_with_progname = false;
+             error (0, 0,
+                    _("%s:%d: warning: '}' found where ')' was expected"),
+                    logical_file_name, token.line_number);
+             error_with_progname = true;
+           }
+         state = 0;
+         continue;
  
-  split_keywordspec (keyword, &keyword_end, &arg1, &arg2);
-  len = keyword_end - keyword;
-  kw = (char *) xmalloc (len + 1);
-  memcpy (kw, keyword, len);
-  kw[len] = '\0';
+       case token_type_comma:
+         if (commas_to_skip >= 0)
+           {
+             if (commas_to_skip > 0)
+               commas_to_skip--;
+             else
+               if (plural_mp != NULL && plural_commas > 0)
+                 {
+                   commas_to_skip = plural_commas - 1;
+                   plural_commas = 0;
+                 }
+               else
+                 commas_to_skip = -1;
+           }
+         state = 0;
+         continue;
  
-  /* kw should be a valid Java identifier sequence with dots.
-     A colon means an invalid parse in split_keywordspec().  */
-  if (strchr (kw, ':') == NULL)
-    {
-      if (arg1 == 0)
-       arg1 = 1;
-      add_object (java_keywords, alloc_keyword (kw, arg1, arg2));
-    }
-}
+       case token_type_string_literal:
+         {
+           lex_pos_ty pos;
+           pos.file_name = logical_file_name;
+           pos.line_number = token.line_number;
+
+           if (extract_all)
+             {
+               xgettext_current_source_encoding = po_charset_utf8;
+               x_java_comment_to_xgettext_comment (token.comment);
+               remember_a_message (mlp, token.string, &pos);
+               x_java_comment_reset ();
+               xgettext_current_source_encoding = xgettext_global_source_encoding;
+             }
+           else
+             {
+               if (commas_to_skip == 0)
+                 {
+                   if (plural_mp == NULL)
+                     {
+                       /* Seen an msgid.  */
+                       message_ty *mp;
+
+                       xgettext_current_source_encoding = po_charset_utf8;
+                       x_java_comment_to_xgettext_comment (token.comment);
+                       mp = remember_a_message (mlp, token.string, &pos);
+                       x_java_comment_reset ();
+                       xgettext_current_source_encoding = xgettext_global_source_encoding;
+                       if (plural_commas > 0)
+                         plural_mp = mp;
+                     }
+                   else
+                     {
+                       /* Seen an msgid_plural.  */
+                       xgettext_current_source_encoding = po_charset_utf8;
+                       remember_a_message_plural (plural_mp, token.string,
+                                                  &pos);
+                       xgettext_current_source_encoding = xgettext_global_source_encoding;
+                       plural_mp = NULL;
+                     }
+                 }
+               else
+                 free (token.string);
+             }
+         }
+         drop_reference (token.comment);
+         state = 0;
+         continue;
  
+       case token_type_eof:
+         return true;
  
-/**
- * Free any memory allocated by the tokenizer.
- */
-static void
-free_global ()
-{
-  /**
-   * free memory used by strings and comments as they are strdup'ed
-   * by the lexer.
-   */
-  if (parser_global->string != NULL)
-    {
-      free (parser_global->string);
-      parser_global->string = NULL;
-    }
-  if (parser_global->comment != NULL)
-    {
-      free (parser_global->comment);
-      parser_global->comment = NULL;
+       case token_type_dot:
+       case token_type_number:
+       case token_type_plus:
+       case token_type_other:
+         state = 0;
+         continue;
+
+       default:
+         abort ();
+       }
      }
  }
  
  
-/**
- * Main java keyword extract function.
- */
  void
  extract_java (FILE *f,
               const char *real_filename, const char *logical_filename,
               msgdomain_list_ty *mdlp)
  {
-  char *logical_file_name = xstrdup (logical_filename);
-  int token;
-  PARSER_STATE state = STATE_NONE;
-  PARSER_STATE last_state = STATE_NONE;
-  char *str = NULL;            /* used only if state == STATE_STRING
-                                               || state == STATE_APPEND */
-  char *key = NULL;            /* used only if state == STATE_WORD
-                                               || state == STATE_INVOCATION */
-  message_ty *plural = NULL;   /* used only after state was STATE_KEYWORD */
    message_list_ty *mlp = mdlp->item[0]->messages;
-  java_keyword *current_keyword = NULL;
-  java_keyword *keyword;
-  int argument_counter = 0;
-
-  if (java_keywords == NULL)
-    {
-      /* ops, no standard keywords */
-      x_java_keyword ("GettextResource.gettext:2");    /* static method */
-      x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */
-      x_java_keyword ("gettext");
-      x_java_keyword ("ngettext:1,2");
-      x_java_keyword ("getString");    /* ResourceBundle.getString */
-    }
-
-  memset (parser_global, 0, sizeof (*parser_global));
-  /* first line is 1 */
-  parser_global->line_no = 1;
-
-  yyin = f;
-  do
-    {
-      token = yylex ();
-      switch (token)
-       {
-
-       case JAVA_WORD:
-         if (state == STATE_KEYWORD)
-           {
-             last_state = STATE_KEYWORD;
-             argument_counter ++;
-           }
-         if (state == STATE_INVOCATION)
-           {
-             char *k2;
-             k2 = append_strings (key, ".");
-             free (key);
-             key = append_strings (k2, parser_global->word);
-             free (k2);
-           }
-         else
-           {
-             if (str != NULL)
-               {
-                 free (str);
-                 str = NULL;
-               }
-             state = STATE_WORD;
-             key = xstrdup (parser_global->word);
-           }
-         /* For java we try to match both things like object.methodCall()
-            and methodCall(). */
-         if ((keyword = is_keyword (key)) != NULL
-             || (keyword = is_keyword (parser_global->word)) != NULL)
-           {
-             current_keyword = keyword;
-             free (key);
-             state = STATE_KEYWORD;
-             argument_counter = 1;
-             plural = NULL;
-           }
-         break;
-
-       case JAVA_STRING:
-         if (state == STATE_KEYWORD)
-           last_state = STATE_KEYWORD;
-         if (state == STATE_APPEND)
-           {
-             char *s2;
-             s2 = append_strings (str, translate_esc (parser_global->string));
-             free (str);
-             str = s2;
-           }
-         else
-           str = translate_esc (parser_global->string);
-         state = STATE_STRING;
-         break;
-
-       case JAVA_OPERATOR:
-         if (state == STATE_STRING && isplus (parser_global->operator))
-           state = STATE_APPEND;
-         else
-           {
-             if (str != NULL)
-               {
-                 free (str);
-                 str = NULL;
-               }
-             state = STATE_NONE;
-           }
-         break;
-
-       case JAVA_FLOW:
-         /* Did we get something? */
-         if (state == STATE_STRING
-             && (last_state == STATE_KEYWORD || extract_all_strings))
-           {
-             lex_pos_ty pos;
-             pos.file_name = logical_file_name;
-             pos.line_number = parser_global->line_no;
-             if (extract_all_strings)
-               {
-                 remember_a_message (mlp, str, &pos);
-               }
-             else if (argument_counter == current_keyword->msgid_arg)
-               {
-                 plural = remember_a_message (mlp, str, &pos);
-                 if (current_keyword->msgid_plural_arg == 0)
-                   {
-                     /**
-                      * we don't expect any plural arg, reset state
-                      */
-                     state = STATE_NONE;
-                     last_state = STATE_NONE;
-                     argument_counter = 0;
-                   }
-                 else
-                   {
-                     argument_counter ++;
-                   }
-
-               }
-             else if (argument_counter == current_keyword->msgid_plural_arg
-                      && str != NULL)
-               {
-                 remember_a_message_plural (plural, str, &pos);
-                 state = STATE_NONE;
-                 last_state = STATE_NONE;
-                 argument_counter = 0;
-               }
-             else
-               {
-                 if (str != NULL)
-                   free (str);
-               }
-             str = NULL;
-           }
  
-         if (extract_all_strings)
-           {
-             if (str != NULL)
-               {
-                 free (str);
-                 str = NULL;
-               }
-             state = STATE_NONE;
-             last_state = STATE_NONE;
-           }
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
  
-         if (state == STATE_WORD && isdot (parser_global->flow))
-           {
-             state = STATE_INVOCATION;
-           }
+  last_comment_line = -1;
+  last_non_comment_line = -1;
  
-         break;
+  phase6_last = token_type_eof;
  
-       case JAVA_COMMENT:
-         if (str != NULL)
-           {
-             free (str);
-             str = NULL;
-           }
-         state = STATE_NONE;
-         last_state = STATE_NONE;
-         xgettext_comment_add (parser_global->comment);
-         break;
+  init_keywords ();
  
-       default:
-         if (str != NULL)
-           {
-             free (str);
-             str = NULL;
-           }
-         state = STATE_NONE;
-       }
-      free_global ();
-    }
-  while (token != -1);
+  /* Eat tokens until eof is seen.  When extract_parenthesized returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  while (!extract_parenthesized (mlp, token_type_eof, -1, 0))
+    ;
  
-  if (str != NULL)
-    free (str);
+  fp = NULL;
+  real_file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
  }
diff --git a/gettext-tools/src/x-java.l b/gettext-tools/src/x-java.l

deleted file mode 100644 (file)

index 7d13280..0000000
--- a/gettext-tools/src/x-java.l
+++ /dev/null
@@ -1,645 +0,0 @@
-/* xgettext Java backend.                                      -*- C -*-
-   Copyright (C) 2001-2002 Free Software Foundation, Inc.
-   Written by Tommy Johansson <tommy.johansson@kanalen.org>, 2001.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software Foundation,
-   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-%{
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "message.h"
-#include "x-java.h"
-#include "xgettext.h"
-#include "xmalloc.h"
-#include "strstr.h"
-
-typedef enum
-{
-  JAVA_WORD,
-  JAVA_STRING,
-  JAVA_OPERATOR,
-  JAVA_FLOW,
-  JAVA_COMMENT
-} TOKEN_TYPE;
-
-typedef struct
-{
-  char *word;
-  char *string;
-  char *operator;
-  char *flow;
-  char *comment;
-
-  int line_no;
-} PARSER_GLOBAL;
-
-static PARSER_GLOBAL pg;
-static PARSER_GLOBAL *parser_global = &pg;
-
-typedef enum
-{
-  STATE_NONE,
-  STATE_STRING,
-  STATE_WORD,
-  STATE_APPEND,
-  STATE_INVOCATION,
-  STATE_KEYWORD
-} PARSER_STATE;
-
-typedef struct
-{
-  char *data;
-  int len;
-  int maxlen;
-} char_buf;
-
-
-typedef struct _object_list
-{
-  int num_obj;
-  int max_num_obj;
-  void **objects;
-} object_list;
-
-#define INITIAL_OBJECT_LIST_SIZE 10
-#define OBJECT_LIST_GROWTH 10
-
-typedef struct _java_keyword
-{
-  char *keyword;
-  int msgid_arg;
-  int msgid_plural_arg;
-} java_keyword;
-
-
-#define INITIAL_CHARBUF_SIZE 500
-#define CHARBUF_GROWTH 100
-static char_buf *
-create_char_buf ()
-{
-  char_buf *b = (char_buf *) xmalloc (sizeof (char_buf));
-  b->data = (char *) xmalloc (INITIAL_CHARBUF_SIZE);
-  b->data[0] = '\0';
-  b->len = 0;
-  b->maxlen = INITIAL_CHARBUF_SIZE;
-  return b;
-}
-
-static void
-append_char_buf (char_buf *b, int c)
-{
-  if (b->len >= b->maxlen - 1)
-    {
-      b->data = (char *) xrealloc (b->data, b->maxlen + CHARBUF_GROWTH);
-      b->maxlen += CHARBUF_GROWTH;
-    }
-  b->data[b->len++] = c;
-  b->data[b->len] = '\0';
-}
-
-static char *
-get_string (char_buf *b)
-{
-  return xstrdup (b->data);
-}
-
-static void
-destroy_charbuf (char_buf *b)
-{
-  free (b->data);
-  free (b);
-}
-
-static void
-update_line_no (int c)
-{
-  if (c == '\n')
-    parser_global->line_no++;
-}
-
-static void
-strip_ending_spaces (char *str)
-{
-  int len = strlen (str);
-
-  while (len > 0 && isspace ((unsigned char) str[len - 1]))
-    len--;
-  str[len] = '\0';
-}
-%}
-
-%option noyywrap
-
-NUM [0-9]
-ID [a-zA-Z_][a-zA-Z0-9_]*
-
-%%
-
-"/*" {
-  int c;
-  int last;
-  char *str;
-
-  char_buf *charbuf = create_char_buf ();
-  for (;;)
-    {
-      c = input ();
-      last = input ();
-      update_line_no (c);
-      if ((c == '*' && last == '/') || c == EOF)
-       break;
-      unput (last);
-      append_char_buf (charbuf, c);
-    }
-  str = get_string (charbuf);
-  destroy_charbuf (charbuf);
-  strip_ending_spaces (str);
-  parser_global->comment = str;
-  return JAVA_COMMENT;
-}
-
-{NUM}| {NUM}+"."{NUM}*
-\" {
-  int c;
-  char *str;
-  char_buf *charbuf = create_char_buf ();
-  while ((c = input ()) != EOF && c != '"')
-    {
-      update_line_no (c);
-      append_char_buf (charbuf, c);
-    }
-  str = get_string (charbuf);
-  destroy_charbuf (charbuf);
-  parser_global->string = str;
-  return JAVA_STRING;
-}
-
-{ID} {
-  parser_global->word = yytext;
-  return JAVA_WORD;
-}
-
-"."|"("|")"|";"|"{"|"}"|"["|"]"|","|":"|"\\"|"?"|"\'" {
-  parser_global->flow = yytext;
-  return JAVA_FLOW;
-}
-
-"="|"<"|">"|"+"|"-"|"*"|"/"|"!"|"&"|"|"|"%"|"^"|"~" {
-  parser_global->operator = yytext;
-  return JAVA_OPERATOR;
-}
-
-"#"|"@"|"\r"|"`" /* ignore whitespace */
-
-"//"[^\n]* {
-  parser_global->comment = xstrdup (yytext + 2);
-  return JAVA_COMMENT;
-}
-"\n"|"\r"|"\r\n" parser_global->line_no++;
-[ \t]+
-.
-<<EOF>> return -1;
-%%
-
-static char *
-append_strings (char *a, const char *b)
-{
-  int total_size = strlen (a) + strlen (b) + 1;
-  char *new_string = (char *) xmalloc (total_size);
-  strcpy (new_string, a);
-  strcat (new_string, b);
-  return new_string;
-}
-
-static inline bool
-isplus (char *s)
-{
-  return *s == '+';
-}
-
-static inline bool
-isdot (char *s)
-{
-  return *s == '.';
-}
-
-
-static char *
-translate_esc (char *s)
-{
-  char *n = (char *) xmalloc (strlen (s) + 1);
-  size_t i;
-  size_t j = 0;
-
-  for (i = 0; i < strlen (s); i++)
-    switch (s[i])
-      {
-      case '\\':
-       if (s[i + 1] == 'n')
-         {
-           n[j++] = '\n';
-           i++;
-         }
-       break;
-      default:
-       n[j++] = s[i];
-      }
-  n[j] = '\0';
-  return n;
-}
-
-static object_list *
-object_list_alloc ()
-{
-  object_list *list = xmalloc (sizeof (object_list));
-  list->max_num_obj = INITIAL_OBJECT_LIST_SIZE;
-  list->num_obj = 0;
-  list->objects = xmalloc (sizeof (void *) * INITIAL_OBJECT_LIST_SIZE);
-  return list;
-}
-
-static void
-object_list_destroy (object_list *list)
-{
-  free (list->objects);
-  free (list);
-}
-
-static int
-get_num_objects (const object_list *list)
-{
-  return list->num_obj;
-}
-
-static void *
-get_object (const object_list *list, int i)
-{
-  return list->objects[i];
-}
-
-static void
-add_object (object_list *list, void *object)
-{
-  if (list->num_obj + 1 >= list->max_num_obj)
-    {
-      list->max_num_obj += OBJECT_LIST_GROWTH;
-      list->objects =
-       xrealloc (list->objects, list->max_num_obj * sizeof (void *));
-    }
-  list->objects[list->num_obj ++] = object;
-}
-
-
-/* options */
-static bool extract_all_strings = false;
-
-void
-x_java_extract_all ()
-{
-  extract_all_strings = true;
-}
-
-
-static java_keyword *
-alloc_keyword (const char *keyword, int arg1, int arg2)
-{
-  java_keyword *jk = xmalloc (sizeof (java_keyword));
-  jk->keyword = xstrdup (keyword);
-  jk->msgid_arg = arg1;
-  jk->msgid_plural_arg = arg2;
-  return jk;
-}
-
-static object_list *java_keywords = NULL;
-
-
-/**
- * Backwards substring match.
- */
-static bool
-tailcmp (const char *s1, const char *s2)
-{
-  int len1 = strlen (s1);
-  int len2 = strlen (s2);
-  int start = len1 - len2;
-  if (start < 0)
-    return false;
-  return (start == 0 || s1[start-1] == '.') && (strcmp (s1 + start, s2) == 0);
-}
-
-/**
- * Try to match a string against the keyword. If substring_match is
- * true substring match is used.
- */
-static bool
-do_compare (const char *s1, const char *s2)
-{
-  if (substring_match)
-    return strstr (s1, s2) != NULL;
-  else
-    return tailcmp (s1, s2);
-}
-
-/**
- * Check if a string is a keyword or not.
- */
-static java_keyword *
-is_keyword (const char *s)
-{
-  int i;
-  int num_keywords = get_num_objects (java_keywords);
-  java_keyword *kw;
-
-  for (i = 0; i < num_keywords; i++)
-    {
-      kw = (java_keyword *) get_object (java_keywords, i);
-
-      if (do_compare (s, kw->keyword))
-       return kw;
-    }
-  return NULL;
-}
-
-/**
- * Add a keyword to the list of possible keywords.
- */
-void
-x_java_keyword (const char *keyword)
-{
-  const char *keyword_end;
-  int arg1;
-  int arg2;
-  size_t len;
-  char *kw;
-
-  if (keyword == NULL)
-    {
-      if (java_keywords != NULL)
-       {
-         object_list_destroy (java_keywords);
-         java_keywords = NULL;
-       }
-      return;
-    }
-
-  if (java_keywords == NULL)
-    java_keywords = object_list_alloc ();
-
-  split_keywordspec (keyword, &keyword_end, &arg1, &arg2);
-  len = keyword_end - keyword;
-  kw = (char *) xmalloc (len + 1);
-  memcpy (kw, keyword, len);
-  kw[len] = '\0';
-
-  /* kw should be a valid Java identifier sequence with dots.
-     A colon means an invalid parse in split_keywordspec().  */
-  if (strchr (kw, ':') == NULL)
-    {
-      if (arg1 == 0)
-       arg1 = 1;
-      add_object (java_keywords, alloc_keyword (kw, arg1, arg2));
-    }
-}
-
-
-/**
- * Free any memory allocated by the tokenizer.
- */
-static void
-free_global ()
-{
-  /**
-   * free memory used by strings and comments as they are strdup'ed
-   * by the lexer.
-   */
-  if (parser_global->string != NULL)
-    {
-      free (parser_global->string);
-      parser_global->string = NULL;
-    }
-  if (parser_global->comment != NULL)
-    {
-      free (parser_global->comment);
-      parser_global->comment = NULL;
-    }
-}
-
-
-/**
- * Main java keyword extract function.
- */
-void
-extract_java (FILE *f,
-             const char *real_filename, const char *logical_filename,
-             msgdomain_list_ty *mdlp)
-{
-  char *logical_file_name = xstrdup (logical_filename);
-  int token;
-  PARSER_STATE state = STATE_NONE;
-  PARSER_STATE last_state = STATE_NONE;
-  char *str = NULL;            /* used only if state == STATE_STRING
-                                               || state == STATE_APPEND */
-  char *key = NULL;            /* used only if state == STATE_WORD
-                                               || state == STATE_INVOCATION */
-  message_ty *plural = NULL;   /* used only after state was STATE_KEYWORD */
-  message_list_ty *mlp = mdlp->item[0]->messages;
-  java_keyword *current_keyword = NULL;
-  java_keyword *keyword;
-  int argument_counter = 0;
-
-  if (java_keywords == NULL)
-    {
-      /* ops, no standard keywords */
-      x_java_keyword ("GettextResource.gettext:2");    /* static method */
-      x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */
-      x_java_keyword ("gettext");
-      x_java_keyword ("ngettext:1,2");
-      x_java_keyword ("getString");    /* ResourceBundle.getString */
-    }
-
-  memset (parser_global, 0, sizeof (*parser_global));
-  /* first line is 1 */
-  parser_global->line_no = 1;
-
-  yyin = f;
-  do
-    {
-      token = yylex ();
-      switch (token)
-       {
-
-       case JAVA_WORD:
-         if (state == STATE_KEYWORD)
-           {
-             last_state = STATE_KEYWORD;
-             argument_counter ++;
-           }
-         if (state == STATE_INVOCATION)
-           {
-             char *k2;
-             k2 = append_strings (key, ".");
-             free (key);
-             key = append_strings (k2, parser_global->word);
-             free (k2);
-           }
-         else
-           {
-             if (str != NULL)
-               {
-                 free (str);
-                 str = NULL;
-               }
-             state = STATE_WORD;
-             key = xstrdup (parser_global->word);
-           }
-         /* For java we try to match both things like object.methodCall()
-            and methodCall(). */
-         if ((keyword = is_keyword (key)) != NULL
-             || (keyword = is_keyword (parser_global->word)) != NULL)
-           {
-             current_keyword = keyword;
-             free (key);
-             state = STATE_KEYWORD;
-             argument_counter = 1;
-             plural = NULL;
-           }
-         break;
-
-       case JAVA_STRING:
-         if (state == STATE_KEYWORD)
-           last_state = STATE_KEYWORD;
-         if (state == STATE_APPEND)
-           {
-             char *s2;
-             s2 = append_strings (str, translate_esc (parser_global->string));
-             free (str);
-             str = s2;
-           }
-         else
-           str = translate_esc (parser_global->string);
-         state = STATE_STRING;
-         break;
-
-       case JAVA_OPERATOR:
-         if (state == STATE_STRING && isplus (parser_global->operator))
-           state = STATE_APPEND;
-         else
-           {
-             if (str != NULL)
-               {
-                 free (str);
-                 str = NULL;
-               }
-             state = STATE_NONE;
-           }
-         break;
-
-       case JAVA_FLOW:
-         /* Did we get something? */
-         if (state == STATE_STRING
-             && (last_state == STATE_KEYWORD || extract_all_strings))
-           {
-             lex_pos_ty pos;
-             pos.file_name = logical_file_name;
-             pos.line_number = parser_global->line_no;
-             if (extract_all_strings)
-               {
-                 remember_a_message (mlp, str, &pos);
-               }
-             else if (argument_counter == current_keyword->msgid_arg)
-               {
-                 plural = remember_a_message (mlp, str, &pos);
-                 if (current_keyword->msgid_plural_arg == 0)
-                   {
-                     /**
-                      * we don't expect any plural arg, reset state
-                      */
-                     state = STATE_NONE;
-                     last_state = STATE_NONE;
-                     argument_counter = 0;
-                   }
-                 else
-                   {
-                     argument_counter ++;
-                   }
-
-               }
-             else if (argument_counter == current_keyword->msgid_plural_arg
-                      && str != NULL)
-               {
-                 remember_a_message_plural (plural, str, &pos);
-                 state = STATE_NONE;
-                 last_state = STATE_NONE;
-                 argument_counter = 0;
-               }
-             else
-               {
-                 if (str != NULL)
-                   free (str);
-               }
-             str = NULL;
-           }
-
-         if (extract_all_strings)
-           {
-             if (str != NULL)
-               {
-                 free (str);
-                 str = NULL;
-               }
-             state = STATE_NONE;
-             last_state = STATE_NONE;
-           }
-
-         if (state == STATE_WORD && isdot (parser_global->flow))
-           {
-             state = STATE_INVOCATION;
-           }
-
-         break;
-
-       case JAVA_COMMENT:
-         if (str != NULL)
-           {
-             free (str);
-             str = NULL;
-           }
-         state = STATE_NONE;
-         last_state = STATE_NONE;
-         xgettext_comment_add (parser_global->comment);
-         break;
-
-       default:
-         if (str != NULL)
-           {
-             free (str);
-             str = NULL;
-           }
-         state = STATE_NONE;
-       }
-      free_global ();
-    }
-  while (token != -1);
-
-  if (str != NULL)
-    free (str);
-}
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog

index 7cee36e77717a6cd49d12c8423842b3867819937..5f20369e59950965795290afed1175da2777b4e3 100644 (file)
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,8 @@
+2003-10-04  Bruno Haible  <bruno@clisp.org>
+
+       * xgettext-11: Test details of the new Java backend, instead of the
+       --keyword-substring option.
+
  2003-10-07  Bruno Haible  <bruno@clisp.org>
  
         * lang-perl-1: Set LC_CTYPE to empty in while setting LANG.
diff --git a/gettext-tools/tests/xgettext-11 b/gettext-tools/tests/xgettext-11

index 0d81ede37f8622c710ad0ed3558c021e76c0b361..85646a7be767217f5ca10cff99556e18282711b1 100755 (executable)
--- a/gettext-tools/tests/xgettext-11
+++ b/gettext-tools/tests/xgettext-11
@@ -1,41 +1,176 @@
  #!/bin/sh
  #
-# Some tests for java substring keyword support
+# More tests for java support
  #
  
  tmpfiles=""
  trap 'rm -fr $tmpfiles' 1 2 3 15
  
  tmpfiles="$tmpfiles xg-test11.java"
-cat <<EOF > xg-test11.java
+cat <<\EOF > xg-test11.java
  class TestCase {
-    public TestCase() {
-          // test exception
-          throw new TestException("Test exception");
-          // empty exception text
-          throw new TestException("");
-    }
+  public static void main (String[] args) {
+    // Test recognition of \u escapes: Böse Bübchen
+    gettext ("B\u00f6se B\u00fcbchen");
+    // Test recognition of \u escapes with different number of u
+    gettext ("Japanese: \uu65e5\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu672c\u8A9e");
+    // Test recognition of \u escapes when they form UTF-16 surrogates
+    // Here: U+1D49E MATHEMATICAL SCRIPT CAPITAL C
+    gettext ("script \ud835\udc9e = ...");
+    // Test invalid surrogates.
+    gettext ("invalid surrogate \ud835 first half");
+    gettext ("invalid surrogate \udc9e second half");
+    // Don't let the line numbers be confused by \u newlines.
+    \u000a \u000d \u000d\u000a
+    gettext ("embedded\nnewline");
+    // Spaces from end of comment are removed.    \u000agettext("dummy");
+    // Various ways to write a backslash are equivalent.
+    gettext ("\u005c\u005c");
+    gettext ("\u005c\");
+    gettext ("\\u005c");
+    gettext ("\\");
+    gettext ("\134");
+    // Escape sequences in strings.
+    gettext ("t -> \t, b -> \b, n -> \n, dquote -> \", squote -> \' ...");
+    // Octal escapes have 2 or 3 digits, depending on the initial digit.
+    gettext ("bel: \7\nnewline: \12backslash: \134\ndquote-zero: \420\n");
+    // Hex escapes are not recognized
+    gettext ("no bel: \x07\n");
+    gettext // Recognized despite comments
+       ( /* Even across multiline
+comment! */ "this is a single " /* now comes the concatenation! */ + // after +
+         "long line");
+    // Character literals are not extracted.
+    gettext ('x');
+    // Invalid concatenations are not concatenated.
+    gettext ("fooba"+'r');
+    // Verify that the static function name has priority.
+    GettextResource.gettext("NOT extracted", "this one is extracted");
+    // Verify that a comma inside braces is hidden.
+    GettextResource.gettext(
+        new Object() {
+          public int foo () {
+            return 5, 8;
+          }
+        },
+        "this is the second argument");
+  }
  }
  EOF
  
-tmpfiles="$tmpfiles xg-test11.po"
+tmpfiles="$tmpfiles xg-test11.tmp xg-test11.pot"
  : ${XGETTEXT=xgettext}
-${XGETTEXT} --omit-header --no-location -c --keyword-substring --keyword=Exception -d xg-test11 xg-test11.java
+${XGETTEXT} --from-code=ISO-8859-1 -c -o xg-test11.tmp xg-test11.java 2>/dev/null
  test $? = 0 || { rm -fr $tmpfiles; exit 1; }
+grep -v 'POT-Creation-Date' < xg-test11.tmp > xg-test11.pot
  
  tmpfiles="$tmpfiles xg-test11.ok"
-cat <<EOF > xg-test11.ok
-#. test exception
-msgid "Test exception"
+cat <<\EOF > xg-test11.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#. Test recognition of \u escapes: Böse Bübchen
+#: xg-test11.java:4
+msgid "Böse Bübchen"
+msgstr ""
+
+#. Test recognition of \u escapes with different number of u
+#: xg-test11.java:6
+msgid "Japanese: 日本語"
+msgstr ""
+
+#. Test recognition of \u escapes when they form UTF-16 surrogates
+#. Here: U+1D49E MATHEMATICAL SCRIPT CAPITAL C
+#: xg-test11.java:9
+msgid "script 𝒞 = ..."
+msgstr ""
+
+#. Test invalid surrogates.
+#: xg-test11.java:11
+msgid "invalid surrogate � first half"
+msgstr ""
+
+#: xg-test11.java:12
+msgid "invalid surrogate ��� second half"
+msgstr ""
+
+#. Don't let the line numbers be confused by \u newlines.
+#: xg-test11.java:15
+msgid ""
+"embedded\n"
+"newline"
+msgstr ""
+
+#. Spaces from end of comment are removed.
+#: xg-test11.java:16
+msgid "dummy"
+msgstr ""
+
+#. Various ways to write a backslash are equivalent.
+#: xg-test11.java:18 xg-test11.java:19 xg-test11.java:20 xg-test11.java:21
+#: xg-test11.java:22
+msgid "\\"
+msgstr ""
+
+#. Escape sequences in strings.
+#: xg-test11.java:24
+msgid ""
+"t -> \t, b -> \b, n -> \n"
+", dquote -> \", squote -> ' ..."
  msgstr ""
  
-#. empty exception text
+#. Octal escapes have 2 or 3 digits, depending on the initial digit.
+#: xg-test11.java:26
  msgid ""
+"bel: \a\n"
+"newline: \n"
+"backslash: \\\n"
+"dquote-zero: \"0\n"
+msgstr ""
+
+#. Hex escapes are not recognized
+#: xg-test11.java:28
+msgid "no bel: \\x07\n"
+msgstr ""
+
+#. Recognized despite comments
+#. Even across multiline
+#. comment!
+#: xg-test11.java:31
+msgid "this is a single long line"
+msgstr ""
+
+#. Invalid concatenations are not concatenated.
+#: xg-test11.java:36
+msgid "fooba"
+msgstr ""
+
+#. Verify that the static function name has priority.
+#: xg-test11.java:38
+msgid "this one is extracted"
+msgstr ""
+
+#: xg-test11.java:46
+msgid "this is the second argument"
  msgstr ""
  EOF
  
  : ${DIFF=diff}
-${DIFF} xg-test11.ok xg-test11.po
+${DIFF} xg-test11.ok xg-test11.pot
  result=$?
  
  rm -fr $tmpfiles
author	Bruno Haible <bruno@clisp.org>
	Wed, 8 Oct 2003 10:01:55 +0000 (10:01 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 23 Jun 2009 10:11:02 +0000 (12:11 +0200)
gettext-tools/ChangeLog		patch \| blob \| blame \| history
gettext-tools/configure.ac		patch \| blob \| blame \| history
gettext-tools/m4/ChangeLog		patch \| blob \| blame \| history
gettext-tools/m4/Makefile.am		patch \| blob \| blame \| history
gettext-tools/m4/flex.m4	[deleted file]	patch \| blob \| blame \| history
gettext-tools/src/ChangeLog		patch \| blob \| blame \| history
gettext-tools/src/FILES		patch \| blob \| blame \| history
gettext-tools/src/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/x-java.c		patch \| blob \| blame \| history
gettext-tools/src/x-java.l	[deleted file]	patch \| blob \| blame \| history
gettext-tools/tests/ChangeLog		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-11		patch \| blob \| blame \| history