From: Bruno Haible Date: Wed, 8 Oct 2003 10:01:55 +0000 (+0000) Subject: New Java backend x-java.c, replaces the old Java backend x-java.l. X-Git-Tag: v0.13~220 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fc1864327d50166ac171bbf813658e23b554d696;p=thirdparty%2Fgettext.git New Java backend x-java.c, replaces the old Java backend x-java.l. --- diff --git a/gettext-tools/ChangeLog b/gettext-tools/ChangeLog index a45c20f1f..dec1ad95e 100644 --- a/gettext-tools/ChangeLog +++ b/gettext-tools/ChangeLog @@ -1,3 +1,7 @@ +2003-10-04 Bruno Haible + + * configure.ac: Remove gt_PROG_LEX invocation. + 2003-09-18 Bruno Haible * windows/gettextpo.def: Add po_file_domain_header, po_header_field, diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac index f7c44a1fc..35d319217 100644 --- a/gettext-tools/configure.ac +++ b/gettext-tools/configure.ac @@ -33,7 +33,6 @@ dnl Checks for programs. AC_PROG_CC AC_PROG_INSTALL AC_PROG_YACC -gt_PROG_LEX gt_GCJ if test -n "$HAVE_GCJ"; then diff --git a/gettext-tools/m4/ChangeLog b/gettext-tools/m4/ChangeLog index 1c1ebea4f..ce8f86090 100644 --- a/gettext-tools/m4/ChangeLog +++ b/gettext-tools/m4/ChangeLog @@ -1,3 +1,8 @@ +2003-10-04 Bruno Haible + + * flex.m4: Remove file. + * Makefile.am (EXTRA_DIST): Remove it. + 2003-09-04 Bruno Haible * locale-fr.m4 (gt_LOCALE_FR): Add support for Solaris 7. diff --git a/gettext-tools/m4/Makefile.am b/gettext-tools/m4/Makefile.am index 823951699..dc9018e51 100644 --- a/gettext-tools/m4/Makefile.am +++ b/gettext-tools/m4/Makefile.am @@ -40,7 +40,6 @@ canonicalize.m4 \ eaccess.m4 \ error.m4 \ extensions.m4 \ -flex.m4 \ fnmatch.m4 \ gcj.m4 \ getline.m4 \ diff --git a/gettext-tools/m4/flex.m4 b/gettext-tools/m4/flex.m4 deleted file mode 100644 index d3409a905..000000000 --- a/gettext-tools/m4/flex.m4 +++ /dev/null @@ -1,16 +0,0 @@ -# flex.m4 serial 2 (gettext-0.12) -dnl Copyright (C) 2001-2003 Free Software Foundation, Inc. -dnl This file is free software, distributed under the terms of the GNU -dnl General Public License. As a special exception to the GNU General -dnl Public License, this file may be distributed as part of a program -dnl that contains a configuration script generated by Autoconf, under -dnl the same distribution terms as the rest of that program. - -# Check for flex. - -AC_DEFUN([gt_PROG_LEX], -[ - dnl Don't use AC_PROG_LEX or AM_PROG_LEX; we insist on flex. - dnl Thus we don't need LEXLIB. - AC_CHECK_PROG(LEX, flex, flex, :) -]) diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index eb692dba6..36df08a02 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,12 @@ +2003-10-04 Bruno Haible + + New Java backend. + * x-java.c: New file. + * x-java.l: Remove file. + * Makefile.am (xgettext_SOURCES): Add x-java.c, remove x-java.l. + (x-java.c): Remove rule. + * FILES: Update. + 2003-09-22 Bruno Haible * x-glade.c (start_element_handler): Implement extract_all behaviour. diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index bb48ecdce..9f4ad08ee 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -238,7 +238,7 @@ format.c Table of the language dependent format string handlers. | x-smalltalk.c | String extractor for Smalltalk. | x-java.h -| x-java.l +| x-java.c | String extractor for Java. | x-awk.h | x-awk.c diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 3293c1af7..29c45c310 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -120,7 +120,7 @@ msgmerge_SOURCES = msgmerge.c plural-count.c msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c read-tcl.c xgettext_SOURCES = xgettext.c \ x-c.c x-po.c x-sh.c x-python.c x-lisp.c x-elisp.c x-librep.c x-smalltalk.c \ - x-java.l x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c x-rst.c x-glade.c + x-java.c x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c x-rst.c x-glade.c msgattrib_SOURCES = msgattrib.c msgcat_SOURCES = msgcat.c msgcomm_SOURCES = msgcomm.c @@ -225,17 +225,6 @@ po-gram-gen2.h: po-gram-gen.h $(SED) -e 's/yy/po_gram_/g' -e 's/extern /extern DLL_VARIABLE /' $(srcdir)/po-gram-gen.h > $@-tmp mv $@-tmp $@ -# We must add the '#include ' here, not inside x-java.l, -# because on VMS, must be included before . -x-java.c: x-java.l - test "$(LEX)" = ":" || { \ - $(LEX) -o$@-tmp -Px_java_yy $(srcdir)/x-java.l && \ - (echo '#ifdef VMS'; echo '#include '; echo '#endif'; \ - cat $@-tmp) > $@-tmq && \ - rm -f $@-tmp && \ - mv $@-tmq $@ ; \ - } - # Special rules for installation of auxiliary programs. diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c index ac8bf2858..dd0f9cd27 100644 --- a/gettext-tools/src/x-java.c +++ b/gettext-tools/src/x-java.c @@ -1,420 +1,6 @@ -#ifdef VMS -#include -#endif -#define yy_create_buffer x_java_yy_create_buffer -#define yy_delete_buffer x_java_yy_delete_buffer -#define yy_scan_buffer x_java_yy_scan_buffer -#define yy_scan_string x_java_yy_scan_string -#define yy_scan_bytes x_java_yy_scan_bytes -#define yy_flex_debug x_java_yy_flex_debug -#define yy_init_buffer x_java_yy_init_buffer -#define yy_flush_buffer x_java_yy_flush_buffer -#define yy_load_buffer_state x_java_yy_load_buffer_state -#define yy_switch_to_buffer x_java_yy_switch_to_buffer -#define yyin x_java_yyin -#define yyleng x_java_yyleng -#define yylex x_java_yylex -#define yyout x_java_yyout -#define yyrestart x_java_yyrestart -#define yytext x_java_yytext - -#line 19 "x-java.c-tmp" -/* A lexical scanner generated by flex */ - -/* Scanner skeleton version: - * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $ - */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 - -#include - - -/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ -#ifdef c_plusplus -#ifndef __cplusplus -#define __cplusplus -#endif -#endif - - -#ifdef __cplusplus - -#include -#include - -/* Use prototypes in function declarations. */ -#define YY_USE_PROTOS - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -#if __STDC__ - -#define YY_USE_PROTOS -#define YY_USE_CONST - -#endif /* __STDC__ */ -#endif /* ! __cplusplus */ - -#ifdef __TURBOC__ - #pragma warn -rch - #pragma warn -use -#include -#include -#define YY_USE_CONST -#define YY_USE_PROTOS -#endif - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - - -#ifdef YY_USE_PROTOS -#define YY_PROTO(proto) proto -#else -#define YY_PROTO(proto) () -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN yy_start = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START ((yy_start - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE yyrestart( yyin ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#define YY_BUF_SIZE 16384 - -typedef struct yy_buffer_state *YY_BUFFER_STATE; - -extern int yyleng; -extern FILE *yyin, *yyout; - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - -/* The funky do-while in the following #define is used to turn the definition - * int a single C statement (which needs a semi-colon terminator). This - * avoids problems with code like: - * - * if ( condition_holds ) - * yyless( 5 ); - * else - * do_something_else(); - * - * Prior to using the do-while the compiler would get upset at the - * "else" because it interpreted the "if" statement as being all - * done when it reached the ';' after the yyless() call. - */ - -/* Return all but the first 'n' matched characters back to the input stream. */ - -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - *yy_cp = yy_hold_char; \ - YY_RESTORE_YY_MORE_OFFSET \ - yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, yytext_ptr ) - -/* The following is because we cannot portably get our hands on size_t - * (without autoconf's help, which isn't available because we want - * flex-generated scanners to compile on their own). - */ -typedef unsigned int yy_size_t; - - -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via yyrestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - }; - -static YY_BUFFER_STATE yy_current_buffer = 0; - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - */ -#define YY_CURRENT_BUFFER yy_current_buffer - - -/* yy_hold_char holds the character lost when yytext is formed. */ -static char yy_hold_char; - -static int yy_n_chars; /* number of characters read into yy_ch_buf */ - - -int yyleng; - -/* Points to current character in buffer. */ -static char *yy_c_buf_p = (char *) 0; -static int yy_init = 1; /* whether we need to initialize */ -static int yy_start = 0; /* start state number */ - -/* Flag which is used to allow yywrap()'s to do buffer switches - * instead of setting up a fresh yyin. A bit of a hack ... - */ -static int yy_did_buffer_switch_on_eof; - -void yyrestart YY_PROTO(( FILE *input_file )); - -void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); -void yy_load_buffer_state YY_PROTO(( void )); -YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); -void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); -void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); -void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); -#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) - -YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); -YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); -YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); - -static void *yy_flex_alloc YY_PROTO(( yy_size_t )); -static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); -static void yy_flex_free YY_PROTO(( void * )); - -#define yy_new_buffer yy_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! yy_current_buffer ) \ - yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ - yy_current_buffer->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! yy_current_buffer ) \ - yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ - yy_current_buffer->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) - - -#define yywrap() 1 -#define YY_SKIP_YYWRAP -typedef unsigned char YY_CHAR; -FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; -typedef int yy_state_type; -extern char *yytext; -#define yytext_ptr yytext - -static yy_state_type yy_get_previous_state YY_PROTO(( void )); -static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); -static int yy_get_next_buffer YY_PROTO(( void )); -static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - yytext_ptr = yy_bp; \ - yyleng = (int) (yy_cp - yy_bp); \ - yy_hold_char = *yy_cp; \ - *yy_cp = '\0'; \ - yy_c_buf_p = yy_cp; - -#define YY_NUM_RULES 12 -#define YY_END_OF_BUFFER 13 -static yyconst short int yy_accept[26] = - { 0, - 0, 0, 13, 11, 10, 9, 7, 6, 3, 7, - 5, 6, 2, 4, 11, 10, 9, 1, 8, 4, - 0, 8, 2, 2, 0 - } ; - -static yyconst int yy_ec[256] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 5, 6, 7, 1, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 28, 29, 30, 31, 27, 32, 27, 27, 27, 27, - - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 33, 34, 35, 36, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - } ; - -static yyconst int yy_meta[38] = - { 0, - 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1 - } ; - -static yyconst short int yy_base[27] = - { 0, - 0, 0, 53, 54, 50, 54, 48, 54, 54, 54, - 54, 25, 54, 20, 31, 47, 54, 54, 0, 21, - 25, 0, 27, 26, 54, 40 - } ; - -static yyconst short int yy_def[27] = - { 0, - 25, 1, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, - 25, 26, 25, 25, 0, 25 - } ; - -static yyconst short int yy_nxt[92] = - { 0, - 4, 5, 6, 7, 8, 9, 10, 8, 8, 11, - 11, 11, 8, 8, 11, 8, 11, 12, 13, 11, - 11, 8, 8, 8, 11, 10, 14, 11, 11, 11, - 8, 10, 11, 8, 11, 8, 15, 18, 20, 20, - 22, 23, 19, 21, 24, 24, 20, 20, 16, 21, - 17, 16, 25, 3, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25 - - } ; - -static yyconst short int yy_chk[92] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 12, 14, 20, - 26, 21, 12, 21, 24, 23, 14, 20, 16, 15, - 7, 5, 3, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25 - - } ; - -static yy_state_type yy_last_accepting_state; -static char *yy_last_accepting_cpos; - -/* The intent behind this definition is that it'll catch - * any uses of REJECT which flex missed. - */ -#define REJECT reject_used_but_not_detected -#define yymore() yymore_used_but_not_detected -#define YY_MORE_ADJ 0 -#define YY_RESTORE_YY_MORE_OFFSET -char *yytext; -#line 1 "./x-java.l" -#define INITIAL 0 -/* xgettext Java backend. -*- C -*- - Copyright (C) 2001-2002 Free Software Foundation, Inc. - Written by Tommy Johansson , 2001. +/* xgettext Java backend. + Copyright (C) 2003 Free Software Foundation, Inc. + Written by Bruno Haible , 2003. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -429,12 +15,13 @@ char *yytext; You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#line 20 "./x-java.l" + #ifdef HAVE_CONFIG_H -# include +# include "config.h" #endif -#include +#include +#include #include #include #include @@ -442,1776 +29,1447 @@ char *yytext; #include "message.h" #include "x-java.h" #include "xgettext.h" +#include "error.h" #include "xmalloc.h" -#include "strstr.h" +#include "exit.h" +#include "hash.h" +#include "po-charset.h" +#include "utf16-ucs4.h" +#include "ucs4-utf8.h" +#include "gettext.h" -typedef enum -{ - JAVA_WORD, - JAVA_STRING, - JAVA_OPERATOR, - JAVA_FLOW, - JAVA_COMMENT -} TOKEN_TYPE; - -typedef struct -{ - char *word; - char *string; - char *operator; - char *flow; - char *comment; +#define _(s) gettext(s) - int line_no; -} PARSER_GLOBAL; -static PARSER_GLOBAL pg; -static PARSER_GLOBAL *parser_global = &pg; - -typedef enum -{ - STATE_NONE, - STATE_STRING, - STATE_WORD, - STATE_APPEND, - STATE_INVOCATION, - STATE_KEYWORD -} PARSER_STATE; - -typedef struct -{ - char *data; - int len; - int maxlen; -} char_buf; +/* The Java syntax is defined in the + Java Language Specification, Second Edition, + (available from http://java.sun.com/), + chapter 3 "Lexical Structure". */ -typedef struct _object_list -{ - int num_obj; - int max_num_obj; - void **objects; -} object_list; +/* ====================== Keyword set customization. ====================== */ -#define INITIAL_OBJECT_LIST_SIZE 10 -#define OBJECT_LIST_GROWTH 10 +/* If true extract all strings. */ +static bool extract_all = false; -typedef struct _java_keyword -{ - char *keyword; - int msgid_arg; - int msgid_plural_arg; -} java_keyword; +static hash_table keywords; +static bool default_keywords = true; -#define INITIAL_CHARBUF_SIZE 500 -#define CHARBUF_GROWTH 100 -static char_buf * -create_char_buf () +void +x_java_extract_all () { - char_buf *b = (char_buf *) xmalloc (sizeof (char_buf)); - b->data = (char *) xmalloc (INITIAL_CHARBUF_SIZE); - b->data[0] = '\0'; - b->len = 0; - b->maxlen = INITIAL_CHARBUF_SIZE; - return b; + extract_all = true; } -static void -append_char_buf (char_buf *b, int c) + +void +x_java_keyword (const char *name) { - if (b->len >= b->maxlen - 1) + if (name == NULL) + default_keywords = false; + else { - b->data = (char *) xrealloc (b->data, b->maxlen + CHARBUF_GROWTH); - b->maxlen += CHARBUF_GROWTH; - } - b->data[b->len++] = c; - b->data[b->len] = '\0'; -} + const char *end; + int argnum1; + int argnum2; + const char *colon; -static char * -get_string (char_buf *b) -{ - return xstrdup (b->data); -} + if (keywords.table == NULL) + init_hash (&keywords, 100); -static void -destroy_charbuf (char_buf *b) -{ - free (b->data); - free (b); + split_keywordspec (name, &end, &argnum1, &argnum2); + + /* The characters between name and end should form a valid Java + identifier sequence with dots. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + { + if (argnum1 == 0) + argnum1 = 1; + insert_entry (&keywords, name, end - name, + (void *) (long) (argnum1 + (argnum2 << 10))); + } + } } +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ static void -update_line_no (int c) +init_keywords () { - if (c == '\n') - parser_global->line_no++; + if (default_keywords) + { + x_java_keyword ("GettextResource.gettext:2"); /* static method */ + x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */ + x_java_keyword ("gettext"); + x_java_keyword ("ngettext:1,2"); + x_java_keyword ("getString"); /* ResourceBundle.getString */ + default_keywords = false; + } } -static void -strip_ending_spaces (char *str) -{ - int len = strlen (str); - while (len > 0 && isspace ((unsigned char) str[len - 1])) - len--; - str[len] = '\0'; -} -#line 559 "x-java.c-tmp" +/* ======================== Reading of characters. ======================== */ -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int yywrap YY_PROTO(( void )); -#else -extern int yywrap YY_PROTO(( void )); -#endif -#endif +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; -#ifndef YY_NO_UNPUT -static void yyunput YY_PROTO(( int c, char *buf_ptr )); -#endif +/* The input file stream. */ +static FILE *fp; -#ifndef yytext_ptr -static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); -#endif -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen YY_PROTO(( yyconst char * )); -#endif +/* Fetch the next single-byte character from the input file. + Pushback can consist of an unlimited number of 'u' followed by up to 4 + other characters. */ -#ifndef YY_NO_INPUT -#ifdef __cplusplus -static int yyinput YY_PROTO(( void )); -#else -static int input YY_PROTO(( void )); -#endif -#endif +/* Special coding of multiple 'u's in the pushback buffer. */ +#define MULTIPLE_U(count) (0x1000 + (count)) -#if YY_STACK_USED -static int yy_start_stack_ptr = 0; -static int yy_start_stack_depth = 0; -static int *yy_start_stack = 0; -#ifndef YY_NO_PUSH_STATE -static void yy_push_state YY_PROTO(( int new_state )); -#endif -#ifndef YY_NO_POP_STATE -static void yy_pop_state YY_PROTO(( void )); -#endif -#ifndef YY_NO_TOP_STATE -static int yy_top_state YY_PROTO(( void )); -#endif +static int phase1_pushback[5]; +static unsigned int phase1_pushback_length; -#else -#define YY_NO_PUSH_STATE 1 -#define YY_NO_POP_STATE 1 -#define YY_NO_TOP_STATE 1 -#endif +static int +phase1_getc () +{ + int c; -#ifdef YY_MALLOC_DECL -YY_MALLOC_DECL -#else -#if __STDC__ -#ifndef __cplusplus -#include -#endif -#else -/* Just try to get by without declaring the routines. This will fail - * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) - * or sizeof(void*) != sizeof(int). - */ -#endif -#endif + if (phase1_pushback_length) + { + c = phase1_pushback[--phase1_pushback_length]; + if (c >= MULTIPLE_U (0)) + { + if (c > MULTIPLE_U (1)) + phase1_pushback[phase1_pushback_length++] = c - 1; + return 'u'; + } + else + return c; + } -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#define YY_READ_BUF_SIZE 8192 -#endif + c = getc (fp); -/* Copy whatever the last rule matched to the standard output. */ + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("\ +error while reading \"%s\""), real_file_name); + } -#ifndef ECHO -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) -#endif + return c; +} -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ - if ( yy_current_buffer->yy_is_interactive ) \ - { \ - int c = '*', n; \ - for ( n = 0; n < max_size && \ - (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ - buf[n] = (char) c; \ - if ( c == '\n' ) \ - buf[n++] = (char) c; \ - if ( c == EOF && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - result = n; \ - } \ - else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ - && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); -#endif +static void +phase1_ungetc (int c) +{ + if (c != EOF) + { + if (c == 'u') + { + if (phase1_pushback_length > 0 + && phase1_pushback[phase1_pushback_length - 1] >= MULTIPLE_U (0)) + phase1_pushback[phase1_pushback_length - 1]++; + else + phase1_pushback[phase1_pushback_length++] = MULTIPLE_U (1); + } + else + phase1_pushback[phase1_pushback_length++] = c; + } +} -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif +/* Fetch the next single-byte character or Unicode character from the file. + (Here, as in the Java Language Specification, when we say "Unicode + character", we actually mean "UTF-16 encoding unit".) + Cope with potentially 2 pushback characters. */ -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) -#endif +/* Return value of phase 2, 3, 4 when EOF is reached. */ +#define P2_EOF 0xffff -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL int yylex YY_PROTO(( void )) -#endif +/* Convert an UTF-16 code point to a return value that can be distinguished + from a single-byte return value. */ +#define UNICODE(code) (0x10000 + (code)) -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif +/* Test a return value of phase 2, 3, 4 whether it designates an UTF-16 code + point. */ +#define IS_UNICODE(p2_result) ((p2_result) >= 0x10000) -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif +/* Extract the UTF-16 code of a return value that satisfies IS_UNICODE. */ +#define UTF16_VALUE(p2_result) ((p2_result) - 0x10000) -#define YY_RULE_SETUP \ - YY_USER_ACTION +/* Reduces a return value of phase 2, 3, 4 by unmasking the UNICODE bit, + so that it can be more easily compared against an ASCII character. + (RED (c) == 'x') is equivalent to (c == 'x' || c == UNICODE ('x')). */ +#define RED(p2_result) ((p2_result) & 0xffff) -YY_DECL - { - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; +/* Maximum used guaranteed to be < 2. */ +static int phase2_pushback[2]; +static int phase2_pushback_length; + +static int +phase2_getc () +{ + int c; -#line 155 "./x-java.l" + if (phase2_pushback_length) + return phase2_pushback[--phase2_pushback_length]; + c = phase1_getc (); + if (c == EOF) + return P2_EOF; + if (c == '\\') + { + c = phase1_getc (); + if (c == 'u') + { + unsigned int u_count = 1; + unsigned char buf[4]; + unsigned int n; + int i; -#line 713 "x-java.c-tmp" + for (;;) + { + c = phase1_getc (); + if (c != 'u') + break; + u_count++; + } + phase1_ungetc (c); - if ( yy_init ) + n = 0; + for (i = 0; i < 4; i++) + { + c = phase1_getc (); + + if (c >= '0' && c <= '9') + n = (n << 4) + (c - '0'); + else if (c >= 'A' && c <= 'F') + n = (n << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + n = (n << 4) + (c - 'a' + 10); + else { - yy_init = 0; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif + phase1_ungetc (c); + while (--i >= 0) + phase1_ungetc (buf[i]); + for (; u_count > 0; u_count--) + phase1_ungetc ('u'); + return '\\'; + } - if ( ! yy_start ) - yy_start = 1; /* first start state */ + buf[i] = c; + } + return UNICODE (n); + } + phase1_ungetc (c); + return '\\'; + } + return c; +} - if ( ! yyin ) - yyin = stdin; +#ifdef unused +static void +phase2_ungetc (int c) +{ + if (c != P2_EOF) + phase2_pushback[phase2_pushback_length++] = c; +} +#endif - if ( ! yyout ) - yyout = stdout; - if ( ! yy_current_buffer ) - yy_current_buffer = - yy_create_buffer( yyin, YY_BUF_SIZE ); +/* Fetch the next single-byte character or Unicode character from the file. + With line number handling. + Convert line terminators to '\n' or UNICODE ('\n'). + Cope with potentially 2 pushback characters. */ - yy_load_buffer_state(); - } +/* Maximum used guaranteed to be < 2. */ +static int phase3_pushback[2]; +static int phase3_pushback_length; - while ( 1 ) /* loops until end-of-file is reached */ - { - yy_cp = yy_c_buf_p; - - /* Support of yytext. */ - *yy_cp = yy_hold_char; - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - - yy_current_state = yy_start; -yy_match: - do - { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; - if ( yy_accept[yy_current_state] ) - { - yy_last_accepting_state = yy_current_state; - yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 26 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - ++yy_cp; - } - while ( yy_base[yy_current_state] != 54 ); - -yy_find_action: - yy_act = yy_accept[yy_current_state]; - if ( yy_act == 0 ) - { /* have to back up */ - yy_cp = yy_last_accepting_cpos; - yy_current_state = yy_last_accepting_state; - yy_act = yy_accept[yy_current_state]; - } - - YY_DO_BEFORE_ACTION; - - -do_action: /* This label is used only to access EOF actions. */ - - - switch ( yy_act ) - { /* beginning of action switch */ - case 0: /* must back up */ - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = yy_hold_char; - yy_cp = yy_last_accepting_cpos; - yy_current_state = yy_last_accepting_state; - goto yy_find_action; - -case 1: -YY_RULE_SETUP -#line 157 "./x-java.l" +static int +phase3_getc () { int c; - int last; - char *str; - char_buf *charbuf = create_char_buf (); - for (;;) + if (phase3_pushback_length) { - c = input (); - last = input (); - update_line_no (c); - if ((c == '*' && last == '/') || c == EOF) - break; - unput (last); - append_char_buf (charbuf, c); + c = phase3_pushback[--phase3_pushback_length]; + if (c == '\n') + ++line_number; + return c; + } + + c = phase2_getc (); + + /* Handle line terminators. */ + if (RED (c) == '\r') + { + int c1 = phase2_getc (); + + if (RED (c1) != '\n') + phase2_getc (c1); + + /* Seen line terminator CR or CR/LF. */ + if (c == '\r' || c1 == '\n') + { + ++line_number; + return '\n'; + } + else + return UNICODE ('\n'); + } + else if (RED (c) == '\n') + { + /* Seen line terminator LF. */ + if (c == '\n') + { + ++line_number; + return '\n'; + } + else + return UNICODE ('\n'); } - str = get_string (charbuf); - destroy_charbuf (charbuf); - strip_ending_spaces (str); - parser_global->comment = str; - return JAVA_COMMENT; + + return c; } - YY_BREAK -case 2: -YY_RULE_SETUP -#line 180 "./x-java.l" - - YY_BREAK -case 3: -YY_RULE_SETUP -#line 181 "./x-java.l" + +static void +phase3_ungetc (int c) { - int c; - char *str; - char_buf *charbuf = create_char_buf (); - while ((c = input ()) != EOF && c != '"') + if (c != P2_EOF) { - update_line_no (c); - append_char_buf (charbuf, c); + if (c == '\n') + --line_number; + phase3_pushback[phase3_pushback_length++] = c; } - str = get_string (charbuf); - destroy_charbuf (charbuf); - parser_global->string = str; - return JAVA_STRING; } - YY_BREAK -case 4: -YY_RULE_SETUP -#line 196 "./x-java.l" + + +/* ========================= Accumulating strings. ======================== */ + +/* A string buffer type that allows appending bytes (in the + xgettext_current_source_encoding) or Unicode characters. + Returns the entire string in UTF-8 encoding. */ + +struct string_buffer { - parser_global->word = yytext; - return JAVA_WORD; -} - YY_BREAK -case 5: -YY_RULE_SETUP -#line 201 "./x-java.l" + /* The part of the string that has already been converted to UTF-8. */ + char *utf8_buffer; + size_t utf8_buflen; + size_t utf8_allocated; + /* The first half of an UTF-16 surrogate character. */ + unsigned short utf16_surr; + /* The part of the string that is still in the source encoding. */ + char *curr_buffer; + size_t curr_buflen; + size_t curr_allocated; +}; + +/* Initialize a 'struct string_buffer' to empty. */ +static inline void +init_string_buffer (struct string_buffer *bp) { - parser_global->flow = yytext; - return JAVA_FLOW; + bp->utf8_buffer = NULL; + bp->utf8_buflen = 0; + bp->utf8_allocated = 0; + bp->utf16_surr = 0; + bp->curr_buffer = NULL; + bp->curr_buflen = 0; + bp->curr_allocated = 0; } - YY_BREAK -case 6: -YY_RULE_SETUP -#line 206 "./x-java.l" + +/* Auxiliary function: Append a byte to bp->curr. */ +static inline void +string_buffer_append_byte (struct string_buffer *bp, unsigned char c) { - parser_global->operator = yytext; - return JAVA_OPERATOR; + if (bp->curr_buflen == bp->curr_allocated) + { + bp->curr_allocated = 2 * bp->curr_allocated + 10; + bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated); + } + bp->curr_buffer[bp->curr_buflen++] = c; } - YY_BREAK -case 7: -YY_RULE_SETUP -#line 211 "./x-java.l" -/* ignore whitespace */ - YY_BREAK -case 8: -YY_RULE_SETUP -#line 213 "./x-java.l" + +/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */ +static inline void +string_buffer_append_unicode_grow (struct string_buffer *bp, size_t count) { - parser_global->comment = xstrdup (yytext + 2); - return JAVA_COMMENT; + if (bp->utf8_buflen + count > bp->utf8_allocated) + { + size_t new_allocated = 2 * bp->utf8_allocated + 10; + if (new_allocated < bp->utf8_buflen + count) + new_allocated = bp->utf8_buflen + count; + bp->utf8_allocated = new_allocated; + bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated); + } } - YY_BREAK -case 9: -YY_RULE_SETUP -#line 217 "./x-java.l" -parser_global->line_no++; - YY_BREAK -case 10: -YY_RULE_SETUP -#line 218 "./x-java.l" - - YY_BREAK -case 11: -YY_RULE_SETUP -#line 219 "./x-java.l" - - YY_BREAK -case YY_STATE_EOF(INITIAL): -#line 220 "./x-java.l" -return -1; - YY_BREAK -case 12: -YY_RULE_SETUP -#line 221 "./x-java.l" -ECHO; - YY_BREAK -#line 906 "x-java.c-tmp" - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = yy_hold_char; - YY_RESTORE_YY_MORE_OFFSET - - if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * yylex(). If so, then we have to assure - * consistency between yy_current_buffer and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - yy_n_chars = yy_current_buffer->yy_n_chars; - yy_current_buffer->yy_input_file = yyin; - yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state(); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state ); - - yy_bp = yytext_ptr + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { - yy_cp = yy_c_buf_p; - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer() ) - { - case EOB_ACT_END_OF_FILE: - { - yy_did_buffer_switch_on_eof = 0; - - if ( yywrap() ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = - yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state(); - - yy_cp = yy_c_buf_p; - yy_bp = yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yy_c_buf_p = - &yy_current_buffer->yy_ch_buf[yy_n_chars]; - - yy_current_state = yy_get_previous_state(); - - yy_cp = yy_c_buf_p; - yy_bp = yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ - } /* end of yylex */ - - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ - -static int yy_get_next_buffer() - { - register char *dest = yy_current_buffer->yy_ch_buf; - register char *source = yytext_ptr; - register int number_to_move, i; - int ret_val; - - if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( yy_current_buffer->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } +/* Auxiliary function: Append a Unicode character to bp->utf8. + uc must be < 0x110000. */ +static inline void +string_buffer_append_unicode (struct string_buffer *bp, unsigned int uc) +{ + unsigned char utf8buf[6]; + int count = u8_uctomb (utf8buf, uc, 6); - /* Try to read more data. */ + if (count < 0) + /* The caller should have ensured that uc is not out-of-range. */ + abort (); - /* First move last chars to start of buffer. */ - number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + string_buffer_append_unicode_grow (bp, count); + memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count); + bp->utf8_buflen += count; +} - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); +/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */ +static inline void +string_buffer_flush_utf16_surr (struct string_buffer *bp) +{ + if (bp->utf16_surr != 0) + { + /* A half surrogate is invalid, therefore use U+FFFD instead. */ + string_buffer_append_unicode (bp, 0xfffd); + bp->utf16_surr = 0; + } +} - if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - yy_current_buffer->yy_n_chars = yy_n_chars = 0; +/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */ +static inline void +string_buffer_flush_curr_buffer (struct string_buffer *bp, int lineno) +{ + if (bp->curr_buflen > 0) + { + char *curr; + size_t count; - else - { - int num_to_read = - yy_current_buffer->yy_buf_size - number_to_move - 1; - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ -#ifdef YY_USES_REJECT - YY_FATAL_ERROR( -"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); -#else - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = yy_current_buffer; - - int yy_c_buf_p_offset = - (int) (yy_c_buf_p - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = b->yy_buf_size * 2; - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - yy_flex_realloc( (void *) b->yy_ch_buf, - b->yy_buf_size + 2 ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = yy_current_buffer->yy_buf_size - - number_to_move - 1; -#endif - } + string_buffer_append_byte (bp, '\0'); - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; + /* Convert from the source encoding to UTF-8. */ + curr = from_current_source_encoding (bp->curr_buffer, + logical_file_name, lineno); - /* Read in more data. */ - YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), - yy_n_chars, num_to_read ); + /* Append it to bp->utf8_buffer. */ + count = strlen (curr); + string_buffer_append_unicode_grow (bp, count); + memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count); + bp->utf8_buflen += count; - yy_current_buffer->yy_n_chars = yy_n_chars; - } + if (curr != bp->curr_buffer) + free (curr); + bp->curr_buflen = 0; + } +} - if ( yy_n_chars == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - yyrestart( yyin ); - } +/* Append a character or Unicode character to a 'struct string_buffer'. */ +static void +string_buffer_append (struct string_buffer *bp, int c) +{ + if (IS_UNICODE (c)) + { + /* Append a Unicode character. */ - else - { - ret_val = EOB_ACT_LAST_MATCH; - yy_current_buffer->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } + /* Switch from multibyte character mode to Unicode character mode. */ + string_buffer_flush_curr_buffer (bp, line_number); - else - ret_val = EOB_ACT_CONTINUE_SCAN; + /* Test whether this character and the previous one form a Unicode + surrogate character pair. */ + if (bp->utf16_surr != 0 + && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))) + { + unsigned short utf16buf[2] = { bp->utf16_surr, UTF16_VALUE (c) }; + unsigned int uc; - yy_n_chars += number_to_move; - yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + if (u16_mbtouc_aux (&uc, utf16buf, 2) != 2) + abort (); - yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + string_buffer_append_unicode (bp, uc); + bp->utf16_surr = 0; + } + else + { + string_buffer_flush_utf16_surr (bp); - return ret_val; + if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00)) + bp->utf16_surr = UTF16_VALUE (c); + else + string_buffer_append_unicode (bp, UTF16_VALUE (c)); } + } + else + { + /* Append a single byte. */ + /* Switch from Unicode character mode to multibyte character mode. */ + string_buffer_flush_utf16_surr (bp); -/* yy_get_previous_state - get the state just before the EOB char was reached */ + /* When a newline is seen, convert the accumulated multibyte sequence. + This ensures a correct line number in the error message in case of + a conversion error. The "- 1" is to account for the newline. */ + if (c == '\n') + string_buffer_flush_curr_buffer (bp, line_number - 1); -static yy_state_type yy_get_previous_state() - { - register yy_state_type yy_current_state; - register char *yy_cp; + string_buffer_append_byte (bp, (unsigned char) c); + } +} - yy_current_state = yy_start; +/* Return the string buffer's contents. */ +static char * +string_buffer_result (struct string_buffer *bp) +{ + /* Flush all into bp->utf8_buffer. */ + string_buffer_flush_utf16_surr (bp); + string_buffer_flush_curr_buffer (bp, line_number); + /* NUL-terminate it. */ + string_buffer_append_unicode_grow (bp, 1); + bp->utf8_buffer[bp->utf8_buflen] = '\0'; + /* Return it. */ + return bp->utf8_buffer; +} - for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) - { - register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); - if ( yy_accept[yy_current_state] ) - { - yy_last_accepting_state = yy_current_state; - yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 26 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - } +/* Free the memory pointed to by a 'struct string_buffer'. */ +static inline void +free_string_buffer (struct string_buffer *bp) +{ + free (bp->utf8_buffer); + free (bp->curr_buffer); +} - return yy_current_state; - } +/* ======================== Accumulating comments. ======================== */ -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ -#ifdef YY_USE_PROTOS -static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) -#else -static yy_state_type yy_try_NUL_trans( yy_current_state ) -yy_state_type yy_current_state; -#endif - { - register int yy_is_jam; - register char *yy_cp = yy_c_buf_p; +/* In this backend we cannot use the xgettext_comment* functions directly, + because in multiline string expressions like + "string1" + + "string2" + the newline between "string1" and "string2" would cause a call to + xgettext_comment_reset(), thus destroying the accumulated comments + that we need a little later, when we have concatenated the two strings + and pass them to remember_a_message(). + Instead, we do the bookkeeping of the accumulated comments directly, + and save a pointer to the accumulated comments when we read "string1". + In order to avoid excessive copying of strings, we use reference + counting. */ - register YY_CHAR yy_c = 1; - if ( yy_accept[yy_current_state] ) - { - yy_last_accepting_state = yy_current_state; - yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 26 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - yy_is_jam = (yy_current_state == 25); +typedef struct refcounted_string_list_ty refcounted_string_list_ty; +struct refcounted_string_list_ty +{ + unsigned int refcount; + struct string_list_ty contents; +}; - return yy_is_jam ? 0 : yy_current_state; - } +static refcounted_string_list_ty *comment; +static inline refcounted_string_list_ty * +add_reference (refcounted_string_list_ty *rslp) +{ + if (rslp != NULL) + rslp->refcount++; + return rslp; +} -#ifndef YY_NO_UNPUT -#ifdef YY_USE_PROTOS -static void yyunput( int c, register char *yy_bp ) -#else -static void yyunput( c, yy_bp ) -int c; -register char *yy_bp; -#endif +static inline void +drop_reference (refcounted_string_list_ty *rslp) +{ + if (rslp != NULL) + { + if (rslp->refcount > 1) + rslp->refcount--; + else { - register char *yy_cp = yy_c_buf_p; - - /* undo effects of setting up yytext */ - *yy_cp = yy_hold_char; - - if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) - { /* need to shift things up to make room */ - /* +2 for EOB chars. */ - register int number_to_move = yy_n_chars + 2; - register char *dest = &yy_current_buffer->yy_ch_buf[ - yy_current_buffer->yy_buf_size + 2]; - register char *source = - &yy_current_buffer->yy_ch_buf[number_to_move]; - - while ( source > yy_current_buffer->yy_ch_buf ) - *--dest = *--source; - - yy_cp += (int) (dest - source); - yy_bp += (int) (dest - source); - yy_current_buffer->yy_n_chars = - yy_n_chars = yy_current_buffer->yy_buf_size; - - if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) - YY_FATAL_ERROR( "flex scanner push-back overflow" ); - } - - *--yy_cp = (char) c; - - - yytext_ptr = yy_bp; - yy_hold_char = *yy_cp; - yy_c_buf_p = yy_cp; + string_list_destroy (&rslp->contents); + free (rslp); } -#endif /* ifndef YY_NO_UNPUT */ - + } +} -#ifdef __cplusplus -static int yyinput() -#else -static int input() -#endif - { - int c; +static void +x_java_comment_add (const char *str) +{ + if (comment == NULL) + { + comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment)); + comment->refcount = 1; + string_list_init (&comment->contents); + } + else if (comment->refcount > 1) + { + /* Unshare the list by making copies. */ + struct string_list_ty *oldcontents; + size_t i; + + comment->refcount--; + oldcontents = &comment->contents; + + comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment)); + comment->refcount = 1; + string_list_init (&comment->contents); + for (i = 0; i < oldcontents->nitems; i++) + string_list_append (&comment->contents, oldcontents->item[i]); + } + string_list_append (&comment->contents, str); +} - *yy_c_buf_p = yy_hold_char; +static void +x_java_comment_reset () +{ + drop_reference (comment); + comment = NULL; +} - if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) - /* This was really a NUL. */ - *yy_c_buf_p = '\0'; +static void +x_java_comment_to_xgettext_comment (refcounted_string_list_ty *rslp) +{ + xgettext_comment_reset (); + if (rslp != NULL) + { + size_t i; - else - { /* need more input */ - int offset = yy_c_buf_p - yytext_ptr; - ++yy_c_buf_p; - - switch ( yy_get_next_buffer() ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - yyrestart( yyin ); - - /* fall through */ - - case EOB_ACT_END_OF_FILE: - { - if ( yywrap() ) - return EOF; - - if ( ! yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(); -#else - return input(); -#endif - } + for (i = 0; i < rslp->contents.nitems; i++) + xgettext_comment_add (rslp->contents.item[i]); + } +} - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext_ptr + offset; - break; - } - } - } - c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ - *yy_c_buf_p = '\0'; /* preserve yytext */ - yy_hold_char = *++yy_c_buf_p; +/* Accumulating a single comment line. */ +static struct string_buffer comment_buffer; - return c; - } +static inline void +comment_start () +{ + comment_buffer.utf8_buflen = 0; + comment_buffer.utf16_surr = 0; + comment_buffer.curr_buflen = 0; +} +static inline bool +comment_at_start () +{ + return (comment_buffer.utf8_buflen == 0 && comment_buffer.utf16_surr == 0 + && comment_buffer.curr_buflen == 0); +} -#ifdef YY_USE_PROTOS -void yyrestart( FILE *input_file ) -#else -void yyrestart( input_file ) -FILE *input_file; -#endif - { - if ( ! yy_current_buffer ) - yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); +static inline void +comment_add (int c) +{ + string_buffer_append (&comment_buffer, c); +} - yy_init_buffer( yy_current_buffer, input_file ); - yy_load_buffer_state(); - } +static inline void +comment_line_end (size_t chars_to_remove) +{ + char *buffer = string_buffer_result (&comment_buffer); + size_t buflen = strlen (buffer); + + buflen -= chars_to_remove; + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + buffer[buflen] = '\0'; + x_java_comment_add (buffer); +} -#ifdef YY_USE_PROTOS -void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) -#else -void yy_switch_to_buffer( new_buffer ) -YY_BUFFER_STATE new_buffer; -#endif - { - if ( yy_current_buffer == new_buffer ) - return; +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; - if ( yy_current_buffer ) - { - /* Flush out information for old buffer. */ - *yy_c_buf_p = yy_hold_char; - yy_current_buffer->yy_buf_pos = yy_c_buf_p; - yy_current_buffer->yy_n_chars = yy_n_chars; - } - yy_current_buffer = new_buffer; - yy_load_buffer_state(); - - /* We don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - yy_did_buffer_switch_on_eof = 1; - } +/* Replace each comment that is not inside a character constant or string + literal with a space or newline character. */ +static int +phase4_getc () +{ + int c0; + int c; + bool last_was_star; -#ifdef YY_USE_PROTOS -void yy_load_buffer_state( void ) -#else -void yy_load_buffer_state() -#endif + c0 = phase3_getc (); + if (RED (c0) != '/') + return c0; + c = phase3_getc (); + switch (RED (c)) + { + default: + phase3_ungetc (c); + return c0; + + case '*': + /* C style comment. */ + comment_start (); + last_was_star = false; + for (;;) { - yy_n_chars = yy_current_buffer->yy_n_chars; - yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; - yyin = yy_current_buffer->yy_input_file; - yy_hold_char = *yy_c_buf_p; - } - + c = phase3_getc (); + if (c == P2_EOF) + break; + /* We skip all leading white space, but not EOLs. */ + if (!(comment_at_start () && (RED (c) == ' ' || RED (c) == '\t'))) + comment_add (c); + switch (RED (c)) + { + case '\n': + comment_line_end (1); + comment_start (); + last_was_star = false; + continue; + + case '*': + last_was_star = true; + continue; + + case '/': + if (last_was_star) + { + comment_line_end (2); + break; + } + /* FALLTHROUGH */ -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) -#else -YY_BUFFER_STATE yy_create_buffer( file, size ) -FILE *file; -int size; -#endif + default: + last_was_star = false; + continue; + } + break; + } + last_comment_line = line_number; + return ' '; + + case '/': + /* C++ style comment. */ + last_comment_line = line_number; + comment_start (); + for (;;) { - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - yy_init_buffer( b, file ); - - return b; + c = phase3_getc (); + if (RED (c) == '\n' || c == P2_EOF) + break; + comment_add (c); } + phase3_ungetc (c); /* push back the newline, to decrement line_number */ + comment_line_end (0); + phase3_getc (); /* read the newline again */ + return '\n'; + } +} +static void +phase4_ungetc (int c) +{ + phase3_ungetc (c); +} -#ifdef YY_USE_PROTOS -void yy_delete_buffer( YY_BUFFER_STATE b ) -#else -void yy_delete_buffer( b ) -YY_BUFFER_STATE b; -#endif - { - if ( ! b ) - return; - if ( b == yy_current_buffer ) - yy_current_buffer = (YY_BUFFER_STATE) 0; +/* ========================== Reading of tokens. ========================== */ - if ( b->yy_is_our_buffer ) - yy_flex_free( (void *) b->yy_ch_buf ); +enum token_type_ty +{ + token_type_eof, + token_type_lparen, /* ( */ + token_type_rparen, /* ) */ + token_type_lbrace, /* { */ + token_type_rbrace, /* } */ + token_type_comma, /* , */ + token_type_dot, /* . */ + token_type_string_literal, /* "abc" */ + token_type_number, /* 1.23 */ + token_type_symbol, /* identifier, keyword, null */ + token_type_plus, /* + */ + token_type_other /* character literal, misc. operator */ +}; +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; /* for token_type_string_literal, token_type_symbol */ + refcounted_string_list_ty *comment; /* for token_type_string_literal */ + int line_number; +}; - yy_flex_free( (void *) b ); - } +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_string_literal || tp->type == token_type_symbol) + free (tp->string); + if (tp->type == token_type_string_literal) + drop_reference (tp->comment); +} -#ifndef YY_ALWAYS_INTERACTIVE -#ifndef YY_NEVER_INTERACTIVE -extern int isatty YY_PROTO(( int )); -#endif -#endif -#ifdef YY_USE_PROTOS -void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) -#else -void yy_init_buffer( b, file ) -YY_BUFFER_STATE b; -FILE *file; -#endif +/* Read an escape sequence inside a string literal or character literal. */ +static inline int +do_getc_escaped () +{ + int c; + /* Use phase 3, because phase 4 elides comments. */ + c = phase3_getc (); + if (c == P2_EOF) + return UNICODE ('\\'); + switch (RED (c)) + { + case 'b': + return UNICODE (0x08); + case 't': + return UNICODE (0x09); + case 'n': + return UNICODE (0x0a); + case 'f': + return UNICODE (0x0c); + case 'r': + return UNICODE (0x0d); + case '"': + return UNICODE ('"'); + case '\'': + return UNICODE ('\''); + case '\\': + return UNICODE ('\\'); + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + { + int n = RED (c) - '0'; + bool maybe3digits = (n < 4); - { - yy_flush_buffer( b ); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - -#if YY_ALWAYS_INTERACTIVE - b->yy_is_interactive = 1; -#else -#if YY_NEVER_INTERACTIVE - b->yy_is_interactive = 0; -#else - b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; -#endif -#endif - } + c = phase3_getc (); + if (RED (c) >= '0' && RED (c) <= '7') + { + n = (n << 3) + (RED (c) - '0'); + if (maybe3digits) + { + c = phase3_getc (); + if (RED (c) >= '0' && RED (c) <= '7') + n = (n << 3) + (RED (c) - '0'); + else + phase3_ungetc (c); + } + } + else + phase3_ungetc (c); + return UNICODE (n); + } + default: + /* Invalid escape sequence. */ + phase3_ungetc (c); + return UNICODE ('\\'); + } +} -#ifdef YY_USE_PROTOS -void yy_flush_buffer( YY_BUFFER_STATE b ) -#else -void yy_flush_buffer( b ) -YY_BUFFER_STATE b; -#endif +/* Read a string literal or character literal. */ +static void +accumulate_escaped (struct string_buffer *literal, int delimiter) +{ + int c; + for (;;) + { + /* Use phase 3, because phase 4 elides comments. */ + c = phase3_getc (); + if (c == P2_EOF || RED (c) == delimiter) + break; + if (RED (c) == '\n') { - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + error_with_progname = false; + error (0, 0, _("%s:%d: warning: unterminated character constant"), + logical_file_name, line_number - 1); + error_with_progname = true; + phase3_ungetc (c); + break; + } + if (RED (c) == '\\') + c = do_getc_escaped (); + string_buffer_append (literal, c); + } +} - b->yy_buf_pos = &b->yy_ch_buf[0]; - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; +/* Combine characters into tokens. Discard whitespace. */ - if ( b == yy_current_buffer ) - yy_load_buffer_state(); - } +/* Maximum used guaranteed to be < 4. */ +static token_ty phase5_pushback[4]; +static int phase5_pushback_length; +static void +phase5_get (token_ty *tp) +{ + int c; -#ifndef YY_NO_SCAN_BUFFER -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) -#else -YY_BUFFER_STATE yy_scan_buffer( base, size ) -char *base; -yy_size_t size; -#endif - { - YY_BUFFER_STATE b; - - if ( size < 2 || - base[size-2] != YY_END_OF_BUFFER_CHAR || - base[size-1] != YY_END_OF_BUFFER_CHAR ) - /* They forgot to leave room for the EOB's. */ - return 0; - - b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); - - b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ - b->yy_buf_pos = b->yy_ch_buf = base; - b->yy_is_our_buffer = 0; - b->yy_input_file = 0; - b->yy_n_chars = b->yy_buf_size; - b->yy_is_interactive = 0; - b->yy_at_bol = 1; - b->yy_fill_buffer = 0; - b->yy_buffer_status = YY_BUFFER_NEW; - - yy_switch_to_buffer( b ); - - return b; - } -#endif + if (phase5_pushback_length) + { + *tp = phase5_pushback[--phase5_pushback_length]; + return; + } + tp->string = NULL; + for (;;) + { + tp->line_number = line_number; + c = phase4_getc (); -#ifndef YY_NO_SCAN_STRING -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) -#else -YY_BUFFER_STATE yy_scan_string( yy_str ) -yyconst char *yy_str; -#endif + if (c == P2_EOF) { - int len; - for ( len = 0; yy_str[len]; ++len ) - ; - - return yy_scan_bytes( yy_str, len ); + tp->type = token_type_eof; + return; } -#endif - -#ifndef YY_NO_SCAN_BYTES -#ifdef YY_USE_PROTOS -YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) -#else -YY_BUFFER_STATE yy_scan_bytes( bytes, len ) -yyconst char *bytes; -int len; -#endif + switch (RED (c)) { - YY_BUFFER_STATE b; - char *buf; - yy_size_t n; - int i; - - /* Get memory for full buffer, including space for trailing EOB's. */ - n = len + 2; - buf = (char *) yy_flex_alloc( n ); - if ( ! buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); - - for ( i = 0; i < len; ++i ) - buf[i] = bytes[i]; - - buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; - - b = yy_scan_buffer( buf, n ); - if ( ! b ) - YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); - - /* It's okay to grow etc. this buffer, and we should throw it - * away when we're done. - */ - b->yy_is_our_buffer = 1; - - return b; + case '\n': + if (last_non_comment_line > last_comment_line) + x_java_comment_reset (); + /* FALLTHROUGH */ + case ' ': + case '\t': + case '\f': + /* Ignore whitespace and comments. */ + continue; } -#endif + last_non_comment_line = tp->line_number; -#ifndef YY_NO_PUSH_STATE -#ifdef YY_USE_PROTOS -static void yy_push_state( int new_state ) -#else -static void yy_push_state( new_state ) -int new_state; -#endif + switch (RED (c)) { - if ( yy_start_stack_ptr >= yy_start_stack_depth ) - { - yy_size_t new_size; - - yy_start_stack_depth += YY_START_STACK_INCR; - new_size = yy_start_stack_depth * sizeof( int ); + case '(': + tp->type = token_type_lparen; + return; - if ( ! yy_start_stack ) - yy_start_stack = (int *) yy_flex_alloc( new_size ); + case ')': + tp->type = token_type_rparen; + return; - else - yy_start_stack = (int *) yy_flex_realloc( - (void *) yy_start_stack, new_size ); + case '{': + tp->type = token_type_lbrace; + return; - if ( ! yy_start_stack ) - YY_FATAL_ERROR( - "out of memory expanding start-condition stack" ); - } + case '}': + tp->type = token_type_rbrace; + return; - yy_start_stack[yy_start_stack_ptr++] = YY_START; + case ',': + tp->type = token_type_comma; + return; - BEGIN(new_state); - } -#endif + case '.': + c = phase4_getc (); + if (!(RED (c) >= '0' && RED (c) <= '9')) + { + phase4_ungetc (c); + tp->type = token_type_dot; + return; + } + /* FALLTHROUGH */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + /* Don't need to verify the complicated syntax of integers and + floating-point numbers. We assume a valid Java input. + The simplified syntax that we recognize as number is: any + sequence of alphanumeric characters, additionally '+' and '-' + immediately after 'e' or 'E' except in hexadecimal numbers. */ + bool hexadecimal = false; + + for (;;) + { + c = phase4_getc (); + if (RED (c) >= '0' && RED (c) <= '9') + continue; + if ((RED (c) >= 'A' && RED (c) <= 'Z') + || (RED (c) >= 'a' && RED (c) <= 'z')) + { + if (RED (c) == 'X' || RED (c) == 'x') + hexadecimal = true; + if ((RED (c) == 'E' || RED (c) == 'e') && !hexadecimal) + { + c = phase4_getc (); + if (!(RED (c) == '+' || RED (c) == '-')) + phase4_ungetc (c); + } + continue; + } + if (RED (c) == '.') + continue; + break; + } + phase4_ungetc (c); + tp->type = token_type_number; + return; + } -#ifndef YY_NO_POP_STATE -static void yy_pop_state() - { - if ( --yy_start_stack_ptr < 0 ) - YY_FATAL_ERROR( "start-condition stack underflow" ); + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + /* Although Java allows identifiers containing many Unicode + characters, we recognize only identifiers consisting of ASCII + characters. This avoids conversion hassles w.r.t. the --keyword + arguments, and shouldn't be a big problem in practice. */ + { + static char *buffer; + static int bufmax; + int bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = RED (c); + c = phase4_getc (); + if (!((RED (c) >= 'A' && RED (c) <= 'Z') + || (RED (c) >= 'a' && RED (c) <= 'z') + || (RED (c) >= '0' && RED (c) <= '9') + || RED (c) == '_')) + break; + } + phase4_ungetc (c); + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + tp->string = xstrdup (buffer); + tp->type = token_type_symbol; + return; + } - BEGIN(yy_start_stack[yy_start_stack_ptr]); - } -#endif + case '"': + /* String literal. */ + { + struct string_buffer literal; + + init_string_buffer (&literal); + accumulate_escaped (&literal, '"'); + tp->string = xstrdup (string_buffer_result (&literal)); + free_string_buffer (&literal); + tp->comment = add_reference (comment); + tp->type = token_type_string_literal; + return; + } + case '\'': + /* Character literal. */ + { + struct string_buffer literal; -#ifndef YY_NO_TOP_STATE -static int yy_top_state() - { - return yy_start_stack[yy_start_stack_ptr - 1]; - } -#endif + init_string_buffer (&literal); + accumulate_escaped (&literal, '\''); + free_string_buffer (&literal); + tp->type = token_type_other; + return; + } -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif + case '+': + c = phase4_getc (); + if (RED (c) == '+') + /* Operator ++ */ + tp->type = token_type_other; + else if (RED (c) == '=') + /* Operator += */ + tp->type = token_type_other; + else + { + /* Operator + */ + phase4_ungetc (c); + tp->type = token_type_plus; + } + return; -#ifdef YY_USE_PROTOS -static void yy_fatal_error( yyconst char msg[] ) -#else -static void yy_fatal_error( msg ) -char msg[]; -#endif - { - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); + default: + /* Misc. operator. */ + tp->type = token_type_other; + return; } + } +} +static void +phase5_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + phase5_pushback[phase5_pushback_length++] = *tp; +} -/* Redefine yyless() so it works in section 3 code. */ +/* Compile-time optimization of string literal concatenation. + Combine "string1" + ... + "stringN" to the concatenated string if + - the token before this expression is not ')' (because then the first + string could be part of a cast expression), + - the token after this expression is not '.' (because then the last + string could be part of a method call expression). */ -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - yytext[yyleng] = yy_hold_char; \ - yy_c_buf_p = yytext + n; \ - yy_hold_char = *yy_c_buf_p; \ - *yy_c_buf_p = '\0'; \ - yyleng = n; \ - } \ - while ( 0 ) +/* Maximum used guaranteed to be < 4. */ +static token_ty phase6_pushback[4]; +static int phase6_pushback_length; +static token_type_ty phase6_last; -/* Internal utility routines. */ +static void +phase6_get (token_ty *tp) +{ + if (phase6_pushback_length) + { + *tp = phase6_pushback[--phase6_pushback_length]; + return; + } -#ifndef yytext_ptr -#ifdef YY_USE_PROTOS -static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) -#else -static void yy_flex_strncpy( s1, s2, n ) -char *s1; -yyconst char *s2; -int n; -#endif - { - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; - } -#endif + phase5_get (tp); + if (tp->type == token_type_string_literal && phase6_last != token_type_rparen) + { + char *sum = tp->string; + size_t sum_len = strlen (sum); -#ifdef YY_NEED_STRLEN -#ifdef YY_USE_PROTOS -static int yy_flex_strlen( yyconst char *s ) -#else -static int yy_flex_strlen( s ) -yyconst char *s; -#endif + for (;;) { - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; - } -#endif + token_ty token2; + phase5_get (&token2); + if (token2.type == token_type_plus) + { + token_ty token3; -#ifdef YY_USE_PROTOS -static void *yy_flex_alloc( yy_size_t size ) -#else -static void *yy_flex_alloc( size ) -yy_size_t size; -#endif - { - return (void *) malloc( size ); - } + phase5_get (&token3); + if (token3.type == token_type_string_literal) + { + token_ty token_after; -#ifdef YY_USE_PROTOS -static void *yy_flex_realloc( void *ptr, yy_size_t size ) -#else -static void *yy_flex_realloc( ptr, size ) -void *ptr; -yy_size_t size; -#endif - { - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); - } + phase5_get (&token_after); + if (token_after.type != token_type_dot) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); -#ifdef YY_USE_PROTOS -static void yy_flex_free( void *ptr ) -#else -static void yy_flex_free( ptr ) -void *ptr; -#endif - { - free( ptr ); - } + sum = (char *) xrealloc (sum, sum_len + addend_len + 1); + memcpy (sum + sum_len, addend, addend_len + 1); + sum_len += addend_len; -#if YY_MAIN -int main() - { - yylex(); - return 0; + phase5_unget (&token_after); + free_token (&token3); + free_token (&token2); + continue; + } + phase5_unget (&token_after); + } + phase5_unget (&token3); + } + phase5_unget (&token2); + break; } -#endif -#line 221 "./x-java.l" - - -static char * -append_strings (char *a, char *b) -{ - int total_size = strlen (a) + strlen (b) + 1; - char *new_string = (char *) xmalloc (total_size); - strcpy (new_string, a); - strcat (new_string, b); - return new_string; -} - -static inline bool -isplus (char *s) -{ - return *s == '+'; -} - -static inline bool -isdot (char *s) -{ - return *s == '.'; -} - - -static char * -translate_esc (char *s) -{ - char *n = (char *) xmalloc (strlen (s) + 1); - size_t i; - size_t j = 0; - - for (i = 0; i < strlen (s); i++) - switch (s[i]) - { - case '\\': - if (s[i + 1] == 'n') - { - n[j++] = '\n'; - i++; - } - break; - default: - n[j++] = s[i]; - } - n[j] = '\0'; - return n; -} - -static object_list * -object_list_alloc () -{ - object_list *list = xmalloc (sizeof (object_list)); - list->max_num_obj = INITIAL_OBJECT_LIST_SIZE; - list->num_obj = 0; - list->objects = xmalloc (sizeof (void *) * INITIAL_OBJECT_LIST_SIZE); - return list; + tp->string = sum; + } + phase6_last = tp->type; } static void -object_list_destroy (object_list *list) +phase6_unget (token_ty *tp) { - free (list->objects); - free (list); + if (tp->type != token_type_eof) + phase6_pushback[phase6_pushback_length++] = *tp; } -static int -get_num_objects (const object_list *list) -{ - return list->num_obj; -} -static void * -get_object (const object_list *list, int i) +static void +x_java_lex (token_ty *tp) { - return list->objects[i]; + phase6_get (tp); } static void -add_object (object_list *list, void *object) +x_java_unlex (token_ty *tp) { - if (list->num_obj + 1 >= list->max_num_obj) - { - list->max_num_obj += OBJECT_LIST_GROWTH; - list->objects = - xrealloc (list->objects, list->max_num_obj * sizeof (void *)); - } - list->objects[list->num_obj ++] = object; + phase6_unget (tp); } -/* options */ -static bool extract_all_strings = false; - -void -x_java_extract_all () -{ - extract_all_strings = true; -} +/* ========================= Extracting strings. ========================== */ +/* The file is broken into tokens. Scan the token stream, looking for + a keyword, followed by a left paren, followed by a string. When we + see this sequence, we have something to remember. We assume we are + looking at a valid C or C++ program, and leave the complaints about + the grammar to the compiler. -static java_keyword * -alloc_keyword (const char *keyword, int arg1, int arg2) -{ - java_keyword *jk = xmalloc (sizeof (java_keyword)); - jk->keyword = xstrdup (keyword); - jk->msgid_arg = arg1; - jk->msgid_plural_arg = arg2; - return jk; -} + Normal handling: Look for + keyword ( ... msgid ... ) + Plural handling: Look for + keyword ( ... msgid ... msgid_plural ... ) -static object_list *java_keywords = NULL; + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ -/** - * Backwards substring match. - */ +/* Extract messages until the next balanced closing parenthesis or brace, + depending on TERMINATOR. + Extracted messages are added to MLP. + When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and, + if also a plural argument shall be extracted, PLURAL_COMMAS > 0, + otherwise PLURAL_COMMAS = 0. + When no specific argument shall be extracted, COMMAS_TO_SKIP < 0. + Return true upon eof, false upon closing parenthesis or brace. */ static bool -tailcmp (const char *s1, const char *s2) +extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, + int commas_to_skip, int plural_commas) { - int len1 = strlen (s1); - int len2 = strlen (s2); - int start = len1 - len2; - if (start < 0) - return false; - return (start == 0 || s1[start-1] == '.') && (strcmp (s1 + start, s2) == 0); -} + /* Remember the message containing the msgid, for msgid_plural. */ + message_ty *plural_mp = NULL; -/** - * Try to match a string against the keyword. If substring_match is - * true substring match is used. - */ -static bool -do_compare (const char *s1, const char *s2) -{ - if (substring_match) - return strstr (s1, s2) != NULL; - else - return tailcmp (s1, s2); -} + /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ + int state; + /* Parameters of the keyword just seen. Defined only in state 1. */ + int next_commas_to_skip = -1; + int next_plural_commas = 0; -/** - * Check if a string is a keyword or not. - */ -static java_keyword * -is_keyword (const char *s) -{ - int i; - int num_keywords = get_num_objects (java_keywords); - java_keyword *kw; + /* Start state is 0. */ + state = 0; - for (i = 0; i < num_keywords; i++) + for (;;) { - kw = (java_keyword *) get_object (java_keywords, i); - - if (do_compare (s, kw->keyword)) - return kw; - } - return NULL; -} - -/** - * Add a keyword to the list of possible keywords. - */ -void -x_java_keyword (const char *keyword) -{ - const char *keyword_end; - int arg1; - int arg2; - size_t len; - char *kw; + token_ty token; - if (keyword == NULL) - { - if (java_keywords != NULL) + x_java_lex (&token); + switch (token.type) { - object_list_destroy (java_keywords); - java_keywords = NULL; - } - return; - } + case token_type_symbol: + { + /* Combine symbol1 . ... . symbolN to a single strings, so that + we can recognize static function calls like + GettextResource.gettext. The information present for + symbolI.....symbolN has precedence over the information for + symbolJ.....symbolN with J > I. */ + char *sum = token.string; + size_t sum_len = strlen (sum); + const char *dottedname; + + for (;;) + { + token_ty token2; + + x_java_lex (&token2); + if (token2.type == token_type_dot) + { + token_ty token3; + + x_java_lex (&token3); + if (token3.type == token_type_symbol) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); + + sum = + (char *) xrealloc (sum, sum_len + 1 + addend_len + 1); + sum[sum_len] = '.'; + memcpy (sum + sum_len + 1, addend, addend_len + 1); + sum_len += 1 + addend_len; + + free_token (&token3); + free_token (&token2); + continue; + } + x_java_unlex (&token3); + } + x_java_unlex (&token2); + break; + } + + for (dottedname = sum;;) + { + void *keyword_value; + + if (find_entry (&keywords, dottedname, strlen (dottedname), + &keyword_value) + == 0) + { + int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); + int argnum2 = (int) (long) keyword_value >> 10; + + next_commas_to_skip = argnum1 - 1; + next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0); + state = 1; + break; + } + + dottedname = strchr (dottedname, '.'); + if (dottedname == NULL) + { + state = 0; + break; + } + dottedname++; + } + free (sum); + continue; + } - if (java_keywords == NULL) - java_keywords = object_list_alloc (); + case token_type_lparen: + if (extract_parenthesized (mlp, token_type_rparen, + state ? next_commas_to_skip : -1, + state ? next_plural_commas : 0)) + return true; + state = 0; + continue; + + case token_type_rparen: + if (terminator == token_type_rparen) + return false; + if (terminator == token_type_rbrace) + { + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: ')' found where '}' was expected"), + logical_file_name, token.line_number); + error_with_progname = true; + } + state = 0; + continue; + + case token_type_lbrace: + if (extract_parenthesized (mlp, token_type_rbrace, -1, 0)) + return true; + state = 0; + continue; + + case token_type_rbrace: + if (terminator == token_type_rbrace) + return false; + if (terminator == token_type_rparen) + { + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: '}' found where ')' was expected"), + logical_file_name, token.line_number); + error_with_progname = true; + } + state = 0; + continue; - split_keywordspec (keyword, &keyword_end, &arg1, &arg2); - len = keyword_end - keyword; - kw = (char *) xmalloc (len + 1); - memcpy (kw, keyword, len); - kw[len] = '\0'; + case token_type_comma: + if (commas_to_skip >= 0) + { + if (commas_to_skip > 0) + commas_to_skip--; + else + if (plural_mp != NULL && plural_commas > 0) + { + commas_to_skip = plural_commas - 1; + plural_commas = 0; + } + else + commas_to_skip = -1; + } + state = 0; + continue; - /* kw should be a valid Java identifier sequence with dots. - A colon means an invalid parse in split_keywordspec(). */ - if (strchr (kw, ':') == NULL) - { - if (arg1 == 0) - arg1 = 1; - add_object (java_keywords, alloc_keyword (kw, arg1, arg2)); - } -} + case token_type_string_literal: + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + + if (extract_all) + { + xgettext_current_source_encoding = po_charset_utf8; + x_java_comment_to_xgettext_comment (token.comment); + remember_a_message (mlp, token.string, &pos); + x_java_comment_reset (); + xgettext_current_source_encoding = xgettext_global_source_encoding; + } + else + { + if (commas_to_skip == 0) + { + if (plural_mp == NULL) + { + /* Seen an msgid. */ + message_ty *mp; + + xgettext_current_source_encoding = po_charset_utf8; + x_java_comment_to_xgettext_comment (token.comment); + mp = remember_a_message (mlp, token.string, &pos); + x_java_comment_reset (); + xgettext_current_source_encoding = xgettext_global_source_encoding; + if (plural_commas > 0) + plural_mp = mp; + } + else + { + /* Seen an msgid_plural. */ + xgettext_current_source_encoding = po_charset_utf8; + remember_a_message_plural (plural_mp, token.string, + &pos); + xgettext_current_source_encoding = xgettext_global_source_encoding; + plural_mp = NULL; + } + } + else + free (token.string); + } + } + drop_reference (token.comment); + state = 0; + continue; + case token_type_eof: + return true; -/** - * Free any memory allocated by the tokenizer. - */ -static void -free_global () -{ - /** - * free memory used by strings and comments as they are strdup'ed - * by the lexer. - */ - if (parser_global->string != NULL) - { - free (parser_global->string); - parser_global->string = NULL; - } - if (parser_global->comment != NULL) - { - free (parser_global->comment); - parser_global->comment = NULL; + case token_type_dot: + case token_type_number: + case token_type_plus: + case token_type_other: + state = 0; + continue; + + default: + abort (); + } } } -/** - * Main java keyword extract function. - */ void extract_java (FILE *f, const char *real_filename, const char *logical_filename, msgdomain_list_ty *mdlp) { - char *logical_file_name = xstrdup (logical_filename); - int token; - PARSER_STATE state = STATE_NONE; - PARSER_STATE last_state = STATE_NONE; - char *str = NULL; /* used only if state == STATE_STRING - || state == STATE_APPEND */ - char *key = NULL; /* used only if state == STATE_WORD - || state == STATE_INVOCATION */ - message_ty *plural = NULL; /* used only after state was STATE_KEYWORD */ message_list_ty *mlp = mdlp->item[0]->messages; - java_keyword *current_keyword = NULL; - java_keyword *keyword; - int argument_counter = 0; - - if (java_keywords == NULL) - { - /* ops, no standard keywords */ - x_java_keyword ("GettextResource.gettext:2"); /* static method */ - x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */ - x_java_keyword ("gettext"); - x_java_keyword ("ngettext:1,2"); - x_java_keyword ("getString"); /* ResourceBundle.getString */ - } - - memset (parser_global, 0, sizeof (*parser_global)); - /* first line is 1 */ - parser_global->line_no = 1; - - yyin = f; - do - { - token = yylex (); - switch (token) - { - - case JAVA_WORD: - if (state == STATE_KEYWORD) - { - last_state = STATE_KEYWORD; - argument_counter ++; - } - if (state == STATE_INVOCATION) - { - char *k2; - k2 = append_strings (key, "."); - free (key); - key = append_strings (k2, parser_global->word); - free (k2); - } - else - { - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_WORD; - key = xstrdup (parser_global->word); - } - /* For java we try to match both things like object.methodCall() - and methodCall(). */ - if ((keyword = is_keyword (key)) != NULL - || (keyword = is_keyword (parser_global->word)) != NULL) - { - current_keyword = keyword; - free (key); - state = STATE_KEYWORD; - argument_counter = 1; - plural = NULL; - } - break; - - case JAVA_STRING: - if (state == STATE_KEYWORD) - last_state = STATE_KEYWORD; - if (state == STATE_APPEND) - { - char *s2; - s2 = append_strings (str, translate_esc (parser_global->string)); - free (str); - str = s2; - } - else - str = translate_esc (parser_global->string); - state = STATE_STRING; - break; - - case JAVA_OPERATOR: - if (state == STATE_STRING && isplus (parser_global->operator)) - state = STATE_APPEND; - else - { - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - } - break; - - case JAVA_FLOW: - /* Did we get something? */ - if (state == STATE_STRING - && (last_state == STATE_KEYWORD || extract_all_strings)) - { - lex_pos_ty pos; - pos.file_name = logical_file_name; - pos.line_number = parser_global->line_no; - if (extract_all_strings) - { - remember_a_message (mlp, str, &pos); - } - else if (argument_counter == current_keyword->msgid_arg) - { - plural = remember_a_message (mlp, str, &pos); - if (current_keyword->msgid_plural_arg == 0) - { - /** - * we don't expect any plural arg, reset state - */ - state = STATE_NONE; - last_state = STATE_NONE; - argument_counter = 0; - } - else - { - argument_counter ++; - } - - } - else if (argument_counter == current_keyword->msgid_plural_arg - && str != NULL) - { - remember_a_message_plural (plural, str, &pos); - state = STATE_NONE; - last_state = STATE_NONE; - argument_counter = 0; - } - else - { - if (str != NULL) - free (str); - } - str = NULL; - } - if (extract_all_strings) - { - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - last_state = STATE_NONE; - } + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; - if (state == STATE_WORD && isdot (parser_global->flow)) - { - state = STATE_INVOCATION; - } + last_comment_line = -1; + last_non_comment_line = -1; - break; + phase6_last = token_type_eof; - case JAVA_COMMENT: - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - last_state = STATE_NONE; - xgettext_comment_add (parser_global->comment); - break; + init_keywords (); - default: - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - } - free_global (); - } - while (token != -1); + /* Eat tokens until eof is seen. When extract_parenthesized returns + due to an unbalanced closing parenthesis, just restart it. */ + while (!extract_parenthesized (mlp, token_type_eof, -1, 0)) + ; - if (str != NULL) - free (str); + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; } diff --git a/gettext-tools/src/x-java.l b/gettext-tools/src/x-java.l deleted file mode 100644 index 7d13280f9..000000000 --- a/gettext-tools/src/x-java.l +++ /dev/null @@ -1,645 +0,0 @@ -/* xgettext Java backend. -*- C -*- - Copyright (C) 2001-2002 Free Software Foundation, Inc. - Written by Tommy Johansson , 2001. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -%{ -#ifdef HAVE_CONFIG_H -# include -#endif - -#include -#include -#include -#include - -#include "message.h" -#include "x-java.h" -#include "xgettext.h" -#include "xmalloc.h" -#include "strstr.h" - -typedef enum -{ - JAVA_WORD, - JAVA_STRING, - JAVA_OPERATOR, - JAVA_FLOW, - JAVA_COMMENT -} TOKEN_TYPE; - -typedef struct -{ - char *word; - char *string; - char *operator; - char *flow; - char *comment; - - int line_no; -} PARSER_GLOBAL; - -static PARSER_GLOBAL pg; -static PARSER_GLOBAL *parser_global = &pg; - -typedef enum -{ - STATE_NONE, - STATE_STRING, - STATE_WORD, - STATE_APPEND, - STATE_INVOCATION, - STATE_KEYWORD -} PARSER_STATE; - -typedef struct -{ - char *data; - int len; - int maxlen; -} char_buf; - - -typedef struct _object_list -{ - int num_obj; - int max_num_obj; - void **objects; -} object_list; - -#define INITIAL_OBJECT_LIST_SIZE 10 -#define OBJECT_LIST_GROWTH 10 - -typedef struct _java_keyword -{ - char *keyword; - int msgid_arg; - int msgid_plural_arg; -} java_keyword; - - -#define INITIAL_CHARBUF_SIZE 500 -#define CHARBUF_GROWTH 100 -static char_buf * -create_char_buf () -{ - char_buf *b = (char_buf *) xmalloc (sizeof (char_buf)); - b->data = (char *) xmalloc (INITIAL_CHARBUF_SIZE); - b->data[0] = '\0'; - b->len = 0; - b->maxlen = INITIAL_CHARBUF_SIZE; - return b; -} - -static void -append_char_buf (char_buf *b, int c) -{ - if (b->len >= b->maxlen - 1) - { - b->data = (char *) xrealloc (b->data, b->maxlen + CHARBUF_GROWTH); - b->maxlen += CHARBUF_GROWTH; - } - b->data[b->len++] = c; - b->data[b->len] = '\0'; -} - -static char * -get_string (char_buf *b) -{ - return xstrdup (b->data); -} - -static void -destroy_charbuf (char_buf *b) -{ - free (b->data); - free (b); -} - -static void -update_line_no (int c) -{ - if (c == '\n') - parser_global->line_no++; -} - -static void -strip_ending_spaces (char *str) -{ - int len = strlen (str); - - while (len > 0 && isspace ((unsigned char) str[len - 1])) - len--; - str[len] = '\0'; -} -%} - -%option noyywrap - -NUM [0-9] -ID [a-zA-Z_][a-zA-Z0-9_]* - -%% - -"/*" { - int c; - int last; - char *str; - - char_buf *charbuf = create_char_buf (); - for (;;) - { - c = input (); - last = input (); - update_line_no (c); - if ((c == '*' && last == '/') || c == EOF) - break; - unput (last); - append_char_buf (charbuf, c); - } - str = get_string (charbuf); - destroy_charbuf (charbuf); - strip_ending_spaces (str); - parser_global->comment = str; - return JAVA_COMMENT; -} - -{NUM}| {NUM}+"."{NUM}* -\" { - int c; - char *str; - char_buf *charbuf = create_char_buf (); - while ((c = input ()) != EOF && c != '"') - { - update_line_no (c); - append_char_buf (charbuf, c); - } - str = get_string (charbuf); - destroy_charbuf (charbuf); - parser_global->string = str; - return JAVA_STRING; -} - -{ID} { - parser_global->word = yytext; - return JAVA_WORD; -} - -"."|"("|")"|";"|"{"|"}"|"["|"]"|","|":"|"\\"|"?"|"\'" { - parser_global->flow = yytext; - return JAVA_FLOW; -} - -"="|"<"|">"|"+"|"-"|"*"|"/"|"!"|"&"|"|"|"%"|"^"|"~" { - parser_global->operator = yytext; - return JAVA_OPERATOR; -} - -"#"|"@"|"\r"|"`" /* ignore whitespace */ - -"//"[^\n]* { - parser_global->comment = xstrdup (yytext + 2); - return JAVA_COMMENT; -} -"\n"|"\r"|"\r\n" parser_global->line_no++; -[ \t]+ -. -<> return -1; -%% - -static char * -append_strings (char *a, const char *b) -{ - int total_size = strlen (a) + strlen (b) + 1; - char *new_string = (char *) xmalloc (total_size); - strcpy (new_string, a); - strcat (new_string, b); - return new_string; -} - -static inline bool -isplus (char *s) -{ - return *s == '+'; -} - -static inline bool -isdot (char *s) -{ - return *s == '.'; -} - - -static char * -translate_esc (char *s) -{ - char *n = (char *) xmalloc (strlen (s) + 1); - size_t i; - size_t j = 0; - - for (i = 0; i < strlen (s); i++) - switch (s[i]) - { - case '\\': - if (s[i + 1] == 'n') - { - n[j++] = '\n'; - i++; - } - break; - default: - n[j++] = s[i]; - } - n[j] = '\0'; - return n; -} - -static object_list * -object_list_alloc () -{ - object_list *list = xmalloc (sizeof (object_list)); - list->max_num_obj = INITIAL_OBJECT_LIST_SIZE; - list->num_obj = 0; - list->objects = xmalloc (sizeof (void *) * INITIAL_OBJECT_LIST_SIZE); - return list; -} - -static void -object_list_destroy (object_list *list) -{ - free (list->objects); - free (list); -} - -static int -get_num_objects (const object_list *list) -{ - return list->num_obj; -} - -static void * -get_object (const object_list *list, int i) -{ - return list->objects[i]; -} - -static void -add_object (object_list *list, void *object) -{ - if (list->num_obj + 1 >= list->max_num_obj) - { - list->max_num_obj += OBJECT_LIST_GROWTH; - list->objects = - xrealloc (list->objects, list->max_num_obj * sizeof (void *)); - } - list->objects[list->num_obj ++] = object; -} - - -/* options */ -static bool extract_all_strings = false; - -void -x_java_extract_all () -{ - extract_all_strings = true; -} - - -static java_keyword * -alloc_keyword (const char *keyword, int arg1, int arg2) -{ - java_keyword *jk = xmalloc (sizeof (java_keyword)); - jk->keyword = xstrdup (keyword); - jk->msgid_arg = arg1; - jk->msgid_plural_arg = arg2; - return jk; -} - -static object_list *java_keywords = NULL; - - -/** - * Backwards substring match. - */ -static bool -tailcmp (const char *s1, const char *s2) -{ - int len1 = strlen (s1); - int len2 = strlen (s2); - int start = len1 - len2; - if (start < 0) - return false; - return (start == 0 || s1[start-1] == '.') && (strcmp (s1 + start, s2) == 0); -} - -/** - * Try to match a string against the keyword. If substring_match is - * true substring match is used. - */ -static bool -do_compare (const char *s1, const char *s2) -{ - if (substring_match) - return strstr (s1, s2) != NULL; - else - return tailcmp (s1, s2); -} - -/** - * Check if a string is a keyword or not. - */ -static java_keyword * -is_keyword (const char *s) -{ - int i; - int num_keywords = get_num_objects (java_keywords); - java_keyword *kw; - - for (i = 0; i < num_keywords; i++) - { - kw = (java_keyword *) get_object (java_keywords, i); - - if (do_compare (s, kw->keyword)) - return kw; - } - return NULL; -} - -/** - * Add a keyword to the list of possible keywords. - */ -void -x_java_keyword (const char *keyword) -{ - const char *keyword_end; - int arg1; - int arg2; - size_t len; - char *kw; - - if (keyword == NULL) - { - if (java_keywords != NULL) - { - object_list_destroy (java_keywords); - java_keywords = NULL; - } - return; - } - - if (java_keywords == NULL) - java_keywords = object_list_alloc (); - - split_keywordspec (keyword, &keyword_end, &arg1, &arg2); - len = keyword_end - keyword; - kw = (char *) xmalloc (len + 1); - memcpy (kw, keyword, len); - kw[len] = '\0'; - - /* kw should be a valid Java identifier sequence with dots. - A colon means an invalid parse in split_keywordspec(). */ - if (strchr (kw, ':') == NULL) - { - if (arg1 == 0) - arg1 = 1; - add_object (java_keywords, alloc_keyword (kw, arg1, arg2)); - } -} - - -/** - * Free any memory allocated by the tokenizer. - */ -static void -free_global () -{ - /** - * free memory used by strings and comments as they are strdup'ed - * by the lexer. - */ - if (parser_global->string != NULL) - { - free (parser_global->string); - parser_global->string = NULL; - } - if (parser_global->comment != NULL) - { - free (parser_global->comment); - parser_global->comment = NULL; - } -} - - -/** - * Main java keyword extract function. - */ -void -extract_java (FILE *f, - const char *real_filename, const char *logical_filename, - msgdomain_list_ty *mdlp) -{ - char *logical_file_name = xstrdup (logical_filename); - int token; - PARSER_STATE state = STATE_NONE; - PARSER_STATE last_state = STATE_NONE; - char *str = NULL; /* used only if state == STATE_STRING - || state == STATE_APPEND */ - char *key = NULL; /* used only if state == STATE_WORD - || state == STATE_INVOCATION */ - message_ty *plural = NULL; /* used only after state was STATE_KEYWORD */ - message_list_ty *mlp = mdlp->item[0]->messages; - java_keyword *current_keyword = NULL; - java_keyword *keyword; - int argument_counter = 0; - - if (java_keywords == NULL) - { - /* ops, no standard keywords */ - x_java_keyword ("GettextResource.gettext:2"); /* static method */ - x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */ - x_java_keyword ("gettext"); - x_java_keyword ("ngettext:1,2"); - x_java_keyword ("getString"); /* ResourceBundle.getString */ - } - - memset (parser_global, 0, sizeof (*parser_global)); - /* first line is 1 */ - parser_global->line_no = 1; - - yyin = f; - do - { - token = yylex (); - switch (token) - { - - case JAVA_WORD: - if (state == STATE_KEYWORD) - { - last_state = STATE_KEYWORD; - argument_counter ++; - } - if (state == STATE_INVOCATION) - { - char *k2; - k2 = append_strings (key, "."); - free (key); - key = append_strings (k2, parser_global->word); - free (k2); - } - else - { - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_WORD; - key = xstrdup (parser_global->word); - } - /* For java we try to match both things like object.methodCall() - and methodCall(). */ - if ((keyword = is_keyword (key)) != NULL - || (keyword = is_keyword (parser_global->word)) != NULL) - { - current_keyword = keyword; - free (key); - state = STATE_KEYWORD; - argument_counter = 1; - plural = NULL; - } - break; - - case JAVA_STRING: - if (state == STATE_KEYWORD) - last_state = STATE_KEYWORD; - if (state == STATE_APPEND) - { - char *s2; - s2 = append_strings (str, translate_esc (parser_global->string)); - free (str); - str = s2; - } - else - str = translate_esc (parser_global->string); - state = STATE_STRING; - break; - - case JAVA_OPERATOR: - if (state == STATE_STRING && isplus (parser_global->operator)) - state = STATE_APPEND; - else - { - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - } - break; - - case JAVA_FLOW: - /* Did we get something? */ - if (state == STATE_STRING - && (last_state == STATE_KEYWORD || extract_all_strings)) - { - lex_pos_ty pos; - pos.file_name = logical_file_name; - pos.line_number = parser_global->line_no; - if (extract_all_strings) - { - remember_a_message (mlp, str, &pos); - } - else if (argument_counter == current_keyword->msgid_arg) - { - plural = remember_a_message (mlp, str, &pos); - if (current_keyword->msgid_plural_arg == 0) - { - /** - * we don't expect any plural arg, reset state - */ - state = STATE_NONE; - last_state = STATE_NONE; - argument_counter = 0; - } - else - { - argument_counter ++; - } - - } - else if (argument_counter == current_keyword->msgid_plural_arg - && str != NULL) - { - remember_a_message_plural (plural, str, &pos); - state = STATE_NONE; - last_state = STATE_NONE; - argument_counter = 0; - } - else - { - if (str != NULL) - free (str); - } - str = NULL; - } - - if (extract_all_strings) - { - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - last_state = STATE_NONE; - } - - if (state == STATE_WORD && isdot (parser_global->flow)) - { - state = STATE_INVOCATION; - } - - break; - - case JAVA_COMMENT: - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - last_state = STATE_NONE; - xgettext_comment_add (parser_global->comment); - break; - - default: - if (str != NULL) - { - free (str); - str = NULL; - } - state = STATE_NONE; - } - free_global (); - } - while (token != -1); - - if (str != NULL) - free (str); -} diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 7cee36e77..5f20369e5 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2003-10-04 Bruno Haible + + * xgettext-11: Test details of the new Java backend, instead of the + --keyword-substring option. + 2003-10-07 Bruno Haible * lang-perl-1: Set LC_CTYPE to empty in while setting LANG. diff --git a/gettext-tools/tests/xgettext-11 b/gettext-tools/tests/xgettext-11 index 0d81ede37..85646a7be 100755 --- a/gettext-tools/tests/xgettext-11 +++ b/gettext-tools/tests/xgettext-11 @@ -1,41 +1,176 @@ #!/bin/sh # -# Some tests for java substring keyword support +# More tests for java support # tmpfiles="" trap 'rm -fr $tmpfiles' 1 2 3 15 tmpfiles="$tmpfiles xg-test11.java" -cat < xg-test11.java +cat <<\EOF > xg-test11.java class TestCase { - public TestCase() { - // test exception - throw new TestException("Test exception"); - // empty exception text - throw new TestException(""); - } + public static void main (String[] args) { + // Test recognition of \u escapes: Böse Bübchen + gettext ("B\u00f6se B\u00fcbchen"); + // Test recognition of \u escapes with different number of u + gettext ("Japanese: \uu65e5\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu672c\u8A9e"); + // Test recognition of \u escapes when they form UTF-16 surrogates + // Here: U+1D49E MATHEMATICAL SCRIPT CAPITAL C + gettext ("script \ud835\udc9e = ..."); + // Test invalid surrogates. + gettext ("invalid surrogate \ud835 first half"); + gettext ("invalid surrogate \udc9e second half"); + // Don't let the line numbers be confused by \u newlines. + \u000a \u000d \u000d\u000a + gettext ("embedded\nnewline"); + // Spaces from end of comment are removed. \u000agettext("dummy"); + // Various ways to write a backslash are equivalent. + gettext ("\u005c\u005c"); + gettext ("\u005c\"); + gettext ("\\u005c"); + gettext ("\\"); + gettext ("\134"); + // Escape sequences in strings. + gettext ("t -> \t, b -> \b, n -> \n, dquote -> \", squote -> \' ..."); + // Octal escapes have 2 or 3 digits, depending on the initial digit. + gettext ("bel: \7\nnewline: \12backslash: \134\ndquote-zero: \420\n"); + // Hex escapes are not recognized + gettext ("no bel: \x07\n"); + gettext // Recognized despite comments + ( /* Even across multiline +comment! */ "this is a single " /* now comes the concatenation! */ + // after + + "long line"); + // Character literals are not extracted. + gettext ('x'); + // Invalid concatenations are not concatenated. + gettext ("fooba"+'r'); + // Verify that the static function name has priority. + GettextResource.gettext("NOT extracted", "this one is extracted"); + // Verify that a comma inside braces is hidden. + GettextResource.gettext( + new Object() { + public int foo () { + return 5, 8; + } + }, + "this is the second argument"); + } } EOF -tmpfiles="$tmpfiles xg-test11.po" +tmpfiles="$tmpfiles xg-test11.tmp xg-test11.pot" : ${XGETTEXT=xgettext} -${XGETTEXT} --omit-header --no-location -c --keyword-substring --keyword=Exception -d xg-test11 xg-test11.java +${XGETTEXT} --from-code=ISO-8859-1 -c -o xg-test11.tmp xg-test11.java 2>/dev/null test $? = 0 || { rm -fr $tmpfiles; exit 1; } +grep -v 'POT-Creation-Date' < xg-test11.tmp > xg-test11.pot tmpfiles="$tmpfiles xg-test11.ok" -cat < xg-test11.ok -#. test exception -msgid "Test exception" +cat <<\EOF > xg-test11.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Test recognition of \u escapes: Böse Bübchen +#: xg-test11.java:4 +msgid "Böse Bübchen" +msgstr "" + +#. Test recognition of \u escapes with different number of u +#: xg-test11.java:6 +msgid "Japanese: 日本語" +msgstr "" + +#. Test recognition of \u escapes when they form UTF-16 surrogates +#. Here: U+1D49E MATHEMATICAL SCRIPT CAPITAL C +#: xg-test11.java:9 +msgid "script 𝒞 = ..." +msgstr "" + +#. Test invalid surrogates. +#: xg-test11.java:11 +msgid "invalid surrogate � first half" +msgstr "" + +#: xg-test11.java:12 +msgid "invalid surrogate í²ž second half" +msgstr "" + +#. Don't let the line numbers be confused by \u newlines. +#: xg-test11.java:15 +msgid "" +"embedded\n" +"newline" +msgstr "" + +#. Spaces from end of comment are removed. +#: xg-test11.java:16 +msgid "dummy" +msgstr "" + +#. Various ways to write a backslash are equivalent. +#: xg-test11.java:18 xg-test11.java:19 xg-test11.java:20 xg-test11.java:21 +#: xg-test11.java:22 +msgid "\\" +msgstr "" + +#. Escape sequences in strings. +#: xg-test11.java:24 +msgid "" +"t -> \t, b -> \b, n -> \n" +", dquote -> \", squote -> ' ..." msgstr "" -#. empty exception text +#. Octal escapes have 2 or 3 digits, depending on the initial digit. +#: xg-test11.java:26 msgid "" +"bel: \n" +"newline: \n" +"backslash: \\\n" +"dquote-zero: \"0\n" +msgstr "" + +#. Hex escapes are not recognized +#: xg-test11.java:28 +msgid "no bel: \\x07\n" +msgstr "" + +#. Recognized despite comments +#. Even across multiline +#. comment! +#: xg-test11.java:31 +msgid "this is a single long line" +msgstr "" + +#. Invalid concatenations are not concatenated. +#: xg-test11.java:36 +msgid "fooba" +msgstr "" + +#. Verify that the static function name has priority. +#: xg-test11.java:38 +msgid "this one is extracted" +msgstr "" + +#: xg-test11.java:46 +msgid "this is the second argument" msgstr "" EOF : ${DIFF=diff} -${DIFF} xg-test11.ok xg-test11.po +${DIFF} xg-test11.ok xg-test11.pot result=$? rm -fr $tmpfiles