From: Bruno Haible Date: Wed, 8 Aug 2001 12:08:55 +0000 (+0000) Subject: Split xgettext into a common frontend and a C specific backend. X-Git-Tag: v0.11~557 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3bda71cf96bb96bbfee19e725ce1dbaf2d96af3;p=thirdparty%2Fgettext.git Split xgettext into a common frontend and a C specific backend. --- diff --git a/src/ChangeLog b/src/ChangeLog index 1c3d5480b..2f81d774d 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,57 @@ +2001-07-27 Bruno Haible + + * x-c.h: New file. + * x-c.c: New file. + (xgettext_token_type_ty, xgettext_token_ty): Moved here from + xget-lex.h. In xgettext_token_ty, unify line_number and file_name into + lex_pos_ty. + (token_type_ty, token_ty): Moved here from xget-lex.c. + (extract_all): Moved here from xgettext.c. + (keywords, default_keywords, trigraphs, file_name, logical_file_name, + line_number, fp, last_comment_line, last_non_comment_line, + newline_count): Moved here from xget-lex.c. + (phase1_pushback, phase1_pushback_length, phase1_getc, phase1_ungetc, + phase2_pushback, phase2_pushback_length, phase2_getc, phase2_ungetc, + phase3_pushback, phase3_pushback_length, phase3_getc, phase3_ungetc, + phase4_getc, phase4_ungetc, phase7_getc, phase7_ungetc, + phase5_pushback, phase5_pushback_length, phase5_get, phase5_unget, + phaseX_get, phase6_pushback, phase6_pushback_length, phase6_get, + phase6_unget, phase8_get): Moved here from xget-lex.c. Use + xgettext_comment_add instead of accessing 'comment'. Don't free + logical_file_name; it is still used as file_name of messages in mdlp. + (x_c_lex): Moved here from xget-lex.c, renamed from xgettext_lex. + (extract_c): Moved here from xgettext.c, renamed from scan_c_file. + Change arguments from filename to FILE * and two filenames. Don't call + xgettext_lex_open and xgettext_lex_close. + (x_c_extract_all): New function. + (x_c_keyword): Moved here from xget-lex.c, renamed from + xgettext_lex_keyword. + (x_c_any_keywords): Moved here from xget-lex.c, renamed from + xgettext_any_keywords. + (x_c_trigraphs): Moved here from xget-lex.c, renamed from + xgettext_lex_trigraphs. + * xgettext.h: New file. + * xgettext.c (extract_all): Move to x-c.c. + (comment): Moved here from xget-lex.c. + (xgettext_comment_add): New function. + (xgettext_comment): Moved here from xget-lex.c, renamed from + xgettext_lex_comment. + (xgettext_comment_reset): Moved here from xget-lex.c, renamed from + xgettext_lex_comment_reset. + (xgettext_open): New function, mostly taken from xgettext_lex_open + in xget-lex.c. + (remember_a_message): Make non-static. Replace xgettext_token_ty arg + with lex_pos_ty argument. + (remember_a_message_plural): Likewise. + (scan_c_file): Moved the body to x-c.c:extract_c. + (language_to_scanner): Use SCANNERS_C. + (extension_to_language): Use EXTENSIONS_C. + * xget-lex.h: Remove file. + * xget-lex.c: Remove file. + * Makefile.am (noinst_HEADERS): Remove xget-lex.h. Add xgettext.h and + x-c.h. + (xgettext_SOURCES): Remove xget-lex.c. Add x-c.c. + 2001-07-26 Bruno Haible * file-list.h: New file. diff --git a/src/Makefile.am b/src/Makefile.am index 8082cc071..d69db8330 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -24,9 +24,9 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \ msgcat msgcomm msgconv msgen msgexec msggrep msguniq noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \ -po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \ -file-list.h po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h \ -msgl-ascii.h msgl-cat.h +po.h open-po.h read-po.h str-list.h write-po.h dir-list.h file-list.h \ +po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-ascii.h \ +msgl-cat.h xgettext.h x-c.h EXTRA_DIST = FILES @@ -52,8 +52,8 @@ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-ascii.c msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \ -po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \ -write-po.c msgl-ascii.c file-list.c +po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \ +msgl-ascii.c file-list.c x-c.c msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-ascii.c msgl-iconv.c msgl-cat.c file-list.c diff --git a/src/xget-lex.c b/src/x-c.c similarity index 82% rename from src/xget-lex.c rename to src/x-c.c index 516b9f48d..395ebee67 100644 --- a/src/xget-lex.c +++ b/src/x-c.c @@ -23,21 +23,19 @@ #include #include +#include #include #include +#include -#include "dir-list.h" +#include "message.h" +#include "x-c.h" +#include "xgettext.h" #include "error.h" #include "progname.h" #include "system.h" -#include "libgettext.h" #include "hash.h" -#include "str-list.h" -#include "xget-lex.h" - -#ifndef errno -extern int errno; -#endif +#include "libgettext.h" #define _(s) gettext(s) @@ -75,6 +73,37 @@ extern int errno; xgettext.c) with a stream of C tokens. The comments are accumulated in a buffer, and given to xgettext when asked for. */ +enum xgettext_token_type_ty +{ + xgettext_token_type_eof, + xgettext_token_type_keyword, + xgettext_token_type_lparen, + xgettext_token_type_rparen, + xgettext_token_type_comma, + xgettext_token_type_string_literal, + xgettext_token_type_symbol +}; +typedef enum xgettext_token_type_ty xgettext_token_type_ty; + +typedef struct xgettext_token_ty xgettext_token_ty; +struct xgettext_token_ty +{ + xgettext_token_type_ty type; + + /* These fields are used only for xgettext_token_type_keyword. */ + int argnum1; + int argnum2; + + /* This field is used only for xgettext_token_type_string_literal. */ + char *string; + + /* These fields are only for + xgettext_token_type_keyword, + xgettext_token_type_string_literal. */ + lex_pos_ty pos; +}; + + enum token_type_ty { token_type_character_constant, @@ -102,14 +131,18 @@ struct token_ty }; +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + +static bool trigraphs = false; + static const char *file_name; static char *logical_file_name; static int line_number; static FILE *fp; -static bool trigraphs = false; -static string_list_ty *comment; -static hash_table keywords; -static bool default_keywords = true; /* These are for tracking whether comments count as immediately before keyword. */ @@ -136,79 +169,7 @@ static void phaseX_get PARAMS ((token_ty *tp)); static void phase6_get PARAMS ((token_ty *tp)); static void phase6_unget PARAMS ((token_ty *tp)); static void phase8_get PARAMS ((token_ty *tp)); - - - -void -xgettext_lex_open (fn) - const char *fn; -{ - char *new_name; - - if (strcmp (fn, "-") == 0) - { - new_name = xstrdup (_("standard input")); - logical_file_name = xstrdup (new_name); - fp = stdin; - } - else if (IS_ABSOLUTE_PATH (fn)) - { - new_name = xstrdup (fn); - fp = fopen (fn, "r"); - if (fp == NULL) - error (EXIT_FAILURE, errno, _("\ -error while opening \"%s\" for reading"), fn); - logical_file_name = xstrdup (new_name); - } - else - { - int j; - - for (j = 0; ; ++j) - { - const char *dir = dir_list_nth (j); - - if (dir == NULL) - error (EXIT_FAILURE, ENOENT, _("\ -error while opening \"%s\" for reading"), fn); - - new_name = concatenated_pathname (dir, fn, NULL); - - fp = fopen (new_name, "r"); - if (fp != NULL) - break; - - if (errno != ENOENT) - error (EXIT_FAILURE, errno, _("\ -error while opening \"%s\" for reading"), new_name); - free (new_name); - } - - /* Note that the NEW_NAME variable contains the actual file name - and the logical file name is what is reported by xgettext. In - this case NEW_NAME is set to the file which was found along the - directory search path, and LOGICAL_FILE_NAME is is set to the - file name which was searched for. */ - logical_file_name = xstrdup (fn); - } - - file_name = new_name; - line_number = 1; -} - - -void -xgettext_lex_close () -{ - if (fp != stdin) - fclose (fp); - free ((char *) file_name); - free (logical_file_name); - fp = NULL; - file_name = NULL; - logical_file_name = NULL; - line_number = 0; -} +static void x_c_lex PARAMS ((xgettext_token_ty *tp)); /* 1. Terminate line by \n, regardless of the external representation of @@ -425,8 +386,6 @@ phase4_getc () /* C comment. */ buflen = 0; state = 0; - if (comment == NULL) - comment = string_list_alloc (); while (1) { c = phase3_getc (); @@ -449,7 +408,7 @@ phase4_getc () || buffer[buflen - 1] == '\t')) --buflen; buffer[buflen] = 0; - string_list_append (comment, buffer); + xgettext_comment_add (buffer); buflen = 0; state = 0; continue; @@ -466,7 +425,7 @@ phase4_getc () || buffer[buflen - 1] == '\t')) --buflen; buffer[buflen] = 0; - string_list_append (comment, buffer); + xgettext_comment_add (buffer); break; } /* FALLTHROUGH */ @@ -501,9 +460,7 @@ phase4_getc () buffer = xrealloc (buffer, bufmax); } buffer[buflen] = 0; - if (comment == NULL) - comment = string_list_alloc (); - string_list_append (comment, buffer); + xgettext_comment_add (buffer); last_comment_line = newline_count; return '\n'; } @@ -1075,14 +1032,12 @@ phase6_get (tp) && buf[1].type == token_type_number && buf[2].type == token_type_string_literal) { - free (logical_file_name); logical_file_name = xstrdup (buf[2].string); line_number = buf[1].number; } if (bufpos >= 2 && buf[0].type == token_type_number && buf[1].type == token_type_string_literal) { - free (logical_file_name); logical_file_name = xstrdup (buf[1].string); line_number = buf[0].number; } @@ -1103,7 +1058,7 @@ phase6_get (tp) } /* We must reset the selected comments. */ - xgettext_lex_comment_reset (); + xgettext_comment_reset (); } } @@ -1154,8 +1109,8 @@ phase8_get (tp) /* 9. Convert the remaining preprocessing tokens to C tokens and discards any white space from the translation unit. */ -void -xgettext_lex (tp) +static void +x_c_lex (tp) xgettext_token_ty *tp; { while (1) @@ -1187,7 +1142,7 @@ xgettext_lex (tp) with non-white space tokens. */ ++newline_count; if (last_non_comment_line > last_comment_line) - xgettext_lex_comment_reset (); + xgettext_comment_reset (); break; case token_type_name: @@ -1195,13 +1150,13 @@ xgettext_lex (tp) if (default_keywords) { - xgettext_lex_keyword ("gettext"); - xgettext_lex_keyword ("dgettext:2"); - xgettext_lex_keyword ("dcgettext:2"); - xgettext_lex_keyword ("ngettext:1,2"); - xgettext_lex_keyword ("dngettext:2,3"); - xgettext_lex_keyword ("dcngettext:2,3"); - xgettext_lex_keyword ("gettext_noop"); + x_c_keyword ("gettext"); + x_c_keyword ("dgettext:2"); + x_c_keyword ("dcgettext:2"); + x_c_keyword ("ngettext:1,2"); + x_c_keyword ("dngettext:2,3"); + x_c_keyword ("dcngettext:2,3"); + x_c_keyword ("gettext_noop"); default_keywords = false; } @@ -1212,8 +1167,8 @@ xgettext_lex (tp) tp->type = xgettext_token_type_keyword; tp->argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); tp->argnum2 = (int) (long) keyword_value >> 10; - tp->line_number = token.line_number; - tp->file_name = logical_file_name; + tp->pos.file_name = logical_file_name; + tp->pos.line_number = token.line_number; } else tp->type = xgettext_token_type_symbol; @@ -1243,8 +1198,8 @@ xgettext_lex (tp) tp->type = xgettext_token_type_string_literal; tp->string = token.string; - tp->line_number = token.line_number; - tp->file_name = logical_file_name; + tp->pos.file_name = logical_file_name; + tp->pos.line_number = token.line_number; return; default: @@ -1258,7 +1213,179 @@ xgettext_lex (tp) void -xgettext_lex_keyword (name) +extract_c (f, real_filename, logical_filename, mdlp) + FILE *f; + const char *real_filename; + const char *logical_filename; + msgdomain_list_ty *mdlp; +{ + message_list_ty *mlp = mdlp->item[0]->messages; + int state; + int commas_to_skip = 0; /* defined only when in states 1 and 2 */ + int plural_commas = 0; /* defined only when in states 1 and 2 */ + message_ty *plural_mp = NULL; /* defined only when in states 1 and 2 */ + int paren_nesting = 0; /* defined only when in state 2 */ + + /* The file is broken into tokens. Scan the token stream, looking for + a keyword, followed by a left paren, followed by a string. When we + see this sequence, we have something to remember. We assume we are + looking at a valid C or C++ program, and leave the complaints about + the grammar to the compiler. + + Normal handling: Look for + [A] keyword [B] ( ... [C] ... msgid ... ) [E] + Plural handling: Look for + [A] keyword [B] ( ... [C] ... msgid ... [D] ... msgid_plural ... ) [E] + At point [A]: state == 0. + At point [B]: state == 1, commas_to_skip set, plural_mp == NULL. + At point [C]: state == 2, commas_to_skip set, plural_mp == NULL. + At point [D]: state == 2, commas_to_skip set again, plural_mp != NULL. + At point [E]: state == 0. */ + + fp = f; + file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + /* Start state is 0. */ + state = 0; + + while (1) + { + xgettext_token_ty token; + + /* A state machine is used to do the recognising: + State 0 = waiting for something to happen + State 1 = seen one of our keywords + State 2 = waiting for part of an argument */ + x_c_lex (&token); + switch (token.type) + { + case xgettext_token_type_keyword: + if (!extract_all && state == 2) + { + if (commas_to_skip == 0) + { + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: keyword nested in keyword arg"), + token.pos.file_name, token.pos.line_number); + error_with_progname = true; + continue; + } + + /* Here we should nest properly, but this would require a + potentially unbounded stack. We haven't run across an + example that needs this functionality yet. For now, + we punt and forget the outer keyword. */ + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: keyword between outer keyword and its arg"), + token.pos.file_name, token.pos.line_number); + error_with_progname = true; + } + commas_to_skip = token.argnum1 - 1; + plural_commas = (token.argnum2 > token.argnum1 + ? token.argnum2 - token.argnum1 : 0); + plural_mp = NULL; + state = 1; + continue; + + case xgettext_token_type_lparen: + switch (state) + { + case 1: + paren_nesting = 0; + state = 2; + break; + case 2: + paren_nesting++; + break; + } + continue; + + case xgettext_token_type_rparen: + if (state == 2 && paren_nesting != 0) + paren_nesting--; + else + state = 0; + continue; + + case xgettext_token_type_comma: + if (state == 2 && commas_to_skip != 0) + { + if (paren_nesting == 0) + commas_to_skip--; + } + else + state = 0; + continue; + + case xgettext_token_type_string_literal: + if (extract_all) + remember_a_message (mlp, token.string, &token.pos); + else if (state == 2 && commas_to_skip == 0) + { + if (plural_mp == NULL) + { + /* Seen an msgid. */ + if (plural_commas == 0) + remember_a_message (mlp, token.string, &token.pos); + else + { + plural_mp = remember_a_message (mlp, token.string, + &token.pos); + commas_to_skip = plural_commas; + plural_commas = 0; + } + } + else + { + /* Seen an msgid_plural. */ + remember_a_message_plural (plural_mp, token.string, + &token.pos); + plural_mp = NULL; + } + } + else + { + free (token.string); + if (state == 1) + state = 0; + } + continue; + + case xgettext_token_type_symbol: + if (state == 1) + state = 0; + continue; + + case xgettext_token_type_eof: + break; + + default: + abort (); + } + break; + } + + /* Close scanner. */ + fp = NULL; + file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} + + +void +x_c_extract_all () +{ + extract_all = true; +} + + +void +x_c_keyword (name) const char *name; { if (name == NULL) @@ -1309,37 +1436,15 @@ xgettext_lex_keyword (name) } } - bool -xgettext_any_keywords () +x_c_any_keywords () { return (keywords.filled > 0) || default_keywords; } -const char * -xgettext_lex_comment (n) - size_t n; -{ - if (comment == NULL || n >= comment->nitems) - return NULL; - return comment->item[n]; -} - - -void -xgettext_lex_comment_reset () -{ - if (comment != NULL) - { - string_list_free (comment); - comment = NULL; - } -} - - void -xgettext_lex_trigraphs () +x_c_trigraphs () { trigraphs = true; } diff --git a/src/x-c.h b/src/x-c.h new file mode 100644 index 000000000..2526b1e33 --- /dev/null +++ b/src/x-c.h @@ -0,0 +1,50 @@ +/* xgettext C/C++/ObjectiveC backend. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#define EXTENSIONS_C \ + { "c", "C", }, \ + { "h", "C", }, \ + { "C", "C++", }, \ + { "c++", "C++", }, \ + { "cc", "C++", }, \ + { "cxx", "C++", }, \ + { "cpp", "C++", }, \ + { "hh", "C++", }, \ + { "hpp", "C++", }, \ + { "m", "ObjectiveC" }, \ + +#define SCANNERS_C \ + { "C", scan_c_file, }, \ + { "C++", scan_c_file, }, \ + { "ObjectiveC", scan_c_file, }, \ + +/* Scan a C/C++/ObjectiveC file and add its translatable strings to mdlp. */ +extern void extract_c PARAMS ((FILE *fp, const char *real_filename, + const char *logical_filename, + msgdomain_list_ty *mdlp)); + + +/* Handling of options specific to this language. */ + +extern void x_c_extract_all PARAMS ((void)); + +extern void x_c_keyword PARAMS ((const char *name)); +extern bool x_c_any_keywords PARAMS ((void)); + +extern void x_c_trigraphs PARAMS ((void)); diff --git a/src/xget-lex.h b/src/xget-lex.h deleted file mode 100644 index b5ea05854..000000000 --- a/src/xget-lex.h +++ /dev/null @@ -1,67 +0,0 @@ -/* GNU gettext - internationalization aids - Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc. - - This file was written by Peter Miller - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifndef _XGET_LEX_H -#define _XGET_LEX_H - -#include - -enum xgettext_token_type_ty -{ - xgettext_token_type_eof, - xgettext_token_type_keyword, - xgettext_token_type_lparen, - xgettext_token_type_rparen, - xgettext_token_type_comma, - xgettext_token_type_string_literal, - xgettext_token_type_symbol -}; -typedef enum xgettext_token_type_ty xgettext_token_type_ty; - -typedef struct xgettext_token_ty xgettext_token_ty; -struct xgettext_token_ty -{ - xgettext_token_type_ty type; - - /* These fields are used only for xgettext_token_type_keyword. */ - int argnum1; - int argnum2; - - /* This field is used only for xgettext_token_type_string_literal. */ - char *string; - - /* These fields are only for - xgettext_token_type_keyword, - xgettext_token_type_string_literal. */ - int line_number; - char *file_name; -}; - - -void xgettext_lex_open PARAMS ((const char *file_name)); -void xgettext_lex_close PARAMS ((void)); -void xgettext_lex PARAMS ((xgettext_token_ty *tp)); -const char *xgettext_lex_comment PARAMS ((size_t n)); -void xgettext_lex_comment_reset PARAMS ((void)); -/* void xgettext_lex_filepos PARAMS ((char **, int *)); FIXME needed? */ -void xgettext_lex_keyword PARAMS ((const char *name)); -bool xgettext_any_keywords PARAMS ((void)); -void xgettext_lex_trigraphs PARAMS ((void)); - -#endif /* _XGET_LEX_H */ diff --git a/src/xgettext.c b/src/xgettext.c index a2c29add3..4d90f7d4c 100644 --- a/src/xgettext.c +++ b/src/xgettext.c @@ -29,40 +29,37 @@ #include #include #include +#include #include #ifdef HAVE_UNISTD_H # include #endif -#ifndef errno -extern int errno; -#endif - +#include "xgettext.h" #include "dir-list.h" #include "file-list.h" #include "error.h" #include "progname.h" -#include "hash.h" #include "getline.h" #include "system.h" #include "po.h" #include "message.h" #include "write-po.h" -#include "xget-lex.h" #include "printf-parse.h" - #include "libgettext.h" #ifndef _POSIX_VERSION struct passwd *getpwuid (); #endif - /* A convenience macro. I don't like writing gettext() every time. */ #define _(str) gettext (str) +#include "x-c.h" + + /* If nonzero add all comments immediately preceding one of the keywords. */ static bool add_all_comments = false; @@ -82,9 +79,6 @@ static int do_debug; /* Content of .po files with symbols to be excluded. */ static message_list_ty *exclude; -/* If true extract all strings. */ -static bool extract_all = false; - /* Force output of PO file even if empty. */ static int force_po; @@ -157,10 +151,8 @@ static void exclude_directive_message PARAMS ((po_ty *pop, char *msgid, lex_pos_ty *msgstr_pos, bool obsolete)); static void read_exclusion_file PARAMS ((char *file_name)); -static message_ty *remember_a_message PARAMS ((message_list_ty *mlp, - xgettext_token_ty *tp)); -static void remember_a_message_plural PARAMS ((message_ty *mp, - xgettext_token_ty *tp)); +static FILE *xgettext_open PARAMS ((const char *fn, char **logical_file_name_p, + char **real_file_name_p)); static void scan_c_file PARAMS ((const char *file_name, msgdomain_list_ty *mdlp)); static void extract_constructor PARAMS ((po_ty *that)); @@ -188,8 +180,8 @@ static enum is_c_format test_whether_c_format PARAMS ((const char *s)); string argument and a message list argument. */ typedef void (*scanner_fp) PARAMS ((const char *, msgdomain_list_ty *)); -static const char *extension_to_language PARAMS ((const char *)); static scanner_fp language_to_scanner PARAMS ((const char *)); +static const char *extension_to_language PARAMS ((const char *)); int @@ -236,7 +228,7 @@ main (argc, argv) case '\0': /* Long option. */ break; case 'a': - extract_all = true; + x_c_extract_all (); break; case 'c': if (optarg == NULL) @@ -285,7 +277,7 @@ main (argc, argv) break; case 'k': if (optarg == NULL || *optarg != '\0') - xgettext_lex_keyword (optarg); + x_c_keyword (optarg); break; case 'l': /* Accepted for backward compatibility with 0.10.35. */ @@ -333,7 +325,7 @@ main (argc, argv) message_print_style_uniforum (); break; case 'T': - xgettext_lex_trigraphs (); + x_c_trigraphs (); break; case 'V': do_version = true; @@ -371,7 +363,7 @@ main (argc, argv) error (EXIT_FAILURE, 0, _("\ --join-existing cannot be used when output is written to stdout")); - if (!xgettext_any_keywords ()) + if (!x_c_any_keywords ()) { error (0, 0, _("\ xgettext cannot work without keywords to look for")); @@ -686,10 +678,107 @@ read_exclusion_file (file_name) } -static message_ty * -remember_a_message (mlp, tp) +static string_list_ty *comment; + +void +xgettext_comment_add (str) + const char *str; +{ + if (comment == NULL) + comment = string_list_alloc (); + string_list_append (comment, str); +} + +const char * +xgettext_comment (n) + size_t n; +{ + if (comment == NULL || n >= comment->nitems) + return NULL; + return comment->item[n]; +} + +void +xgettext_comment_reset () +{ + if (comment != NULL) + { + string_list_free (comment); + comment = NULL; + } +} + + + +static FILE * +xgettext_open (fn, logical_file_name_p, real_file_name_p) + const char *fn; + char **logical_file_name_p; + char **real_file_name_p; +{ + FILE *fp; + char *new_name; + char *logical_file_name; + + if (strcmp (fn, "-") == 0) + { + new_name = xstrdup (_("standard input")); + logical_file_name = xstrdup (new_name); + fp = stdin; + } + else if (IS_ABSOLUTE_PATH (fn)) + { + new_name = xstrdup (fn); + fp = fopen (fn, "r"); + if (fp == NULL) + error (EXIT_FAILURE, errno, _("\ +error while opening \"%s\" for reading"), fn); + logical_file_name = xstrdup (new_name); + } + else + { + int j; + + for (j = 0; ; ++j) + { + const char *dir = dir_list_nth (j); + + if (dir == NULL) + error (EXIT_FAILURE, ENOENT, _("\ +error while opening \"%s\" for reading"), fn); + + new_name = concatenated_pathname (dir, fn, NULL); + + fp = fopen (new_name, "r"); + if (fp != NULL) + break; + + if (errno != ENOENT) + error (EXIT_FAILURE, errno, _("\ +error while opening \"%s\" for reading"), new_name); + free (new_name); + } + + /* Note that the NEW_NAME variable contains the actual file name + and the logical file name is what is reported by xgettext. In + this case NEW_NAME is set to the file which was found along the + directory search path, and LOGICAL_FILE_NAME is is set to the + file name which was searched for. */ + logical_file_name = xstrdup (fn); + } + + *logical_file_name_p = logical_file_name; + *real_file_name_p = new_name; + return fp; +} + + + +message_ty * +remember_a_message (mlp, string, pos) message_list_ty *mlp; - xgettext_token_ty *tp; + char *string; + lex_pos_ty *pos; { enum is_c_format is_c_format = undecided; enum is_wrap do_wrap = undecided; @@ -697,14 +786,14 @@ remember_a_message (mlp, tp) message_ty *mp; char *msgstr; - msgid = tp->string; + msgid = string; /* See whether we shall exclude this message. */ if (exclude != NULL && message_list_search (exclude, msgid) != NULL) { /* Tell the lexer to reset its comment buffer, so that the next message gets the correct comments. */ - xgettext_lex_comment_reset (); + xgettext_comment_reset (); return NULL; } @@ -749,7 +838,7 @@ remember_a_message (mlp, tp) for (j = 0; ; ++j) { - const char *s = xgettext_lex_comment (j); + const char *s = xgettext_comment (j); if (s == NULL) break; @@ -781,27 +870,28 @@ remember_a_message (mlp, tp) /* Remember where we saw this msgid. */ if (line_comment) - message_comment_filepos (mp, tp->file_name, tp->line_number); + message_comment_filepos (mp, pos->file_name, pos->line_number); /* Tell the lexer to reset its comment buffer, so that the next message gets the correct comments. */ - xgettext_lex_comment_reset (); + xgettext_comment_reset (); return mp; } -static void -remember_a_message_plural (mp, tp) +void +remember_a_message_plural (mp, string, pos) message_ty *mp; - xgettext_token_ty *tp; + char *string; + lex_pos_ty *pos; { char *msgid_plural; char *msgstr1; size_t msgstr1_len; char *msgstr; - msgid_plural = tp->string; + msgid_plural = string; /* See if the message is already a plural message. */ if (mp->msgid_plural == NULL) @@ -834,157 +924,20 @@ remember_a_message_plural (mp, tp) static void -scan_c_file (filename, mdlp) - const char *filename; +scan_c_file (file_name, mdlp) + const char *file_name; msgdomain_list_ty *mdlp; { - message_list_ty *mlp = mdlp->item[0]->messages; - int state; - int commas_to_skip = 0; /* defined only when in states 1 and 2 */ - int plural_commas = 0; /* defined only when in states 1 and 2 */ - message_ty *plural_mp = NULL; /* defined only when in states 1 and 2 */ - int paren_nesting = 0; /* defined only when in state 2 */ - - /* The file is broken into tokens. Scan the token stream, looking for - a keyword, followed by a left paren, followed by a string. When we - see this sequence, we have something to remember. We assume we are - looking at a valid C or C++ program, and leave the complaints about - the grammar to the compiler. - - Normal handling: Look for - [A] keyword [B] ( ... [C] ... msgid ... ) [E] - Plural handling: Look for - [A] keyword [B] ( ... [C] ... msgid ... [D] ... msgid_plural ... ) [E] - At point [A]: state == 0. - At point [B]: state == 1, commas_to_skip set, plural_mp == NULL. - At point [C]: state == 2, commas_to_skip set, plural_mp == NULL. - At point [D]: state == 2, commas_to_skip set again, plural_mp != NULL. - At point [E]: state == 0. */ - - xgettext_lex_open (filename); - - /* Start state is 0. */ - state = 0; - - while (1) - { - xgettext_token_ty token; - - /* A state machine is used to do the recognising: - State 0 = waiting for something to happen - State 1 = seen one of our keywords - State 2 = waiting for part of an argument */ - xgettext_lex (&token); - switch (token.type) - { - case xgettext_token_type_keyword: - if (!extract_all && state == 2) - { - if (commas_to_skip == 0) - { - error_with_progname = false; - error (0, 0, - _("%s:%d: warning: keyword nested in keyword arg"), - token.file_name, token.line_number); - error_with_progname = true; - continue; - } - - /* Here we should nest properly, but this would require a - potentially unbounded stack. We haven't run across an - example that needs this functionality yet. For now, - we punt and forget the outer keyword. */ - error_with_progname = false; - error (0, 0, - _("%s:%d: warning: keyword between outer keyword and its arg"), - token.file_name, token.line_number); - error_with_progname = true; - } - commas_to_skip = token.argnum1 - 1; - plural_commas = (token.argnum2 > token.argnum1 - ? token.argnum2 - token.argnum1 : 0); - plural_mp = NULL; - state = 1; - continue; - - case xgettext_token_type_lparen: - switch (state) - { - case 1: - paren_nesting = 0; - state = 2; - break; - case 2: - paren_nesting++; - break; - } - continue; - - case xgettext_token_type_rparen: - if (state == 2 && paren_nesting != 0) - paren_nesting--; - else - state = 0; - continue; - - case xgettext_token_type_comma: - if (state == 2 && commas_to_skip != 0) - { - if (paren_nesting == 0) - commas_to_skip--; - } - else - state = 0; - continue; - - case xgettext_token_type_string_literal: - if (extract_all) - remember_a_message (mlp, &token); - else if (state == 2 && commas_to_skip == 0) - { - if (plural_mp == NULL) - { - /* Seen an msgid. */ - if (plural_commas == 0) - remember_a_message (mlp, &token); - else - { - plural_mp = remember_a_message (mlp, &token); - commas_to_skip = plural_commas; - plural_commas = 0; - } - } - else - { - /* Seen an msgid_plural. */ - remember_a_message_plural (plural_mp, &token); - plural_mp = NULL; - } - } - else - { - free (token.string); - if (state == 1) - state = 0; - } - continue; - - case xgettext_token_type_symbol: - if (state == 1) - state = 0; - continue; - - case xgettext_token_type_eof: - break; - - default: - abort (); - } - break; - } - - /* Close scanner. */ - xgettext_lex_close (); + char *logical_file_name; + char *real_file_name; + FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name); + + extract_c (fp, real_file_name, logical_file_name, mdlp); + + if (fp != stdin) + fclose (fp); + free (logical_file_name); + free (real_file_name); } @@ -1375,9 +1328,7 @@ language_to_scanner (name) static table_ty table[] = { - { "C", scan_c_file, }, - { "C++", scan_c_file, }, - { "ObjectiveC", scan_c_file, }, + SCANNERS_C { "PO", read_po_file, }, /* Here will follow more languages and their scanners: awk, perl, etc... Make sure new scanners honor the --exlude-file option. */ @@ -1407,16 +1358,7 @@ extension_to_language (extension) static table_ty table[] = { - { "c", "C", }, - { "C", "C++", }, - { "c++", "C++", }, - { "cc", "C++", }, - { "cxx", "C++", }, - { "cpp", "C++", }, - { "h", "C", }, - { "hh", "C++", }, - { "hpp", "C++", }, - { "m", "ObjectiveC" }, + EXTENSIONS_C { "po", "PO", }, { "pot", "PO", }, { "pox", "PO", }, diff --git a/src/xgettext.h b/src/xgettext.h new file mode 100644 index 000000000..5437d7d1c --- /dev/null +++ b/src/xgettext.h @@ -0,0 +1,37 @@ +/* xgettext common functions. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Peter Miller + and Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _XGETTEXT_H +#define _XGETTEXT_H + +#include +#include "message.h" +#include "pos.h" + +extern void xgettext_comment_add PARAMS ((const char *str)); +extern const char *xgettext_comment PARAMS ((size_t n)); +extern void xgettext_comment_reset PARAMS ((void)); + +extern message_ty *remember_a_message PARAMS ((message_list_ty *mlp, + char *string, lex_pos_ty *pos)); +extern void remember_a_message_plural PARAMS ((message_ty *mp, + char *string, lex_pos_ty *pos)); + + +#endif /* _XGETTEXT_H */