From: Bruno Haible Date: Wed, 13 Sep 2000 16:55:08 +0000 (+0000) Subject: Accept // comments in C. X-Git-Tag: v0.10.36~224 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=723b1a33d014845c77d9ad1ebd492c743512976d;p=thirdparty%2Fgettext.git Accept // comments in C. Look into #define bodies. --- diff --git a/src/ChangeLog b/src/ChangeLog index a956ef4d3..a1288ca0a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,19 @@ +2000-09-13 Bruno Haible + + Look into #define bodies. + * xget-lex.c (phaseX_get): Simplify. + (phase6_get): Stop the loop when recognizing a #define directive. + Based on a patch by Martin v. Löwis. + + Accept ISO C 99 comment syntax. + * xget-lex.c (cplusplus_comments: Remove variable. + (phase4_getc): Always recognize // comments. + (xgettext_lex_cplusplus): Remove function. + * xget-lex.h (xgettext_lex_cplusplus): Remove declaration. + * xgettext.c (scan_c_file): Remove is_cpp_file argument. + (scanner_c, scanner_cxx): Remove functions. + (language_to_scanner): Call scan_c_file directly. + 2000-08-23 Bruno Haible * po-lex.c (ALERT_CHAR): New constant macro. diff --git a/src/xget-lex.c b/src/xget-lex.c index 3fa5cf6d2..1ad1d6f92 100644 --- a/src/xget-lex.c +++ b/src/xget-lex.c @@ -109,7 +109,6 @@ static char *logical_file_name; static int line_number; static FILE *fp; static int trigraphs; -static int cplusplus_comments; static string_list_ty *comment; static hash_table keywords; static int default_keywords = 1; @@ -307,7 +306,7 @@ phase1_ungetc (c) /* 2. Convert trigraphs to their single character equivalents. Most sane human beings vomit copiously at the mention of trigraphs, which - is why they are on option. */ + is why they are an option. */ /* Maximum used guaranteed to be < 4. */ static unsigned char phase2_pushback[4]; @@ -492,12 +491,7 @@ phase4_getc () return ' '; case '/': - /* C++ comment. */ - if (!cplusplus_comments) - { - phase3_ungetc ('/'); - return '/'; - } + /* C++ or ISO C 99 comment. */ buflen = 0; while (1) { @@ -982,55 +976,36 @@ static void phaseX_get (tp) token_ty *tp; { - static int middle; - token_ty tmp; + static int middle; /* 0 at the beginning of a line, 1 otherwise. */ phase5_get (tp); - if (middle) - { - switch (tp->type) - { - case token_type_eoln: - case token_type_eof: - middle = 0; - break; - - case token_type_hash: - tp->type = token_type_symbol; - break; - default: - break; - } - } + if (tp->type == token_type_eoln || tp->type == token_type_eof) + middle = 0; else { - switch (tp->type) + if (middle) { - case token_type_eoln: - case token_type_eof: - break; - - case token_type_white_space: - tmp = *tp; - phase5_get (tp); - if (tp->type != token_type_hash) - { - phase5_unget (tp); - *tp = tmp; - middle = 1; - return; - } - - /* Discard the leading white space token, the hash is all + /* Turn hash in the middle of a line into a plain symbol token. */ + if (tp->type == token_type_hash) + tp->type = token_type_symbol; + } + else + { + /* When we see leading whitespace followed by a hash sign, + discard the leading white space token. The hash is all phase 6 is interested in. */ - if (tp->type != token_type_eof && tp->type != token_type_eoln) - middle = 1; - break; + if (tp->type == token_type_white_space) + { + token_ty next; - default: + phase5_get (&next); + if (next.type == token_type_hash) + *tp = next; + else + phase5_unget (&next); + } middle = 1; - break; } } } @@ -1038,8 +1013,8 @@ phaseX_get (tp) /* 6. Recognize and carry out directives (it also expands macros on non-directive lines, which we do not do here). The only directive - we care about is the #line directive. We throw all the others - away. */ + we care about are the #line and #define directive. We throw all the + others away. */ /* Maximum used guaranteed to be < 4. */ static token_ty phase6_pushback[4]; @@ -1063,13 +1038,13 @@ phase6_get (tp) while (1) { /* Get the next token. If it is not a '#' at the beginning of a - line, return immediately. Be careful of white space. */ + line (ignoring whitespace), return immediately. */ phaseX_get (tp); if (tp->type != token_type_hash) return; - /* Accumulate the rest of the directive in a buffer. Work out - what it is later. */ + /* Accumulate the rest of the directive in a buffer, until the + "define" keyword is seen or until end of line. */ bufpos = 0; while (1) { @@ -1077,19 +1052,26 @@ phase6_get (tp) if (tp->type == token_type_eoln || tp->type == token_type_eof) break; - /* White space would be important in the directive, if we - were interested in the #define directive. But we are - going to ignore the #define directive, so just throw - white space away. */ - if (tp->type == token_type_white_space) - continue; - - if (bufpos >= bufmax) + /* Before the "define" keyword and inside other directives + white space is irrelevant. So just throw it away. */ + if (tp->type != token_type_white_space) { - bufmax += 100; - buf = xrealloc (buf, bufmax * sizeof (buf[0])); + /* If it is a #define directive, return immediately, + thus treating the body of the #define directive like + normal input. */ + if (bufpos == 0 + && tp->type == token_type_name + && strcmp (tp->string, "define") == 0) + return; + + /* Accumulate. */ + if (bufpos >= bufmax) + { + bufmax += 100; + buf = xrealloc (buf, bufmax * sizeof (buf[0])); + } + buf[bufpos++] = *tp; } - buf[bufpos++] = *tp; } /* If it is a #line directive, with no macros to expand, act on @@ -1337,13 +1319,6 @@ xgettext_lex_comment_reset () } -void -xgettext_lex_cplusplus () -{ - cplusplus_comments = 1; -} - - void xgettext_lex_trigraphs () { diff --git a/src/xget-lex.h b/src/xget-lex.h index ccdaf3425..086c41442 100644 --- a/src/xget-lex.h +++ b/src/xget-lex.h @@ -59,7 +59,6 @@ void xgettext_lex_comment_reset PARAMS ((void)); /* void xgettext_lex_filepos PARAMS ((char **, int *)); FIXME needed? */ void xgettext_lex_keyword PARAMS ((const char *__name)); int xgettext_any_keywords PARAMS ((void)); -void xgettext_lex_cplusplus PARAMS ((void)); void xgettext_lex_trigraphs PARAMS ((void)); #endif diff --git a/src/xgettext.c b/src/xgettext.c index db8113f40..5000551ab 100644 --- a/src/xgettext.c +++ b/src/xgettext.c @@ -171,7 +171,7 @@ static void read_exclusion_file PARAMS ((char *__file_name)); static void remember_a_message PARAMS ((message_list_ty *__mlp, xgettext_token_ty *__tp)); static void scan_c_file PARAMS ((const char *__file_name, - message_list_ty *__mlp, int __is_cpp_file)); + message_list_ty *__mlp)); static void extract_constructor PARAMS ((po_ty *__that)); static void extract_directive_domain PARAMS ((po_ty *__that, char *__name)); static void extract_directive_message PARAMS ((po_ty *__that, char *__msgid, @@ -836,19 +836,14 @@ remember_a_message (mlp, tp) static void -scan_c_file(filename, mlp, is_cpp_file) +scan_c_file(filename, mlp) const char *filename; message_list_ty *mlp; - int is_cpp_file; { int state; int commas_to_skip = 0; /* defined only when in states 1 and 2 */ int paren_nesting = 0; /* defined only when in state 2 */ - /* Inform scanner whether we have C++ files or not. */ - if (is_cpp_file) - xgettext_lex_cplusplus (); - /* The file is broken into tokens. Scan the token stream, looking for a keyword, followed by a left paren, followed by a string. When we see this sequence, we have something to remember. We assume we are @@ -1322,24 +1317,6 @@ test_whether_c_format (s) } -static void - scanner_c (filename, mlp) - const char *filename; - message_list_ty *mlp; -{ - scan_c_file (filename, mlp, 0); -} - - -static void -scanner_cxx (filename, mlp) - const char *filename; - message_list_ty *mlp; -{ - scan_c_file (filename, mlp, 1); -} - - #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) #define ENDOF(a) ((a) + SIZEOF(a)) @@ -1357,8 +1334,8 @@ language_to_scanner (name) static table_ty table[] = { - { "C", scanner_c, }, - { "C++", scanner_cxx, }, + { "C", scan_c_file, }, + { "C++", scan_c_file, }, { "PO", read_po_file, }, /* Here will follow more languages and their scanners: awk, perl, etc... Make sure new scanners honor the --exlude-file option. */