+2000-09-13 Bruno Haible <haible@clisp.cons.org>
+
+ Look into #define bodies.
+ * xget-lex.c (phaseX_get): Simplify.
+ (phase6_get): Stop the loop when recognizing a #define directive.
+ Based on a patch by Martin v. Löwis.
+
+ Accept ISO C 99 comment syntax.
+ * xget-lex.c (cplusplus_comments: Remove variable.
+ (phase4_getc): Always recognize // comments.
+ (xgettext_lex_cplusplus): Remove function.
+ * xget-lex.h (xgettext_lex_cplusplus): Remove declaration.
+ * xgettext.c (scan_c_file): Remove is_cpp_file argument.
+ (scanner_c, scanner_cxx): Remove functions.
+ (language_to_scanner): Call scan_c_file directly.
+
2000-08-23 Bruno Haible <haible@clisp.cons.org>
* po-lex.c (ALERT_CHAR): New constant macro.
static int line_number;
static FILE *fp;
static int trigraphs;
-static int cplusplus_comments;
static string_list_ty *comment;
static hash_table keywords;
static int default_keywords = 1;
/* 2. Convert trigraphs to their single character equivalents. Most
sane human beings vomit copiously at the mention of trigraphs, which
- is why they are on option. */
+ is why they are an option. */
/* Maximum used guaranteed to be < 4. */
static unsigned char phase2_pushback[4];
return ' ';
case '/':
- /* C++ comment. */
- if (!cplusplus_comments)
- {
- phase3_ungetc ('/');
- return '/';
- }
+ /* C++ or ISO C 99 comment. */
buflen = 0;
while (1)
{
phaseX_get (tp)
token_ty *tp;
{
- static int middle;
- token_ty tmp;
+ static int middle; /* 0 at the beginning of a line, 1 otherwise. */
phase5_get (tp);
- if (middle)
- {
- switch (tp->type)
- {
- case token_type_eoln:
- case token_type_eof:
- middle = 0;
- break;
-
- case token_type_hash:
- tp->type = token_type_symbol;
- break;
- default:
- break;
- }
- }
+ if (tp->type == token_type_eoln || tp->type == token_type_eof)
+ middle = 0;
else
{
- switch (tp->type)
+ if (middle)
{
- case token_type_eoln:
- case token_type_eof:
- break;
-
- case token_type_white_space:
- tmp = *tp;
- phase5_get (tp);
- if (tp->type != token_type_hash)
- {
- phase5_unget (tp);
- *tp = tmp;
- middle = 1;
- return;
- }
-
- /* Discard the leading white space token, the hash is all
+ /* Turn hash in the middle of a line into a plain symbol token. */
+ if (tp->type == token_type_hash)
+ tp->type = token_type_symbol;
+ }
+ else
+ {
+ /* When we see leading whitespace followed by a hash sign,
+ discard the leading white space token. The hash is all
phase 6 is interested in. */
- if (tp->type != token_type_eof && tp->type != token_type_eoln)
- middle = 1;
- break;
+ if (tp->type == token_type_white_space)
+ {
+ token_ty next;
- default:
+ phase5_get (&next);
+ if (next.type == token_type_hash)
+ *tp = next;
+ else
+ phase5_unget (&next);
+ }
middle = 1;
- break;
}
}
}
/* 6. Recognize and carry out directives (it also expands macros on
non-directive lines, which we do not do here). The only directive
- we care about is the #line directive. We throw all the others
- away. */
+ we care about are the #line and #define directive. We throw all the
+ others away. */
/* Maximum used guaranteed to be < 4. */
static token_ty phase6_pushback[4];
while (1)
{
/* Get the next token. If it is not a '#' at the beginning of a
- line, return immediately. Be careful of white space. */
+ line (ignoring whitespace), return immediately. */
phaseX_get (tp);
if (tp->type != token_type_hash)
return;
- /* Accumulate the rest of the directive in a buffer. Work out
- what it is later. */
+ /* Accumulate the rest of the directive in a buffer, until the
+ "define" keyword is seen or until end of line. */
bufpos = 0;
while (1)
{
if (tp->type == token_type_eoln || tp->type == token_type_eof)
break;
- /* White space would be important in the directive, if we
- were interested in the #define directive. But we are
- going to ignore the #define directive, so just throw
- white space away. */
- if (tp->type == token_type_white_space)
- continue;
-
- if (bufpos >= bufmax)
+ /* Before the "define" keyword and inside other directives
+ white space is irrelevant. So just throw it away. */
+ if (tp->type != token_type_white_space)
{
- bufmax += 100;
- buf = xrealloc (buf, bufmax * sizeof (buf[0]));
+ /* If it is a #define directive, return immediately,
+ thus treating the body of the #define directive like
+ normal input. */
+ if (bufpos == 0
+ && tp->type == token_type_name
+ && strcmp (tp->string, "define") == 0)
+ return;
+
+ /* Accumulate. */
+ if (bufpos >= bufmax)
+ {
+ bufmax += 100;
+ buf = xrealloc (buf, bufmax * sizeof (buf[0]));
+ }
+ buf[bufpos++] = *tp;
}
- buf[bufpos++] = *tp;
}
/* If it is a #line directive, with no macros to expand, act on
}
-void
-xgettext_lex_cplusplus ()
-{
- cplusplus_comments = 1;
-}
-
-
void
xgettext_lex_trigraphs ()
{
static void remember_a_message PARAMS ((message_list_ty *__mlp,
xgettext_token_ty *__tp));
static void scan_c_file PARAMS ((const char *__file_name,
- message_list_ty *__mlp, int __is_cpp_file));
+ message_list_ty *__mlp));
static void extract_constructor PARAMS ((po_ty *__that));
static void extract_directive_domain PARAMS ((po_ty *__that, char *__name));
static void extract_directive_message PARAMS ((po_ty *__that, char *__msgid,
static void
-scan_c_file(filename, mlp, is_cpp_file)
+scan_c_file(filename, mlp)
const char *filename;
message_list_ty *mlp;
- int is_cpp_file;
{
int state;
int commas_to_skip = 0; /* defined only when in states 1 and 2 */
int paren_nesting = 0; /* defined only when in state 2 */
- /* Inform scanner whether we have C++ files or not. */
- if (is_cpp_file)
- xgettext_lex_cplusplus ();
-
/* The file is broken into tokens. Scan the token stream, looking for
a keyword, followed by a left paren, followed by a string. When we
see this sequence, we have something to remember. We assume we are
}
-static void
- scanner_c (filename, mlp)
- const char *filename;
- message_list_ty *mlp;
-{
- scan_c_file (filename, mlp, 0);
-}
-
-
-static void
-scanner_cxx (filename, mlp)
- const char *filename;
- message_list_ty *mlp;
-{
- scan_c_file (filename, mlp, 1);
-}
-
-
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
#define ENDOF(a) ((a) + SIZEOF(a))
static table_ty table[] =
{
- { "C", scanner_c, },
- { "C++", scanner_cxx, },
+ { "C", scan_c_file, },
+ { "C++", scan_c_file, },
{ "PO", read_po_file, },
/* Here will follow more languages and their scanners: awk, perl,
etc... Make sure new scanners honor the --exlude-file option. */