Accept // comments in C.

author Bruno Haible <bruno@clisp.org>

Wed, 13 Sep 2000 16:55:08 +0000 (16:55 +0000)

committer Bruno Haible <bruno@clisp.org>

Wed, 13 Sep 2000 16:55:08 +0000 (16:55 +0000)
author Bruno Haible <bruno@clisp.org>
Wed, 13 Sep 2000 16:55:08 +0000 (16:55 +0000)
committer Bruno Haible <bruno@clisp.org>
Wed, 13 Sep 2000 16:55:08 +0000 (16:55 +0000)
diff --git a/src/ChangeLog b/src/ChangeLog

index a956ef4d3a4236678761b0454ce8f1da88222743..a1288ca0ade17c47b283cfa4bce76401e60923d2 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,19 @@
+2000-09-13  Bruno Haible  <haible@clisp.cons.org>
+
+       Look into #define bodies.
+       * xget-lex.c (phaseX_get): Simplify.
+       (phase6_get): Stop the loop when recognizing a #define directive.
+       Based on a patch by Martin v. Löwis.
+
+       Accept ISO C 99 comment syntax.
+       * xget-lex.c (cplusplus_comments: Remove variable.
+       (phase4_getc): Always recognize // comments.
+       (xgettext_lex_cplusplus): Remove function.
+       * xget-lex.h (xgettext_lex_cplusplus): Remove declaration.
+       * xgettext.c (scan_c_file): Remove is_cpp_file argument.
+       (scanner_c, scanner_cxx): Remove functions.
+       (language_to_scanner): Call scan_c_file directly.
+
  2000-08-23  Bruno Haible  <haible@clisp.cons.org>
  
         * po-lex.c (ALERT_CHAR): New constant macro.
diff --git a/src/xget-lex.c b/src/xget-lex.c

index 3fa5cf6d22c6d74be3cecef36e6280af04ba8e18..1ad1d6f92ce679a87351359dfaacbf9eb03ef320 100644 (file)
--- a/src/xget-lex.c
+++ b/src/xget-lex.c
@@ -109,7 +109,6 @@ static char *logical_file_name;
  static int line_number;
  static FILE *fp;
  static int trigraphs;
-static int cplusplus_comments;
  static string_list_ty *comment;
  static hash_table keywords;
  static int default_keywords = 1;
@@ -307,7 +306,7 @@ phase1_ungetc (c)
  
  /* 2. Convert trigraphs to their single character equivalents.  Most
     sane human beings vomit copiously at the mention of trigraphs, which
-   is why they are on option.  */
+   is why they are an option.  */
  
  /* Maximum used guaranteed to be < 4.  */
  static unsigned char phase2_pushback[4];
@@ -492,12 +491,7 @@ phase4_getc ()
        return ' ';
  
      case '/':
-      /* C++ comment.  */
-      if (!cplusplus_comments)
-       {
-         phase3_ungetc ('/');
-         return '/';
-       }
+      /* C++ or ISO C 99 comment.  */
        buflen = 0;
        while (1)
         {
@@ -982,55 +976,36 @@ static void
  phaseX_get (tp)
       token_ty *tp;
  {
-  static int middle;
-  token_ty tmp;
+  static int middle;   /* 0 at the beginning of a line, 1 otherwise.  */
  
    phase5_get (tp);
-  if (middle)
-    {
-      switch (tp->type)
-       {
-       case token_type_eoln:
-       case token_type_eof:
-         middle = 0;
-         break;
-
-       case token_type_hash:
-         tp->type = token_type_symbol;
-         break;
  
-       default:
-         break;
-       }
-    }
+  if (tp->type == token_type_eoln || tp->type == token_type_eof)
+    middle = 0;
    else
      {
-      switch (tp->type)
+      if (middle)
         {
-       case token_type_eoln:
-       case token_type_eof:
-         break;
-
-       case token_type_white_space:
-         tmp = *tp;
-         phase5_get (tp);
-         if (tp->type != token_type_hash)
-           {
-             phase5_unget (tp);
-             *tp = tmp;
-             middle = 1;
-             return;
-           }
-
-         /* Discard the leading white space token, the hash is all
+         /* Turn hash in the middle of a line into a plain symbol token.  */
+         if (tp->type == token_type_hash)
+           tp->type = token_type_symbol;
+       }
+      else
+       {
+         /* When we see leading whitespace followed by a hash sign,
+            discard the leading white space token.  The hash is all
              phase 6 is interested in.  */
-         if (tp->type != token_type_eof && tp->type != token_type_eoln)
-           middle = 1;
-         break;
+         if (tp->type == token_type_white_space)
+           {
+             token_ty next;
  
-       default:
+             phase5_get (&next);
+             if (next.type == token_type_hash)
+               *tp = next;
+             else
+               phase5_unget (&next);
+           }
           middle = 1;
-         break;
         }
      }
  }
@@ -1038,8 +1013,8 @@ phaseX_get (tp)
  
  /* 6. Recognize and carry out directives (it also expands macros on
     non-directive lines, which we do not do here).  The only directive
-   we care about is the #line directive.  We throw all the others
-   away.  */
+   we care about are the #line and #define directive.  We throw all the
+   others away.  */
  
  /* Maximum used guaranteed to be < 4.  */
  static token_ty phase6_pushback[4];
@@ -1063,13 +1038,13 @@ phase6_get (tp)
    while (1)
      {
        /* Get the next token.  If it is not a '#' at the beginning of a
-        line, return immediately.  Be careful of white space.  */
+        line (ignoring whitespace), return immediately.  */
        phaseX_get (tp);
        if (tp->type != token_type_hash)
         return;
  
-      /* Accumulate the rest of the directive in a buffer.  Work out
-        what it is later.  */
+      /* Accumulate the rest of the directive in a buffer, until the
+        "define" keyword is seen or until end of line.  */
        bufpos = 0;
        while (1)
         {
@@ -1077,19 +1052,26 @@ phase6_get (tp)
           if (tp->type == token_type_eoln || tp->type == token_type_eof)
             break;
  
-         /* White space would be important in the directive, if we
-            were interested in the #define directive.  But we are
-            going to ignore the #define directive, so just throw
-            white space away.  */
-         if (tp->type == token_type_white_space)
-           continue;
-
-         if (bufpos >= bufmax)
+         /* Before the "define" keyword and inside other directives
+            white space is irrelevant.  So just throw it away.  */
+         if (tp->type != token_type_white_space)
             {
-             bufmax += 100;
-             buf = xrealloc (buf, bufmax * sizeof (buf[0]));
+             /* If it is a #define directive, return immediately,
+                thus treating the body of the #define directive like
+                normal input.  */
+             if (bufpos == 0
+                 && tp->type == token_type_name
+                 && strcmp (tp->string, "define") == 0)
+               return;
+
+             /* Accumulate.  */
+             if (bufpos >= bufmax)
+               {
+                 bufmax += 100;
+                 buf = xrealloc (buf, bufmax * sizeof (buf[0]));
+               }
+             buf[bufpos++] = *tp;
             }
-         buf[bufpos++] = *tp;
         }
  
        /* If it is a #line directive, with no macros to expand, act on
@@ -1337,13 +1319,6 @@ xgettext_lex_comment_reset ()
  }
  
  
-void
-xgettext_lex_cplusplus ()
-{
-  cplusplus_comments = 1;
-}
-
-
  void
  xgettext_lex_trigraphs ()
  {
diff --git a/src/xget-lex.h b/src/xget-lex.h

index ccdaf342575cde33a0a82dc4ca14f2202eb7522d..086c4144289751ecc7761ddd68f0134f6471331b 100644 (file)
--- a/src/xget-lex.h
+++ b/src/xget-lex.h
@@ -59,7 +59,6 @@ void xgettext_lex_comment_reset PARAMS ((void));
  /* void xgettext_lex_filepos PARAMS ((char **, int *)); FIXME needed?  */
  void xgettext_lex_keyword PARAMS ((const char *__name));
  int xgettext_any_keywords PARAMS ((void));
-void xgettext_lex_cplusplus PARAMS ((void));
  void xgettext_lex_trigraphs PARAMS ((void));
  
  #endif
diff --git a/src/xgettext.c b/src/xgettext.c

index db8113f4022fa9c7da83c0ddaea42a78d54563c9..5000551abbdcd34eda68d27ba875695c6f2ad8a4 100644 (file)
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -171,7 +171,7 @@ static void read_exclusion_file PARAMS ((char *__file_name));
  static void remember_a_message PARAMS ((message_list_ty *__mlp,
                                         xgettext_token_ty *__tp));
  static void scan_c_file PARAMS ((const char *__file_name,
-                                message_list_ty *__mlp,  int __is_cpp_file));
+                                message_list_ty *__mlp));
  static void extract_constructor PARAMS ((po_ty *__that));
  static void extract_directive_domain PARAMS ((po_ty *__that, char *__name));
  static void extract_directive_message PARAMS ((po_ty *__that, char *__msgid,
@@ -836,19 +836,14 @@ remember_a_message (mlp, tp)
  
  
  static void
-scan_c_file(filename, mlp, is_cpp_file)
+scan_c_file(filename, mlp)
       const char *filename;
       message_list_ty *mlp;
-     int is_cpp_file;
  {
    int state;
    int commas_to_skip = 0;      /* defined only when in states 1 and 2 */
    int paren_nesting = 0;       /* defined only when in state 2 */
  
-  /* Inform scanner whether we have C++ files or not.  */
-  if (is_cpp_file)
-    xgettext_lex_cplusplus ();
-
    /* The file is broken into tokens.  Scan the token stream, looking for
       a keyword, followed by a left paren, followed by a string.  When we
       see this sequence, we have something to remember.  We assume we are
@@ -1322,24 +1317,6 @@ test_whether_c_format (s)
  }
  
  
-static void
-  scanner_c (filename, mlp)
-  const char *filename;
-  message_list_ty *mlp;
-{
-  scan_c_file (filename, mlp, 0);
-}
-
-
-static void
-scanner_cxx (filename, mlp)
-  const char *filename;
-  message_list_ty *mlp;
-{
-  scan_c_file (filename, mlp, 1);
-}
-
-
  #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
  #define ENDOF(a) ((a) + SIZEOF(a))
  
@@ -1357,8 +1334,8 @@ language_to_scanner (name)
  
    static table_ty table[] =
    {
-    { "C", scanner_c, },
-    { "C++", scanner_cxx, },
+    { "C", scan_c_file, },
+    { "C++", scan_c_file, },
      { "PO", read_po_file, },
      /* Here will follow more languages and their scanners: awk, perl,
         etc...  Make sure new scanners honor the --exlude-file option.  */
author	Bruno Haible <bruno@clisp.org>
	Wed, 13 Sep 2000 16:55:08 +0000 (16:55 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Wed, 13 Sep 2000 16:55:08 +0000 (16:55 +0000)
src/ChangeLog		patch \| blob \| blame \| history
src/xget-lex.c		patch \| blob \| blame \| history
src/xget-lex.h		patch \| blob \| blame \| history
src/xgettext.c		patch \| blob \| blame \| history