From: Bruno Haible <bruno@clisp.org>
Date: Wed, 13 Sep 2000 16:55:08 +0000 (+0000)
Subject: Accept // comments in C.
X-Git-Tag: v0.10.36~224
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=723b1a33d014845c77d9ad1ebd492c743512976d;p=thirdparty%2Fgettext.git

Accept // comments in C.
Look into #define bodies.
---

diff --git a/src/ChangeLog b/src/ChangeLog
index a956ef4d3..a1288ca0a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,19 @@
+2000-09-13  Bruno Haible  <haible@clisp.cons.org>
+
+	Look into #define bodies.
+	* xget-lex.c (phaseX_get): Simplify.
+	(phase6_get): Stop the loop when recognizing a #define directive.
+	Based on a patch by Martin v. Löwis.
+
+	Accept ISO C 99 comment syntax.
+	* xget-lex.c (cplusplus_comments: Remove variable.
+	(phase4_getc): Always recognize // comments.
+	(xgettext_lex_cplusplus): Remove function.
+	* xget-lex.h (xgettext_lex_cplusplus): Remove declaration.
+	* xgettext.c (scan_c_file): Remove is_cpp_file argument.
+	(scanner_c, scanner_cxx): Remove functions.
+	(language_to_scanner): Call scan_c_file directly.
+
 2000-08-23  Bruno Haible  <haible@clisp.cons.org>
 
 	* po-lex.c (ALERT_CHAR): New constant macro.
diff --git a/src/xget-lex.c b/src/xget-lex.c
index 3fa5cf6d2..1ad1d6f92 100644
--- a/src/xget-lex.c
+++ b/src/xget-lex.c
@@ -109,7 +109,6 @@ static char *logical_file_name;
 static int line_number;
 static FILE *fp;
 static int trigraphs;
-static int cplusplus_comments;
 static string_list_ty *comment;
 static hash_table keywords;
 static int default_keywords = 1;
@@ -307,7 +306,7 @@ phase1_ungetc (c)
 
 /* 2. Convert trigraphs to their single character equivalents.  Most
    sane human beings vomit copiously at the mention of trigraphs, which
-   is why they are on option.  */
+   is why they are an option.  */
 
 /* Maximum used guaranteed to be < 4.  */
 static unsigned char phase2_pushback[4];
@@ -492,12 +491,7 @@ phase4_getc ()
       return ' ';
 
     case '/':
-      /* C++ comment.  */
-      if (!cplusplus_comments)
-	{
-	  phase3_ungetc ('/');
-	  return '/';
-	}
+      /* C++ or ISO C 99 comment.  */
       buflen = 0;
       while (1)
 	{
@@ -982,55 +976,36 @@ static void
 phaseX_get (tp)
      token_ty *tp;
 {
-  static int middle;
-  token_ty tmp;
+  static int middle;	/* 0 at the beginning of a line, 1 otherwise.  */
 
   phase5_get (tp);
-  if (middle)
-    {
-      switch (tp->type)
-	{
-	case token_type_eoln:
-	case token_type_eof:
-	  middle = 0;
-	  break;
-
-	case token_type_hash:
-	  tp->type = token_type_symbol;
-	  break;
 
-	default:
-	  break;
-	}
-    }
+  if (tp->type == token_type_eoln || tp->type == token_type_eof)
+    middle = 0;
   else
     {
-      switch (tp->type)
+      if (middle)
 	{
-	case token_type_eoln:
-	case token_type_eof:
-	  break;
-
-	case token_type_white_space:
-	  tmp = *tp;
-	  phase5_get (tp);
-	  if (tp->type != token_type_hash)
-	    {
-	      phase5_unget (tp);
-	      *tp = tmp;
-	      middle = 1;
-	      return;
-	    }
-
-	  /* Discard the leading white space token, the hash is all
+	  /* Turn hash in the middle of a line into a plain symbol token.  */
+	  if (tp->type == token_type_hash)
+	    tp->type = token_type_symbol;
+	}
+      else
+	{
+	  /* When we see leading whitespace followed by a hash sign,
+	     discard the leading white space token.  The hash is all
 	     phase 6 is interested in.  */
-	  if (tp->type != token_type_eof && tp->type != token_type_eoln)
-	    middle = 1;
-	  break;
+	  if (tp->type == token_type_white_space)
+	    {
+	      token_ty next;
 
-	default:
+	      phase5_get (&next);
+	      if (next.type == token_type_hash)
+		*tp = next;
+	      else
+		phase5_unget (&next);
+	    }
 	  middle = 1;
-	  break;
 	}
     }
 }
@@ -1038,8 +1013,8 @@ phaseX_get (tp)
 
 /* 6. Recognize and carry out directives (it also expands macros on
    non-directive lines, which we do not do here).  The only directive
-   we care about is the #line directive.  We throw all the others
-   away.  */
+   we care about are the #line and #define directive.  We throw all the
+   others away.  */
 
 /* Maximum used guaranteed to be < 4.  */
 static token_ty phase6_pushback[4];
@@ -1063,13 +1038,13 @@ phase6_get (tp)
   while (1)
     {
       /* Get the next token.  If it is not a '#' at the beginning of a
-	 line, return immediately.  Be careful of white space.  */
+	 line (ignoring whitespace), return immediately.  */
       phaseX_get (tp);
       if (tp->type != token_type_hash)
 	return;
 
-      /* Accumulate the rest of the directive in a buffer.  Work out
-	 what it is later.  */
+      /* Accumulate the rest of the directive in a buffer, until the
+	 "define" keyword is seen or until end of line.  */
       bufpos = 0;
       while (1)
 	{
@@ -1077,19 +1052,26 @@ phase6_get (tp)
 	  if (tp->type == token_type_eoln || tp->type == token_type_eof)
 	    break;
 
-	  /* White space would be important in the directive, if we
-	     were interested in the #define directive.  But we are
-	     going to ignore the #define directive, so just throw
-	     white space away.  */
-	  if (tp->type == token_type_white_space)
-	    continue;
-
-	  if (bufpos >= bufmax)
+	  /* Before the "define" keyword and inside other directives
+	     white space is irrelevant.  So just throw it away.  */
+	  if (tp->type != token_type_white_space)
 	    {
-	      bufmax += 100;
-	      buf = xrealloc (buf, bufmax * sizeof (buf[0]));
+	      /* If it is a #define directive, return immediately,
+		 thus treating the body of the #define directive like
+		 normal input.  */
+	      if (bufpos == 0
+		  && tp->type == token_type_name
+		  && strcmp (tp->string, "define") == 0)
+		return;
+
+	      /* Accumulate.  */
+	      if (bufpos >= bufmax)
+		{
+		  bufmax += 100;
+		  buf = xrealloc (buf, bufmax * sizeof (buf[0]));
+		}
+	      buf[bufpos++] = *tp;
 	    }
-	  buf[bufpos++] = *tp;
 	}
 
       /* If it is a #line directive, with no macros to expand, act on
@@ -1337,13 +1319,6 @@ xgettext_lex_comment_reset ()
 }
 
 
-void
-xgettext_lex_cplusplus ()
-{
-  cplusplus_comments = 1;
-}
-
-
 void
 xgettext_lex_trigraphs ()
 {
diff --git a/src/xget-lex.h b/src/xget-lex.h
index ccdaf3425..086c41442 100644
--- a/src/xget-lex.h
+++ b/src/xget-lex.h
@@ -59,7 +59,6 @@ void xgettext_lex_comment_reset PARAMS ((void));
 /* void xgettext_lex_filepos PARAMS ((char **, int *)); FIXME needed?  */
 void xgettext_lex_keyword PARAMS ((const char *__name));
 int xgettext_any_keywords PARAMS ((void));
-void xgettext_lex_cplusplus PARAMS ((void));
 void xgettext_lex_trigraphs PARAMS ((void));
 
 #endif
diff --git a/src/xgettext.c b/src/xgettext.c
index db8113f40..5000551ab 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -171,7 +171,7 @@ static void read_exclusion_file PARAMS ((char *__file_name));
 static void remember_a_message PARAMS ((message_list_ty *__mlp,
 					xgettext_token_ty *__tp));
 static void scan_c_file PARAMS ((const char *__file_name,
-				 message_list_ty *__mlp,  int __is_cpp_file));
+				 message_list_ty *__mlp));
 static void extract_constructor PARAMS ((po_ty *__that));
 static void extract_directive_domain PARAMS ((po_ty *__that, char *__name));
 static void extract_directive_message PARAMS ((po_ty *__that, char *__msgid,
@@ -836,19 +836,14 @@ remember_a_message (mlp, tp)
 
 
 static void
-scan_c_file(filename, mlp, is_cpp_file)
+scan_c_file(filename, mlp)
      const char *filename;
      message_list_ty *mlp;
-     int is_cpp_file;
 {
   int state;
   int commas_to_skip = 0;	/* defined only when in states 1 and 2 */
   int paren_nesting = 0;	/* defined only when in state 2 */
 
-  /* Inform scanner whether we have C++ files or not.  */
-  if (is_cpp_file)
-    xgettext_lex_cplusplus ();
-
   /* The file is broken into tokens.  Scan the token stream, looking for
      a keyword, followed by a left paren, followed by a string.  When we
      see this sequence, we have something to remember.  We assume we are
@@ -1322,24 +1317,6 @@ test_whether_c_format (s)
 }
 
 
-static void
-  scanner_c (filename, mlp)
-  const char *filename;
-  message_list_ty *mlp;
-{
-  scan_c_file (filename, mlp, 0);
-}
-
-
-static void
-scanner_cxx (filename, mlp)
-  const char *filename;
-  message_list_ty *mlp;
-{
-  scan_c_file (filename, mlp, 1);
-}
-
-
 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
 #define ENDOF(a) ((a) + SIZEOF(a))
 
@@ -1357,8 +1334,8 @@ language_to_scanner (name)
 
   static table_ty table[] =
   {
-    { "C", scanner_c, },
-    { "C++", scanner_cxx, },
+    { "C", scan_c_file, },
+    { "C++", scan_c_file, },
     { "PO", read_po_file, },
     /* Here will follow more languages and their scanners: awk, perl,
        etc...  Make sure new scanners honor the --exlude-file option.  */