From: Bruno Haible <bruno@clisp.org>
Date: Wed, 8 Aug 2001 12:08:55 +0000 (+0000)
Subject: Split xgettext into a common frontend and a C specific backend.
X-Git-Tag: v0.11~557
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3bda71cf96bb96bbfee19e725ce1dbaf2d96af3;p=thirdparty%2Fgettext.git

Split xgettext into a common frontend and a C specific backend.
---

diff --git a/src/ChangeLog b/src/ChangeLog
index 1c3d5480b..2f81d774d 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,57 @@
+2001-07-27  Bruno Haible  <haible@clisp.cons.org>
+
+	* x-c.h: New file.
+	* x-c.c: New file.
+	(xgettext_token_type_ty, xgettext_token_ty): Moved here from
+	xget-lex.h. In xgettext_token_ty, unify line_number and file_name into
+	lex_pos_ty.
+	(token_type_ty, token_ty): Moved here from xget-lex.c.
+	(extract_all): Moved here from xgettext.c.
+	(keywords, default_keywords, trigraphs, file_name, logical_file_name,
+	line_number, fp, last_comment_line, last_non_comment_line,
+	newline_count): Moved here from xget-lex.c.
+	(phase1_pushback, phase1_pushback_length, phase1_getc, phase1_ungetc,
+	phase2_pushback, phase2_pushback_length, phase2_getc, phase2_ungetc,
+	phase3_pushback, phase3_pushback_length, phase3_getc, phase3_ungetc,
+	phase4_getc, phase4_ungetc, phase7_getc, phase7_ungetc,
+	phase5_pushback, phase5_pushback_length, phase5_get, phase5_unget,
+	phaseX_get, phase6_pushback, phase6_pushback_length, phase6_get,
+	phase6_unget, phase8_get): Moved here from xget-lex.c. Use
+	xgettext_comment_add instead of accessing 'comment'. Don't free
+	logical_file_name; it is still used as file_name of messages in mdlp.
+	(x_c_lex): Moved here from xget-lex.c, renamed from xgettext_lex.
+	(extract_c): Moved here from xgettext.c, renamed from scan_c_file.
+	Change arguments from filename to FILE * and two filenames. Don't call
+	xgettext_lex_open and xgettext_lex_close.
+	(x_c_extract_all): New function.
+	(x_c_keyword): Moved here from xget-lex.c, renamed from
+	xgettext_lex_keyword.
+	(x_c_any_keywords): Moved here from xget-lex.c, renamed from
+	xgettext_any_keywords.
+	(x_c_trigraphs): Moved here from xget-lex.c, renamed from
+	xgettext_lex_trigraphs.
+	* xgettext.h: New file.
+	* xgettext.c (extract_all): Move to x-c.c.
+	(comment): Moved here from xget-lex.c.
+	(xgettext_comment_add): New function.
+	(xgettext_comment): Moved here from xget-lex.c, renamed from
+	xgettext_lex_comment.
+	(xgettext_comment_reset): Moved here from xget-lex.c, renamed from
+	xgettext_lex_comment_reset.
+	(xgettext_open): New function, mostly taken from xgettext_lex_open
+	in xget-lex.c.
+	(remember_a_message): Make non-static. Replace xgettext_token_ty arg
+	with lex_pos_ty argument.
+	(remember_a_message_plural): Likewise.
+	(scan_c_file): Moved the body to x-c.c:extract_c.
+	(language_to_scanner): Use SCANNERS_C.
+	(extension_to_language): Use EXTENSIONS_C.
+	* xget-lex.h: Remove file.
+	* xget-lex.c: Remove file.
+	* Makefile.am (noinst_HEADERS): Remove xget-lex.h. Add xgettext.h and
+	x-c.h.
+	(xgettext_SOURCES): Remove xget-lex.c. Add x-c.c.
+
 2001-07-26  Bruno Haible  <haible@clisp.cons.org>
 
 	* file-list.h: New file.
diff --git a/src/Makefile.am b/src/Makefile.am
index 8082cc071..d69db8330 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -24,9 +24,9 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \
 msgcat msgcomm msgconv msgen msgexec msggrep msguniq
 
 noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \
-po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \
-file-list.h po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h \
-msgl-ascii.h msgl-cat.h
+po.h open-po.h read-po.h str-list.h write-po.h dir-list.h file-list.h \
+po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-ascii.h \
+msgl-cat.h xgettext.h x-c.h
 
 EXTRA_DIST = FILES
 
@@ -52,8 +52,8 @@ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
 msgl-ascii.c
 msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c
 xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \
-po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \
-write-po.c msgl-ascii.c file-list.c
+po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \
+msgl-ascii.c file-list.c x-c.c
 msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
 po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
 msgl-ascii.c msgl-iconv.c msgl-cat.c file-list.c
diff --git a/src/xget-lex.c b/src/x-c.c
similarity index 82%
rename from src/xget-lex.c
rename to src/x-c.c
index 516b9f48d..395ebee67 100644
--- a/src/xget-lex.c
+++ b/src/x-c.c
@@ -23,21 +23,19 @@
 
 #include <ctype.h>
 #include <errno.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
-#include "dir-list.h"
+#include "message.h"
+#include "x-c.h"
+#include "xgettext.h"
 #include "error.h"
 #include "progname.h"
 #include "system.h"
-#include "libgettext.h"
 #include "hash.h"
-#include "str-list.h"
-#include "xget-lex.h"
-
-#ifndef errno
-extern int errno;
-#endif
+#include "libgettext.h"
 
 #define _(s) gettext(s)
 
@@ -75,6 +73,37 @@ extern int errno;
    xgettext.c) with a stream of C tokens.  The comments are
    accumulated in a buffer, and given to xgettext when asked for.  */
 
+enum xgettext_token_type_ty
+{
+  xgettext_token_type_eof,
+  xgettext_token_type_keyword,
+  xgettext_token_type_lparen,
+  xgettext_token_type_rparen,
+  xgettext_token_type_comma,
+  xgettext_token_type_string_literal,
+  xgettext_token_type_symbol
+};
+typedef enum xgettext_token_type_ty xgettext_token_type_ty;
+
+typedef struct xgettext_token_ty xgettext_token_ty;
+struct xgettext_token_ty
+{
+  xgettext_token_type_ty type;
+
+  /* These fields are used only for xgettext_token_type_keyword.  */
+  int argnum1;
+  int argnum2;
+
+  /* This field is used only for xgettext_token_type_string_literal.  */
+  char *string;
+
+  /* These fields are only for
+       xgettext_token_type_keyword,
+       xgettext_token_type_string_literal.  */
+  lex_pos_ty pos;
+};
+
+
 enum token_type_ty
 {
   token_type_character_constant,
@@ -102,14 +131,18 @@ struct token_ty
 };
 
 
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+static bool trigraphs = false;
+
 static const char *file_name;
 static char *logical_file_name;
 static int line_number;
 static FILE *fp;
-static bool trigraphs = false;
-static string_list_ty *comment;
-static hash_table keywords;
-static bool default_keywords = true;
 
 /* These are for tracking whether comments count as immediately before
    keyword.  */
@@ -136,79 +169,7 @@ static void phaseX_get PARAMS ((token_ty *tp));
 static void phase6_get PARAMS ((token_ty *tp));
 static void phase6_unget PARAMS ((token_ty *tp));
 static void phase8_get PARAMS ((token_ty *tp));
-
-
-
-void
-xgettext_lex_open (fn)
-     const char *fn;
-{
-  char *new_name;
-
-  if (strcmp (fn, "-") == 0)
-    {
-      new_name = xstrdup (_("standard input"));
-      logical_file_name = xstrdup (new_name);
-      fp = stdin;
-    }
-  else if (IS_ABSOLUTE_PATH (fn))
-    {
-      new_name = xstrdup (fn);
-      fp = fopen (fn, "r");
-      if (fp == NULL)
-	error (EXIT_FAILURE, errno, _("\
-error while opening \"%s\" for reading"), fn);
-      logical_file_name = xstrdup (new_name);
-    }
-  else
-    {
-      int j;
-
-      for (j = 0; ; ++j)
-	{
-	  const char *dir = dir_list_nth (j);
-
-	  if (dir == NULL)
-	    error (EXIT_FAILURE, ENOENT, _("\
-error while opening \"%s\" for reading"), fn);
-
-	  new_name = concatenated_pathname (dir, fn, NULL);
-
-	  fp = fopen (new_name, "r");
-	  if (fp != NULL)
-	    break;
-
-	  if (errno != ENOENT)
-	    error (EXIT_FAILURE, errno, _("\
-error while opening \"%s\" for reading"), new_name);
-	  free (new_name);
-	}
-
-      /* Note that the NEW_NAME variable contains the actual file name
-	 and the logical file name is what is reported by xgettext.  In
-	 this case NEW_NAME is set to the file which was found along the
-	 directory search path, and LOGICAL_FILE_NAME is is set to the
-	 file name which was searched for.  */
-      logical_file_name = xstrdup (fn);
-    }
-
-  file_name = new_name;
-  line_number = 1;
-}
-
-
-void
-xgettext_lex_close ()
-{
-  if (fp != stdin)
-    fclose (fp);
-  free ((char *) file_name);
-  free (logical_file_name);
-  fp = NULL;
-  file_name = NULL;
-  logical_file_name = NULL;
-  line_number = 0;
-}
+static void x_c_lex PARAMS ((xgettext_token_ty *tp));
 
 
 /* 1. Terminate line by \n, regardless of the external representation of
@@ -425,8 +386,6 @@ phase4_getc ()
       /* C comment.  */
       buflen = 0;
       state = 0;
-      if (comment == NULL)
-	comment = string_list_alloc ();
       while (1)
 	{
 	  c = phase3_getc ();
@@ -449,7 +408,7 @@ phase4_getc ()
 				     || buffer[buflen - 1] == '\t'))
 		--buflen;
 	      buffer[buflen] = 0;
-	      string_list_append (comment, buffer);
+	      xgettext_comment_add (buffer);
 	      buflen = 0;
 	      state = 0;
 	      continue;
@@ -466,7 +425,7 @@ phase4_getc ()
 					 || buffer[buflen - 1] == '\t'))
 		    --buflen;
 		  buffer[buflen] = 0;
-		  string_list_append (comment, buffer);
+		  xgettext_comment_add (buffer);
 		  break;
 		}
 	      /* FALLTHROUGH */
@@ -501,9 +460,7 @@ phase4_getc ()
 	  buffer = xrealloc (buffer, bufmax);
 	}
       buffer[buflen] = 0;
-      if (comment == NULL)
-	comment = string_list_alloc ();
-      string_list_append (comment, buffer);
+      xgettext_comment_add (buffer);
       last_comment_line = newline_count;
       return '\n';
     }
@@ -1075,14 +1032,12 @@ phase6_get (tp)
 	  && buf[1].type == token_type_number
 	  && buf[2].type == token_type_string_literal)
 	{
-	  free (logical_file_name);
 	  logical_file_name = xstrdup (buf[2].string);
 	  line_number = buf[1].number;
 	}
       if (bufpos >= 2 && buf[0].type == token_type_number
 	  && buf[1].type == token_type_string_literal)
 	{
-	  free (logical_file_name);
 	  logical_file_name = xstrdup (buf[1].string);
 	  line_number = buf[0].number;
 	}
@@ -1103,7 +1058,7 @@ phase6_get (tp)
 	}
 
       /* We must reset the selected comments.  */
-      xgettext_lex_comment_reset ();
+      xgettext_comment_reset ();
     }
 }
 
@@ -1154,8 +1109,8 @@ phase8_get (tp)
 /* 9. Convert the remaining preprocessing tokens to C tokens and
    discards any white space from the translation unit.  */
 
-void
-xgettext_lex (tp)
+static void
+x_c_lex (tp)
      xgettext_token_ty *tp;
 {
   while (1)
@@ -1187,7 +1142,7 @@ xgettext_lex (tp)
 	     with non-white space tokens.  */
 	  ++newline_count;
 	  if (last_non_comment_line > last_comment_line)
-	    xgettext_lex_comment_reset ();
+	    xgettext_comment_reset ();
 	  break;
 
 	case token_type_name:
@@ -1195,13 +1150,13 @@ xgettext_lex (tp)
 
 	  if (default_keywords)
 	    {
-	      xgettext_lex_keyword ("gettext");
-	      xgettext_lex_keyword ("dgettext:2");
-	      xgettext_lex_keyword ("dcgettext:2");
-	      xgettext_lex_keyword ("ngettext:1,2");
-	      xgettext_lex_keyword ("dngettext:2,3");
-	      xgettext_lex_keyword ("dcngettext:2,3");
-	      xgettext_lex_keyword ("gettext_noop");
+	      x_c_keyword ("gettext");
+	      x_c_keyword ("dgettext:2");
+	      x_c_keyword ("dcgettext:2");
+	      x_c_keyword ("ngettext:1,2");
+	      x_c_keyword ("dngettext:2,3");
+	      x_c_keyword ("dcngettext:2,3");
+	      x_c_keyword ("gettext_noop");
 	      default_keywords = false;
 	    }
 
@@ -1212,8 +1167,8 @@ xgettext_lex (tp)
 	      tp->type = xgettext_token_type_keyword;
 	      tp->argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
 	      tp->argnum2 = (int) (long) keyword_value >> 10;
-	      tp->line_number = token.line_number;
-	      tp->file_name = logical_file_name;
+	      tp->pos.file_name = logical_file_name;
+	      tp->pos.line_number = token.line_number;
 	    }
 	  else
 	    tp->type = xgettext_token_type_symbol;
@@ -1243,8 +1198,8 @@ xgettext_lex (tp)
 
 	  tp->type = xgettext_token_type_string_literal;
 	  tp->string = token.string;
-	  tp->line_number = token.line_number;
-	  tp->file_name = logical_file_name;
+	  tp->pos.file_name = logical_file_name;
+	  tp->pos.line_number = token.line_number;
 	  return;
 
 	default:
@@ -1258,7 +1213,179 @@ xgettext_lex (tp)
 
 
 void
-xgettext_lex_keyword (name)
+extract_c (f, real_filename, logical_filename, mdlp)
+     FILE *f;
+     const char *real_filename;
+     const char *logical_filename;
+     msgdomain_list_ty *mdlp;
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+  int state;
+  int commas_to_skip = 0;	/* defined only when in states 1 and 2 */
+  int plural_commas = 0;	/* defined only when in states 1 and 2 */
+  message_ty *plural_mp = NULL;	/* defined only when in states 1 and 2 */
+  int paren_nesting = 0;	/* defined only when in state 2 */
+
+  /* The file is broken into tokens.  Scan the token stream, looking for
+     a keyword, followed by a left paren, followed by a string.  When we
+     see this sequence, we have something to remember.  We assume we are
+     looking at a valid C or C++ program, and leave the complaints about
+     the grammar to the compiler.
+
+     Normal handling: Look for
+       [A] keyword [B] ( ... [C] ... msgid ... ) [E]
+     Plural handling: Look for
+       [A] keyword [B] ( ... [C] ... msgid ... [D] ... msgid_plural ... ) [E]
+     At point [A]: state == 0.
+     At point [B]: state == 1, commas_to_skip set, plural_mp == NULL.
+     At point [C]: state == 2, commas_to_skip set, plural_mp == NULL.
+     At point [D]: state == 2, commas_to_skip set again, plural_mp != NULL.
+     At point [E]: state == 0.  */
+
+  fp = f;
+  file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  /* Start state is 0.  */
+  state = 0;
+
+  while (1)
+   {
+     xgettext_token_ty token;
+
+     /* A state machine is used to do the recognising:
+        State 0 = waiting for something to happen
+        State 1 = seen one of our keywords
+        State 2 = waiting for part of an argument */
+     x_c_lex (&token);
+     switch (token.type)
+       {
+       case xgettext_token_type_keyword:
+	 if (!extract_all && state == 2)
+	   {
+	     if (commas_to_skip == 0)
+	       {
+		 error_with_progname = false;
+		 error (0, 0,
+			_("%s:%d: warning: keyword nested in keyword arg"),
+			token.pos.file_name, token.pos.line_number);
+		 error_with_progname = true;
+		 continue;
+	       }
+
+	     /* Here we should nest properly, but this would require a
+		potentially unbounded stack.  We haven't run across an
+		example that needs this functionality yet.  For now,
+		we punt and forget the outer keyword.  */
+	     error_with_progname = false;
+	     error (0, 0,
+		    _("%s:%d: warning: keyword between outer keyword and its arg"),
+		    token.pos.file_name, token.pos.line_number);
+	     error_with_progname = true;
+	   }
+	 commas_to_skip = token.argnum1 - 1;
+	 plural_commas = (token.argnum2 > token.argnum1
+			  ? token.argnum2 - token.argnum1 : 0);
+	 plural_mp = NULL;
+	 state = 1;
+	 continue;
+
+       case xgettext_token_type_lparen:
+	 switch (state)
+	   {
+	   case 1:
+	     paren_nesting = 0;
+	     state = 2;
+	     break;
+	   case 2:
+	     paren_nesting++;
+	     break;
+	   }
+	 continue;
+
+       case xgettext_token_type_rparen:
+	 if (state == 2 && paren_nesting != 0)
+	   paren_nesting--;
+	 else
+	   state = 0;
+	 continue;
+
+       case xgettext_token_type_comma:
+	 if (state == 2 && commas_to_skip != 0)
+	   {
+	     if (paren_nesting == 0)
+	       commas_to_skip--;
+	   }
+	 else
+	   state = 0;
+	 continue;
+
+       case xgettext_token_type_string_literal:
+	 if (extract_all)
+	   remember_a_message (mlp, token.string, &token.pos);
+	 else if (state == 2 && commas_to_skip == 0)
+	   {
+	     if (plural_mp == NULL)
+	       {
+		 /* Seen an msgid.  */
+		 if (plural_commas == 0)
+		   remember_a_message (mlp, token.string, &token.pos);
+		 else
+		   {
+		     plural_mp = remember_a_message (mlp, token.string,
+						     &token.pos);
+		     commas_to_skip = plural_commas;
+		     plural_commas = 0;
+		   }
+	       }
+	     else
+	       {
+		 /* Seen an msgid_plural.  */
+		 remember_a_message_plural (plural_mp, token.string,
+					    &token.pos);
+		 plural_mp = NULL;
+	       }
+	   }
+	 else
+	   {
+	     free (token.string);
+	     if (state == 1)
+	       state = 0;
+	   }
+	 continue;
+
+       case xgettext_token_type_symbol:
+	 if (state == 1)
+	   state = 0;
+	 continue;
+
+       case xgettext_token_type_eof:
+	 break;
+
+       default:
+	 abort ();
+       }
+     break;
+   }
+
+  /* Close scanner.  */
+  fp = NULL;
+  file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}
+
+
+void
+x_c_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_c_keyword (name)
      const char *name;
 {
   if (name == NULL)
@@ -1309,37 +1436,15 @@ xgettext_lex_keyword (name)
     }
 }
 
-
 bool
-xgettext_any_keywords ()
+x_c_any_keywords ()
 {
   return (keywords.filled > 0) || default_keywords;
 }
 
 
-const char *
-xgettext_lex_comment (n)
-     size_t n;
-{
-  if (comment == NULL || n >= comment->nitems)
-    return NULL;
-  return comment->item[n];
-}
-
-
-void
-xgettext_lex_comment_reset ()
-{
-  if (comment != NULL)
-    {
-      string_list_free (comment);
-      comment = NULL;
-    }
-}
-
-
 void
-xgettext_lex_trigraphs ()
+x_c_trigraphs ()
 {
   trigraphs = true;
 }
diff --git a/src/x-c.h b/src/x-c.h
new file mode 100644
index 000000000..2526b1e33
--- /dev/null
+++ b/src/x-c.h
@@ -0,0 +1,50 @@
+/* xgettext C/C++/ObjectiveC backend.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#define EXTENSIONS_C \
+  { "c",      "C",    },						\
+  { "h",      "C",    },						\
+  { "C",      "C++",  },						\
+  { "c++",    "C++",  },						\
+  { "cc",     "C++",  },						\
+  { "cxx",    "C++",  },						\
+  { "cpp",    "C++",  },						\
+  { "hh",     "C++",  },						\
+  { "hpp",    "C++",  },						\
+  { "m",      "ObjectiveC" },						\
+
+#define SCANNERS_C \
+  { "C",          scan_c_file, },					\
+  { "C++",        scan_c_file, },					\
+  { "ObjectiveC", scan_c_file, },					\
+
+/* Scan a C/C++/ObjectiveC file and add its translatable strings to mdlp.  */
+extern void extract_c PARAMS ((FILE *fp, const char *real_filename,
+			       const char *logical_filename,
+			       msgdomain_list_ty *mdlp));
+
+
+/* Handling of options specific to this language.  */
+
+extern void x_c_extract_all PARAMS ((void));
+
+extern void x_c_keyword PARAMS ((const char *name));
+extern bool x_c_any_keywords PARAMS ((void));
+
+extern void x_c_trigraphs PARAMS ((void));
diff --git a/src/xget-lex.h b/src/xget-lex.h
deleted file mode 100644
index b5ea05854..000000000
--- a/src/xget-lex.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* GNU gettext - internationalization aids
-   Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
-
-   This file was written by Peter Miller <millerp@canb.auug.org.au>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef _XGET_LEX_H
-#define _XGET_LEX_H
-
-#include <stdbool.h>
-
-enum xgettext_token_type_ty
-{
-  xgettext_token_type_eof,
-  xgettext_token_type_keyword,
-  xgettext_token_type_lparen,
-  xgettext_token_type_rparen,
-  xgettext_token_type_comma,
-  xgettext_token_type_string_literal,
-  xgettext_token_type_symbol
-};
-typedef enum xgettext_token_type_ty xgettext_token_type_ty;
-
-typedef struct xgettext_token_ty xgettext_token_ty;
-struct xgettext_token_ty
-{
-  xgettext_token_type_ty type;
-
-  /* These fields are used only for xgettext_token_type_keyword.  */
-  int argnum1;
-  int argnum2;
-
-  /* This field is used only for xgettext_token_type_string_literal.  */
-  char *string;
-
-  /* These fields are only for
-       xgettext_token_type_keyword,
-       xgettext_token_type_string_literal.  */
-  int line_number;
-  char *file_name;
-};
-
-
-void xgettext_lex_open PARAMS ((const char *file_name));
-void xgettext_lex_close PARAMS ((void));
-void xgettext_lex PARAMS ((xgettext_token_ty *tp));
-const char *xgettext_lex_comment PARAMS ((size_t n));
-void xgettext_lex_comment_reset PARAMS ((void));
-/* void xgettext_lex_filepos PARAMS ((char **, int *)); FIXME needed?  */
-void xgettext_lex_keyword PARAMS ((const char *name));
-bool xgettext_any_keywords PARAMS ((void));
-void xgettext_lex_trigraphs PARAMS ((void));
-
-#endif /* _XGET_LEX_H */
diff --git a/src/xgettext.c b/src/xgettext.c
index a2c29add3..4d90f7d4c 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -29,40 +29,37 @@
 #include <time.h>
 #include <sys/types.h>
 #include <stdlib.h>
+#include <stdbool.h>
 #include <locale.h>
 
 #ifdef HAVE_UNISTD_H
 # include <unistd.h>
 #endif
 
-#ifndef errno
-extern int errno;
-#endif
-
+#include "xgettext.h"
 #include "dir-list.h"
 #include "file-list.h"
 #include "error.h"
 #include "progname.h"
-#include "hash.h"
 #include "getline.h"
 #include "system.h"
 #include "po.h"
 #include "message.h"
 #include "write-po.h"
-#include "xget-lex.h"
 #include "printf-parse.h"
-
 #include "libgettext.h"
 
 #ifndef _POSIX_VERSION
 struct passwd *getpwuid ();
 #endif
 
-
 /* A convenience macro.  I don't like writing gettext() every time.  */
 #define _(str) gettext (str)
 
 
+#include "x-c.h"
+
+
 /* If nonzero add all comments immediately preceding one of the keywords. */
 static bool add_all_comments = false;
 
@@ -82,9 +79,6 @@ static int do_debug;
 /* Content of .po files with symbols to be excluded.  */
 static message_list_ty *exclude;
 
-/* If true extract all strings.  */
-static bool extract_all = false;
-
 /* Force output of PO file even if empty.  */
 static int force_po;
 
@@ -157,10 +151,8 @@ static void exclude_directive_message PARAMS ((po_ty *pop, char *msgid,
 					       lex_pos_ty *msgstr_pos,
 					       bool obsolete));
 static void read_exclusion_file PARAMS ((char *file_name));
-static message_ty *remember_a_message PARAMS ((message_list_ty *mlp,
-					       xgettext_token_ty *tp));
-static void remember_a_message_plural PARAMS ((message_ty *mp,
-					       xgettext_token_ty *tp));
+static FILE *xgettext_open PARAMS ((const char *fn, char **logical_file_name_p,
+				    char **real_file_name_p));
 static void scan_c_file PARAMS ((const char *file_name,
 				 msgdomain_list_ty *mdlp));
 static void extract_constructor PARAMS ((po_ty *that));
@@ -188,8 +180,8 @@ static enum is_c_format test_whether_c_format PARAMS ((const char *s));
    string argument and a message list argument.  */
 typedef void (*scanner_fp) PARAMS ((const char *, msgdomain_list_ty *));
 
-static const char *extension_to_language PARAMS ((const char *));
 static scanner_fp language_to_scanner PARAMS ((const char *));
+static const char *extension_to_language PARAMS ((const char *));
 
 
 int
@@ -236,7 +228,7 @@ main (argc, argv)
       case '\0':		/* Long option.  */
 	break;
       case 'a':
-	extract_all = true;
+	x_c_extract_all ();
 	break;
       case 'c':
 	if (optarg == NULL)
@@ -285,7 +277,7 @@ main (argc, argv)
 	break;
       case 'k':
 	if (optarg == NULL || *optarg != '\0')
-	  xgettext_lex_keyword (optarg);
+	  x_c_keyword (optarg);
 	break;
       case 'l':
 	/* Accepted for backward compatibility with 0.10.35.  */
@@ -333,7 +325,7 @@ main (argc, argv)
 	message_print_style_uniforum ();
 	break;
       case 'T':
-	xgettext_lex_trigraphs ();
+	x_c_trigraphs ();
 	break;
       case 'V':
 	do_version = true;
@@ -371,7 +363,7 @@ main (argc, argv)
     error (EXIT_FAILURE, 0, _("\
 --join-existing cannot be used when output is written to stdout"));
 
-  if (!xgettext_any_keywords ())
+  if (!x_c_any_keywords ())
     {
       error (0, 0, _("\
 xgettext cannot work without keywords to look for"));
@@ -686,10 +678,107 @@ read_exclusion_file (file_name)
 }
 
 
-static message_ty *
-remember_a_message (mlp, tp)
+static string_list_ty *comment;
+
+void
+xgettext_comment_add (str)
+     const char *str;
+{
+  if (comment == NULL)
+    comment = string_list_alloc ();
+  string_list_append (comment, str);
+}
+
+const char *
+xgettext_comment (n)
+     size_t n;
+{
+  if (comment == NULL || n >= comment->nitems)
+    return NULL;
+  return comment->item[n];
+}
+
+void
+xgettext_comment_reset ()
+{
+  if (comment != NULL)
+    {
+      string_list_free (comment);
+      comment = NULL;
+    }
+}
+
+
+
+static FILE *
+xgettext_open (fn, logical_file_name_p, real_file_name_p)
+     const char *fn;
+     char **logical_file_name_p;
+     char **real_file_name_p;
+{
+  FILE *fp;
+  char *new_name;
+  char *logical_file_name;
+
+  if (strcmp (fn, "-") == 0)
+    {
+      new_name = xstrdup (_("standard input"));
+      logical_file_name = xstrdup (new_name);
+      fp = stdin;
+    }
+  else if (IS_ABSOLUTE_PATH (fn))
+    {
+      new_name = xstrdup (fn);
+      fp = fopen (fn, "r");
+      if (fp == NULL)
+	error (EXIT_FAILURE, errno, _("\
+error while opening \"%s\" for reading"), fn);
+      logical_file_name = xstrdup (new_name);
+    }
+  else
+    {
+      int j;
+
+      for (j = 0; ; ++j)
+	{
+	  const char *dir = dir_list_nth (j);
+
+	  if (dir == NULL)
+	    error (EXIT_FAILURE, ENOENT, _("\
+error while opening \"%s\" for reading"), fn);
+
+	  new_name = concatenated_pathname (dir, fn, NULL);
+
+	  fp = fopen (new_name, "r");
+	  if (fp != NULL)
+	    break;
+
+	  if (errno != ENOENT)
+	    error (EXIT_FAILURE, errno, _("\
+error while opening \"%s\" for reading"), new_name);
+	  free (new_name);
+	}
+
+      /* Note that the NEW_NAME variable contains the actual file name
+	 and the logical file name is what is reported by xgettext.  In
+	 this case NEW_NAME is set to the file which was found along the
+	 directory search path, and LOGICAL_FILE_NAME is is set to the
+	 file name which was searched for.  */
+      logical_file_name = xstrdup (fn);
+    }
+
+  *logical_file_name_p = logical_file_name;
+  *real_file_name_p = new_name;
+  return fp;
+}
+
+
+
+message_ty *
+remember_a_message (mlp, string, pos)
      message_list_ty *mlp;
-     xgettext_token_ty *tp;
+     char *string;
+     lex_pos_ty *pos;
 {
   enum is_c_format is_c_format = undecided;
   enum is_wrap do_wrap = undecided;
@@ -697,14 +786,14 @@ remember_a_message (mlp, tp)
   message_ty *mp;
   char *msgstr;
 
-  msgid = tp->string;
+  msgid = string;
 
   /* See whether we shall exclude this message.  */
   if (exclude != NULL && message_list_search (exclude, msgid) != NULL)
     {
       /* Tell the lexer to reset its comment buffer, so that the next
 	 message gets the correct comments.  */
-      xgettext_lex_comment_reset ();
+      xgettext_comment_reset ();
 
       return NULL;
     }
@@ -749,7 +838,7 @@ remember_a_message (mlp, tp)
 
       for (j = 0; ; ++j)
 	{
-	  const char *s = xgettext_lex_comment (j);
+	  const char *s = xgettext_comment (j);
 	  if (s == NULL)
 	    break;
 
@@ -781,27 +870,28 @@ remember_a_message (mlp, tp)
 
   /* Remember where we saw this msgid.  */
   if (line_comment)
-    message_comment_filepos (mp, tp->file_name, tp->line_number);
+    message_comment_filepos (mp, pos->file_name, pos->line_number);
 
   /* Tell the lexer to reset its comment buffer, so that the next
      message gets the correct comments.  */
-  xgettext_lex_comment_reset ();
+  xgettext_comment_reset ();
 
   return mp;
 }
 
 
-static void
-remember_a_message_plural (mp, tp)
+void
+remember_a_message_plural (mp, string, pos)
      message_ty *mp;
-     xgettext_token_ty *tp;
+     char *string;
+     lex_pos_ty *pos;
 {
   char *msgid_plural;
   char *msgstr1;
   size_t msgstr1_len;
   char *msgstr;
 
-  msgid_plural = tp->string;
+  msgid_plural = string;
 
   /* See if the message is already a plural message.  */
   if (mp->msgid_plural == NULL)
@@ -834,157 +924,20 @@ remember_a_message_plural (mp, tp)
 
 
 static void
-scan_c_file (filename, mdlp)
-     const char *filename;
+scan_c_file (file_name, mdlp)
+     const char *file_name;
      msgdomain_list_ty *mdlp;
 {
-  message_list_ty *mlp = mdlp->item[0]->messages;
-  int state;
-  int commas_to_skip = 0;	/* defined only when in states 1 and 2 */
-  int plural_commas = 0;	/* defined only when in states 1 and 2 */
-  message_ty *plural_mp = NULL;	/* defined only when in states 1 and 2 */
-  int paren_nesting = 0;	/* defined only when in state 2 */
-
-  /* The file is broken into tokens.  Scan the token stream, looking for
-     a keyword, followed by a left paren, followed by a string.  When we
-     see this sequence, we have something to remember.  We assume we are
-     looking at a valid C or C++ program, and leave the complaints about
-     the grammar to the compiler.
-
-     Normal handling: Look for
-       [A] keyword [B] ( ... [C] ... msgid ... ) [E]
-     Plural handling: Look for
-       [A] keyword [B] ( ... [C] ... msgid ... [D] ... msgid_plural ... ) [E]
-     At point [A]: state == 0.
-     At point [B]: state == 1, commas_to_skip set, plural_mp == NULL.
-     At point [C]: state == 2, commas_to_skip set, plural_mp == NULL.
-     At point [D]: state == 2, commas_to_skip set again, plural_mp != NULL.
-     At point [E]: state == 0.  */
-
-  xgettext_lex_open (filename);
-
-  /* Start state is 0.  */
-  state = 0;
-
-  while (1)
-   {
-     xgettext_token_ty token;
-
-     /* A state machine is used to do the recognising:
-        State 0 = waiting for something to happen
-        State 1 = seen one of our keywords
-        State 2 = waiting for part of an argument */
-     xgettext_lex (&token);
-     switch (token.type)
-       {
-       case xgettext_token_type_keyword:
-	 if (!extract_all && state == 2)
-	   {
-	     if (commas_to_skip == 0)
-	       {
-		 error_with_progname = false;
-		 error (0, 0,
-			_("%s:%d: warning: keyword nested in keyword arg"),
-			token.file_name, token.line_number);
-		 error_with_progname = true;
-		 continue;
-	       }
-
-	     /* Here we should nest properly, but this would require a
-		potentially unbounded stack.  We haven't run across an
-		example that needs this functionality yet.  For now,
-		we punt and forget the outer keyword.  */
-	     error_with_progname = false;
-	     error (0, 0,
-		    _("%s:%d: warning: keyword between outer keyword and its arg"),
-		    token.file_name, token.line_number);
-	     error_with_progname = true;
-	   }
-	 commas_to_skip = token.argnum1 - 1;
-	 plural_commas = (token.argnum2 > token.argnum1
-			  ? token.argnum2 - token.argnum1 : 0);
-	 plural_mp = NULL;
-	 state = 1;
-	 continue;
-
-       case xgettext_token_type_lparen:
-	 switch (state)
-	   {
-	   case 1:
-	     paren_nesting = 0;
-	     state = 2;
-	     break;
-	   case 2:
-	     paren_nesting++;
-	     break;
-	   }
-	 continue;
-
-       case xgettext_token_type_rparen:
-	 if (state == 2 && paren_nesting != 0)
-	   paren_nesting--;
-	 else
-	   state = 0;
-	 continue;
-
-       case xgettext_token_type_comma:
-	 if (state == 2 && commas_to_skip != 0)
-	   {
-	     if (paren_nesting == 0)
-	       commas_to_skip--;
-	   }
-	 else
-	   state = 0;
-	 continue;
-
-       case xgettext_token_type_string_literal:
-	 if (extract_all)
-	   remember_a_message (mlp, &token);
-	 else if (state == 2 && commas_to_skip == 0)
-	   {
-	     if (plural_mp == NULL)
-	       {
-		 /* Seen an msgid.  */
-		 if (plural_commas == 0)
-		   remember_a_message (mlp, &token);
-		 else
-		   {
-		     plural_mp = remember_a_message (mlp, &token);
-		     commas_to_skip = plural_commas;
-		     plural_commas = 0;
-		   }
-	       }
-	     else
-	       {
-		 /* Seen an msgid_plural.  */
-		 remember_a_message_plural (plural_mp, &token);
-		 plural_mp = NULL;
-	       }
-	   }
-	 else
-	   {
-	     free (token.string);
-	     if (state == 1)
-	       state = 0;
-	   }
-	 continue;
-
-       case xgettext_token_type_symbol:
-	 if (state == 1)
-	   state = 0;
-	 continue;
-
-       case xgettext_token_type_eof:
-	 break;
-
-       default:
-	 abort ();
-       }
-     break;
-   }
-
-  /* Close scanner.  */
-  xgettext_lex_close ();
+  char *logical_file_name;
+  char *real_file_name;
+  FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
+
+  extract_c (fp, real_file_name, logical_file_name, mdlp);
+
+  if (fp != stdin)
+    fclose (fp);
+  free (logical_file_name);
+  free (real_file_name);
 }
 
 
@@ -1375,9 +1328,7 @@ language_to_scanner (name)
 
   static table_ty table[] =
   {
-    { "C", scan_c_file, },
-    { "C++", scan_c_file, },
-    { "ObjectiveC", scan_c_file, },
+    SCANNERS_C
     { "PO", read_po_file, },
     /* Here will follow more languages and their scanners: awk, perl,
        etc...  Make sure new scanners honor the --exlude-file option.  */
@@ -1407,16 +1358,7 @@ extension_to_language (extension)
 
   static table_ty table[] =
   {
-    { "c",      "C",    },
-    { "C",      "C++",  },
-    { "c++",    "C++",  },
-    { "cc",     "C++",  },
-    { "cxx",    "C++",  },
-    { "cpp",    "C++",  },
-    { "h",      "C",    },
-    { "hh",     "C++",  },
-    { "hpp",    "C++",  },
-    { "m",      "ObjectiveC" },
+    EXTENSIONS_C
     { "po",     "PO",   },
     { "pot",    "PO",   },
     { "pox",    "PO",   },
diff --git a/src/xgettext.h b/src/xgettext.h
new file mode 100644
index 000000000..5437d7d1c
--- /dev/null
+++ b/src/xgettext.h
@@ -0,0 +1,37 @@
+/* xgettext common functions.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Peter Miller <millerp@canb.auug.org.au>
+   and Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _XGETTEXT_H
+#define _XGETTEXT_H
+
+#include <stdlib.h>
+#include "message.h"
+#include "pos.h"
+
+extern void xgettext_comment_add PARAMS ((const char *str));
+extern const char *xgettext_comment PARAMS ((size_t n));
+extern void xgettext_comment_reset PARAMS ((void));
+
+extern message_ty *remember_a_message PARAMS ((message_list_ty *mlp,
+					       char *string, lex_pos_ty *pos));
+extern void remember_a_message_plural PARAMS ((message_ty *mp,
+					       char *string, lex_pos_ty *pos));
+
+
+#endif /* _XGETTEXT_H */