Split xgettext into a common frontend and a C specific backend.

author Bruno Haible <bruno@clisp.org>

Wed, 8 Aug 2001 12:08:55 +0000 (12:08 +0000)

committer Bruno Haible <bruno@clisp.org>

Wed, 8 Aug 2001 12:08:55 +0000 (12:08 +0000)
author Bruno Haible <bruno@clisp.org>
Wed, 8 Aug 2001 12:08:55 +0000 (12:08 +0000)
committer Bruno Haible <bruno@clisp.org>
Wed, 8 Aug 2001 12:08:55 +0000 (12:08 +0000)
diff --git a/src/ChangeLog b/src/ChangeLog

index 1c3d5480b451a36186abb0037277e758ecd41e72..2f81d774d9565e4f27e8c852f7b7aa4b5aeea6c7 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,57 @@
+2001-07-27  Bruno Haible  <haible@clisp.cons.org>
+
+       * x-c.h: New file.
+       * x-c.c: New file.
+       (xgettext_token_type_ty, xgettext_token_ty): Moved here from
+       xget-lex.h. In xgettext_token_ty, unify line_number and file_name into
+       lex_pos_ty.
+       (token_type_ty, token_ty): Moved here from xget-lex.c.
+       (extract_all): Moved here from xgettext.c.
+       (keywords, default_keywords, trigraphs, file_name, logical_file_name,
+       line_number, fp, last_comment_line, last_non_comment_line,
+       newline_count): Moved here from xget-lex.c.
+       (phase1_pushback, phase1_pushback_length, phase1_getc, phase1_ungetc,
+       phase2_pushback, phase2_pushback_length, phase2_getc, phase2_ungetc,
+       phase3_pushback, phase3_pushback_length, phase3_getc, phase3_ungetc,
+       phase4_getc, phase4_ungetc, phase7_getc, phase7_ungetc,
+       phase5_pushback, phase5_pushback_length, phase5_get, phase5_unget,
+       phaseX_get, phase6_pushback, phase6_pushback_length, phase6_get,
+       phase6_unget, phase8_get): Moved here from xget-lex.c. Use
+       xgettext_comment_add instead of accessing 'comment'. Don't free
+       logical_file_name; it is still used as file_name of messages in mdlp.
+       (x_c_lex): Moved here from xget-lex.c, renamed from xgettext_lex.
+       (extract_c): Moved here from xgettext.c, renamed from scan_c_file.
+       Change arguments from filename to FILE * and two filenames. Don't call
+       xgettext_lex_open and xgettext_lex_close.
+       (x_c_extract_all): New function.
+       (x_c_keyword): Moved here from xget-lex.c, renamed from
+       xgettext_lex_keyword.
+       (x_c_any_keywords): Moved here from xget-lex.c, renamed from
+       xgettext_any_keywords.
+       (x_c_trigraphs): Moved here from xget-lex.c, renamed from
+       xgettext_lex_trigraphs.
+       * xgettext.h: New file.
+       * xgettext.c (extract_all): Move to x-c.c.
+       (comment): Moved here from xget-lex.c.
+       (xgettext_comment_add): New function.
+       (xgettext_comment): Moved here from xget-lex.c, renamed from
+       xgettext_lex_comment.
+       (xgettext_comment_reset): Moved here from xget-lex.c, renamed from
+       xgettext_lex_comment_reset.
+       (xgettext_open): New function, mostly taken from xgettext_lex_open
+       in xget-lex.c.
+       (remember_a_message): Make non-static. Replace xgettext_token_ty arg
+       with lex_pos_ty argument.
+       (remember_a_message_plural): Likewise.
+       (scan_c_file): Moved the body to x-c.c:extract_c.
+       (language_to_scanner): Use SCANNERS_C.
+       (extension_to_language): Use EXTENSIONS_C.
+       * xget-lex.h: Remove file.
+       * xget-lex.c: Remove file.
+       * Makefile.am (noinst_HEADERS): Remove xget-lex.h. Add xgettext.h and
+       x-c.h.
+       (xgettext_SOURCES): Remove xget-lex.c. Add x-c.c.
+
  2001-07-26  Bruno Haible  <haible@clisp.cons.org>
  
         * file-list.h: New file.
diff --git a/src/Makefile.am b/src/Makefile.am

index 8082cc0715d077385c1a0e992ecc8a6b1338686c..d69db8330da017dc1374667cab524adf618e7ff2 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -24,9 +24,9 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \
  msgcat msgcomm msgconv msgen msgexec msggrep msguniq
  
  noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \
-po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \
-file-list.h po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h \
-msgl-ascii.h msgl-cat.h
+po.h open-po.h read-po.h str-list.h write-po.h dir-list.h file-list.h \
+po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-ascii.h \
+msgl-cat.h xgettext.h x-c.h
  
  EXTRA_DIST = FILES
  
@@ -52,8 +52,8 @@ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
  msgl-ascii.c
  msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c
  xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \
-po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \
-write-po.c msgl-ascii.c file-list.c
+po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \
+msgl-ascii.c file-list.c x-c.c
  msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
  po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
  msgl-ascii.c msgl-iconv.c msgl-cat.c file-list.c
diff --git a/src/xget-lex.c b/src/x-c.c

similarity index 82%

rename from src/xget-lex.c

rename to src/x-c.c

index 516b9f48d165a28f4c0491dd9bb57c347a1347fb..395ebee679a3ba20f44aea7d55cc9ebcb3c0f000 100644 (file)
--- a/src/xget-lex.c
+++ b/src/x-c.c
@@ -23,21 +23,19 @@
  
  #include <ctype.h>
  #include <errno.h>
+#include <stdbool.h>
  #include <stdio.h>
  #include <stdlib.h>
+#include <string.h>
  
-#include "dir-list.h"
+#include "message.h"
+#include "x-c.h"
+#include "xgettext.h"
  #include "error.h"
  #include "progname.h"
  #include "system.h"
-#include "libgettext.h"
  #include "hash.h"
-#include "str-list.h"
-#include "xget-lex.h"
-
-#ifndef errno
-extern int errno;
-#endif
+#include "libgettext.h"
  
  #define _(s) gettext(s)
  
@@ -75,6 +73,37 @@ extern int errno;
     xgettext.c) with a stream of C tokens.  The comments are
     accumulated in a buffer, and given to xgettext when asked for.  */
  
+enum xgettext_token_type_ty
+{
+  xgettext_token_type_eof,
+  xgettext_token_type_keyword,
+  xgettext_token_type_lparen,
+  xgettext_token_type_rparen,
+  xgettext_token_type_comma,
+  xgettext_token_type_string_literal,
+  xgettext_token_type_symbol
+};
+typedef enum xgettext_token_type_ty xgettext_token_type_ty;
+
+typedef struct xgettext_token_ty xgettext_token_ty;
+struct xgettext_token_ty
+{
+  xgettext_token_type_ty type;
+
+  /* These fields are used only for xgettext_token_type_keyword.  */
+  int argnum1;
+  int argnum2;
+
+  /* This field is used only for xgettext_token_type_string_literal.  */
+  char *string;
+
+  /* These fields are only for
+       xgettext_token_type_keyword,
+       xgettext_token_type_string_literal.  */
+  lex_pos_ty pos;
+};
+
+
  enum token_type_ty
  {
    token_type_character_constant,
@@ -102,14 +131,18 @@ struct token_ty
  };
  
  
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+static bool trigraphs = false;
+
  static const char *file_name;
  static char *logical_file_name;
  static int line_number;
  static FILE *fp;
-static bool trigraphs = false;
-static string_list_ty *comment;
-static hash_table keywords;
-static bool default_keywords = true;
  
  /* These are for tracking whether comments count as immediately before
     keyword.  */
@@ -136,79 +169,7 @@ static void phaseX_get PARAMS ((token_ty *tp));
  static void phase6_get PARAMS ((token_ty *tp));
  static void phase6_unget PARAMS ((token_ty *tp));
  static void phase8_get PARAMS ((token_ty *tp));
-
-
-
-void
-xgettext_lex_open (fn)
-     const char *fn;
-{
-  char *new_name;
-
-  if (strcmp (fn, "-") == 0)
-    {
-      new_name = xstrdup (_("standard input"));
-      logical_file_name = xstrdup (new_name);
-      fp = stdin;
-    }
-  else if (IS_ABSOLUTE_PATH (fn))
-    {
-      new_name = xstrdup (fn);
-      fp = fopen (fn, "r");
-      if (fp == NULL)
-       error (EXIT_FAILURE, errno, _("\
-error while opening \"%s\" for reading"), fn);
-      logical_file_name = xstrdup (new_name);
-    }
-  else
-    {
-      int j;
-
-      for (j = 0; ; ++j)
-       {
-         const char *dir = dir_list_nth (j);
-
-         if (dir == NULL)
-           error (EXIT_FAILURE, ENOENT, _("\
-error while opening \"%s\" for reading"), fn);
-
-         new_name = concatenated_pathname (dir, fn, NULL);
-
-         fp = fopen (new_name, "r");
-         if (fp != NULL)
-           break;
-
-         if (errno != ENOENT)
-           error (EXIT_FAILURE, errno, _("\
-error while opening \"%s\" for reading"), new_name);
-         free (new_name);
-       }
-
-      /* Note that the NEW_NAME variable contains the actual file name
-        and the logical file name is what is reported by xgettext.  In
-        this case NEW_NAME is set to the file which was found along the
-        directory search path, and LOGICAL_FILE_NAME is is set to the
-        file name which was searched for.  */
-      logical_file_name = xstrdup (fn);
-    }
-
-  file_name = new_name;
-  line_number = 1;
-}
-
-
-void
-xgettext_lex_close ()
-{
-  if (fp != stdin)
-    fclose (fp);
-  free ((char *) file_name);
-  free (logical_file_name);
-  fp = NULL;
-  file_name = NULL;
-  logical_file_name = NULL;
-  line_number = 0;
-}
+static void x_c_lex PARAMS ((xgettext_token_ty *tp));
  
  
  /* 1. Terminate line by \n, regardless of the external representation of
@@ -425,8 +386,6 @@ phase4_getc ()
        /* C comment.  */
        buflen = 0;
        state = 0;
-      if (comment == NULL)
-       comment = string_list_alloc ();
        while (1)
         {
           c = phase3_getc ();
@@ -449,7 +408,7 @@ phase4_getc ()
                                      || buffer[buflen - 1] == '\t'))
                 --buflen;
               buffer[buflen] = 0;
-             string_list_append (comment, buffer);
+             xgettext_comment_add (buffer);
               buflen = 0;
               state = 0;
               continue;
@@ -466,7 +425,7 @@ phase4_getc ()
                                          || buffer[buflen - 1] == '\t'))
                     --buflen;
                   buffer[buflen] = 0;
-                 string_list_append (comment, buffer);
+                 xgettext_comment_add (buffer);
                   break;
                 }
               /* FALLTHROUGH */
@@ -501,9 +460,7 @@ phase4_getc ()
           buffer = xrealloc (buffer, bufmax);
         }
        buffer[buflen] = 0;
-      if (comment == NULL)
-       comment = string_list_alloc ();
-      string_list_append (comment, buffer);
+      xgettext_comment_add (buffer);
        last_comment_line = newline_count;
        return '\n';
      }
@@ -1075,14 +1032,12 @@ phase6_get (tp)
           && buf[1].type == token_type_number
           && buf[2].type == token_type_string_literal)
         {
-         free (logical_file_name);
           logical_file_name = xstrdup (buf[2].string);
           line_number = buf[1].number;
         }
        if (bufpos >= 2 && buf[0].type == token_type_number
           && buf[1].type == token_type_string_literal)
         {
-         free (logical_file_name);
           logical_file_name = xstrdup (buf[1].string);
           line_number = buf[0].number;
         }
@@ -1103,7 +1058,7 @@ phase6_get (tp)
         }
  
        /* We must reset the selected comments.  */
-      xgettext_lex_comment_reset ();
+      xgettext_comment_reset ();
      }
  }
  
@@ -1154,8 +1109,8 @@ phase8_get (tp)
  /* 9. Convert the remaining preprocessing tokens to C tokens and
     discards any white space from the translation unit.  */
  
-void
-xgettext_lex (tp)
+static void
+x_c_lex (tp)
       xgettext_token_ty *tp;
  {
    while (1)
@@ -1187,7 +1142,7 @@ xgettext_lex (tp)
              with non-white space tokens.  */
           ++newline_count;
           if (last_non_comment_line > last_comment_line)
-           xgettext_lex_comment_reset ();
+           xgettext_comment_reset ();
           break;
  
         case token_type_name:
@@ -1195,13 +1150,13 @@ xgettext_lex (tp)
  
           if (default_keywords)
             {
-             xgettext_lex_keyword ("gettext");
-             xgettext_lex_keyword ("dgettext:2");
-             xgettext_lex_keyword ("dcgettext:2");
-             xgettext_lex_keyword ("ngettext:1,2");
-             xgettext_lex_keyword ("dngettext:2,3");
-             xgettext_lex_keyword ("dcngettext:2,3");
-             xgettext_lex_keyword ("gettext_noop");
+             x_c_keyword ("gettext");
+             x_c_keyword ("dgettext:2");
+             x_c_keyword ("dcgettext:2");
+             x_c_keyword ("ngettext:1,2");
+             x_c_keyword ("dngettext:2,3");
+             x_c_keyword ("dcngettext:2,3");
+             x_c_keyword ("gettext_noop");
               default_keywords = false;
             }
  
@@ -1212,8 +1167,8 @@ xgettext_lex (tp)
               tp->type = xgettext_token_type_keyword;
               tp->argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
               tp->argnum2 = (int) (long) keyword_value >> 10;
-             tp->line_number = token.line_number;
-             tp->file_name = logical_file_name;
+             tp->pos.file_name = logical_file_name;
+             tp->pos.line_number = token.line_number;
             }
           else
             tp->type = xgettext_token_type_symbol;
@@ -1243,8 +1198,8 @@ xgettext_lex (tp)
  
           tp->type = xgettext_token_type_string_literal;
           tp->string = token.string;
-         tp->line_number = token.line_number;
-         tp->file_name = logical_file_name;
+         tp->pos.file_name = logical_file_name;
+         tp->pos.line_number = token.line_number;
           return;
  
         default:
@@ -1258,7 +1213,179 @@ xgettext_lex (tp)
  
  
  void
-xgettext_lex_keyword (name)
+extract_c (f, real_filename, logical_filename, mdlp)
+     FILE *f;
+     const char *real_filename;
+     const char *logical_filename;
+     msgdomain_list_ty *mdlp;
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+  int state;
+  int commas_to_skip = 0;      /* defined only when in states 1 and 2 */
+  int plural_commas = 0;       /* defined only when in states 1 and 2 */
+  message_ty *plural_mp = NULL;        /* defined only when in states 1 and 2 */
+  int paren_nesting = 0;       /* defined only when in state 2 */
+
+  /* The file is broken into tokens.  Scan the token stream, looking for
+     a keyword, followed by a left paren, followed by a string.  When we
+     see this sequence, we have something to remember.  We assume we are
+     looking at a valid C or C++ program, and leave the complaints about
+     the grammar to the compiler.
+
+     Normal handling: Look for
+       [A] keyword [B] ( ... [C] ... msgid ... ) [E]
+     Plural handling: Look for
+       [A] keyword [B] ( ... [C] ... msgid ... [D] ... msgid_plural ... ) [E]
+     At point [A]: state == 0.
+     At point [B]: state == 1, commas_to_skip set, plural_mp == NULL.
+     At point [C]: state == 2, commas_to_skip set, plural_mp == NULL.
+     At point [D]: state == 2, commas_to_skip set again, plural_mp != NULL.
+     At point [E]: state == 0.  */
+
+  fp = f;
+  file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  /* Start state is 0.  */
+  state = 0;
+
+  while (1)
+   {
+     xgettext_token_ty token;
+
+     /* A state machine is used to do the recognising:
+        State 0 = waiting for something to happen
+        State 1 = seen one of our keywords
+        State 2 = waiting for part of an argument */
+     x_c_lex (&token);
+     switch (token.type)
+       {
+       case xgettext_token_type_keyword:
+        if (!extract_all && state == 2)
+          {
+            if (commas_to_skip == 0)
+              {
+                error_with_progname = false;
+                error (0, 0,
+                       _("%s:%d: warning: keyword nested in keyword arg"),
+                       token.pos.file_name, token.pos.line_number);
+                error_with_progname = true;
+                continue;
+              }
+
+            /* Here we should nest properly, but this would require a
+               potentially unbounded stack.  We haven't run across an
+               example that needs this functionality yet.  For now,
+               we punt and forget the outer keyword.  */
+            error_with_progname = false;
+            error (0, 0,
+                   _("%s:%d: warning: keyword between outer keyword and its arg"),
+                   token.pos.file_name, token.pos.line_number);
+            error_with_progname = true;
+          }
+        commas_to_skip = token.argnum1 - 1;
+        plural_commas = (token.argnum2 > token.argnum1
+                         ? token.argnum2 - token.argnum1 : 0);
+        plural_mp = NULL;
+        state = 1;
+        continue;
+
+       case xgettext_token_type_lparen:
+        switch (state)
+          {
+          case 1:
+            paren_nesting = 0;
+            state = 2;
+            break;
+          case 2:
+            paren_nesting++;
+            break;
+          }
+        continue;
+
+       case xgettext_token_type_rparen:
+        if (state == 2 && paren_nesting != 0)
+          paren_nesting--;
+        else
+          state = 0;
+        continue;
+
+       case xgettext_token_type_comma:
+        if (state == 2 && commas_to_skip != 0)
+          {
+            if (paren_nesting == 0)
+              commas_to_skip--;
+          }
+        else
+          state = 0;
+        continue;
+
+       case xgettext_token_type_string_literal:
+        if (extract_all)
+          remember_a_message (mlp, token.string, &token.pos);
+        else if (state == 2 && commas_to_skip == 0)
+          {
+            if (plural_mp == NULL)
+              {
+                /* Seen an msgid.  */
+                if (plural_commas == 0)
+                  remember_a_message (mlp, token.string, &token.pos);
+                else
+                  {
+                    plural_mp = remember_a_message (mlp, token.string,
+                                                    &token.pos);
+                    commas_to_skip = plural_commas;
+                    plural_commas = 0;
+                  }
+              }
+            else
+              {
+                /* Seen an msgid_plural.  */
+                remember_a_message_plural (plural_mp, token.string,
+                                           &token.pos);
+                plural_mp = NULL;
+              }
+          }
+        else
+          {
+            free (token.string);
+            if (state == 1)
+              state = 0;
+          }
+        continue;
+
+       case xgettext_token_type_symbol:
+        if (state == 1)
+          state = 0;
+        continue;
+
+       case xgettext_token_type_eof:
+        break;
+
+       default:
+        abort ();
+       }
+     break;
+   }
+
+  /* Close scanner.  */
+  fp = NULL;
+  file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}
+
+
+void
+x_c_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_c_keyword (name)
       const char *name;
  {
    if (name == NULL)
@@ -1309,37 +1436,15 @@ xgettext_lex_keyword (name)
      }
  }
  
-
  bool
-xgettext_any_keywords ()
+x_c_any_keywords ()
  {
    return (keywords.filled > 0) || default_keywords;
  }
  
  
-const char *
-xgettext_lex_comment (n)
-     size_t n;
-{
-  if (comment == NULL || n >= comment->nitems)
-    return NULL;
-  return comment->item[n];
-}
-
-
-void
-xgettext_lex_comment_reset ()
-{
-  if (comment != NULL)
-    {
-      string_list_free (comment);
-      comment = NULL;
-    }
-}
-
-
  void
-xgettext_lex_trigraphs ()
+x_c_trigraphs ()
  {
    trigraphs = true;
  }
diff --git a/src/x-c.h b/src/x-c.h

new file mode 100644 (file)

index 0000000..2526b1e
--- /dev/null
+++ b/src/x-c.h
@@ -0,0 +1,50 @@
+/* xgettext C/C++/ObjectiveC backend.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#define EXTENSIONS_C \
+  { "c",      "C",    },                                               \
+  { "h",      "C",    },                                               \
+  { "C",      "C++",  },                                               \
+  { "c++",    "C++",  },                                               \
+  { "cc",     "C++",  },                                               \
+  { "cxx",    "C++",  },                                               \
+  { "cpp",    "C++",  },                                               \
+  { "hh",     "C++",  },                                               \
+  { "hpp",    "C++",  },                                               \
+  { "m",      "ObjectiveC" },                                          \
+
+#define SCANNERS_C \
+  { "C",          scan_c_file, },                                      \
+  { "C++",        scan_c_file, },                                      \
+  { "ObjectiveC", scan_c_file, },                                      \
+
+/* Scan a C/C++/ObjectiveC file and add its translatable strings to mdlp.  */
+extern void extract_c PARAMS ((FILE *fp, const char *real_filename,
+                              const char *logical_filename,
+                              msgdomain_list_ty *mdlp));
+
+
+/* Handling of options specific to this language.  */
+
+extern void x_c_extract_all PARAMS ((void));
+
+extern void x_c_keyword PARAMS ((const char *name));
+extern bool x_c_any_keywords PARAMS ((void));
+
+extern void x_c_trigraphs PARAMS ((void));
diff --git a/src/xget-lex.h b/src/xget-lex.h

deleted file mode 100644 (file)

index b5ea058..0000000
--- a/src/xget-lex.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* GNU gettext - internationalization aids
-   Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
-
-   This file was written by Peter Miller <millerp@canb.auug.org.au>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef _XGET_LEX_H
-#define _XGET_LEX_H
-
-#include <stdbool.h>
-
-enum xgettext_token_type_ty
-{
-  xgettext_token_type_eof,
-  xgettext_token_type_keyword,
-  xgettext_token_type_lparen,
-  xgettext_token_type_rparen,
-  xgettext_token_type_comma,
-  xgettext_token_type_string_literal,
-  xgettext_token_type_symbol
-};
-typedef enum xgettext_token_type_ty xgettext_token_type_ty;
-
-typedef struct xgettext_token_ty xgettext_token_ty;
-struct xgettext_token_ty
-{
-  xgettext_token_type_ty type;
-
-  /* These fields are used only for xgettext_token_type_keyword.  */
-  int argnum1;
-  int argnum2;
-
-  /* This field is used only for xgettext_token_type_string_literal.  */
-  char *string;
-
-  /* These fields are only for
-       xgettext_token_type_keyword,
-       xgettext_token_type_string_literal.  */
-  int line_number;
-  char *file_name;
-};
-
-
-void xgettext_lex_open PARAMS ((const char *file_name));
-void xgettext_lex_close PARAMS ((void));
-void xgettext_lex PARAMS ((xgettext_token_ty *tp));
-const char *xgettext_lex_comment PARAMS ((size_t n));
-void xgettext_lex_comment_reset PARAMS ((void));
-/* void xgettext_lex_filepos PARAMS ((char **, int *)); FIXME needed?  */
-void xgettext_lex_keyword PARAMS ((const char *name));
-bool xgettext_any_keywords PARAMS ((void));
-void xgettext_lex_trigraphs PARAMS ((void));
-
-#endif /* _XGET_LEX_H */
diff --git a/src/xgettext.c b/src/xgettext.c

index a2c29add3fa8fd3fa1c8dfd505ca413a7781cb16..4d90f7d4cef4587ba6c5f9622b1267f83b2ff2c3 100644 (file)
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -29,40 +29,37 @@
  #include <time.h>
  #include <sys/types.h>
  #include <stdlib.h>
+#include <stdbool.h>
  #include <locale.h>
  
  #ifdef HAVE_UNISTD_H
  # include <unistd.h>
  #endif
  
-#ifndef errno
-extern int errno;
-#endif
-
+#include "xgettext.h"
  #include "dir-list.h"
  #include "file-list.h"
  #include "error.h"
  #include "progname.h"
-#include "hash.h"
  #include "getline.h"
  #include "system.h"
  #include "po.h"
  #include "message.h"
  #include "write-po.h"
-#include "xget-lex.h"
  #include "printf-parse.h"
-
  #include "libgettext.h"
  
  #ifndef _POSIX_VERSION
  struct passwd *getpwuid ();
  #endif
  
-
  /* A convenience macro.  I don't like writing gettext() every time.  */
  #define _(str) gettext (str)
  
  
+#include "x-c.h"
+
+
  /* If nonzero add all comments immediately preceding one of the keywords. */
  static bool add_all_comments = false;
  
@@ -82,9 +79,6 @@ static int do_debug;
  /* Content of .po files with symbols to be excluded.  */
  static message_list_ty *exclude;
  
-/* If true extract all strings.  */
-static bool extract_all = false;
-
  /* Force output of PO file even if empty.  */
  static int force_po;
  
@@ -157,10 +151,8 @@ static void exclude_directive_message PARAMS ((po_ty *pop, char *msgid,
                                                lex_pos_ty *msgstr_pos,
                                                bool obsolete));
  static void read_exclusion_file PARAMS ((char *file_name));
-static message_ty *remember_a_message PARAMS ((message_list_ty *mlp,
-                                              xgettext_token_ty *tp));
-static void remember_a_message_plural PARAMS ((message_ty *mp,
-                                              xgettext_token_ty *tp));
+static FILE *xgettext_open PARAMS ((const char *fn, char **logical_file_name_p,
+                                   char **real_file_name_p));
  static void scan_c_file PARAMS ((const char *file_name,
                                  msgdomain_list_ty *mdlp));
  static void extract_constructor PARAMS ((po_ty *that));
@@ -188,8 +180,8 @@ static enum is_c_format test_whether_c_format PARAMS ((const char *s));
     string argument and a message list argument.  */
  typedef void (*scanner_fp) PARAMS ((const char *, msgdomain_list_ty *));
  
-static const char *extension_to_language PARAMS ((const char *));
  static scanner_fp language_to_scanner PARAMS ((const char *));
+static const char *extension_to_language PARAMS ((const char *));
  
  
  int
@@ -236,7 +228,7 @@ main (argc, argv)
        case '\0':               /* Long option.  */
         break;
        case 'a':
-       extract_all = true;
+       x_c_extract_all ();
         break;
        case 'c':
         if (optarg == NULL)
@@ -285,7 +277,7 @@ main (argc, argv)
         break;
        case 'k':
         if (optarg == NULL || *optarg != '\0')
-         xgettext_lex_keyword (optarg);
+         x_c_keyword (optarg);
         break;
        case 'l':
         /* Accepted for backward compatibility with 0.10.35.  */
@@ -333,7 +325,7 @@ main (argc, argv)
         message_print_style_uniforum ();
         break;
        case 'T':
-       xgettext_lex_trigraphs ();
+       x_c_trigraphs ();
         break;
        case 'V':
         do_version = true;
@@ -371,7 +363,7 @@ main (argc, argv)
      error (EXIT_FAILURE, 0, _("\
  --join-existing cannot be used when output is written to stdout"));
  
-  if (!xgettext_any_keywords ())
+  if (!x_c_any_keywords ())
      {
        error (0, 0, _("\
  xgettext cannot work without keywords to look for"));
@@ -686,10 +678,107 @@ read_exclusion_file (file_name)
  }
  
  
-static message_ty *
-remember_a_message (mlp, tp)
+static string_list_ty *comment;
+
+void
+xgettext_comment_add (str)
+     const char *str;
+{
+  if (comment == NULL)
+    comment = string_list_alloc ();
+  string_list_append (comment, str);
+}
+
+const char *
+xgettext_comment (n)
+     size_t n;
+{
+  if (comment == NULL || n >= comment->nitems)
+    return NULL;
+  return comment->item[n];
+}
+
+void
+xgettext_comment_reset ()
+{
+  if (comment != NULL)
+    {
+      string_list_free (comment);
+      comment = NULL;
+    }
+}
+
+
+
+static FILE *
+xgettext_open (fn, logical_file_name_p, real_file_name_p)
+     const char *fn;
+     char **logical_file_name_p;
+     char **real_file_name_p;
+{
+  FILE *fp;
+  char *new_name;
+  char *logical_file_name;
+
+  if (strcmp (fn, "-") == 0)
+    {
+      new_name = xstrdup (_("standard input"));
+      logical_file_name = xstrdup (new_name);
+      fp = stdin;
+    }
+  else if (IS_ABSOLUTE_PATH (fn))
+    {
+      new_name = xstrdup (fn);
+      fp = fopen (fn, "r");
+      if (fp == NULL)
+       error (EXIT_FAILURE, errno, _("\
+error while opening \"%s\" for reading"), fn);
+      logical_file_name = xstrdup (new_name);
+    }
+  else
+    {
+      int j;
+
+      for (j = 0; ; ++j)
+       {
+         const char *dir = dir_list_nth (j);
+
+         if (dir == NULL)
+           error (EXIT_FAILURE, ENOENT, _("\
+error while opening \"%s\" for reading"), fn);
+
+         new_name = concatenated_pathname (dir, fn, NULL);
+
+         fp = fopen (new_name, "r");
+         if (fp != NULL)
+           break;
+
+         if (errno != ENOENT)
+           error (EXIT_FAILURE, errno, _("\
+error while opening \"%s\" for reading"), new_name);
+         free (new_name);
+       }
+
+      /* Note that the NEW_NAME variable contains the actual file name
+        and the logical file name is what is reported by xgettext.  In
+        this case NEW_NAME is set to the file which was found along the
+        directory search path, and LOGICAL_FILE_NAME is is set to the
+        file name which was searched for.  */
+      logical_file_name = xstrdup (fn);
+    }
+
+  *logical_file_name_p = logical_file_name;
+  *real_file_name_p = new_name;
+  return fp;
+}
+
+
+
+message_ty *
+remember_a_message (mlp, string, pos)
       message_list_ty *mlp;
-     xgettext_token_ty *tp;
+     char *string;
+     lex_pos_ty *pos;
  {
    enum is_c_format is_c_format = undecided;
    enum is_wrap do_wrap = undecided;
@@ -697,14 +786,14 @@ remember_a_message (mlp, tp)
    message_ty *mp;
    char *msgstr;
  
-  msgid = tp->string;
+  msgid = string;
  
    /* See whether we shall exclude this message.  */
    if (exclude != NULL && message_list_search (exclude, msgid) != NULL)
      {
        /* Tell the lexer to reset its comment buffer, so that the next
          message gets the correct comments.  */
-      xgettext_lex_comment_reset ();
+      xgettext_comment_reset ();
  
        return NULL;
      }
@@ -749,7 +838,7 @@ remember_a_message (mlp, tp)
  
        for (j = 0; ; ++j)
         {
-         const char *s = xgettext_lex_comment (j);
+         const char *s = xgettext_comment (j);
           if (s == NULL)
             break;
  
@@ -781,27 +870,28 @@ remember_a_message (mlp, tp)
  
    /* Remember where we saw this msgid.  */
    if (line_comment)
-    message_comment_filepos (mp, tp->file_name, tp->line_number);
+    message_comment_filepos (mp, pos->file_name, pos->line_number);
  
    /* Tell the lexer to reset its comment buffer, so that the next
       message gets the correct comments.  */
-  xgettext_lex_comment_reset ();
+  xgettext_comment_reset ();
  
    return mp;
  }
  
  
-static void
-remember_a_message_plural (mp, tp)
+void
+remember_a_message_plural (mp, string, pos)
       message_ty *mp;
-     xgettext_token_ty *tp;
+     char *string;
+     lex_pos_ty *pos;
  {
    char *msgid_plural;
    char *msgstr1;
    size_t msgstr1_len;
    char *msgstr;
  
-  msgid_plural = tp->string;
+  msgid_plural = string;
  
    /* See if the message is already a plural message.  */
    if (mp->msgid_plural == NULL)
@@ -834,157 +924,20 @@ remember_a_message_plural (mp, tp)
  
  
  static void
-scan_c_file (filename, mdlp)
-     const char *filename;
+scan_c_file (file_name, mdlp)
+     const char *file_name;
       msgdomain_list_ty *mdlp;
  {
-  message_list_ty *mlp = mdlp->item[0]->messages;
-  int state;
-  int commas_to_skip = 0;      /* defined only when in states 1 and 2 */
-  int plural_commas = 0;       /* defined only when in states 1 and 2 */
-  message_ty *plural_mp = NULL;        /* defined only when in states 1 and 2 */
-  int paren_nesting = 0;       /* defined only when in state 2 */
-
-  /* The file is broken into tokens.  Scan the token stream, looking for
-     a keyword, followed by a left paren, followed by a string.  When we
-     see this sequence, we have something to remember.  We assume we are
-     looking at a valid C or C++ program, and leave the complaints about
-     the grammar to the compiler.
-
-     Normal handling: Look for
-       [A] keyword [B] ( ... [C] ... msgid ... ) [E]
-     Plural handling: Look for
-       [A] keyword [B] ( ... [C] ... msgid ... [D] ... msgid_plural ... ) [E]
-     At point [A]: state == 0.
-     At point [B]: state == 1, commas_to_skip set, plural_mp == NULL.
-     At point [C]: state == 2, commas_to_skip set, plural_mp == NULL.
-     At point [D]: state == 2, commas_to_skip set again, plural_mp != NULL.
-     At point [E]: state == 0.  */
-
-  xgettext_lex_open (filename);
-
-  /* Start state is 0.  */
-  state = 0;
-
-  while (1)
-   {
-     xgettext_token_ty token;
-
-     /* A state machine is used to do the recognising:
-        State 0 = waiting for something to happen
-        State 1 = seen one of our keywords
-        State 2 = waiting for part of an argument */
-     xgettext_lex (&token);
-     switch (token.type)
-       {
-       case xgettext_token_type_keyword:
-        if (!extract_all && state == 2)
-          {
-            if (commas_to_skip == 0)
-              {
-                error_with_progname = false;
-                error (0, 0,
-                       _("%s:%d: warning: keyword nested in keyword arg"),
-                       token.file_name, token.line_number);
-                error_with_progname = true;
-                continue;
-              }
-
-            /* Here we should nest properly, but this would require a
-               potentially unbounded stack.  We haven't run across an
-               example that needs this functionality yet.  For now,
-               we punt and forget the outer keyword.  */
-            error_with_progname = false;
-            error (0, 0,
-                   _("%s:%d: warning: keyword between outer keyword and its arg"),
-                   token.file_name, token.line_number);
-            error_with_progname = true;
-          }
-        commas_to_skip = token.argnum1 - 1;
-        plural_commas = (token.argnum2 > token.argnum1
-                         ? token.argnum2 - token.argnum1 : 0);
-        plural_mp = NULL;
-        state = 1;
-        continue;
-
-       case xgettext_token_type_lparen:
-        switch (state)
-          {
-          case 1:
-            paren_nesting = 0;
-            state = 2;
-            break;
-          case 2:
-            paren_nesting++;
-            break;
-          }
-        continue;
-
-       case xgettext_token_type_rparen:
-        if (state == 2 && paren_nesting != 0)
-          paren_nesting--;
-        else
-          state = 0;
-        continue;
-
-       case xgettext_token_type_comma:
-        if (state == 2 && commas_to_skip != 0)
-          {
-            if (paren_nesting == 0)
-              commas_to_skip--;
-          }
-        else
-          state = 0;
-        continue;
-
-       case xgettext_token_type_string_literal:
-        if (extract_all)
-          remember_a_message (mlp, &token);
-        else if (state == 2 && commas_to_skip == 0)
-          {
-            if (plural_mp == NULL)
-              {
-                /* Seen an msgid.  */
-                if (plural_commas == 0)
-                  remember_a_message (mlp, &token);
-                else
-                  {
-                    plural_mp = remember_a_message (mlp, &token);
-                    commas_to_skip = plural_commas;
-                    plural_commas = 0;
-                  }
-              }
-            else
-              {
-                /* Seen an msgid_plural.  */
-                remember_a_message_plural (plural_mp, &token);
-                plural_mp = NULL;
-              }
-          }
-        else
-          {
-            free (token.string);
-            if (state == 1)
-              state = 0;
-          }
-        continue;
-
-       case xgettext_token_type_symbol:
-        if (state == 1)
-          state = 0;
-        continue;
-
-       case xgettext_token_type_eof:
-        break;
-
-       default:
-        abort ();
-       }
-     break;
-   }
-
-  /* Close scanner.  */
-  xgettext_lex_close ();
+  char *logical_file_name;
+  char *real_file_name;
+  FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
+
+  extract_c (fp, real_file_name, logical_file_name, mdlp);
+
+  if (fp != stdin)
+    fclose (fp);
+  free (logical_file_name);
+  free (real_file_name);
  }
  
  
@@ -1375,9 +1328,7 @@ language_to_scanner (name)
  
    static table_ty table[] =
    {
-    { "C", scan_c_file, },
-    { "C++", scan_c_file, },
-    { "ObjectiveC", scan_c_file, },
+    SCANNERS_C
      { "PO", read_po_file, },
      /* Here will follow more languages and their scanners: awk, perl,
         etc...  Make sure new scanners honor the --exlude-file option.  */
@@ -1407,16 +1358,7 @@ extension_to_language (extension)
  
    static table_ty table[] =
    {
-    { "c",      "C",    },
-    { "C",      "C++",  },
-    { "c++",    "C++",  },
-    { "cc",     "C++",  },
-    { "cxx",    "C++",  },
-    { "cpp",    "C++",  },
-    { "h",      "C",    },
-    { "hh",     "C++",  },
-    { "hpp",    "C++",  },
-    { "m",      "ObjectiveC" },
+    EXTENSIONS_C
      { "po",     "PO",   },
      { "pot",    "PO",   },
      { "pox",    "PO",   },
diff --git a/src/xgettext.h b/src/xgettext.h

new file mode 100644 (file)

index 0000000..5437d7d
--- /dev/null
+++ b/src/xgettext.h
@@ -0,0 +1,37 @@
+/* xgettext common functions.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Peter Miller <millerp@canb.auug.org.au>
+   and Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _XGETTEXT_H
+#define _XGETTEXT_H
+
+#include <stdlib.h>
+#include "message.h"
+#include "pos.h"
+
+extern void xgettext_comment_add PARAMS ((const char *str));
+extern const char *xgettext_comment PARAMS ((size_t n));
+extern void xgettext_comment_reset PARAMS ((void));
+
+extern message_ty *remember_a_message PARAMS ((message_list_ty *mlp,
+                                              char *string, lex_pos_ty *pos));
+extern void remember_a_message_plural PARAMS ((message_ty *mp,
+                                              char *string, lex_pos_ty *pos));
+
+
+#endif /* _XGETTEXT_H */
author	Bruno Haible <bruno@clisp.org>
	Wed, 8 Aug 2001 12:08:55 +0000 (12:08 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Wed, 8 Aug 2001 12:08:55 +0000 (12:08 +0000)
src/ChangeLog		patch \| blob \| blame \| history
src/Makefile.am		patch \| blob \| blame \| history
src/x-c.c	[moved from src/xget-lex.c with 82% similarity]	patch \| blob \| blame \| history
src/x-c.h	[new file with mode: 0644]	patch \| blob
src/xget-lex.h	[deleted file]	patch \| blob \| blame \| history
src/xgettext.c		patch \| blob \| blame \| history
src/xgettext.h	[new file with mode: 0644]	patch \| blob