From: Bruno Haible <bruno@clisp.org>
Date: Tue, 18 Jan 2005 11:42:52 +0000 (+0000)
Subject: xgettext Scheme backend.
X-Git-Tag: v0.14.2~125
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a148160a35afe1e13a12e3c94f286e43f8260a2;p=thirdparty%2Fgettext.git

xgettext Scheme backend.
---

diff --git a/gettext-tools/src/x-scheme.c b/gettext-tools/src/x-scheme.c
new file mode 100644
index 000000000..297dda8a2
--- /dev/null
+++ b/gettext-tools/src/x-scheme.c
@@ -0,0 +1,1244 @@
+/* xgettext Scheme backend.
+   Copyright (C) 2004-2005 Free Software Foundation, Inc.
+
+   This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "xgettext.h"
+#include "x-scheme.h"
+#include "error.h"
+#include "xalloc.h"
+#include "exit.h"
+#include "hash.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+
+/* The Scheme syntax is described in R5RS.  It is implemented in
+   guile-1.6.4/libguile/read.c.
+   Since we are interested only in strings and in forms similar to
+        (gettext msgid ...)
+   or   (ngettext msgid msgid_plural ...)
+   we make the following simplifications:
+
+   - Assume the keywords and strings are in an ASCII compatible encoding.
+     This means we can read the input file one byte at a time, instead of
+     one character at a time.  No need to worry about multibyte characters:
+     If they occur as part of identifiers, they most probably act as
+     constituent characters, and the byte based approach will do the same.
+
+   - Assume the read-hash-procedures is in the default state.
+     Non-standard reader extensions are mostly used to read data, not programs.
+
+   The remaining syntax rules are:
+
+   - The syntax code assigned to each character, and how tokens are built
+     up from characters (single escape, multiple escape etc.).
+
+   - Comment syntax: ';' and '#! ... \n!#\n'.
+
+   - String syntax: "..." with single escapes.
+
+   - Read macros and dispatch macro character '#'.  Needed to be able to
+     tell which is the n-th argument of a function call.
+
+ */
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_scheme_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_scheme_keyword (const char *name)
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      int argnum1;
+      int argnum2;
+      const char *colon;
+
+      if (keywords.table == NULL)
+	init_hash (&keywords, 100);
+
+      split_keywordspec (name, &end, &argnum1, &argnum2);
+
+      /* The characters between name and end should form a valid Lisp symbol.
+	 Extract the symbol name part.  */
+      colon = strchr (name, ':');
+      if (colon != NULL && colon < end)
+	{
+	  name = colon + 1;
+	  if (name < end && *name == ':')
+	    name++;
+	  colon = strchr (name, ':');
+	  if (colon != NULL && colon < end)
+	    return;
+	}
+
+      if (argnum1 == 0)
+	argnum1 = 1;
+      insert_entry (&keywords, name, end - name,
+		    (void *) (long) (argnum1 + (argnum2 << 10)));
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      x_scheme_keyword ("gettext");		/* libguile/i18n.c */
+      x_scheme_keyword ("ngettext:1,2");	/* libguile/i18n.c */
+      x_scheme_keyword ("gettext-noop");
+      default_keywords = false;
+    }
+}
+
+void
+init_flag_table_scheme ()
+{
+  xgettext_record_flag ("gettext:1:pass-scheme-format");
+  xgettext_record_flag ("ngettext:1:pass-scheme-format");
+  xgettext_record_flag ("ngettext:2:pass-scheme-format");
+  xgettext_record_flag ("gettext-noop:1:pass-scheme-format");
+  xgettext_record_flag ("format:2:scheme-format");
+}
+
+
+/* ======================== Reading of characters.  ======================== */
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream.  */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file.  */
+static int
+do_getc ()
+{
+  int c = getc (fp);
+
+  if (c == EOF)
+    {
+      if (ferror (fp))
+	error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+    }
+  else if (c == '\n')
+   line_number++;
+
+  return c;
+}
+
+/* Put back the last fetched character, not EOF.  */
+static void
+do_ungetc (int c)
+{
+  if (c == '\n')
+    line_number--;
+  ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens.  ========================== */
+
+
+/* A token consists of a sequence of characters.  */
+struct token
+{
+  int allocated;		/* number of allocated 'token_char's */
+  int charcount;		/* number of used 'token_char's */
+  char *chars;			/* the token's constituents */
+};
+
+/* Initialize a 'struct token'.  */
+static inline void
+init_token (struct token *tp)
+{
+  tp->allocated = 10;
+  tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
+  tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'.  */
+static inline void
+free_token (struct token *tp)
+{
+  free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character.  */
+static inline void
+grow_token (struct token *tp)
+{
+  if (tp->charcount == tp->allocated)
+    {
+      tp->allocated *= 2;
+      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+    }
+}
+
+/* Read the next token.  'first' is the first character, which has already
+   been read.  */
+static void
+read_token (struct token *tp, int first)
+{
+  init_token (tp);
+
+  grow_token (tp);
+  tp->chars[tp->charcount++] = first;
+
+  for (;;)
+    {
+      int c = do_getc ();
+
+      if (c == EOF)
+	break;
+      if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n'
+	  || c == '"' || c == '(' || c == ')' || c == ';')
+	{
+	  do_ungetc (c);
+	  break;
+	}
+      grow_token (tp);
+      tp->chars[tp->charcount++] = c;
+    }
+}
+
+/* Tests if a token represents an integer.
+   Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int().  */
+static inline bool
+is_integer_syntax (const char *str, int len, int radix)
+{
+  const char *p = str;
+  const char *p_end = str + len;
+
+  /* The accepted syntax is
+       ['+'|'-'] DIGIT+
+     where DIGIT is a hexadecimal digit whose value is below radix.  */
+
+  if (p == p_end)
+    return false;
+  if (*p == '+' || *p == '-')
+    {
+      p++;
+      if (p == p_end)
+	return false;
+    }
+  do
+    {
+      int c = *p++;
+
+      if (c >= '0' && c <= '9')
+	c = c - '0';
+      else if (c >= 'A' && c <= 'F')
+	c = c - 'A' + 10;
+      else if (c >= 'a' && c <= 'f')
+	c = c - 'a' + 10;
+      else
+	return false;
+      if (c >= radix)
+	return false;
+    }
+  while (p < p_end);
+  return true;
+}
+
+/* Tests if a token represents a rational, floating-point or complex number.
+   If unconstrained is false, only real numbers are accepted; otherwise,
+   complex numbers are accepted as well.
+   Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo().  */
+static inline bool
+is_other_number_syntax (const char *str, int len, int radix, bool unconstrained)
+{
+  const char *p = str;
+  const char *p_end = str + len;
+  bool seen_sign;
+  bool seen_digits;
+
+  /* The accepted syntaxes are:
+     for a floating-point number:
+       ['+'|'-'] DIGIT+ [EXPONENT]
+       ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT]
+       where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+
+       (Dot and exponent are allowed only if radix is 10.)
+     for a rational number:
+       ['+'|'-'] DIGIT+ '/' DIGIT+
+     for a complex number:
+       REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
+       REAL-NUMBER {'+'|'-'} 'i'
+       {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
+       {'+'|'-'} 'i'
+       REAL-NUMBER '@' REAL-NUMBER
+   */
+  if (p == p_end)
+    return false;
+  /* Parse leading sign.  */
+  seen_sign = false;
+  if (*p == '+' || *p == '-')
+    {
+      p++;
+      if (p == p_end)
+	return false;
+      seen_sign = true;
+      /* Recognize complex number syntax: {'+'|'-'} 'i'  */
+      if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end)
+	return true;
+    }
+  /* Parse digits before dot or exponent or slash.  */
+  seen_digits = false;
+  do
+    {
+      int c = *p;
+
+      if (c >= '0' && c <= '9')
+	c = c - '0';
+      else if (c >= 'A' && c <= 'F')
+	{
+	  if (c >= 'D' && radix == 10) /* exponent? */
+	    break;
+	  c = c - 'A' + 10;
+	}
+      else if (c >= 'a' && c <= 'f')
+	{
+	  if (c >= 'd' && radix == 10) /* exponent? */
+	    break;
+	  c = c - 'a' + 10;
+	}
+      else
+	break;
+      if (c >= radix)
+	return false;
+      seen_digits = true;
+      p++;
+    }
+  while (p < p_end);
+  /* If p == p_end, we know that seen_digits = true, and the number is an
+     integer without exponent.  */
+  if (p < p_end)
+    {
+      /* If we have no digits so far, we need a decimal point later.  */
+      if (!seen_digits && !(*p == '.' && radix == 10))
+	return false;
+      /* Trailing '#' signs are equivalent to zeroes.  */
+      while (p < p_end && *p == '#')
+	p++;
+      if (p < p_end)
+	{
+	  if (*p == '/')
+	    {
+	      /* Parse digits after the slash.  */
+	      bool all_zeroes = true;
+	      p++;
+	      for (; p < p_end; p++)
+		{
+		  int c = *p;
+
+		  if (c >= '0' && c <= '9')
+		    c = c - '0';
+		  else if (c >= 'A' && c <= 'F')
+		    c = c - 'A' + 10;
+		  else if (c >= 'a' && c <= 'f')
+		    c = c - 'a' + 10;
+		  else
+		    break;
+		  if (c >= radix)
+		    return false;
+		  if (c != 0)
+		    all_zeroes = false;
+		}
+	      /* A zero denominator is not allowed.  */
+	      if (all_zeroes)
+		return false;
+	      /* Trailing '#' signs are equivalent to zeroes.  */
+	      while (p < p_end && *p == '#')
+		p++;
+	    }
+	  else
+	    {
+	      if (*p == '.')
+		{
+		  /* Decimal point notation.  */
+		  if (radix != 10)
+		    return false;
+		  /* Parse digits after the decimal point.  */
+		  p++;
+		  for (; p < p_end; p++)
+		    {
+		      int c = *p;
+
+		      if (c >= '0' && c <= '9')
+			seen_digits = true;
+		      else
+			break;
+		    }
+		  /* Digits are required before or after the decimal point.  */
+		  if (!seen_digits)
+		    return false;
+		  /* Trailing '#' signs are equivalent to zeroes.  */
+		  while (p < p_end && *p == '#')
+		    p++;
+		}
+	      if (p < p_end)
+		{
+		  /* Parse exponent.  */
+		  switch (*p)
+		    {
+		    case 'D': case 'd':
+		    case 'E': case 'e':
+		    case 'F': case 'f':
+		    case 'L': case 'l':
+		    case 'S': case 's':
+		      if (radix != 10)
+			return false;
+		      p++;
+		      if (p == p_end)
+			return false;
+		      if (*p == '+' || *p == '-')
+			{
+			  p++;
+			  if (p == p_end)
+			    return false;
+			}
+		      if (!(*p >= '0' && *p <= '9'))
+			return false;
+		      for (;;)
+			{
+			  p++;
+			  if (p == p_end)
+			    break;
+			  if (!(*p >= '0' && *p <= '9'))
+			    break;
+			}
+		      break;
+		    default:
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+  if (p == p_end)
+    return true;
+  /* Recognize complex number syntax.  */
+  if (unconstrained)
+    {
+      /* Recognize the syntax  {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'  */
+      if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end)
+	return true;
+      /* Recognize the syntaxes
+	   REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
+	   REAL-NUMBER {'+'|'-'} 'i'
+       */
+      if (*p == '+' || *p == '-')
+	return (p_end[-1] == 'I' || p_end[-1] == 'i')
+		&& (p + 1 == p_end - 1
+		    || is_other_number_syntax (p, p_end - 1 - p, radix, false));
+      /* Recognize the syntax  REAL-NUMBER '@' REAL-NUMBER  */
+      if (*p == '@')
+	{
+	  p++;
+	  return is_other_number_syntax (p, p_end - p, radix, false);
+	}
+    }
+  return false;
+}
+
+/* Tests if a token represents a number.
+   Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number().  */
+static bool
+is_number (const struct token *tp)
+{
+  const char *str = tp->chars;
+  int len = tp->charcount;
+  int radix = 10;
+  enum { unknown, exact, inexact } exactness = unknown;
+  bool seen_radix_prefix = false;
+  bool seen_exactness_prefix = false;
+
+  if (len == 1)
+    if (*str == '+' || *str == '-')
+      return false;
+  while (len >= 2 && *str == '#')
+    {
+      switch (str[1])
+	{
+	case 'B': case 'b':
+	  if (seen_radix_prefix)
+	    return false;
+	  radix = 2;
+	  seen_radix_prefix = true;
+	  break;
+	case 'O': case 'o':
+	  if (seen_radix_prefix)
+	    return false;
+	  radix = 8;
+	  seen_radix_prefix = true;
+	  break;
+	case 'D': case 'd':
+	  if (seen_radix_prefix)
+	    return false;
+	  radix = 10;
+	  seen_radix_prefix = true;
+	  break;
+	case 'X': case 'x':
+	  if (seen_radix_prefix)
+	    return false;
+	  radix = 16;
+	  seen_radix_prefix = true;
+	  break;
+	case 'E': case 'e':
+	  if (seen_exactness_prefix)
+	    return false;
+	  exactness = exact;
+	  seen_exactness_prefix = true;
+	  break;
+	case 'I': case 'i':
+	  if (seen_exactness_prefix)
+	    return false;
+	  exactness = inexact;
+	  seen_exactness_prefix = true;
+	  break;
+	default:
+	  return false;
+	}
+      str += 2;
+      len -= 2;
+    }
+  if (exactness != inexact)
+    {
+      /* Try to parse an integer.  */
+      if (is_integer_syntax (str, len, 10))
+	return true;
+      /* FIXME: Other Scheme implementations support exact rational numbers
+	 or exact complex numbers.  */
+    }
+  if (exactness != exact)
+    {
+      /* Try to parse a rational, floating-point or complex number.  */
+      if (is_other_number_syntax (str, len, 10, true))
+	return true;
+    }
+  return false;
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+  buflen = 0;
+}
+
+static inline void
+comment_add (int c)
+{
+  if (buflen >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (size_t chars_to_remove)
+{
+  buflen -= chars_to_remove;
+  while (buflen >= 1
+	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+    --buflen;
+  if (chars_to_remove == 0 && buflen >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen] = '\0';
+  xgettext_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ========================== Reading of objects.  ========================= */
+
+
+/* We are only interested in symbols (e.g. gettext or ngettext) and strings.
+   Other objects need not to be represented precisely.  */
+enum object_type
+{
+  t_symbol,	/* symbol */
+  t_string,	/* string */
+  t_other,	/* other kind of real object */
+  t_dot,	/* '.' pseudo object */
+  t_close,	/* ')' pseudo object */
+  t_eof		/* EOF marker */
+};
+
+struct object
+{
+  enum object_type type;
+  struct token *token;		/* for t_symbol and t_string */
+  int line_number_at_start;	/* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'.  */
+static inline void
+free_object (struct object *op)
+{
+  if (op->type == t_symbol || op->type == t_string)
+    {
+      free_token (op->token);
+      free (op->token);
+    }
+}
+
+/* Convert a t_symbol/t_string token to a char*.  */
+static char *
+string_of_object (const struct object *op)
+{
+  char *str;
+  int n;
+
+  if (!(op->type == t_symbol || op->type == t_string))
+    abort ();
+  n = op->token->charcount;
+  str = (char *) xmalloc (n + 1);
+  memcpy (str, op->token->chars, n);
+  str[n] = '\0';
+  return str;
+}
+
+/* Context lookup table.  */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Read the next object.  */
+static void
+read_object (struct object *op, flag_context_ty outer_context)
+{
+  for (;;)
+    {
+      int c = do_getc ();
+
+      switch (c)
+	{
+	case EOF:
+	  op->type = t_eof;
+	  return;
+
+	case ' ': case '\r': case '\f': case '\t':
+	  continue;
+
+	case '\n':
+	  /* Comments assumed to be grouped with a message must immediately
+	     precede it, with no non-whitespace token on a line between
+	     both.  */
+	  if (last_non_comment_line > last_comment_line)
+	    xgettext_comment_reset ();
+	  continue;
+
+	case ';':
+	  {
+	    bool all_semicolons = true;
+
+	    last_comment_line = line_number;
+	    comment_start ();
+	    for (;;)
+	      {
+		c = do_getc ();
+		if (c == EOF || c == '\n')
+		  break;
+		if (c != ';')
+		  all_semicolons = false;
+		if (!all_semicolons)
+		  {
+		    /* We skip all leading white space, but not EOLs.  */
+		    if (!(buflen == 0 && (c == ' ' || c == '\t')))
+		      comment_add (c);
+		  }
+	      }
+	    comment_line_end (0);
+	    continue;
+	  }
+
+	case '(':
+	  {
+	     int arg = 0;		/* Current argument number.  */
+	     flag_context_list_iterator_ty context_iter;
+	     int argnum1 = 0;	/* First string position.  */
+	     int argnum2 = 0;	/* Plural string position.  */
+	     message_ty *plural_mp = NULL;	/* Remember the msgid.  */
+
+	     for (;; arg++)
+	       {
+		struct object inner;
+		flag_context_ty inner_context;
+
+		if (arg == 0)
+		  inner_context = null_context;
+		else
+		  inner_context =
+		    inherited_context (outer_context,
+				       flag_context_list_iterator_advance (
+					 &context_iter));
+
+		read_object (&inner, inner_context);
+
+		/* Recognize end of list.  */
+		if (inner.type == t_close)
+		  {
+		    op->type = t_other;
+		    last_non_comment_line = line_number;
+		    return;
+		  }
+
+		/* Dots are not allowed in every position.
+		   But be tolerant.  */
+
+		/* EOF inside list is illegal.
+		   But be tolerant.  */
+		if (inner.type == t_eof)
+		  break;
+
+		if (arg == 0)
+		  {
+		    /* This is the function position.  */
+		    if (inner.type == t_symbol)
+		      {
+			char *symbol_name = string_of_object (&inner);
+			void *keyword_value;
+
+			if (find_entry (&keywords,
+					symbol_name, strlen (symbol_name),
+					&keyword_value)
+			    == 0)
+			  {
+			    argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+			    argnum2 = (int) (long) keyword_value >> 10;
+			  }
+
+			context_iter =
+			  flag_context_list_iterator (
+			    flag_context_list_table_lookup (
+			      flag_context_list_table,
+			      symbol_name, strlen (symbol_name)));
+
+			free (symbol_name);
+		      }
+		    else
+		      context_iter = null_context_list_iterator;
+		  }
+		else
+		  {
+		    /* These are the argument positions.
+		       Extract a string if we have reached the right
+		       argument position.  */
+		    if (arg == argnum1)
+		      {
+			if (inner.type == t_string)
+			  {
+			    lex_pos_ty pos;
+			    message_ty *mp;
+
+			    pos.file_name = logical_file_name;
+			    pos.line_number = inner.line_number_at_start;
+			    mp = remember_a_message (mlp, string_of_object (&inner),
+						     inner_context, &pos);
+			    if (argnum2 > 0)
+			      plural_mp = mp;
+			  }
+		      }
+		    else if (arg == argnum2)
+		      {
+			if (inner.type == t_string && plural_mp != NULL)
+			  {
+			    lex_pos_ty pos;
+
+			    pos.file_name = logical_file_name;
+			    pos.line_number = inner.line_number_at_start;
+			    remember_a_message_plural (plural_mp, string_of_object (&inner),
+						       inner_context, &pos);
+			  }
+		      }
+		  }
+
+		free_object (&inner);
+	      }
+	  }
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ')':
+	  /* Tell the caller about the end of list.
+	     Unmatched closing parenthesis is illegal.
+	     But be tolerant.  */
+	  op->type = t_close;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ',':
+	  {
+	    int c = do_getc ();
+	    /* The ,@ handling inside lists is wrong anyway, because
+	       ,@form expands to an unknown number of elements.  */
+	    if (c != EOF && c != '@')
+	      do_ungetc (c);
+	  }
+	  /*FALLTHROUGH*/
+	case '\'':
+	case '`':
+	  {
+	    struct object inner;
+
+	    read_object (&inner, null_context);
+
+	    /* Dots and EOF are not allowed here.  But be tolerant.  */
+
+	    free_object (&inner);
+
+	    op->type = t_other;
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case '#':
+	  /* Dispatch macro handling.  */
+	  {
+	    c = do_getc ();
+	    if (c == EOF)
+	      /* Invalid input.  Be tolerant, no error message.  */
+	      {
+		op->type = t_other;
+		return;
+	      }
+
+	    switch (c)
+	      {
+	      case '(': /* Vector */
+		do_ungetc (c);
+		{
+		  struct object inner;
+		  read_object (&inner, null_context);
+		  /* Dots and EOF are not allowed here.
+		     But be tolerant.  */
+		  free_object (&inner);
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+
+	      case 'T': case 't': /* Boolean true */
+	      case 'F': case 'f': /* Boolean false */
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+
+	      case 'B': case 'b':
+	      case 'O': case 'o':
+	      case 'D': case 'd':
+	      case 'X': case 'x':
+	      case 'E': case 'e':
+	      case 'I': case 'i':
+		{
+		  struct token token;
+		  do_ungetc (c);
+		  read_token (&token, '#');
+		  if (is_number (&token))
+		    {
+		      /* A number.  */
+		      free_token (&token);
+		      op->type = t_other;
+		      last_non_comment_line = line_number;
+		      return;
+		    }
+		  else
+		    {
+		      if (token.charcount == 2
+			  && (token.chars[1] == 'e' || token.chars[1] == 'i'))
+			{
+			  c = do_getc ();
+			  if (c != EOF)
+			    do_ungetc (c);
+			  if (c == '(')
+			    /* Homogenous vector syntax, see arrays.scm.  */
+			    case 'a':	/* Vectors of char */
+			    case 'c':	/* Vectors of complex */
+			  /*case 'e':*/	/* Vectors of long */
+			    case 'h':	/* Vectors of short */
+			  /*case 'i':*/	/* Vectors of double-float */
+			    case 'l':	/* Vectors of long long */
+			    case 's':	/* Vectors of single-float */
+			    case 'u':	/* Vectors of unsigned long */
+			    case 'y':	/* Vectors of byte */
+			      {
+				struct object inner;
+				read_object (&inner, null_context);
+				/* Dots and EOF are not allowed here.
+				   But be tolerant.  */
+				free_token (&token);
+				free_object (&inner);
+				op->type = t_other;
+				last_non_comment_line = line_number;
+				return;
+			      }
+			}
+		      /* Unknown # object.  But be tolerant.  */
+		      free_token (&token);
+		      op->type = t_other;
+		      last_non_comment_line = line_number;
+		      return;
+		    }
+		}
+
+	      case '!':
+		/* Block comment '#! ... \n!#\n'.  We don't extract it
+		   because it's only used to introduce scripts on Unix.  */
+		{
+		  int last1 = 0;
+		  int last2 = 0;
+		  int last3 = 0;
+
+		  for (;;)
+		    {
+		      c = do_getc ();
+		      if (c == EOF)
+			/* EOF is not allowed here.  But be tolerant.  */
+			break;
+		      if (last3 == '\n' && last2 == '!' && last1 == '#'
+			  && c == '\n')
+			break;
+		      last3 = last2;
+		      last2 = last1;
+		      last1 = c;
+		    }
+		  continue;
+		}
+
+	      case '*':
+		/* Bit vector.  */
+		{
+		  struct token token;
+		  read_token (&token, c);
+		  /* The token should consists only of '0' and '1', except
+		     for the initial '*'.  But be tolerant.  */
+		  free_token (&token);
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+
+	      case '{':
+		/* Symbol with multiple escapes: #{...}#  */
+		{
+		  op->token = (struct token *) xmalloc (sizeof (struct token));
+
+		  init_token (op->token);
+
+		  for (;;)
+		    {
+		      c = do_getc ();
+
+		      if (c == EOF)
+			break;
+		      if (c == '\\')
+			{
+			  c = do_getc ();
+			  if (c == EOF)
+			    break;
+			}
+		      else if (c == '}')
+			{
+			  c = do_getc ();
+			  if (c == '#')
+			    break;
+			  if (c != EOF)
+			    do_ungetc (c);
+			  c = '}';
+			}
+		      grow_token (op->token);
+		      op->token->chars[op->token->charcount++] = c;
+		    }
+
+		  op->type = t_symbol;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+
+	      case '\\':
+		/* Character.  */
+		{
+		  struct token token;
+		  c = do_getc ();
+		  if (c != EOF)
+		    {
+		      read_token (&token, c);
+		      free_token (&token);
+		    }
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+
+	      case ':': /* Keyword.  */
+	      case '&': /* Deprecated keyword, installed in optargs.scm.  */
+		{
+		  struct token token;
+		  read_token (&token, '-');
+		  free_token (&token);
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+
+	      /* The following are installed through read-hash-extend.  */
+
+	      /* arrays.scm */
+	      case '0': case '1': case '2': case '3': case '4':
+	      case '5': case '6': case '7': case '8': case '9':
+		/* Multidimensional array syntax: #nx(...) where
+		     n ::= DIGIT+
+		     x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'}
+		 */
+		do
+		  c = do_getc ();
+		while (c >= '0' && c <= '9');
+		/* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}.
+		   But be tolerant.  */
+		/*FALLTHROUGH*/
+	      case '\'': /* boot-9.scm */
+	      case '.': /* boot-9.scm */
+	      case ',': /* srfi-10.scm */
+		{
+		  struct object inner;
+		  read_object (&inner, null_context);
+		  /* Dots and EOF are not allowed here.
+		     But be tolerant.  */
+		  free_object (&inner);
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+
+	      default:
+		/* Unknown.  */
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	    /*NOTREACHED*/
+	    abort ();
+	  }
+
+	case '"':
+	  {
+	    op->token = (struct token *) xmalloc (sizeof (struct token));
+	    init_token (op->token);
+	    op->line_number_at_start = line_number;
+	    for (;;)
+	      {
+		int c = do_getc ();
+		if (c == EOF)
+		  /* Invalid input.  Be tolerant, no error message.  */
+		  break;
+		if (c == '"')
+		  break;
+		if (c == '\\')
+		  {
+		    c = do_getc ();
+		    if (c == EOF)
+		      /* Invalid input.  Be tolerant, no error message.  */
+		      break;
+		    switch (c)
+		      {
+		      case '\n':
+			continue;
+		      case '0':
+			c = '\0';
+			break;
+		      case 'a':
+			c = '\a';
+			break;
+		      case 'f':
+			c = '\f';
+			break;
+		      case 'n':
+			c = '\n';
+			break;
+		      case 'r':
+			c = '\r';
+			break;
+		      case 't':
+			c = '\t';
+			break;
+		      case 'v':
+			c = '\v';
+			break;
+		      default:
+			break;
+		      }
+		  }
+		grow_token (op->token);
+		op->token->chars[op->token->charcount++] = c;
+	      }
+	    op->type = t_string;
+
+	    if (extract_all)
+	      {
+		lex_pos_ty pos;
+
+		pos.file_name = logical_file_name;
+		pos.line_number = op->line_number_at_start;
+		remember_a_message (mlp, string_of_object (op),
+				    null_context, &pos);
+	      }
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+	case '+': case '-': case '.':
+	  /* Read a number or symbol token.  */
+	  op->token = (struct token *) xmalloc (sizeof (struct token));
+	  read_token (op->token, c);
+	  if (op->token->charcount == 1 && op->token->chars[0] == '.')
+	    {
+	      free_token (op->token);
+	      free (op->token);
+	      op->type = t_dot;
+	    }
+	  else if (is_number (op->token))
+	    {
+	      /* A number.  */
+	      free_token (op->token);
+	      free (op->token);
+	      op->type = t_other;
+	    }
+	  else
+	    {
+	      /* A symbol.  */
+	      op->type = t_symbol;
+	    }
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ':':
+	default:
+	  /* Read a symbol token.  */
+	  op->token = (struct token *) xmalloc (sizeof (struct token));
+	  read_token (op->token, c);
+	  op->type = t_symbol;
+	  last_non_comment_line = line_number;
+	  return;
+	}
+    }
+}
+
+
+void
+extract_scheme (FILE *f,
+		const char *real_filename, const char *logical_filename,
+		flag_context_list_table_ty *flag_table,
+		msgdomain_list_ty *mdlp)
+{
+  mlp = mdlp->item[0]->messages;
+
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  flag_context_list_table = flag_table;
+
+  init_keywords ();
+
+  /* Eat tokens until eof is seen.  When read_object returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  do
+    {
+      struct object toplevel_object;
+
+      read_object (&toplevel_object, null_context);
+
+      if (toplevel_object.type == t_eof)
+	break;
+
+      free_object (&toplevel_object);
+    }
+  while (!feof (fp));
+
+  /* Close scanner.  */
+  fp = NULL;
+  real_file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}
diff --git a/gettext-tools/src/x-scheme.h b/gettext-tools/src/x-scheme.h
new file mode 100644
index 000000000..12b093a6a
--- /dev/null
+++ b/gettext-tools/src/x-scheme.h
@@ -0,0 +1,39 @@
+/* xgettext Scheme backend.
+   Copyright (C) 2004 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2004.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#define EXTENSIONS_SCHEME \
+  { "scm",       "Scheme"     },					\
+
+#define SCANNERS_SCHEME \
+  { "Scheme",		extract_scheme,					  \
+			&flag_table_scheme, &formatstring_scheme, NULL }, \
+
+/* Scan a Scheme file and add its translatable strings to mdlp.  */
+extern void extract_scheme (FILE *fp, const char *real_filename,
+			    const char *logical_filename,
+			    flag_context_list_table_ty *flag_table,
+			    msgdomain_list_ty *mdlp);
+
+
+/* Handling of options specific to this language.  */
+
+extern void x_scheme_extract_all (void);
+extern void x_scheme_keyword (const char *name);
+
+extern void init_flag_table_scheme (void);