--- /dev/null
+/* xgettext Scheme backend.
+ Copyright (C) 2004-2005 Free Software Foundation, Inc.
+
+ This file was written by Bruno Haible <bruno@clisp.org>, 2004-2005.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "xgettext.h"
+#include "x-scheme.h"
+#include "error.h"
+#include "xalloc.h"
+#include "exit.h"
+#include "hash.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+
+/* The Scheme syntax is described in R5RS. It is implemented in
+ guile-1.6.4/libguile/read.c.
+ Since we are interested only in strings and in forms similar to
+ (gettext msgid ...)
+ or (ngettext msgid msgid_plural ...)
+ we make the following simplifications:
+
+ - Assume the keywords and strings are in an ASCII compatible encoding.
+ This means we can read the input file one byte at a time, instead of
+ one character at a time. No need to worry about multibyte characters:
+ If they occur as part of identifiers, they most probably act as
+ constituent characters, and the byte based approach will do the same.
+
+ - Assume the read-hash-procedures is in the default state.
+ Non-standard reader extensions are mostly used to read data, not programs.
+
+ The remaining syntax rules are:
+
+ - The syntax code assigned to each character, and how tokens are built
+ up from characters (single escape, multiple escape etc.).
+
+ - Comment syntax: ';' and '#! ... \n!#\n'.
+
+ - String syntax: "..." with single escapes.
+
+ - Read macros and dispatch macro character '#'. Needed to be able to
+ tell which is the n-th argument of a function call.
+
+ */
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_scheme_extract_all ()
+{
+ extract_all = true;
+}
+
+
+void
+x_scheme_keyword (const char *name)
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ int argnum1;
+ int argnum2;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ init_hash (&keywords, 100);
+
+ split_keywordspec (name, &end, &argnum1, &argnum2);
+
+ /* The characters between name and end should form a valid Lisp symbol.
+ Extract the symbol name part. */
+ colon = strchr (name, ':');
+ if (colon != NULL && colon < end)
+ {
+ name = colon + 1;
+ if (name < end && *name == ':')
+ name++;
+ colon = strchr (name, ':');
+ if (colon != NULL && colon < end)
+ return;
+ }
+
+ if (argnum1 == 0)
+ argnum1 = 1;
+ insert_entry (&keywords, name, end - name,
+ (void *) (long) (argnum1 + (argnum2 << 10)));
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ x_scheme_keyword ("gettext"); /* libguile/i18n.c */
+ x_scheme_keyword ("ngettext:1,2"); /* libguile/i18n.c */
+ x_scheme_keyword ("gettext-noop");
+ default_keywords = false;
+ }
+}
+
+void
+init_flag_table_scheme ()
+{
+ xgettext_record_flag ("gettext:1:pass-scheme-format");
+ xgettext_record_flag ("ngettext:1:pass-scheme-format");
+ xgettext_record_flag ("ngettext:2:pass-scheme-format");
+ xgettext_record_flag ("gettext-noop:1:pass-scheme-format");
+ xgettext_record_flag ("format:2:scheme-format");
+}
+
+
+/* ======================== Reading of characters. ======================== */
+
+/* Real filename, used in error messages about the input file. */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages. */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream. */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file. */
+static int
+do_getc ()
+{
+ int c = getc (fp);
+
+ if (c == EOF)
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+ }
+ else if (c == '\n')
+ line_number++;
+
+ return c;
+}
+
+/* Put back the last fetched character, not EOF. */
+static void
+do_ungetc (int c)
+{
+ if (c == '\n')
+ line_number--;
+ ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens. ========================== */
+
+
+/* A token consists of a sequence of characters. */
+struct token
+{
+ int allocated; /* number of allocated 'token_char's */
+ int charcount; /* number of used 'token_char's */
+ char *chars; /* the token's constituents */
+};
+
+/* Initialize a 'struct token'. */
+static inline void
+init_token (struct token *tp)
+{
+ tp->allocated = 10;
+ tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
+ tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'. */
+static inline void
+free_token (struct token *tp)
+{
+ free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character. */
+static inline void
+grow_token (struct token *tp)
+{
+ if (tp->charcount == tp->allocated)
+ {
+ tp->allocated *= 2;
+ tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+ }
+}
+
+/* Read the next token. 'first' is the first character, which has already
+ been read. */
+static void
+read_token (struct token *tp, int first)
+{
+ init_token (tp);
+
+ grow_token (tp);
+ tp->chars[tp->charcount++] = first;
+
+ for (;;)
+ {
+ int c = do_getc ();
+
+ if (c == EOF)
+ break;
+ if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n'
+ || c == '"' || c == '(' || c == ')' || c == ';')
+ {
+ do_ungetc (c);
+ break;
+ }
+ grow_token (tp);
+ tp->chars[tp->charcount++] = c;
+ }
+}
+
+/* Tests if a token represents an integer.
+ Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int(). */
+static inline bool
+is_integer_syntax (const char *str, int len, int radix)
+{
+ const char *p = str;
+ const char *p_end = str + len;
+
+ /* The accepted syntax is
+ ['+'|'-'] DIGIT+
+ where DIGIT is a hexadecimal digit whose value is below radix. */
+
+ if (p == p_end)
+ return false;
+ if (*p == '+' || *p == '-')
+ {
+ p++;
+ if (p == p_end)
+ return false;
+ }
+ do
+ {
+ int c = *p++;
+
+ if (c >= '0' && c <= '9')
+ c = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ c = c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ c = c - 'a' + 10;
+ else
+ return false;
+ if (c >= radix)
+ return false;
+ }
+ while (p < p_end);
+ return true;
+}
+
+/* Tests if a token represents a rational, floating-point or complex number.
+ If unconstrained is false, only real numbers are accepted; otherwise,
+ complex numbers are accepted as well.
+ Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo(). */
+static inline bool
+is_other_number_syntax (const char *str, int len, int radix, bool unconstrained)
+{
+ const char *p = str;
+ const char *p_end = str + len;
+ bool seen_sign;
+ bool seen_digits;
+
+ /* The accepted syntaxes are:
+ for a floating-point number:
+ ['+'|'-'] DIGIT+ [EXPONENT]
+ ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT]
+ where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+
+ (Dot and exponent are allowed only if radix is 10.)
+ for a rational number:
+ ['+'|'-'] DIGIT+ '/' DIGIT+
+ for a complex number:
+ REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
+ REAL-NUMBER {'+'|'-'} 'i'
+ {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
+ {'+'|'-'} 'i'
+ REAL-NUMBER '@' REAL-NUMBER
+ */
+ if (p == p_end)
+ return false;
+ /* Parse leading sign. */
+ seen_sign = false;
+ if (*p == '+' || *p == '-')
+ {
+ p++;
+ if (p == p_end)
+ return false;
+ seen_sign = true;
+ /* Recognize complex number syntax: {'+'|'-'} 'i' */
+ if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end)
+ return true;
+ }
+ /* Parse digits before dot or exponent or slash. */
+ seen_digits = false;
+ do
+ {
+ int c = *p;
+
+ if (c >= '0' && c <= '9')
+ c = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ {
+ if (c >= 'D' && radix == 10) /* exponent? */
+ break;
+ c = c - 'A' + 10;
+ }
+ else if (c >= 'a' && c <= 'f')
+ {
+ if (c >= 'd' && radix == 10) /* exponent? */
+ break;
+ c = c - 'a' + 10;
+ }
+ else
+ break;
+ if (c >= radix)
+ return false;
+ seen_digits = true;
+ p++;
+ }
+ while (p < p_end);
+ /* If p == p_end, we know that seen_digits = true, and the number is an
+ integer without exponent. */
+ if (p < p_end)
+ {
+ /* If we have no digits so far, we need a decimal point later. */
+ if (!seen_digits && !(*p == '.' && radix == 10))
+ return false;
+ /* Trailing '#' signs are equivalent to zeroes. */
+ while (p < p_end && *p == '#')
+ p++;
+ if (p < p_end)
+ {
+ if (*p == '/')
+ {
+ /* Parse digits after the slash. */
+ bool all_zeroes = true;
+ p++;
+ for (; p < p_end; p++)
+ {
+ int c = *p;
+
+ if (c >= '0' && c <= '9')
+ c = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ c = c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ c = c - 'a' + 10;
+ else
+ break;
+ if (c >= radix)
+ return false;
+ if (c != 0)
+ all_zeroes = false;
+ }
+ /* A zero denominator is not allowed. */
+ if (all_zeroes)
+ return false;
+ /* Trailing '#' signs are equivalent to zeroes. */
+ while (p < p_end && *p == '#')
+ p++;
+ }
+ else
+ {
+ if (*p == '.')
+ {
+ /* Decimal point notation. */
+ if (radix != 10)
+ return false;
+ /* Parse digits after the decimal point. */
+ p++;
+ for (; p < p_end; p++)
+ {
+ int c = *p;
+
+ if (c >= '0' && c <= '9')
+ seen_digits = true;
+ else
+ break;
+ }
+ /* Digits are required before or after the decimal point. */
+ if (!seen_digits)
+ return false;
+ /* Trailing '#' signs are equivalent to zeroes. */
+ while (p < p_end && *p == '#')
+ p++;
+ }
+ if (p < p_end)
+ {
+ /* Parse exponent. */
+ switch (*p)
+ {
+ case 'D': case 'd':
+ case 'E': case 'e':
+ case 'F': case 'f':
+ case 'L': case 'l':
+ case 'S': case 's':
+ if (radix != 10)
+ return false;
+ p++;
+ if (p == p_end)
+ return false;
+ if (*p == '+' || *p == '-')
+ {
+ p++;
+ if (p == p_end)
+ return false;
+ }
+ if (!(*p >= '0' && *p <= '9'))
+ return false;
+ for (;;)
+ {
+ p++;
+ if (p == p_end)
+ break;
+ if (!(*p >= '0' && *p <= '9'))
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+ if (p == p_end)
+ return true;
+ /* Recognize complex number syntax. */
+ if (unconstrained)
+ {
+ /* Recognize the syntax {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' */
+ if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end)
+ return true;
+ /* Recognize the syntaxes
+ REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i'
+ REAL-NUMBER {'+'|'-'} 'i'
+ */
+ if (*p == '+' || *p == '-')
+ return (p_end[-1] == 'I' || p_end[-1] == 'i')
+ && (p + 1 == p_end - 1
+ || is_other_number_syntax (p, p_end - 1 - p, radix, false));
+ /* Recognize the syntax REAL-NUMBER '@' REAL-NUMBER */
+ if (*p == '@')
+ {
+ p++;
+ return is_other_number_syntax (p, p_end - p, radix, false);
+ }
+ }
+ return false;
+}
+
+/* Tests if a token represents a number.
+ Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number(). */
+static bool
+is_number (const struct token *tp)
+{
+ const char *str = tp->chars;
+ int len = tp->charcount;
+ int radix = 10;
+ enum { unknown, exact, inexact } exactness = unknown;
+ bool seen_radix_prefix = false;
+ bool seen_exactness_prefix = false;
+
+ if (len == 1)
+ if (*str == '+' || *str == '-')
+ return false;
+ while (len >= 2 && *str == '#')
+ {
+ switch (str[1])
+ {
+ case 'B': case 'b':
+ if (seen_radix_prefix)
+ return false;
+ radix = 2;
+ seen_radix_prefix = true;
+ break;
+ case 'O': case 'o':
+ if (seen_radix_prefix)
+ return false;
+ radix = 8;
+ seen_radix_prefix = true;
+ break;
+ case 'D': case 'd':
+ if (seen_radix_prefix)
+ return false;
+ radix = 10;
+ seen_radix_prefix = true;
+ break;
+ case 'X': case 'x':
+ if (seen_radix_prefix)
+ return false;
+ radix = 16;
+ seen_radix_prefix = true;
+ break;
+ case 'E': case 'e':
+ if (seen_exactness_prefix)
+ return false;
+ exactness = exact;
+ seen_exactness_prefix = true;
+ break;
+ case 'I': case 'i':
+ if (seen_exactness_prefix)
+ return false;
+ exactness = inexact;
+ seen_exactness_prefix = true;
+ break;
+ default:
+ return false;
+ }
+ str += 2;
+ len -= 2;
+ }
+ if (exactness != inexact)
+ {
+ /* Try to parse an integer. */
+ if (is_integer_syntax (str, len, 10))
+ return true;
+ /* FIXME: Other Scheme implementations support exact rational numbers
+ or exact complex numbers. */
+ }
+ if (exactness != exact)
+ {
+ /* Try to parse a rational, floating-point or complex number. */
+ if (is_other_number_syntax (str, len, 10, true))
+ return true;
+ }
+ return false;
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+ buflen = 0;
+}
+
+static inline void
+comment_add (int c)
+{
+ if (buflen >= bufmax)
+ {
+ bufmax = 2 * bufmax + 10;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (size_t chars_to_remove)
+{
+ buflen -= chars_to_remove;
+ while (buflen >= 1
+ && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+ --buflen;
+ if (chars_to_remove == 0 && buflen >= bufmax)
+ {
+ bufmax = 2 * bufmax + 10;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen] = '\0';
+ xgettext_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ========================== Reading of objects. ========================= */
+
+
+/* We are only interested in symbols (e.g. gettext or ngettext) and strings.
+ Other objects need not to be represented precisely. */
+enum object_type
+{
+ t_symbol, /* symbol */
+ t_string, /* string */
+ t_other, /* other kind of real object */
+ t_dot, /* '.' pseudo object */
+ t_close, /* ')' pseudo object */
+ t_eof /* EOF marker */
+};
+
+struct object
+{
+ enum object_type type;
+ struct token *token; /* for t_symbol and t_string */
+ int line_number_at_start; /* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'. */
+static inline void
+free_object (struct object *op)
+{
+ if (op->type == t_symbol || op->type == t_string)
+ {
+ free_token (op->token);
+ free (op->token);
+ }
+}
+
+/* Convert a t_symbol/t_string token to a char*. */
+static char *
+string_of_object (const struct object *op)
+{
+ char *str;
+ int n;
+
+ if (!(op->type == t_symbol || op->type == t_string))
+ abort ();
+ n = op->token->charcount;
+ str = (char *) xmalloc (n + 1);
+ memcpy (str, op->token->chars, n);
+ str[n] = '\0';
+ return str;
+}
+
+/* Context lookup table. */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Read the next object. */
+static void
+read_object (struct object *op, flag_context_ty outer_context)
+{
+ for (;;)
+ {
+ int c = do_getc ();
+
+ switch (c)
+ {
+ case EOF:
+ op->type = t_eof;
+ return;
+
+ case ' ': case '\r': case '\f': case '\t':
+ continue;
+
+ case '\n':
+ /* Comments assumed to be grouped with a message must immediately
+ precede it, with no non-whitespace token on a line between
+ both. */
+ if (last_non_comment_line > last_comment_line)
+ xgettext_comment_reset ();
+ continue;
+
+ case ';':
+ {
+ bool all_semicolons = true;
+
+ last_comment_line = line_number;
+ comment_start ();
+ for (;;)
+ {
+ c = do_getc ();
+ if (c == EOF || c == '\n')
+ break;
+ if (c != ';')
+ all_semicolons = false;
+ if (!all_semicolons)
+ {
+ /* We skip all leading white space, but not EOLs. */
+ if (!(buflen == 0 && (c == ' ' || c == '\t')))
+ comment_add (c);
+ }
+ }
+ comment_line_end (0);
+ continue;
+ }
+
+ case '(':
+ {
+ int arg = 0; /* Current argument number. */
+ flag_context_list_iterator_ty context_iter;
+ int argnum1 = 0; /* First string position. */
+ int argnum2 = 0; /* Plural string position. */
+ message_ty *plural_mp = NULL; /* Remember the msgid. */
+
+ for (;; arg++)
+ {
+ struct object inner;
+ flag_context_ty inner_context;
+
+ if (arg == 0)
+ inner_context = null_context;
+ else
+ inner_context =
+ inherited_context (outer_context,
+ flag_context_list_iterator_advance (
+ &context_iter));
+
+ read_object (&inner, inner_context);
+
+ /* Recognize end of list. */
+ if (inner.type == t_close)
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots are not allowed in every position.
+ But be tolerant. */
+
+ /* EOF inside list is illegal.
+ But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ if (arg == 0)
+ {
+ /* This is the function position. */
+ if (inner.type == t_symbol)
+ {
+ char *symbol_name = string_of_object (&inner);
+ void *keyword_value;
+
+ if (find_entry (&keywords,
+ symbol_name, strlen (symbol_name),
+ &keyword_value)
+ == 0)
+ {
+ argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+ argnum2 = (int) (long) keyword_value >> 10;
+ }
+
+ context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ symbol_name, strlen (symbol_name)));
+
+ free (symbol_name);
+ }
+ else
+ context_iter = null_context_list_iterator;
+ }
+ else
+ {
+ /* These are the argument positions.
+ Extract a string if we have reached the right
+ argument position. */
+ if (arg == argnum1)
+ {
+ if (inner.type == t_string)
+ {
+ lex_pos_ty pos;
+ message_ty *mp;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = inner.line_number_at_start;
+ mp = remember_a_message (mlp, string_of_object (&inner),
+ inner_context, &pos);
+ if (argnum2 > 0)
+ plural_mp = mp;
+ }
+ }
+ else if (arg == argnum2)
+ {
+ if (inner.type == t_string && plural_mp != NULL)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = inner.line_number_at_start;
+ remember_a_message_plural (plural_mp, string_of_object (&inner),
+ inner_context, &pos);
+ }
+ }
+ }
+
+ free_object (&inner);
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case ')':
+ /* Tell the caller about the end of list.
+ Unmatched closing parenthesis is illegal.
+ But be tolerant. */
+ op->type = t_close;
+ last_non_comment_line = line_number;
+ return;
+
+ case ',':
+ {
+ int c = do_getc ();
+ /* The ,@ handling inside lists is wrong anyway, because
+ ,@form expands to an unknown number of elements. */
+ if (c != EOF && c != '@')
+ do_ungetc (c);
+ }
+ /*FALLTHROUGH*/
+ case '\'':
+ case '`':
+ {
+ struct object inner;
+
+ read_object (&inner, null_context);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '#':
+ /* Dispatch macro handling. */
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ return;
+ }
+
+ switch (c)
+ {
+ case '(': /* Vector */
+ do_ungetc (c);
+ {
+ struct object inner;
+ read_object (&inner, null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case 'T': case 't': /* Boolean true */
+ case 'F': case 'f': /* Boolean false */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case 'B': case 'b':
+ case 'O': case 'o':
+ case 'D': case 'd':
+ case 'X': case 'x':
+ case 'E': case 'e':
+ case 'I': case 'i':
+ {
+ struct token token;
+ do_ungetc (c);
+ read_token (&token, '#');
+ if (is_number (&token))
+ {
+ /* A number. */
+ free_token (&token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ else
+ {
+ if (token.charcount == 2
+ && (token.chars[1] == 'e' || token.chars[1] == 'i'))
+ {
+ c = do_getc ();
+ if (c != EOF)
+ do_ungetc (c);
+ if (c == '(')
+ /* Homogenous vector syntax, see arrays.scm. */
+ case 'a': /* Vectors of char */
+ case 'c': /* Vectors of complex */
+ /*case 'e':*/ /* Vectors of long */
+ case 'h': /* Vectors of short */
+ /*case 'i':*/ /* Vectors of double-float */
+ case 'l': /* Vectors of long long */
+ case 's': /* Vectors of single-float */
+ case 'u': /* Vectors of unsigned long */
+ case 'y': /* Vectors of byte */
+ {
+ struct object inner;
+ read_object (&inner, null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_token (&token);
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+ /* Unknown # object. But be tolerant. */
+ free_token (&token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+
+ case '!':
+ /* Block comment '#! ... \n!#\n'. We don't extract it
+ because it's only used to introduce scripts on Unix. */
+ {
+ int last1 = 0;
+ int last2 = 0;
+ int last3 = 0;
+
+ for (;;)
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* EOF is not allowed here. But be tolerant. */
+ break;
+ if (last3 == '\n' && last2 == '!' && last1 == '#'
+ && c == '\n')
+ break;
+ last3 = last2;
+ last2 = last1;
+ last1 = c;
+ }
+ continue;
+ }
+
+ case '*':
+ /* Bit vector. */
+ {
+ struct token token;
+ read_token (&token, c);
+ /* The token should consists only of '0' and '1', except
+ for the initial '*'. But be tolerant. */
+ free_token (&token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '{':
+ /* Symbol with multiple escapes: #{...}# */
+ {
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+
+ init_token (op->token);
+
+ for (;;)
+ {
+ c = do_getc ();
+
+ if (c == EOF)
+ break;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ break;
+ }
+ else if (c == '}')
+ {
+ c = do_getc ();
+ if (c == '#')
+ break;
+ if (c != EOF)
+ do_ungetc (c);
+ c = '}';
+ }
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+
+ op->type = t_symbol;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '\\':
+ /* Character. */
+ {
+ struct token token;
+ c = do_getc ();
+ if (c != EOF)
+ {
+ read_token (&token, c);
+ free_token (&token);
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case ':': /* Keyword. */
+ case '&': /* Deprecated keyword, installed in optargs.scm. */
+ {
+ struct token token;
+ read_token (&token, '-');
+ free_token (&token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* The following are installed through read-hash-extend. */
+
+ /* arrays.scm */
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ /* Multidimensional array syntax: #nx(...) where
+ n ::= DIGIT+
+ x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'}
+ */
+ do
+ c = do_getc ();
+ while (c >= '0' && c <= '9');
+ /* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}.
+ But be tolerant. */
+ /*FALLTHROUGH*/
+ case '\'': /* boot-9.scm */
+ case '.': /* boot-9.scm */
+ case ',': /* srfi-10.scm */
+ {
+ struct object inner;
+ read_object (&inner, null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ default:
+ /* Unknown. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ /*NOTREACHED*/
+ abort ();
+ }
+
+ case '"':
+ {
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ init_token (op->token);
+ op->line_number_at_start = line_number;
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == '"')
+ break;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ switch (c)
+ {
+ case '\n':
+ continue;
+ case '0':
+ c = '\0';
+ break;
+ case 'a':
+ c = '\a';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ default:
+ break;
+ }
+ }
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ op->type = t_string;
+
+ if (extract_all)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = op->line_number_at_start;
+ remember_a_message (mlp, string_of_object (op),
+ null_context, &pos);
+ }
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '+': case '-': case '.':
+ /* Read a number or symbol token. */
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ read_token (op->token, c);
+ if (op->token->charcount == 1 && op->token->chars[0] == '.')
+ {
+ free_token (op->token);
+ free (op->token);
+ op->type = t_dot;
+ }
+ else if (is_number (op->token))
+ {
+ /* A number. */
+ free_token (op->token);
+ free (op->token);
+ op->type = t_other;
+ }
+ else
+ {
+ /* A symbol. */
+ op->type = t_symbol;
+ }
+ last_non_comment_line = line_number;
+ return;
+
+ case ':':
+ default:
+ /* Read a symbol token. */
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ read_token (op->token, c);
+ op->type = t_symbol;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+}
+
+
+void
+extract_scheme (FILE *f,
+ const char *real_filename, const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp)
+{
+ mlp = mdlp->item[0]->messages;
+
+ fp = f;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ line_number = 1;
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ flag_context_list_table = flag_table;
+
+ init_keywords ();
+
+ /* Eat tokens until eof is seen. When read_object returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ do
+ {
+ struct object toplevel_object;
+
+ read_object (&toplevel_object, null_context);
+
+ if (toplevel_object.type == t_eof)
+ break;
+
+ free_object (&toplevel_object);
+ }
+ while (!feof (fp));
+
+ /* Close scanner. */
+ fp = NULL;
+ real_file_name = NULL;
+ logical_file_name = NULL;
+ line_number = 0;
+}