From: Bruno Haible Date: Tue, 18 Jan 2005 11:42:52 +0000 (+0000) Subject: xgettext Scheme backend. X-Git-Tag: v0.14.2~125 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a148160a35afe1e13a12e3c94f286e43f8260a2;p=thirdparty%2Fgettext.git xgettext Scheme backend. --- diff --git a/gettext-tools/src/x-scheme.c b/gettext-tools/src/x-scheme.c new file mode 100644 index 000000000..297dda8a2 --- /dev/null +++ b/gettext-tools/src/x-scheme.c @@ -0,0 +1,1244 @@ +/* xgettext Scheme backend. + Copyright (C) 2004-2005 Free Software Foundation, Inc. + + This file was written by Bruno Haible , 2004-2005. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "message.h" +#include "xgettext.h" +#include "x-scheme.h" +#include "error.h" +#include "xalloc.h" +#include "exit.h" +#include "hash.h" +#include "gettext.h" + +#define _(s) gettext(s) + + +/* The Scheme syntax is described in R5RS. It is implemented in + guile-1.6.4/libguile/read.c. + Since we are interested only in strings and in forms similar to + (gettext msgid ...) + or (ngettext msgid msgid_plural ...) + we make the following simplifications: + + - Assume the keywords and strings are in an ASCII compatible encoding. + This means we can read the input file one byte at a time, instead of + one character at a time. No need to worry about multibyte characters: + If they occur as part of identifiers, they most probably act as + constituent characters, and the byte based approach will do the same. + + - Assume the read-hash-procedures is in the default state. + Non-standard reader extensions are mostly used to read data, not programs. + + The remaining syntax rules are: + + - The syntax code assigned to each character, and how tokens are built + up from characters (single escape, multiple escape etc.). + + - Comment syntax: ';' and '#! ... \n!#\n'. + + - String syntax: "..." with single escapes. + + - Read macros and dispatch macro character '#'. Needed to be able to + tell which is the n-th argument of a function call. + + */ + + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +x_scheme_extract_all () +{ + extract_all = true; +} + + +void +x_scheme_keyword (const char *name) +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + int argnum1; + int argnum2; + const char *colon; + + if (keywords.table == NULL) + init_hash (&keywords, 100); + + split_keywordspec (name, &end, &argnum1, &argnum2); + + /* The characters between name and end should form a valid Lisp symbol. + Extract the symbol name part. */ + colon = strchr (name, ':'); + if (colon != NULL && colon < end) + { + name = colon + 1; + if (name < end && *name == ':') + name++; + colon = strchr (name, ':'); + if (colon != NULL && colon < end) + return; + } + + if (argnum1 == 0) + argnum1 = 1; + insert_entry (&keywords, name, end - name, + (void *) (long) (argnum1 + (argnum2 << 10))); + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + x_scheme_keyword ("gettext"); /* libguile/i18n.c */ + x_scheme_keyword ("ngettext:1,2"); /* libguile/i18n.c */ + x_scheme_keyword ("gettext-noop"); + default_keywords = false; + } +} + +void +init_flag_table_scheme () +{ + xgettext_record_flag ("gettext:1:pass-scheme-format"); + xgettext_record_flag ("ngettext:1:pass-scheme-format"); + xgettext_record_flag ("ngettext:2:pass-scheme-format"); + xgettext_record_flag ("gettext-noop:1:pass-scheme-format"); + xgettext_record_flag ("format:2:scheme-format"); +} + + +/* ======================== Reading of characters. ======================== */ + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* Fetch the next character from the input file. */ +static int +do_getc () +{ + int c = getc (fp); + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("\ +error while reading \"%s\""), real_file_name); + } + else if (c == '\n') + line_number++; + + return c; +} + +/* Put back the last fetched character, not EOF. */ +static void +do_ungetc (int c) +{ + if (c == '\n') + line_number--; + ungetc (c, fp); +} + + +/* ========================== Reading of tokens. ========================== */ + + +/* A token consists of a sequence of characters. */ +struct token +{ + int allocated; /* number of allocated 'token_char's */ + int charcount; /* number of used 'token_char's */ + char *chars; /* the token's constituents */ +}; + +/* Initialize a 'struct token'. */ +static inline void +init_token (struct token *tp) +{ + tp->allocated = 10; + tp->chars = (char *) xmalloc (tp->allocated * sizeof (char)); + tp->charcount = 0; +} + +/* Free the memory pointed to by a 'struct token'. */ +static inline void +free_token (struct token *tp) +{ + free (tp->chars); +} + +/* Ensure there is enough room in the token for one more character. */ +static inline void +grow_token (struct token *tp) +{ + if (tp->charcount == tp->allocated) + { + tp->allocated *= 2; + tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); + } +} + +/* Read the next token. 'first' is the first character, which has already + been read. */ +static void +read_token (struct token *tp, int first) +{ + init_token (tp); + + grow_token (tp); + tp->chars[tp->charcount++] = first; + + for (;;) + { + int c = do_getc (); + + if (c == EOF) + break; + if (c == ' ' || c == '\r' || c == '\f' || c == '\t' || c == '\n' + || c == '"' || c == '(' || c == ')' || c == ';') + { + do_ungetc (c); + break; + } + grow_token (tp); + tp->chars[tp->charcount++] = c; + } +} + +/* Tests if a token represents an integer. + Taken from guile-1.6.4/libguile/numbers.c:scm_istr2int(). */ +static inline bool +is_integer_syntax (const char *str, int len, int radix) +{ + const char *p = str; + const char *p_end = str + len; + + /* The accepted syntax is + ['+'|'-'] DIGIT+ + where DIGIT is a hexadecimal digit whose value is below radix. */ + + if (p == p_end) + return false; + if (*p == '+' || *p == '-') + { + p++; + if (p == p_end) + return false; + } + do + { + int c = *p++; + + if (c >= '0' && c <= '9') + c = c - '0'; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + c = c - 'a' + 10; + else + return false; + if (c >= radix) + return false; + } + while (p < p_end); + return true; +} + +/* Tests if a token represents a rational, floating-point or complex number. + If unconstrained is false, only real numbers are accepted; otherwise, + complex numbers are accepted as well. + Taken from guile-1.6.4/libguile/numbers.c:scm_istr2flo(). */ +static inline bool +is_other_number_syntax (const char *str, int len, int radix, bool unconstrained) +{ + const char *p = str; + const char *p_end = str + len; + bool seen_sign; + bool seen_digits; + + /* The accepted syntaxes are: + for a floating-point number: + ['+'|'-'] DIGIT+ [EXPONENT] + ['+'|'-'] DIGIT* '.' DIGIT+ [EXPONENT] + where EXPONENT ::= ['d'|'e'|'f'|'l'|'s'] DIGIT+ + (Dot and exponent are allowed only if radix is 10.) + for a rational number: + ['+'|'-'] DIGIT+ '/' DIGIT+ + for a complex number: + REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' + REAL-NUMBER {'+'|'-'} 'i' + {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' + {'+'|'-'} 'i' + REAL-NUMBER '@' REAL-NUMBER + */ + if (p == p_end) + return false; + /* Parse leading sign. */ + seen_sign = false; + if (*p == '+' || *p == '-') + { + p++; + if (p == p_end) + return false; + seen_sign = true; + /* Recognize complex number syntax: {'+'|'-'} 'i' */ + if (unconstrained && (*p == 'I' || *p == 'i') && p + 1 == p_end) + return true; + } + /* Parse digits before dot or exponent or slash. */ + seen_digits = false; + do + { + int c = *p; + + if (c >= '0' && c <= '9') + c = c - '0'; + else if (c >= 'A' && c <= 'F') + { + if (c >= 'D' && radix == 10) /* exponent? */ + break; + c = c - 'A' + 10; + } + else if (c >= 'a' && c <= 'f') + { + if (c >= 'd' && radix == 10) /* exponent? */ + break; + c = c - 'a' + 10; + } + else + break; + if (c >= radix) + return false; + seen_digits = true; + p++; + } + while (p < p_end); + /* If p == p_end, we know that seen_digits = true, and the number is an + integer without exponent. */ + if (p < p_end) + { + /* If we have no digits so far, we need a decimal point later. */ + if (!seen_digits && !(*p == '.' && radix == 10)) + return false; + /* Trailing '#' signs are equivalent to zeroes. */ + while (p < p_end && *p == '#') + p++; + if (p < p_end) + { + if (*p == '/') + { + /* Parse digits after the slash. */ + bool all_zeroes = true; + p++; + for (; p < p_end; p++) + { + int c = *p; + + if (c >= '0' && c <= '9') + c = c - '0'; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + c = c - 'a' + 10; + else + break; + if (c >= radix) + return false; + if (c != 0) + all_zeroes = false; + } + /* A zero denominator is not allowed. */ + if (all_zeroes) + return false; + /* Trailing '#' signs are equivalent to zeroes. */ + while (p < p_end && *p == '#') + p++; + } + else + { + if (*p == '.') + { + /* Decimal point notation. */ + if (radix != 10) + return false; + /* Parse digits after the decimal point. */ + p++; + for (; p < p_end; p++) + { + int c = *p; + + if (c >= '0' && c <= '9') + seen_digits = true; + else + break; + } + /* Digits are required before or after the decimal point. */ + if (!seen_digits) + return false; + /* Trailing '#' signs are equivalent to zeroes. */ + while (p < p_end && *p == '#') + p++; + } + if (p < p_end) + { + /* Parse exponent. */ + switch (*p) + { + case 'D': case 'd': + case 'E': case 'e': + case 'F': case 'f': + case 'L': case 'l': + case 'S': case 's': + if (radix != 10) + return false; + p++; + if (p == p_end) + return false; + if (*p == '+' || *p == '-') + { + p++; + if (p == p_end) + return false; + } + if (!(*p >= '0' && *p <= '9')) + return false; + for (;;) + { + p++; + if (p == p_end) + break; + if (!(*p >= '0' && *p <= '9')) + break; + } + break; + default: + break; + } + } + } + } + } + if (p == p_end) + return true; + /* Recognize complex number syntax. */ + if (unconstrained) + { + /* Recognize the syntax {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' */ + if (seen_sign && (*p == 'I' || *p == 'i') && p + 1 == p_end) + return true; + /* Recognize the syntaxes + REAL-NUMBER {'+'|'-'} REAL-NUMBER-WITHOUT-SIGN 'i' + REAL-NUMBER {'+'|'-'} 'i' + */ + if (*p == '+' || *p == '-') + return (p_end[-1] == 'I' || p_end[-1] == 'i') + && (p + 1 == p_end - 1 + || is_other_number_syntax (p, p_end - 1 - p, radix, false)); + /* Recognize the syntax REAL-NUMBER '@' REAL-NUMBER */ + if (*p == '@') + { + p++; + return is_other_number_syntax (p, p_end - p, radix, false); + } + } + return false; +} + +/* Tests if a token represents a number. + Taken from guile-1.6.4/libguile/numbers.c:scm_istring2number(). */ +static bool +is_number (const struct token *tp) +{ + const char *str = tp->chars; + int len = tp->charcount; + int radix = 10; + enum { unknown, exact, inexact } exactness = unknown; + bool seen_radix_prefix = false; + bool seen_exactness_prefix = false; + + if (len == 1) + if (*str == '+' || *str == '-') + return false; + while (len >= 2 && *str == '#') + { + switch (str[1]) + { + case 'B': case 'b': + if (seen_radix_prefix) + return false; + radix = 2; + seen_radix_prefix = true; + break; + case 'O': case 'o': + if (seen_radix_prefix) + return false; + radix = 8; + seen_radix_prefix = true; + break; + case 'D': case 'd': + if (seen_radix_prefix) + return false; + radix = 10; + seen_radix_prefix = true; + break; + case 'X': case 'x': + if (seen_radix_prefix) + return false; + radix = 16; + seen_radix_prefix = true; + break; + case 'E': case 'e': + if (seen_exactness_prefix) + return false; + exactness = exact; + seen_exactness_prefix = true; + break; + case 'I': case 'i': + if (seen_exactness_prefix) + return false; + exactness = inexact; + seen_exactness_prefix = true; + break; + default: + return false; + } + str += 2; + len -= 2; + } + if (exactness != inexact) + { + /* Try to parse an integer. */ + if (is_integer_syntax (str, len, 10)) + return true; + /* FIXME: Other Scheme implementations support exact rational numbers + or exact complex numbers. */ + } + if (exactness != exact) + { + /* Try to parse a rational, floating-point or complex number. */ + if (is_other_number_syntax (str, len, 10, true)) + return true; + } + return false; +} + + +/* ========================= Accumulating comments ========================= */ + + +static char *buffer; +static size_t bufmax; +static size_t buflen; + +static inline void +comment_start () +{ + buflen = 0; +} + +static inline void +comment_add (int c) +{ + if (buflen >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen++] = c; +} + +static inline void +comment_line_end (size_t chars_to_remove) +{ + buflen -= chars_to_remove; + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + if (chars_to_remove == 0 && buflen >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen] = '\0'; + xgettext_comment_add (buffer); +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* ========================= Accumulating messages ========================= */ + + +static message_list_ty *mlp; + + +/* ========================== Reading of objects. ========================= */ + + +/* We are only interested in symbols (e.g. gettext or ngettext) and strings. + Other objects need not to be represented precisely. */ +enum object_type +{ + t_symbol, /* symbol */ + t_string, /* string */ + t_other, /* other kind of real object */ + t_dot, /* '.' pseudo object */ + t_close, /* ')' pseudo object */ + t_eof /* EOF marker */ +}; + +struct object +{ + enum object_type type; + struct token *token; /* for t_symbol and t_string */ + int line_number_at_start; /* for t_string */ +}; + +/* Free the memory pointed to by a 'struct object'. */ +static inline void +free_object (struct object *op) +{ + if (op->type == t_symbol || op->type == t_string) + { + free_token (op->token); + free (op->token); + } +} + +/* Convert a t_symbol/t_string token to a char*. */ +static char * +string_of_object (const struct object *op) +{ + char *str; + int n; + + if (!(op->type == t_symbol || op->type == t_string)) + abort (); + n = op->token->charcount; + str = (char *) xmalloc (n + 1); + memcpy (str, op->token->chars, n); + str[n] = '\0'; + return str; +} + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + +/* Read the next object. */ +static void +read_object (struct object *op, flag_context_ty outer_context) +{ + for (;;) + { + int c = do_getc (); + + switch (c) + { + case EOF: + op->type = t_eof; + return; + + case ' ': case '\r': case '\f': case '\t': + continue; + + case '\n': + /* Comments assumed to be grouped with a message must immediately + precede it, with no non-whitespace token on a line between + both. */ + if (last_non_comment_line > last_comment_line) + xgettext_comment_reset (); + continue; + + case ';': + { + bool all_semicolons = true; + + last_comment_line = line_number; + comment_start (); + for (;;) + { + c = do_getc (); + if (c == EOF || c == '\n') + break; + if (c != ';') + all_semicolons = false; + if (!all_semicolons) + { + /* We skip all leading white space, but not EOLs. */ + if (!(buflen == 0 && (c == ' ' || c == '\t'))) + comment_add (c); + } + } + comment_line_end (0); + continue; + } + + case '(': + { + int arg = 0; /* Current argument number. */ + flag_context_list_iterator_ty context_iter; + int argnum1 = 0; /* First string position. */ + int argnum2 = 0; /* Plural string position. */ + message_ty *plural_mp = NULL; /* Remember the msgid. */ + + for (;; arg++) + { + struct object inner; + flag_context_ty inner_context; + + if (arg == 0) + inner_context = null_context; + else + inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance ( + &context_iter)); + + read_object (&inner, inner_context); + + /* Recognize end of list. */ + if (inner.type == t_close) + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /* Dots are not allowed in every position. + But be tolerant. */ + + /* EOF inside list is illegal. + But be tolerant. */ + if (inner.type == t_eof) + break; + + if (arg == 0) + { + /* This is the function position. */ + if (inner.type == t_symbol) + { + char *symbol_name = string_of_object (&inner); + void *keyword_value; + + if (find_entry (&keywords, + symbol_name, strlen (symbol_name), + &keyword_value) + == 0) + { + argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); + argnum2 = (int) (long) keyword_value >> 10; + } + + context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + symbol_name, strlen (symbol_name))); + + free (symbol_name); + } + else + context_iter = null_context_list_iterator; + } + else + { + /* These are the argument positions. + Extract a string if we have reached the right + argument position. */ + if (arg == argnum1) + { + if (inner.type == t_string) + { + lex_pos_ty pos; + message_ty *mp; + + pos.file_name = logical_file_name; + pos.line_number = inner.line_number_at_start; + mp = remember_a_message (mlp, string_of_object (&inner), + inner_context, &pos); + if (argnum2 > 0) + plural_mp = mp; + } + } + else if (arg == argnum2) + { + if (inner.type == t_string && plural_mp != NULL) + { + lex_pos_ty pos; + + pos.file_name = logical_file_name; + pos.line_number = inner.line_number_at_start; + remember_a_message_plural (plural_mp, string_of_object (&inner), + inner_context, &pos); + } + } + } + + free_object (&inner); + } + } + op->type = t_other; + last_non_comment_line = line_number; + return; + + case ')': + /* Tell the caller about the end of list. + Unmatched closing parenthesis is illegal. + But be tolerant. */ + op->type = t_close; + last_non_comment_line = line_number; + return; + + case ',': + { + int c = do_getc (); + /* The ,@ handling inside lists is wrong anyway, because + ,@form expands to an unknown number of elements. */ + if (c != EOF && c != '@') + do_ungetc (c); + } + /*FALLTHROUGH*/ + case '\'': + case '`': + { + struct object inner; + + read_object (&inner, null_context); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '#': + /* Dispatch macro handling. */ + { + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + return; + } + + switch (c) + { + case '(': /* Vector */ + do_ungetc (c); + { + struct object inner; + read_object (&inner, null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case 'T': case 't': /* Boolean true */ + case 'F': case 'f': /* Boolean false */ + op->type = t_other; + last_non_comment_line = line_number; + return; + + case 'B': case 'b': + case 'O': case 'o': + case 'D': case 'd': + case 'X': case 'x': + case 'E': case 'e': + case 'I': case 'i': + { + struct token token; + do_ungetc (c); + read_token (&token, '#'); + if (is_number (&token)) + { + /* A number. */ + free_token (&token); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + else + { + if (token.charcount == 2 + && (token.chars[1] == 'e' || token.chars[1] == 'i')) + { + c = do_getc (); + if (c != EOF) + do_ungetc (c); + if (c == '(') + /* Homogenous vector syntax, see arrays.scm. */ + case 'a': /* Vectors of char */ + case 'c': /* Vectors of complex */ + /*case 'e':*/ /* Vectors of long */ + case 'h': /* Vectors of short */ + /*case 'i':*/ /* Vectors of double-float */ + case 'l': /* Vectors of long long */ + case 's': /* Vectors of single-float */ + case 'u': /* Vectors of unsigned long */ + case 'y': /* Vectors of byte */ + { + struct object inner; + read_object (&inner, null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_token (&token); + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + } + /* Unknown # object. But be tolerant. */ + free_token (&token); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + } + + case '!': + /* Block comment '#! ... \n!#\n'. We don't extract it + because it's only used to introduce scripts on Unix. */ + { + int last1 = 0; + int last2 = 0; + int last3 = 0; + + for (;;) + { + c = do_getc (); + if (c == EOF) + /* EOF is not allowed here. But be tolerant. */ + break; + if (last3 == '\n' && last2 == '!' && last1 == '#' + && c == '\n') + break; + last3 = last2; + last2 = last1; + last1 = c; + } + continue; + } + + case '*': + /* Bit vector. */ + { + struct token token; + read_token (&token, c); + /* The token should consists only of '0' and '1', except + for the initial '*'. But be tolerant. */ + free_token (&token); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '{': + /* Symbol with multiple escapes: #{...}# */ + { + op->token = (struct token *) xmalloc (sizeof (struct token)); + + init_token (op->token); + + for (;;) + { + c = do_getc (); + + if (c == EOF) + break; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + break; + } + else if (c == '}') + { + c = do_getc (); + if (c == '#') + break; + if (c != EOF) + do_ungetc (c); + c = '}'; + } + grow_token (op->token); + op->token->chars[op->token->charcount++] = c; + } + + op->type = t_symbol; + last_non_comment_line = line_number; + return; + } + + case '\\': + /* Character. */ + { + struct token token; + c = do_getc (); + if (c != EOF) + { + read_token (&token, c); + free_token (&token); + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case ':': /* Keyword. */ + case '&': /* Deprecated keyword, installed in optargs.scm. */ + { + struct token token; + read_token (&token, '-'); + free_token (&token); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /* The following are installed through read-hash-extend. */ + + /* arrays.scm */ + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Multidimensional array syntax: #nx(...) where + n ::= DIGIT+ + x ::= {'a'|'b'|'c'|'e'|'i'|'s'|'u'} + */ + do + c = do_getc (); + while (c >= '0' && c <= '9'); + /* c should be one of {'a'|'b'|'c'|'e'|'i'|'s'|'u'}. + But be tolerant. */ + /*FALLTHROUGH*/ + case '\'': /* boot-9.scm */ + case '.': /* boot-9.scm */ + case ',': /* srfi-10.scm */ + { + struct object inner; + read_object (&inner, null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + default: + /* Unknown. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + } + /*NOTREACHED*/ + abort (); + } + + case '"': + { + op->token = (struct token *) xmalloc (sizeof (struct token)); + init_token (op->token); + op->line_number_at_start = line_number; + for (;;) + { + int c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + if (c == '"') + break; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + switch (c) + { + case '\n': + continue; + case '0': + c = '\0'; + break; + case 'a': + c = '\a'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + default: + break; + } + } + grow_token (op->token); + op->token->chars[op->token->charcount++] = c; + } + op->type = t_string; + + if (extract_all) + { + lex_pos_ty pos; + + pos.file_name = logical_file_name; + pos.line_number = op->line_number_at_start; + remember_a_message (mlp, string_of_object (op), + null_context, &pos); + } + last_non_comment_line = line_number; + return; + } + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '+': case '-': case '.': + /* Read a number or symbol token. */ + op->token = (struct token *) xmalloc (sizeof (struct token)); + read_token (op->token, c); + if (op->token->charcount == 1 && op->token->chars[0] == '.') + { + free_token (op->token); + free (op->token); + op->type = t_dot; + } + else if (is_number (op->token)) + { + /* A number. */ + free_token (op->token); + free (op->token); + op->type = t_other; + } + else + { + /* A symbol. */ + op->type = t_symbol; + } + last_non_comment_line = line_number; + return; + + case ':': + default: + /* Read a symbol token. */ + op->token = (struct token *) xmalloc (sizeof (struct token)); + read_token (op->token, c); + op->type = t_symbol; + last_non_comment_line = line_number; + return; + } + } +} + + +void +extract_scheme (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + last_comment_line = -1; + last_non_comment_line = -1; + + flag_context_list_table = flag_table; + + init_keywords (); + + /* Eat tokens until eof is seen. When read_object returns + due to an unbalanced closing parenthesis, just restart it. */ + do + { + struct object toplevel_object; + + read_object (&toplevel_object, null_context); + + if (toplevel_object.type == t_eof) + break; + + free_object (&toplevel_object); + } + while (!feof (fp)); + + /* Close scanner. */ + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} diff --git a/gettext-tools/src/x-scheme.h b/gettext-tools/src/x-scheme.h new file mode 100644 index 000000000..12b093a6a --- /dev/null +++ b/gettext-tools/src/x-scheme.h @@ -0,0 +1,39 @@ +/* xgettext Scheme backend. + Copyright (C) 2004 Free Software Foundation, Inc. + Written by Bruno Haible , 2004. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#define EXTENSIONS_SCHEME \ + { "scm", "Scheme" }, \ + +#define SCANNERS_SCHEME \ + { "Scheme", extract_scheme, \ + &flag_table_scheme, &formatstring_scheme, NULL }, \ + +/* Scan a Scheme file and add its translatable strings to mdlp. */ +extern void extract_scheme (FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp); + + +/* Handling of options specific to this language. */ + +extern void x_scheme_extract_all (void); +extern void x_scheme_keyword (const char *name); + +extern void init_flag_table_scheme (void);