From: Daiki Ueno Date: Fri, 4 Sep 2015 08:57:22 +0000 (+0900) Subject: xgettext: Add support for generic XML files X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0dedd6cadd3c82ee0cac454eda32405ae6c4460b;p=thirdparty%2Fgettext.git xgettext: Add support for generic XML files * gettext-tools/src/xlocator.h: New file. * gettext-tools/src/xlocator.c: New file. * gettext-tools/src/its.h: New file. * gettext-tools/src/its.c: New file. * gettext-tools/src/xgettext.c: Include "xlocator.h" and "its.h". (its_locators): New variable. (long_options): Add --its option. (extract_from_xml_file): New file. (main): Handle --its option. (usage): Document --its option. * gettext-tools/src/Makefile.am (noinst_HEADERS): Add xlocator.h and its.h. (xgettext_CPPFLAGS): Add $(INCXML). (xgettext_SOURCES): Add xlocator.c and its.c. --- diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 61da5c4eb..b4e6ff745 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -40,7 +40,7 @@ read-po.h read-properties.h read-stringtable.h \ str-list.h \ color.h write-catalog.h write-po.h write-properties.h write-stringtable.h \ dir-list.h file-list.h po-gram-gen.h po-gram-gen2.h cldr-plural.h \ -cldr-plural-exp.h \ +cldr-plural-exp.h xlocator.h its.h \ msgl-charset.h msgl-equal.h msgl-iconv.h msgl-ascii.h msgl-cat.h msgl-header.h \ msgl-english.h msgl-check.h msgl-fsearch.h msgfmt.h msgunfmt.h \ plural-count.h plural-eval.h plural-distrib.h \ @@ -183,7 +183,7 @@ xgettext_SOURCES += \ x-c.c x-po.c x-sh.c x-python.c x-lisp.c x-elisp.c x-librep.c x-scheme.c \ x-smalltalk.c x-java.c x-csharp.c x-awk.c x-ycp.c x-tcl.c x-perl.c x-php.c \ x-rst.c x-glade.c x-lua.c x-javascript.c x-vala.c x-gsettings.c \ - x-desktop.c x-appdata.c + x-desktop.c x-appdata.c xlocator.c its.c if !WOE32DLL msgattrib_SOURCES = msgattrib.c else @@ -319,7 +319,7 @@ msgcmp_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" msgfmt_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" msgmerge_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" msgunfmt_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" -xgettext_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" +xgettext_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" $(INCXML) msgattrib_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" msgcat_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" msgcomm_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c new file mode 100644 index 000000000..ea6577c0e --- /dev/null +++ b/gettext-tools/src/its.c @@ -0,0 +1,764 @@ +/* Internationalization Tag Set (ITS) handling + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno , 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +/* Specification. */ +#include "its.h" + +#include +#include +#include "error.h" +#include "gettext.h" +#include "hash.h" +#include +#include +#include +#include +#include +#include "xalloc.h" + +#define _(str) gettext (str) + +/* The Internationalization Tag Set (ITS) 2.0 standard is published at: + http://www.w3.org/TR/its20/ + + This implementation supports only a few data categories, useful for + gettext-based projects. Other data categories can be added by + extending a class from its_rule_class_ty and registering it in + init_classes(). + + The value associated with a data category is represented as an + array of key-value pairs. */ + +#define ITS_NS "http://www.w3.org/2005/11/its" + +struct its_value_ty +{ + char *name; + char *value; +}; + +struct its_value_list_ty +{ + struct its_value_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static void +its_value_list_append (struct its_value_list_ty *values, + const char *name, + const char *value) +{ + struct its_value_ty _value; + + _value.name = xstrdup (name); + _value.value = xstrdup (value); + + if (values->nitems == values->nitems_max) + { + values->nitems_max = 2 * values->nitems_max + 1; + values->items = + xrealloc (values->items, + sizeof (struct its_value_ty) * values->nitems_max); + } + memcpy (&values->items[values->nitems++], &_value, + sizeof (struct its_value_ty)); +} + +static const char * +its_value_list_get_value (struct its_value_list_ty *values, + const char *name) +{ + size_t i; + + for (i = 0; i < values->nitems; i++) + { + struct its_value_ty *value = &values->items[i]; + if (strcmp (value->name, name) == 0) + return value->value; + } + return NULL; +} + +static void +its_value_list_merge (struct its_value_list_ty *values, + struct its_value_list_ty *other) +{ + size_t i; + + for (i = 0; i < other->nitems; i++) + { + struct its_value_ty *other_value = &other->items[i]; + size_t j; + + for (j = 0; j < values->nitems; j++) + { + struct its_value_ty *value = &values->items[j]; + + if (strcmp (value->name, other_value->name) == 0 + && strcmp (value->value, other_value->value) != 0) + { + free (value->value); + value->value = xstrdup (other_value->value); + break; + } + } + + if (j == values->nitems) + its_value_list_append (values, other_value->name, other_value->value); + } +} + +static void +its_value_list_destroy (struct its_value_list_ty *values) +{ + size_t i; + + for (i = 0; i < values->nitems; i++) + { + free (values->items[i].name); + free (values->items[i].value); + } + free (values->items); +} + +struct its_pool_ty +{ + struct its_value_list_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static struct its_value_list_ty * +its_pool_alloc_value_list (struct its_pool_ty *pool) +{ + struct its_value_list_ty *values; + + if (pool->nitems == pool->nitems_max) + { + pool->nitems_max = 2 * pool->nitems_max + 1; + pool->items = + xrealloc (pool->items, + sizeof (struct its_value_list_ty) * pool->nitems_max); + } + + values = &pool->items[pool->nitems++]; + memset (values, 0, sizeof (struct its_value_list_ty)); + return values; +} + +static void +its_pool_destroy (struct its_pool_ty *pool) +{ + size_t i; + + for (i = 0; i < pool->nitems; i++) + its_value_list_destroy (&pool->items[i]); + free (pool->items); +} + +struct its_rule_list_ty +{ + struct its_rule_ty **items; + size_t nitems; + size_t nitems_max; + + struct its_pool_ty pool; +}; + +struct its_node_list_ty +{ + xmlNode **items; + size_t nitems; + size_t nitems_max; +}; + +static void +its_node_list_append (struct its_node_list_ty *nodes, + xmlNode *node) +{ + if (nodes->nitems == nodes->nitems_max) + { + nodes->nitems_max = 2 * nodes->nitems_max + 1; + nodes->items = + xrealloc (nodes->items, sizeof (xmlNode *) * nodes->nitems_max); + } + nodes->items[nodes->nitems++] = node; +} + +/* Base class representing an ITS rule in global definition. */ +struct its_rule_class_ty +{ + /* How many bytes to malloc for an instance of this class. */ + size_t size; + + /* What to do immediately after the instance is malloc()ed. */ + void (*constructor) (struct its_rule_ty *pop, xmlNode *node); + + /* What to do immediately before the instance is free()ed. */ + void (*destructor) (struct its_rule_ty *pop); + + /* How to apply the rule to all elements in DOC. */ + void (* apply) (struct its_rule_ty *pop, struct its_pool_ty *pool, + xmlDoc *doc); + + /* How to evaluate the value of NODE according to the rule. */ + struct its_value_list_ty *(* eval) (struct its_rule_ty *pop, + struct its_pool_ty *pool, xmlNode *node); +}; + +#define ITS_RULE_TY \ + struct its_rule_class_ty *methods; \ + char *selector; \ + struct its_value_list_ty values; + +struct its_rule_ty +{ + ITS_RULE_TY +}; + +static hash_table classes; + +static void +its_rule_destructor (struct its_rule_ty *pop) +{ + free (pop->selector); + its_value_list_destroy (&pop->values); +} + +static void +its_rule_apply (struct its_rule_ty *rule, struct its_pool_ty *pool, xmlDoc *doc) +{ + xmlXPathContext *context; + xmlXPathObject *object; + size_t i; + + if (!rule->selector) + { + error (0, 0, _("selector is not specified")); + return; + } + + context = xmlXPathNewContext (doc); + if (!context) + { + error (0, 0, _("cannot create XPath context")); + return; + } + + object = xmlXPathEvalExpression (BAD_CAST rule->selector, context); + if (!object) + { + xmlXPathFreeContext (context); + error (0, 0, _("cannot evaluate XPath expression: %s"), rule->selector); + return; + } + + if (object->nodesetval) + { + xmlNodeSet *nodes = object->nodesetval; + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNode *node = nodes->nodeTab[i]; + struct its_value_list_ty *values; + + /* We can't store VALUES in NODE, since the address can + change when realloc()ed. */ + intptr_t index = (intptr_t) node->_private; + + assert (index <= pool->nitems); + if (index > 0) + values = &pool->items[index - 1]; + else + { + values = its_pool_alloc_value_list (pool); + node->_private = (void *) pool->nitems; + } + + its_value_list_merge (values, &rule->values); + } + } + + xmlXPathFreeObject (object); + xmlXPathFreeContext (context); +} + +static char * +_its_get_attribute (xmlNode *node, const char *attr) +{ + xmlChar *value; + char *result; + + value = xmlGetProp (node, BAD_CAST attr); + + result = xstrdup ((const char *) value); + xmlFree (value); + + return result; +} + +/* Implementation of Translate data category. */ +static void +its_translate_rule_constructor (struct its_rule_ty *pop, xmlNode *node) +{ + char *prop; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + error (0, 0, _("\"translateRule\" node does not contain \"selector\"")); + return; + } + + if (!xmlHasProp (node, BAD_CAST "translate")) + { + error (0, 0, _("\"translateRule\" node does not contain \"translate\"")); + return; + } + + prop = _its_get_attribute (node, "selector"); + if (prop) + pop->selector = prop; + + prop = _its_get_attribute (node, "translate"); + its_value_list_append (&pop->values, "translate", prop); + free (prop); +} + +struct its_value_list_ty * +its_translate_rule_eval (struct its_rule_ty *pop, struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + xmlNode *n = node; + + result = XCALLOC (1, struct its_value_list_ty); + + /* Inherit from the parent elements. */ + for (n = node; n && n->type == XML_ELEMENT_NODE; n = n->parent) + if ((intptr_t) n->_private > 0) + break; + + if (n == NULL || (intptr_t) n->_private == 0) + /* The default value is translate="yes". */ + its_value_list_append (result, "translate", "yes"); + else + { + intptr_t index = (intptr_t) n->_private; + struct its_value_list_ty *values; + + assert (index <= pool->nitems); + values = &pool->items[index - 1]; + its_value_list_merge (result, values); + } + + return result; +} + +static struct its_rule_class_ty its_translate_rule_class = + { + sizeof (struct its_rule_ty), + its_translate_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_translate_rule_eval, + }; + +/* Implementation of Localization Note data category. */ +static void +its_localization_note_rule_constructor (struct its_rule_ty *pop, xmlNode *node) +{ + char *prop; + xmlNode *n; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + error (0, 0, _("\"locNoteRule\" node does not contain \"selector\"")); + return; + } + + if (!xmlHasProp (node, BAD_CAST "locNoteType")) + { + error (0, 0, _("\"locNoteRule\" node does not contain \"locNoteType\"")); + return; + } + + prop = _its_get_attribute (node, "selector"); + if (prop) + pop->selector = prop; + + for (n = node->children; n; n = n->next) + { + if (n->type == XML_ELEMENT_NODE + && xmlStrEqual (n->name, BAD_CAST "locNote") + && xmlStrEqual (n->ns->href, BAD_CAST ITS_NS)) + break; + } + + if (n) + { + static char *buffer; + static size_t bufmax; + size_t bufpos = 0; + + for (n = n->children; n; n = n->next) + if (n->type == XML_TEXT_NODE) + { + xmlChar *content = xmlNodeGetContent (n); + size_t content_length = xmlStrlen (content); + + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + content_length + 1; + buffer = xrealloc (buffer, bufmax); + } + memcpy (&buffer[bufpos], content, content_length); + bufpos += content_length; + buffer[bufpos] = 0; + } + + its_value_list_append (&pop->values, "locNote", buffer); + } + else if (xmlHasProp (node, BAD_CAST "locNotePointer")) + { + prop = _its_get_attribute (node, "locNotePointer"); + its_value_list_append (&pop->values, "locNotePointer", prop); + free (prop); + } +} + +struct its_value_list_ty * +its_localization_note_rule_eval (struct its_rule_ty *pop, + struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + xmlNode *n = node; + + result = XCALLOC (1, struct its_value_list_ty); + + /* Inherit from the parent elements. */ + for (n = node; n && n->type == XML_ELEMENT_NODE; n = n->parent) + if ((intptr_t) n->_private > 0) + break; + + /* The default value is None. */ + if (n != NULL && (intptr_t) n->_private > 0) + { + intptr_t index = (intptr_t) n->_private; + struct its_value_list_ty *values; + + assert (index <= pool->nitems); + values = &pool->items[index - 1]; + its_value_list_merge (result, values); + } + + return result; +} + +static struct its_rule_class_ty its_localization_note_rule_class = + { + sizeof (struct its_rule_ty), + its_localization_note_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_localization_note_rule_eval, + }; + +static struct its_rule_ty * +its_rule_alloc (struct its_rule_class_ty *method_table, xmlNode *node) +{ + struct its_rule_ty *pop; + + pop = (struct its_rule_ty *) xcalloc (1, method_table->size); + pop->methods = method_table; + if (method_table->constructor) + method_table->constructor (pop, node); + return pop; +} + +static struct its_rule_ty * +its_rule_parse (xmlNode *node) +{ + const char *name = (const char *) node->name; + void *value; + + if (hash_find_entry (&classes, name, strlen (name), &value) == 0) + return its_rule_alloc ((struct its_rule_class_ty *) value, node); + + return NULL; +} + +static void +its_rule_destroy (struct its_rule_ty *pop) +{ + if (pop->methods->destructor) + pop->methods->destructor (pop); +} + +static void +init_classes (void) +{ +#define ADD_RULE_CLASS(n, c) \ + hash_insert_entry (&classes, n, strlen (n), &c); + + ADD_RULE_CLASS ("translateRule", its_translate_rule_class); + ADD_RULE_CLASS ("locNoteRule", its_localization_note_rule_class); + +#undef ADD_CLASS +} + +struct its_rule_list_ty * +its_rule_list_alloc (void) +{ + struct its_rule_list_ty *result; + + if (classes.table == NULL) + { + hash_init (&classes, 10); + init_classes (); + } + + result = XCALLOC (1, struct its_rule_list_ty); + return result; +} + +void +its_rule_list_free (struct its_rule_list_ty *rules) +{ + size_t i; + + for (i = 0; i < rules->nitems; i++) + its_rule_destroy (rules->items[i]); + free (rules->items); + its_pool_destroy (&rules->pool); +} + +bool +its_rule_list_add_file (struct its_rule_list_ty *rules, + const char *filename) +{ + xmlDoc *doc; + xmlNode *root, *node; + FILE *fp; + + fp = fopen (filename, "r"); + if (fp == NULL) + { + error (0, errno, + _("error while opening \"%s\" for reading"), filename); + return false; + } + + doc = xmlReadFd (fileno (fp), filename, "utf-8", + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + fclose (fp); + if (doc == NULL) + return false; + + root = xmlDocGetRootElement (doc); + if (!(xmlStrEqual (root->name, BAD_CAST "rules") + && xmlStrEqual (root->ns->href, BAD_CAST ITS_NS))) + { + error (0, 0, _("the root element is not \"rules\"" + " under namespace %s"), + ITS_NS); + xmlFreeDoc (doc); + return false; + } + + for (node = root->children; node; node = node->next) + { + struct its_rule_ty *rule; + + rule = its_rule_parse (node); + if (!rule) + { + xmlFreeDoc (doc); + return false; + } + + if (rules->nitems == rules->nitems_max) + { + rules->nitems_max = 2 * rules->nitems_max + 1; + rules->items = + xrealloc (rules->items, + sizeof (struct its_rule_ty *) * rules->nitems_max); + } + rules->items[rules->nitems++] = rule; + } + + return true; +} + +static void +its_rule_list_apply (struct its_rule_list_ty *rules, xmlDoc *doc) +{ + size_t i; + + for (i = 0; i < rules->nitems; i++) + { + struct its_rule_ty *rule = rules->items[i]; + rule->methods->apply (rule, &rules->pool, doc); + } +} + +static struct its_value_list_ty * +its_rule_list_eval (its_rule_list_ty *rules, xmlNode *node) +{ + struct its_value_list_ty *result; + size_t i; + + result = XCALLOC (1, struct its_value_list_ty); + for (i = 0; i < rules->nitems; i++) + { + struct its_rule_ty *rule = rules->items[i]; + struct its_value_list_ty *values; + + values = rule->methods->eval (rule, &rules->pool, node); + its_value_list_merge (result, values); + its_value_list_destroy (values); + free (values); + } + + return result; +} + +static void +its_rule_list_extract_nodes (its_rule_list_ty *rules, + struct its_node_list_ty *nodes, + xmlNode *node, + const struct its_value_ty *values) +{ + if (node->type == XML_ELEMENT_NODE) + { + struct its_value_list_ty *element_values; + size_t i; + xmlNode *n; + + element_values = its_rule_list_eval (rules, node); + for (i = 0; values[i].name != NULL; i++) + { + size_t j; + + for (j = 0; j < element_values->nitems; j++) + { + struct its_value_ty *element_value = &element_values->items[j]; + if (strcmp (values[i].name, element_value->name) == 0 + && strcmp (values[i].value, element_value->value) == 0) + break; + } + + if (j == element_values->nitems) + break; + } + + if (values[i].name == NULL) + its_node_list_append (nodes, node); + + for (n = node->children; n; n = n->next) + its_rule_list_extract_nodes (rules, nodes, n, values); + } +} + +static void +its_rule_list_extract_text (its_rule_list_ty *rules, + xmlNode *node, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + message_list_ty *mlp) +{ + if (node->type == XML_ELEMENT_NODE) + { + struct its_value_list_ty *values; + xmlNode *n; + const char *comment = NULL; + + values = its_rule_list_eval (rules, node); + + comment = its_value_list_get_value (values, "locNote"); + + for (n = node->children; n; n = n->next) + if (n->type == XML_TEXT_NODE) + { + xmlChar *content = xmlNodeGetContent (n); + + if (xmlStrlen (content) > 0) + { + lex_pos_ty pos; + + pos.file_name = xstrdup (logical_filename); + pos.line_number = xmlGetLineNo (n); + + remember_a_message (mlp, NULL, + xstrdup ((const char *) content), + null_context, &pos, + comment, NULL); + } + xmlFree (content); + } + } +} + +void +its_rule_list_extract (its_rule_list_ty *rules, + FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + const struct its_value_ty values[] = + { + { "translate", "yes" }, + { NULL, NULL } + }; + xmlDoc *doc; + struct its_node_list_ty nodes; + size_t i; + + doc = xmlReadFd (fileno (fp), logical_filename, "utf-8", + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + return; + + its_rule_list_apply (rules, doc); + + memset (&nodes, 0, sizeof (struct its_node_list_ty)); + its_rule_list_extract_nodes (rules, + &nodes, + xmlDocGetRootElement (doc), + values); + + for (i = 0; i < nodes.nitems; i++) + its_rule_list_extract_text (rules, nodes.items[i], + logical_filename, + flag_table, + mdlp->item[0]->messages); + + free (nodes.items); + xmlFreeDoc (doc); +} diff --git a/gettext-tools/src/its.h b/gettext-tools/src/its.h new file mode 100644 index 000000000..ae2c04cb4 --- /dev/null +++ b/gettext-tools/src/its.h @@ -0,0 +1,48 @@ +/* Internationalization Tag Set (ITS) handling + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno , 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef _ITS_H_ +#define _ITS_H_ + +#include "message.h" +#include "xgettext.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct its_rule_list_ty its_rule_list_ty; + +extern its_rule_list_ty *its_rule_list_alloc (void); + +extern void its_rule_list_free (its_rule_list_ty *rules); + +extern bool its_rule_list_add_file (its_rule_list_ty *rules, + const char *filename); + +extern void its_rule_list_extract (its_rule_list_ty *rules, + FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp); + +#ifdef __cplusplus +} +#endif + +#endif /* _ITS_H_ */ diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 2f34057ba..2ad4c5d84 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -71,6 +71,8 @@ #include "propername.h" #include "sentence.h" #include "unistr.h" +#include "xlocator.h" +#include "its.h" #include "gettext.h" /* A convenience macro. I don't like writing gettext() every time. */ @@ -206,6 +208,8 @@ const char *xgettext_current_source_encoding; iconv_t xgettext_current_source_iconv; #endif +static xlocator_list_ty *its_locators; + /* Long options. */ static const struct option long_options[] = { @@ -229,6 +233,7 @@ static const struct option long_options[] = { "from-code", required_argument, NULL, CHAR_MAX + 3 }, { "help", no_argument, NULL, 'h' }, { "indent", no_argument, NULL, 'i' }, + { "its", no_argument, NULL, CHAR_MAX + 19 }, { "join-existing", no_argument, NULL, 'j' }, { "kde", no_argument, NULL, CHAR_MAX + 10 }, { "keyword", optional_argument, NULL, 'k' }, @@ -289,6 +294,9 @@ static void usage (int status) static void read_exclusion_file (char *file_name); static void extract_from_file (const char *file_name, extractor_ty extractor, msgdomain_list_ty *mdlp); +static void extract_from_xml_file (const char *file_name, + its_rule_list_ty *rules, + msgdomain_list_ty *mdlp); static message_ty *construct_header (void); static void finalize_header (msgdomain_list_ty *mdlp); static extractor_ty language_to_extractor (const char *name); @@ -307,6 +315,7 @@ main (int argc, char *argv[]) bool some_additional_keywords = false; bool sort_by_msgid = false; bool sort_by_filepos = false; + bool its = false; const char *file_name; const char *files_from = NULL; string_list_ty *file_list; @@ -643,6 +652,10 @@ main (int argc, char *argv[]) error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg); break; + case CHAR_MAX + 19: /* --its */ + its = true; + break; + default: usage (EXIT_FAILURE); /* NOTREACHED */ @@ -703,6 +716,25 @@ xgettext cannot work without keywords to look for")); usage (EXIT_FAILURE); } + if (its) + { + const char *gettextdatadir; + char *locatordir; + + /* Make it possible to override the locator file location. This + is necessary for running the testsuite before "make + install". */ + gettextdatadir = getenv ("GETTEXTDATADIR"); + if (gettextdatadir == NULL || gettextdatadir[0] == '\0') + gettextdatadir = relocate (GETTEXTDATADIR); + + locatordir = + xconcatenated_filename (gettextdatadir, "its/locators", + NULL); + its_locators = xlocator_list_alloc (); + xlocator_list_add_directory (its_locators, locatordir); + } + /* Determine extractor from language. */ if (language != NULL) extractor = language_to_extractor (language); @@ -801,6 +833,7 @@ This version was built without iconv()."), { const char *filename; extractor_ty this_file_extractor; + its_rule_list_ty *its_rules; filename = file_list->item[i]; @@ -839,24 +872,75 @@ This version was built without iconv()."), } } - if (language == NULL) + if (language == NULL && its_locators != NULL) { - extension = strrchr (reduced, '.'); - if (extension == NULL) - extension = ""; - else - extension++; - error (0, 0, _("\ + bool inspect; + const char *gettextdatadir; + const char *baseuri; + char *ruledir; + const char *its_filename = NULL; + + /* Inspect the content, only when the file extension is + ".xml". */ + inspect = strlen (reduced) >= 4 + && memcmp (reduced + strlen (reduced) - 4, ".xml", 4) + == 0; + + baseuri = xlocator_list_locate (its_locators, filename, + inspect); + + /* Make it possible to override the locator file location. This + is necessary for running the testsuite before "make + install". */ + gettextdatadir = getenv ("GETTEXTDATADIR"); + if (gettextdatadir == NULL || gettextdatadir[0] == '\0') + gettextdatadir = relocate (GETTEXTDATADIR); + + ruledir = + xconcatenated_filename (gettextdatadir, "its/rules", + NULL); + its_filename = + xconcatenated_filename (ruledir, baseuri, + NULL); + free (ruledir); + + its_rules = its_rule_list_alloc (); + if (!its_rule_list_add_file (its_rules, its_filename)) + { + its_rule_list_free (its_rules); + its_rules = NULL; + } + } + + if (its_rules == NULL) + { + if (language == NULL) + { + extension = strrchr (reduced, '.'); + if (extension == NULL) + extension = ""; + else + extension++; + error (0, 0, _("\ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension); - language = "C"; + language = "C"; + } + + this_file_extractor = language_to_extractor (language); } - this_file_extractor = language_to_extractor (language); free (reduced); } - /* Extract the strings from the file. */ - extract_from_file (filename, this_file_extractor, mdlp); + if (its_rules != NULL) + { + /* Extract the strings from the file, using ITS. */ + extract_from_xml_file (filename, its_rules, mdlp); + its_rule_list_free (its_rules); + } + else + /* Extract the strings from the file. */ + extract_from_file (filename, this_file_extractor, mdlp); } string_list_free (file_list); @@ -898,6 +982,8 @@ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension) /* Write the PO file. */ msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug); + xlocator_list_free (its_locators); + exit (EXIT_SUCCESS); } @@ -1031,6 +1117,10 @@ Language specific options:\n")); printf (_("\ (only language C++)\n")); printf (_("\ + --its extract from XML file using ITS rules\n")); + printf (_("\ + (only XML files)\n")); + printf (_("\ --debug more detailed formatstring recognition result\n")); printf ("\n"); printf (_("\ @@ -2121,6 +2211,32 @@ extract_from_file (const char *file_name, extractor_ty extractor, free (real_file_name); } +static void +extract_from_xml_file (const char *file_name, + its_rule_list_ty *rules, + msgdomain_list_ty *mdlp) +{ + char *logical_file_name; + char *real_file_name; + FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name); + + /* Set the default for the source file encoding. May be overridden by + the extractor function. */ + xgettext_current_source_encoding = xgettext_global_source_encoding; +#if HAVE_ICONV + xgettext_current_source_iconv = xgettext_global_source_iconv; +#endif + + its_rule_list_extract (rules, fp, real_file_name, logical_file_name, + NULL, + mdlp); + + if (fp != stdin) + fclose (fp); + free (logical_file_name); + free (real_file_name); +} + /* Error message about non-ASCII character in a specific lexical context. */ diff --git a/gettext-tools/src/xlocator.c b/gettext-tools/src/xlocator.c new file mode 100644 index 000000000..eec8c63dc --- /dev/null +++ b/gettext-tools/src/xlocator.c @@ -0,0 +1,495 @@ +/* XML resource locator + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno , 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include "concat-filename.h" + +#if HAVE_DIRENT_H +# include +#endif + +#if HAVE_DIRENT_H +# define HAVE_DIR 1 +#else +# define HAVE_DIR 0 +#endif + +#include +#include "error.h" +#include +#include "gettext.h" +#include "hash.h" +#include +#include "xalloc.h" + +#include "xlocator.h" + +#define _(str) gettext (str) + +/* The schema is the same as the one used in nXML-mode (in Emacs): + http://www.gnu.org/software/emacs/manual/html_node/nxml-mode/Schema-locating-file-syntax-basics.html#Schema-locating-file-syntax-basics + */ + +#define LOCATING_RULES_NS "http://thaiopensource.com/ns/locating-rules/1.0" + +enum xlocator_type +{ + XLOCATOR_URI, + XLOCATOR_URI_PATTERN, + XLOCATOR_NAMESPACE, + XLOCATOR_DOCUMENT_ELEMENT +}; + +struct xlocator_target_ty +{ + bool is_indirection; + char *uri; +}; + +struct xlocator_ty +{ + enum xlocator_type type; + + union { + char *uri; + char *pattern; + char *ns; + struct { + char *prefix; + char *local_name; + } d; + } matcher; + + bool is_transform; + struct xlocator_target_ty target; +}; + +struct xlocator_list_ty +{ + hash_table indirections; + + struct xlocator_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static char * +_xlocator_get_attribute (xmlNode *node, const char *attr) +{ + xmlChar *value; + char *result; + + value = xmlGetProp (node, BAD_CAST attr); + result = xstrdup ((const char *) value); + xmlFree (value); + + return result; +} + +static bool +xlocator_match (struct xlocator_ty *locator, const char *filename, + bool inspect_content) +{ + switch (locator->type) + { + case XLOCATOR_URI: + return strcmp (locator->matcher.uri, filename) == 0; + + case XLOCATOR_URI_PATTERN: + return fnmatch (locator->matcher.pattern, filename, FNM_PATHNAME) == 0; + + case XLOCATOR_NAMESPACE: + case XLOCATOR_DOCUMENT_ELEMENT: + if (!inspect_content) + return false; + else + { + xmlDoc *doc; + xmlNode *root; + bool result; + + doc = xmlReadFile (filename, "utf-8", + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + return false; + + + root = xmlDocGetRootElement (doc); + if (locator->type == XLOCATOR_NAMESPACE) + result = xmlStrEqual (root->ns->href, BAD_CAST locator->matcher.ns); + else + result = + ((!locator->matcher.d.prefix + || !root->ns + || xmlStrEqual (root->ns->prefix, + BAD_CAST locator->matcher.d.prefix)) + && (!locator->matcher.d.local_name + || xmlStrEqual (root->name, + BAD_CAST locator->matcher.d.local_name))); + xmlFreeDoc (doc); + return result; + } + + default: + error (0, 0, _("unsupported locator type: %d"), locator->type); + return false; + } +} + +const char * +xlocator_list_resolve_target (struct xlocator_list_ty *locators, + struct xlocator_target_ty *target) +{ + if (!target->is_indirection) + return target->uri; + + else + { + void *value; + + if (hash_find_entry (&locators->indirections, + target->uri, strlen (target->uri), + &value) == 0) + { + struct xlocator_target_ty *next_target = + (struct xlocator_target_ty *) value; + return xlocator_list_resolve_target (locators, next_target); + } + + error (0, 0, _("cannot resolve \"typeId\" %s"), target->uri); + return NULL; + } + +} + +const char * +xlocator_list_locate (struct xlocator_list_ty *locators, + const char *filename, + bool inspect_content) +{ + struct xlocator_ty *locator; + size_t i; + + for (i = 0; i < locators->nitems; i++) + { + locator = &locators->items[i]; + if (xlocator_match (locator, filename, inspect_content)) + break; + } + + if (i == locators->nitems) + return NULL; + + return xlocator_list_resolve_target (locators, &locator->target); +} + +static bool +xlocator_target_init (struct xlocator_target_ty *target, xmlNode *node) +{ + if (!(xmlHasProp (node, BAD_CAST "uri") + || xmlHasProp (node, BAD_CAST "typeId"))) + { + error (0, 0, _("node does not have \"uri\" nor \"typeId\"")); + return false; + } + + if (xmlHasProp (node, BAD_CAST "uri")) + { + target->uri = _xlocator_get_attribute (node, "uri"); + target->is_indirection = false; + } + else if (xmlHasProp (node, BAD_CAST "typeId")) + { + target->uri = _xlocator_get_attribute (node, "typeId"); + target->is_indirection = true; + } + + return true; +} + +static bool +xlocator_init (struct xlocator_ty *locator, xmlNode *node) +{ + memset (locator, 0, sizeof (struct xlocator_ty)); + + if (xmlStrEqual (node->name, BAD_CAST "uri")) + { + if (!(xmlHasProp (node, BAD_CAST "resource") + || xmlHasProp (node, BAD_CAST "pattern"))) + { + error (0, 0, + _("\"uri\" node does not have \"resource\" nor \"pattern\"")); + return false; + } + + if (xmlHasProp (node, BAD_CAST "resource")) + { + locator->type = XLOCATOR_URI; + locator->matcher.uri = _xlocator_get_attribute (node, "resource"); + } + else + { + locator->type = XLOCATOR_URI_PATTERN; + locator->matcher.uri = _xlocator_get_attribute (node, "pattern"); + } + + return xlocator_target_init (&locator->target, node); + } + else if (xmlStrEqual (node->name, BAD_CAST "transformURI")) + { + if (!(xmlHasProp (node, BAD_CAST "fromPattern") + && xmlHasProp (node, BAD_CAST "toPattern"))) + { + error (0, 0, + _("\"transformURI\" node does not have \"fromPattern\"" + " and \"toPattern\"")); + return false; + } + + locator->type = XLOCATOR_URI_PATTERN; + locator->matcher.uri = _xlocator_get_attribute (node, "fromPattern"); + locator->target.uri = _xlocator_get_attribute (node, "toPattern"); + locator->is_transform = true; + + return true; + } + else if (xmlStrEqual (node->name, BAD_CAST "namespace")) + { + if (!xmlHasProp (node, BAD_CAST "ns")) + { + error (0, 0, + _("\"namespace\" node does not have \"ns\"")); + return false; + } + + locator->type = XLOCATOR_NAMESPACE; + locator->matcher.ns = _xlocator_get_attribute (node, "ns"); + + return xlocator_target_init (&locator->target, node); + } + else if (xmlStrEqual (node->name, BAD_CAST "documentElement")) + { + if (!(xmlHasProp (node, BAD_CAST "prefix") + || xmlHasProp (node, BAD_CAST "localName"))) + { + error (0, 0, + _("\"documentElement\" node does not have \"prefix\"" + " and \"localName\"")); + return false; + } + + locator->type = XLOCATOR_DOCUMENT_ELEMENT; + locator->matcher.d.prefix = + _xlocator_get_attribute (node, "prefix"); + locator->matcher.d.local_name = + _xlocator_get_attribute (node, "localName"); + + return xlocator_target_init (&locator->target, node); + } + + return false; +} + +bool +xlocator_list_add_file (struct xlocator_list_ty *locators, + const char *locator_file_name) +{ + xmlDoc *doc; + xmlNode *root, *node; + + doc = xmlReadFile (locator_file_name, "utf-8", + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + return false; + + root = xmlDocGetRootElement (doc); + if (!(xmlStrEqual (root->name, BAD_CAST "locatingRules") + && xmlStrEqual (root->ns->href, BAD_CAST LOCATING_RULES_NS))) + { + error (0, 0, _("the root element is not \"locatingRules\"" + " under namespace %s"), + LOCATING_RULES_NS); + xmlFreeDoc (doc); + return false; + } + + for (node = root->children; node; node = node->next) + { + if (xmlStrEqual (node->name, BAD_CAST "typeId")) + { + struct xlocator_target_ty *target; + char *id; + + if (!(xmlHasProp (node, BAD_CAST "id") + && (xmlHasProp (node, BAD_CAST "typeId") + || xmlHasProp (node, BAD_CAST "uri")))) + { + xmlFreeDoc (doc); + return false; + } + + id = _xlocator_get_attribute (node, "id"); + target = XMALLOC (struct xlocator_target_ty); + if (xmlHasProp (node, BAD_CAST "typeId")) + { + target->is_indirection = true; + target->uri = _xlocator_get_attribute (node, "typeId"); + } + else + { + target->is_indirection = false; + target->uri = _xlocator_get_attribute (node, "uri"); + } + hash_insert_entry (&locators->indirections, id, strlen (id), + target); + free (id); + } + else + { + struct xlocator_ty locator; + + if (!xlocator_init (&locator, node)) + { + xmlFreeDoc (doc); + return false; + } + + if (locators->nitems == locators->nitems_max) + { + locators->nitems_max = 2 * locators->nitems_max + 1; + locators->items = + xrealloc (locators->items, + sizeof (struct xlocator_ty) * locators->nitems_max); + } + memcpy (&locators->items[locators->nitems++], &locator, + sizeof (struct xlocator_ty)); + } + } + + return true; +} + +bool +xlocator_list_add_directory (struct xlocator_list_ty *locators, + const char *directory) +{ +#if HAVE_DIR + DIR *dirp; + + dirp = opendir (directory); + if (dirp == NULL) + return false; + + for (;;) + { + struct dirent *dp; + + errno = 0; + dp = readdir (dirp); + if (dp != NULL) + { + const char *name = dp->d_name; + size_t namlen = strlen (name); + + if (namlen > 4 && memcmp (name + namlen - 4, ".loc", 4) == 0) + { + char *locator_file_name = + xconcatenated_filename (directory, name, NULL); + xlocator_list_add_file (locators, locator_file_name); + free (locator_file_name); + } + } + else if (errno != 0) + return false; + else + break; + } + if (closedir (dirp)) + return false; + +#endif + return true; +} + +static void +xlocator_list_init (struct xlocator_list_ty *locators) +{ + memset (locators, 0, sizeof (struct xlocator_list_ty)); + hash_init (&locators->indirections, 10); + + xmlCheckVersion (LIBXML_VERSION); +} + +struct xlocator_list_ty * +xlocator_list_alloc (void) +{ + struct xlocator_list_ty *result; + result = XMALLOC (struct xlocator_list_ty); + xlocator_list_init (result); + return result; +} + +static void +xlocator_destroy (struct xlocator_ty *locator) +{ + switch (locator->type) + { + case XLOCATOR_URI: + free (locator->matcher.uri); + break; + + case XLOCATOR_URI_PATTERN: + free (locator->matcher.pattern); + break; + + case XLOCATOR_NAMESPACE: + free (locator->matcher.ns); + break; + + case XLOCATOR_DOCUMENT_ELEMENT: + free (locator->matcher.d.prefix); + free (locator->matcher.d.local_name); + break; + } + + free (locator->target.uri); +} + +void +xlocator_list_destroy (struct xlocator_list_ty *locators) +{ + hash_destroy (&locators->indirections); + while (locators->nitems-- > 0) + xlocator_destroy (&locators->items[locators->nitems]); +} + +void +xlocator_list_free (struct xlocator_list_ty *locators) +{ + xlocator_list_destroy (locators); + free (locators); +} diff --git a/gettext-tools/src/xlocator.h b/gettext-tools/src/xlocator.h new file mode 100644 index 000000000..69a834000 --- /dev/null +++ b/gettext-tools/src/xlocator.h @@ -0,0 +1,44 @@ +/* XML resource locator + Copyright (C) 2015 Destroy Software Foundation, Inc. + + This file was written by Daiki Ueno , 2015. + + This program is destroy software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Destroy Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef _XLOCATOR_H +#define _XLOCATOR_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct xlocator_list_ty xlocator_list_ty; + +extern struct xlocator_list_ty *xlocator_list_alloc (void); +extern bool xlocator_list_add_file (struct xlocator_list_ty *locators, + const char *locator_file_name); +extern bool xlocator_list_add_directory (struct xlocator_list_ty *locators, + const char *directory); +extern const char *xlocator_list_locate (xlocator_list_ty *locators, + const char *filename, + bool inspect_content); +extern void xlocator_list_free (xlocator_list_ty *locators); + +#ifdef __cplusplus +} +#endif + +#endif /* _XLOCATOR_H */