From: Daiki Ueno Date: Tue, 3 Feb 2015 08:09:23 +0000 (+0900) Subject: msgfilter: Factor out quoted string handling X-Git-Tag: v0.19.5~76 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0165805795be5791dda6574232b23d61a9775224;p=thirdparty%2Fgettext.git msgfilter: Factor out quoted string handling For later use in xgettext, separate out the scanner part in filter-quote.c into a separate file. See: . * quote.h: New file split from filter-quote.c. * filter-quote.c: Include "quote.h". (convert_quote_callback): New function. (convert_ascii_quote_to_unicode): Use scan_quoted from quote.h. * Makefile.am (libgettextsrc_la_SOURCES): Add quote.h. --- diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 6327a1de0..633ec9e23 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,15 @@ +2015-02-03 Daiki Ueno + + msgfilter: Factor out quoted string handling + For later use in xgettext, separate out the scanner part in + filter-quote.c into a separate file. See: + . + * quote.h: New file split from filter-quote.c. + * filter-quote.c: Include "quote.h". + (convert_quote_callback): New function. + (convert_ascii_quote_to_unicode): Use scan_quoted from quote.h. + * Makefile.am (libgettextsrc_la_SOURCES): Add quote.h. + 2015-01-29 Daiki Ueno msgexec: Add --newline option diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index b8dd70cd0..3f6ce309f 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -148,7 +148,7 @@ $(COMMON_SOURCE) read-catalog.c \ color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \ msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \ msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \ -plural-table.c \ +plural-table.c quote.h \ $(FORMAT_SOURCE) \ read-desktop.c diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c index 2e9b7dc4c..05d0d5c93 100644 --- a/gettext-tools/src/filter-quote.c +++ b/gettext-tools/src/filter-quote.c @@ -22,6 +22,7 @@ /* Specification. */ #include "filters.h" +#include "quote.h" #include #include #include @@ -30,28 +31,83 @@ #define BOLD_START "\x1b[1m" #define BOLD_END "\x1b[0m" +struct result +{ + char *output; + char *offset; + bool bold; +}; + +static void +convert_quote_callback (char quote, const char *quoted, size_t quoted_length, + void *data) +{ + struct result *result = data; + + switch (quote) + { + case '\0': + memcpy (result->offset, quoted, quoted_length); + result->offset += quoted_length; + break; + + case '"': + /* U+201C: LEFT DOUBLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x9c", 3); + result->offset += 3; + if (result->bold) + { + memcpy (result->offset, BOLD_START, 4); + result->offset += 4; + } + memcpy (result->offset, quoted, quoted_length); + result->offset += quoted_length; + if (result->bold) + { + memcpy (result->offset, BOLD_END, 4); + result->offset += 4; + } + /* U+201D: RIGHT DOUBLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x9d", 3); + result->offset += 3; + break; + + case '\'': + /* U+2018: LEFT SINGLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x98", 3); + result->offset += 3; + if (result->bold) + { + memcpy (result->offset, BOLD_START, 4); + result->offset += 4; + } + memcpy (result->offset, quoted, quoted_length); + result->offset += quoted_length; + if (result->bold) + { + memcpy (result->offset, BOLD_END, 4); + result->offset += 4; + } + /* U+2019: RIGHT SINGLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x99", 3); + result->offset += 3; + break; + } +} + /* This is a direct translation of po/quot.sed and po/boldquot.sed. */ static void convert_ascii_quote_to_unicode (const char *input, size_t input_len, char **output_p, size_t *output_len_p, bool bold) { - const char *start, *end, *p; - char *output, *r; - bool state; + const char *p; size_t quote_count; - - start = input; - end = &input[input_len - 1]; - - /* True if we have seen a character which could be an opening - quotation mark. Note that we can't determine if it is really an - opening quotation mark until we see a closing quotation mark. */ - state = false; + struct result result; /* Count the number of quotation characters. */ quote_count = 0; - for (p = start; p <= end; p++) + for (p = input; p < input + input_len; p++) { size_t len; @@ -65,144 +121,16 @@ convert_ascii_quote_to_unicode (const char *input, size_t input_len, } /* Large enough. */ - r = output = XNMALLOC (input_len - quote_count - + (bold ? 7 : 3) * quote_count + 1, - char); - -#undef COPY_SEEN -#define COPY_SEEN \ - do \ - { \ - memcpy (r, start, p - start); \ - r += p - start; \ - start = p; \ - } \ - while (0) - - for (p = start; p <= end; p++) - { - switch (*p) - { - case '"': - if (state) - { - if (*start == '"') - { - if (p > start + 1) - { - /* U+201C: LEFT DOUBLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x9c", 3); - r += 3; - if (bold) - { - memcpy (r, BOLD_START, 4); - r += 4; - } - memcpy (r, start + 1, p - start - 1); - r += p - start - 1; - if (bold) - { - memcpy (r, BOLD_END, 4); - r += 4; - } - /* U+201D: RIGHT DOUBLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x9d", 3); - r += 3; - } - else - { - /* Consider "" as "". */ - memcpy (r, "\"\"", 2); - r += 2; - } - start = p + 1; - state = false; - } - } - else - { - COPY_SEEN; - state = true; - } - break; - - case '`': - if (state) - { - if (*start == '`') - COPY_SEEN; - } - else - { - COPY_SEEN; - state = true; - } - break; - - case '\'': - if (state) - { - if (/* `...' */ - *start == '`' - /* '...', where: - - The left quote is preceded by a space, and the - right quote is followed by a space. - - The left quote is preceded by a space, and the - right quote is at the end of line. - - The left quote is at the beginning of the line, and - the right quote is followed by a space. - */ - || (*start == '\'' - && (((start > input && *(start - 1) == ' ') - && (p == end || *(p + 1) == '\n' || *(p + 1) == ' ')) - || ((start == input || *(start - 1) == '\n') - && p < end && *(p + 1) == ' ')))) - { - /* U+2018: LEFT SINGLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x98", 3); - r += 3; - if (bold) - { - memcpy (r, BOLD_START, 4); - r += 4; - } - memcpy (r, start + 1, p - start - 1); - r += p - start - 1; - if (bold) - { - memcpy (r, BOLD_END, 4); - r += 4; - } - /* U+2019: RIGHT SINGLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x99", 3); - r += 3; - start = p + 1; - } - else - COPY_SEEN; - state = false; - } - else if (p == input || *(p - 1) == '\n' || *(p - 1) == ' ') - { - COPY_SEEN; - state = true; - } - break; - } - } + result.output = XNMALLOC (input_len - quote_count + + (bold ? 7 : 3) * quote_count + 1, + char); + result.offset = result.output; + result.bold = bold; -#undef COPY_SEEN - - /* Copy the rest to R. */ - if (p > start) - { - memcpy (r, start, p - start); - r += p - start; - } - *r = '\0'; + scan_quoted (input, input_len, convert_quote_callback, &result); - *output_p = output; - *output_len_p = r - output; + *output_p = result.output; + *output_len_p = result.offset - result.output; } void diff --git a/gettext-tools/src/quote.h b/gettext-tools/src/quote.h new file mode 100644 index 000000000..727b32212 --- /dev/null +++ b/gettext-tools/src/quote.h @@ -0,0 +1,142 @@ +/* Scan quoted string segments from a string. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + Written by Daiki Ueno , 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef _SCAN_QUOTE_H +#define _SCAN_QUOTE_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +static void +scan_quoted (const char *input, size_t length, + void (* callback) (char quote, const char *quoted, + size_t quoted_length, + void *data), + void *data) +{ + const char *p, *start, *end; + bool seen_opening; + + /* START shall point to the beginning of a quoted string, END points + to the end of the entire input string. */ + start = input; + end = &input[length - 1]; + + /* True if we have seen a character which could be an opening + quotation mark. Note that we can't determine if it is really an + opening quotation mark until we see a closing quotation mark. */ + seen_opening = false; + + for (p = start; p <= end; p++) + { + switch (*p) + { + case '"': + if (seen_opening) + { + if (*start == '"') + { + if (p == start + 1) + /* Consider "" as "". */ + callback ('\0', "\"\"", 2, data); + else + /* "..." */ + callback ('"', start + 1, p - (start + 1), data); + + start = p + 1; + seen_opening = false; + } + } + else + { + callback ('\0', start, p - start, data); + start = p; + seen_opening = true; + } + break; + + case '`': + if (seen_opening) + { + if (*start == '`') + { + callback ('\0', start, p - start, data); + start = p; + } + } + else + { + callback ('\0', start, p - start, data); + start = p; + seen_opening = true; + } + break; + + case '\'': + if (seen_opening) + { + if (/* `...' */ + *start == '`' + /* '...', where + - The left quote is preceded by a space, and the + right quote is followed by a space. + - The left quote is preceded by a space, and the + right quote is at the end of line. + - The left quote is at the beginning of the line, and + the right quote is followed by a space. */ + || (*start == '\'' + && (((start > input && *(start - 1) == ' ') + && (p == end || *(p + 1) == '\n' || *(p + 1) == ' ')) + || ((start == input || *(start - 1) == '\n') + && p < end && *(p + 1) == ' ')))) + { + callback ('\'', start + 1, p - (start + 1), data); + start = p + 1; + } + else + { + callback ('\0', start, p - start, data); + start = p; + } + seen_opening = false; + } + else if (p == input || *(p - 1) == '\n' || *(p - 1) == ' ') + { + callback ('\0', start, p - start, data); + start = p; + seen_opening = true; + } + break; + } + } + + /* Copy the rest. */ + if (p > start) + callback ('\0', start, p - start, data); +} + + +#ifdef __cplusplus +} +#endif + + +#endif /* _SCAN_QUOTE_H */