]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
fts: HTML parser now makes sure space is added for each <tag>
authorTimo Sirainen <tss@iki.fi>
Thu, 4 Aug 2011 10:05:26 +0000 (13:05 +0300)
committerTimo Sirainen <tss@iki.fi>
Thu, 4 Aug 2011 10:05:26 +0000 (13:05 +0300)
This could be smarter though, like not doing this for <span>, but it gets a
bit complex..

src/plugins/fts/fts-parser-html.c

index 870aafe389f5c9fdbb8227cbd69060b091b56ad9..4dc63e4a1c3c0236f79caaa73a6bb1b376e103a7 100644 (file)
@@ -127,6 +127,15 @@ static size_t parse_entity(struct html_fts_parser *parser,
        return i + 1;
 }
 
+static void parser_add_space(struct html_fts_parser *parser)
+{
+       const unsigned char *data = parser->output->data;
+
+       if (parser->output->used > 0 &&
+           data[parser->output->used-1] != ' ')
+               buffer_append_c(parser->output, ' ');
+}
+
 static size_t
 parse_data(struct html_fts_parser *parser,
           const unsigned char *data, size_t size)
@@ -158,6 +167,7 @@ parse_data(struct html_fts_parser *parser,
                        else if (c == '>') {
                                parser->state = parser->ignore_next_text ?
                                        HTML_STATE_IGNORE : HTML_STATE_TEXT;
+                               parser_add_space(parser);
                        }
                        break;
                case HTML_STATE_TAG_QUOTED: