+++ /dev/null
-/* xgettext libexpat compatibility.
- Copyright (C) 2002-2003, 2005-2009, 2013, 2015 Free Software
- Foundation, Inc.
-
- This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#if DYNLOAD_LIBEXPAT
-# include <dlfcn.h>
-#else
-# if HAVE_LIBEXPAT
-# include <expat.h>
-# endif
-#endif
-
-/* Keep the references to XML_GetCurrent{Line,Column}Number symbols
- before loading libexpat-compat.h, since they are redefined to
- rpl_XML_GetCurrent{Line,Column}Number . */
-#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2
-static void *p_XML_GetCurrentLineNumber = (void *) &XML_GetCurrentLineNumber;
-static void *p_XML_GetCurrentColumnNumber = (void *) &XML_GetCurrentColumnNumber;
-#endif
-
-#include "libexpat-compat.h"
-
-/* ======================= Different libexpat ABIs. ======================= */
-
-/* There are three different ABIs of libexpat, regarding the functions
- XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber.
- In expat < 2.0, they return an 'int'.
- In expat >= 2.0, they return
- - a 'long' if expat was compiled with the default flags, or
- - a 'long long' if expat was compiled with -DXML_LARGE_SIZE.
- But the <expat.h> include file does not contain the information whether
- expat was compiled with -DXML_LARGE_SIZE; so the include file is lying!
- For this information, we need to call XML_GetFeatureList(), for
- expat >= 2.0.1; for expat = 2.0.0, we have to assume the default flags. */
-
-#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2
-
-/* expat >= 2.0 -> Return type is 'int64_t' worst-case. */
-
-/* Return true if libexpat was compiled with -DXML_LARGE_SIZE. */
-static bool
-is_XML_LARGE_SIZE_ABI (void)
-{
- static bool tested;
- static bool is_large;
-
- if (!tested)
- {
- const XML_Feature *features;
-
- is_large = false;
- for (features = XML_GetFeatureList (); features->name != NULL; features++)
- if (strcmp (features->name, "XML_LARGE_SIZE") == 0)
- {
- is_large = true;
- break;
- }
-
- tested = true;
- }
- return is_large;
-}
-
-int64_t
-rpl_XML_GetCurrentLineNumber (XML_Parser parser)
-{
- if (is_XML_LARGE_SIZE_ABI ())
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- else
- return ((long (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
-}
-
-int64_t
-rpl_XML_GetCurrentColumnNumber (XML_Parser parser)
-{
- if (is_XML_LARGE_SIZE_ABI ())
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- else
- return ((long (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
-}
-#endif
-
-
-/* ===================== Dynamic loading of libexpat. ===================== */
-
-#if DYNLOAD_LIBEXPAT
-
-static XML_Expat_Version (*p_XML_ExpatVersionInfo) (void);
-
-XML_Expat_Version
-XML_ExpatVersionInfo (void)
-{
- return (*p_XML_ExpatVersionInfo) ();
-}
-
-static const XML_Feature * (*p_XML_GetFeatureList) (void);
-
-const XML_Feature *
-XML_GetFeatureList (void)
-{
- return (*p_XML_GetFeatureList) ();
-}
-
-enum XML_Size_ABI
-get_XML_Size_ABI (void)
-{
- static bool tested;
- static enum XML_Size_ABI abi;
-
- if (!tested)
- {
- if (XML_ExpatVersionInfo () .major >= 2)
- /* expat >= 2.0 -> XML_Size is 'int64_t' or 'long'. */
- {
- const XML_Feature *features;
-
- abi = is_long;
- for (features = XML_GetFeatureList ();
- features->name != NULL;
- features++)
- if (strcmp (features->name, "XML_LARGE_SIZE") == 0)
- {
- abi = is_int64_t;
- break;
- }
- }
- else
- /* expat < 2.0 -> XML_Size is 'int'. */
- abi = is_int;
- tested = true;
- }
- return abi;
-}
-
-static XML_Parser (*p_XML_ParserCreate) (const XML_Char *encoding);
-
-XML_Parser
-XML_ParserCreate (const XML_Char *encoding)
-{
- return (*p_XML_ParserCreate) (encoding);
-}
-
-static void (*p_XML_SetElementHandler) (XML_Parser parser,
- XML_StartElementHandler start,
- XML_EndElementHandler end);
-
-void
-XML_SetElementHandler (XML_Parser parser,
- XML_StartElementHandler start,
- XML_EndElementHandler end)
-{
- (*p_XML_SetElementHandler) (parser, start, end);
-}
-
-
-static void (*p_XML_SetCharacterDataHandler) (XML_Parser parser,
- XML_CharacterDataHandler handler);
-
-void
-XML_SetCharacterDataHandler (XML_Parser parser,
- XML_CharacterDataHandler handler)
-{
- (*p_XML_SetCharacterDataHandler) (parser, handler);
-}
-
-
-static void (*p_XML_SetCommentHandler) (XML_Parser parser,
- XML_CommentHandler handler);
-
-void
-XML_SetCommentHandler (XML_Parser parser, XML_CommentHandler handler)
-{
- (*p_XML_SetCommentHandler) (parser, handler);
-}
-
-
-static int (*p_XML_Parse) (XML_Parser parser, const char *s,
- int len, int isFinal);
-
-int
-XML_Parse (XML_Parser parser, const char *s, int len, int isFinal)
-{
- return (*p_XML_Parse) (parser, s, len, isFinal);
-}
-
-
-static enum XML_Error (*p_XML_GetErrorCode) (XML_Parser parser);
-
-enum XML_Error
-XML_GetErrorCode (XML_Parser parser)
-{
- return (*p_XML_GetErrorCode) (parser);
-}
-
-
-static void *p_XML_GetCurrentLineNumber;
-
-int64_t
-XML_GetCurrentLineNumber (XML_Parser parser)
-{
- switch (get_XML_Size_ABI ())
- {
- case is_int:
- return ((int (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- case is_long:
- return ((long (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- case is_int64_t:
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- default:
- abort ();
- }
-}
-
-static void *p_XML_GetCurrentColumnNumber;
-
-int64_t
-XML_GetCurrentColumnNumber (XML_Parser parser)
-{
- switch (get_XML_Size_ABI ())
- {
- case is_int:
- return ((int (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- case is_long:
- return ((long (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- case is_int64_t:
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- default:
- abort ();
- }
-}
-
-
-static const XML_LChar * (*p_XML_ErrorString) (int code);
-
-const XML_LChar *
-XML_ErrorString (int code)
-{
- return (*p_XML_ErrorString) (code);
-}
-
-static void (*p_XML_ParserFree) (XML_Parser parser);
-
-void
-XML_ParserFree (XML_Parser parser)
-{
- return (*p_XML_ParserFree) (parser);
-}
-
-static int libexpat_loaded = 0;
-
-bool
-load_libexpat ()
-{
- if (libexpat_loaded == 0)
- {
- void *handle;
-
- /* Try to load libexpat-2.x. */
- handle = dlopen ("libexpat.so.1", RTLD_LAZY);
- if (handle == NULL)
- /* Try to load libexpat-1.x. */
- handle = dlopen ("libexpat.so.0", RTLD_LAZY);
- if (handle != NULL
- && (p_XML_ExpatVersionInfo =
- (XML_Expat_Version (*) (void))
- dlsym (handle, "XML_ExpatVersionInfo")) != NULL
- && (p_XML_GetFeatureList =
- (const XML_Feature * (*) (void))
- dlsym (handle, "XML_GetFeatureList")) != NULL
- && (p_XML_ParserCreate =
- (XML_Parser (*) (const XML_Char *))
- dlsym (handle, "XML_ParserCreate")) != NULL
- && (p_XML_SetElementHandler =
- (void (*) (XML_Parser, XML_StartElementHandler, XML_EndElementHandler))
- dlsym (handle, "XML_SetElementHandler")) != NULL
- && (p_XML_SetCharacterDataHandler =
- (void (*) (XML_Parser, XML_CharacterDataHandler))
- dlsym (handle, "XML_SetCharacterDataHandler")) != NULL
- && (p_XML_SetCommentHandler =
- (void (*) (XML_Parser, XML_CommentHandler))
- dlsym (handle, "XML_SetCommentHandler")) != NULL
- && (p_XML_Parse =
- (int (*) (XML_Parser, const char *, int, int))
- dlsym (handle, "XML_Parse")) != NULL
- && (p_XML_GetErrorCode =
- (enum XML_Error (*) (XML_Parser))
- dlsym (handle, "XML_GetErrorCode")) != NULL
- && (p_XML_GetCurrentLineNumber =
- dlsym (handle, "XML_GetCurrentLineNumber")) != NULL
- && (p_XML_GetCurrentColumnNumber =
- dlsym (handle, "XML_GetCurrentColumnNumber")) != NULL
- && (p_XML_ParserFree =
- (void (*) (XML_Parser))
- dlsym (handle, "XML_ParserFree")) != NULL
- && (p_XML_ErrorString =
- (const XML_LChar * (*) (int))
- dlsym (handle, "XML_ErrorString")) != NULL)
- libexpat_loaded = 1;
- else
- libexpat_loaded = -1;
- }
- return libexpat_loaded >= 0;
-}
-
-#endif
--- /dev/null
+/* markup.c -- simple XML-like parser
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+ This file is not part of the GNU gettext program, but is used with
+ GNU gettext.
+
+ This is a stripped down version of GLib's gmarkup.c. The original
+ copyright notice is as follows:
+*/
+
+/* gmarkup.c - Simple XML-like parser
+ *
+ * Copyright 2000, 2003 Red Hat, Inc.
+ * Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
+ *
+ * GLib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * GLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with GLib; see the file COPYING.LIB. If not,
+ * see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Specification */
+#include "markup.h"
+
+#include "c-ctype.h"
+#include "gettext.h"
+#include "gl_linked_list.h"
+#include "gl_xlist.h"
+#include "unictype.h"
+#include "unistr.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+
+#define _(s) gettext(s)
+
+/**
+ * The "markup" parser is intended to parse a simple markup format
+ * that's a subset of XML. This is a small, efficient, easy-to-use
+ * parser. It should not be used if you expect to interoperate with
+ * other applications generating full-scale XML. However, it's very
+ * useful for application data files, config files, etc. where you
+ * know your application will be the only one writing the file.
+ * Full-scale XML parsers should be able to parse the subset used by
+ * markup, so you can easily migrate to full-scale XML at a later
+ * time if the need arises.
+ *
+ * The parser is not guaranteed to signal an error on all invalid XML;
+ * the parser may accept documents that an XML parser would not.
+ * However, XML documents which are not well-formed (which is a weaker
+ * condition than being valid. See the XML specification
+ * <http://www.w3.org/TR/REC-xml/> for definitions of these terms.)
+ * are not considered valid GMarkup documents.
+ *
+ * Simplifications to XML include:
+ *
+ * - Only UTF-8 encoding is allowed
+ *
+ * - No user-defined entities
+ *
+ * - Processing instructions, comments and the doctype declaration
+ * are "passed through" but are not interpreted in any way
+ *
+ * - No DTD or validation
+ *
+ * The markup format does support:
+ *
+ * - Elements
+ *
+ * - Attributes
+ *
+ * - 5 standard entities: & < > " '
+ *
+ * - Character references
+ *
+ * - Sections marked as CDATA
+ */
+
+typedef enum
+{
+ STATE_START,
+ STATE_AFTER_OPEN_ANGLE,
+ STATE_AFTER_CLOSE_ANGLE,
+ STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
+ STATE_INSIDE_OPEN_TAG_NAME,
+ STATE_INSIDE_ATTRIBUTE_NAME,
+ STATE_AFTER_ATTRIBUTE_NAME,
+ STATE_BETWEEN_ATTRIBUTES,
+ STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
+ STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
+ STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
+ STATE_INSIDE_TEXT,
+ STATE_AFTER_CLOSE_TAG_SLASH,
+ STATE_INSIDE_CLOSE_TAG_NAME,
+ STATE_AFTER_CLOSE_TAG_NAME,
+ STATE_INSIDE_PASSTHROUGH,
+ STATE_ERROR
+} markup_parse_state_ty;
+
+typedef struct
+{
+ const char *prev_element;
+ const markup_parser_ty *prev_parser;
+ void *prev_user_data;
+} markup_recursion_tracker_ty;
+
+typedef struct
+{
+ char *buffer;
+ size_t bufmax;
+ size_t buflen;
+} markup_string_ty;
+
+struct _markup_parse_context_ty
+{
+ const markup_parser_ty *parser;
+
+ markup_parse_flags_ty flags;
+
+ int line_number;
+ int char_number;
+
+ markup_parse_state_ty state;
+
+ void *user_data;
+
+ /* A piece of character data or an element that
+ * hasn't "ended" yet so we haven't yet called
+ * the callback for it.
+ */
+ markup_string_ty *partial_chunk;
+
+ gl_list_t tag_stack; /* <markup_string_ty> */
+
+ char **attr_names;
+ char **attr_values;
+ int cur_attr;
+ int alloc_attrs;
+
+ const char *current_text;
+ ssize_t current_text_len;
+ const char *current_text_end;
+
+ /* used to save the start of the last interesting thingy */
+ const char *start;
+
+ const char *iter;
+
+ char *error_text;
+
+ unsigned int document_empty : 1;
+ unsigned int parsing : 1;
+ unsigned int awaiting_pop : 1;
+ int balance;
+
+ /* subparser support */
+ gl_list_t subparser_stack; /* <markup_recursion_tracker_ty *> */
+ const char *subparser_element;
+};
+
+static markup_string_ty *
+markup_string_new (void)
+{
+ return XZALLOC (markup_string_ty);
+}
+
+static char *
+markup_string_free (markup_string_ty *string, bool free_segment)
+{
+ if (free_segment)
+ {
+ free (string->buffer);
+ free (string);
+ return NULL;
+ }
+ else
+ {
+ char *result = string->buffer;
+ free (string);
+ return result;
+ }
+}
+
+static void
+markup_string_free1 (markup_string_ty *string)
+{
+ markup_string_free (string, true);
+}
+
+static void
+markup_string_truncate (markup_string_ty *string, size_t length)
+{
+ assert (string && length < string->buflen - 1);
+ string->buffer[length] = '\0';
+ string->buflen = length;
+}
+
+static void
+markup_string_append (markup_string_ty *string, const char *to_append,
+ size_t length)
+{
+ if (string->buflen + length + 1 > string->bufmax)
+ {
+ string->bufmax *= 2;
+ if (string->buflen + length + 1 > string->bufmax)
+ string->bufmax = string->buflen + length + 1;
+ string->buffer = xrealloc (string->buffer, string->bufmax);
+ }
+ memcpy (string->buffer + string->buflen, to_append, length);
+ string->buffer[length] = '\0';
+ string->buflen = length;
+}
+
+static inline void
+string_blank (markup_string_ty *string)
+{
+ if (string->bufmax > 0)
+ {
+ *string->buffer = '\0';
+ string->buflen = 0;
+ }
+}
+
+/* Creates a new parse context. A parse context is used to parse
+ marked-up documents. You can feed any number of documents into a
+ context, as long as no errors occur; once an error occurs, the
+ parse context can't continue to parse text (you have to free it and
+ create a new parse context). */
+markup_parse_context_ty *
+markup_parse_context_new (const markup_parser_ty *parser,
+ markup_parse_flags_ty flags,
+ void *user_data)
+{
+ markup_parse_context_ty *context;
+
+ assert (parser != NULL);
+
+ context = XMALLOC (markup_parse_context_ty);
+
+ context->parser = parser;
+ context->flags = flags;
+ context->user_data = user_data;
+
+ context->line_number = 1;
+ context->char_number = 1;
+
+ context->partial_chunk = NULL;
+
+ context->state = STATE_START;
+ context->tag_stack =
+ gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL,
+ (gl_listelement_dispose_fn) markup_string_free1,
+ true);
+ context->attr_names = NULL;
+ context->attr_values = NULL;
+ context->cur_attr = -1;
+ context->alloc_attrs = 0;
+
+ context->current_text = NULL;
+ context->current_text_len = -1;
+ context->current_text_end = NULL;
+
+ context->start = NULL;
+ context->iter = NULL;
+
+ context->error_text = NULL;
+
+ context->document_empty = true;
+ context->parsing = false;
+
+ context->awaiting_pop = false;
+ context->subparser_stack =
+ gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL,
+ (gl_listelement_dispose_fn) free,
+ true);
+ context->subparser_element = NULL;
+
+ context->balance = 0;
+
+ return context;
+}
+
+static void clear_attributes (markup_parse_context_ty *context);
+
+/* Frees a parse context. This function can't be called from inside
+ one of the markup_parser_ty functions or while a subparser is
+ pushed. */
+void
+markup_parse_context_free (markup_parse_context_ty *context)
+{
+ assert (context != NULL);
+ assert (!context->parsing);
+ assert (gl_list_size (context->subparser_stack) == 0);
+ assert (!context->awaiting_pop);
+
+ clear_attributes (context);
+ free (context->attr_names);
+ free (context->attr_values);
+
+ gl_list_free (context->tag_stack);
+ gl_list_free (context->subparser_stack);
+
+ if (context->partial_chunk)
+ markup_string_free (context->partial_chunk, true);
+
+ free (context->error_text);
+
+ free (context);
+}
+
+static void pop_subparser_stack (markup_parse_context_ty *context);
+
+static void
+emit_error (markup_parse_context_ty *context, const char *error_text)
+{
+ context->state = STATE_ERROR;
+
+ if (context->parser->error)
+ (*context->parser->error) (context, error_text, context->user_data);
+
+ /* report the error all the way up to free all the user-data */
+ while (gl_list_size (context->subparser_stack) > 0)
+ {
+ pop_subparser_stack (context);
+ context->awaiting_pop = false; /* already been freed */
+
+ if (context->parser->error)
+ (*context->parser->error) (context, error_text, context->user_data);
+ }
+
+ if (context->error_text)
+ free (context->error_text);
+ context->error_text = xstrdup (error_text);
+}
+
+#define IS_COMMON_NAME_END_CHAR(c) \
+ ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
+
+static bool
+slow_name_validate (markup_parse_context_ty *context, const char *name)
+{
+ const char *p = name;
+ ucs4_t uc;
+
+ if (u8_check ((uint8_t *) name, strlen (name)) != NULL)
+ {
+ emit_error (context, _("invalid UTF-8 sequence"));
+ return false;
+ }
+
+ if (!(c_isalpha (*p)
+ || (!IS_COMMON_NAME_END_CHAR (*p)
+ && (*p == '_'
+ || *p == ':'
+ || (u8_mbtouc (&uc, (uint8_t *) name, strlen (name)) > 0
+ && uc_is_alpha (uc))))))
+ {
+ char *error_text = xasprintf (_("'%s' is not a valid name"), name);
+ emit_error (context, error_text);
+ free (error_text);
+ return false;
+ }
+
+ for (p = (char *) u8_next (&uc, (uint8_t *) name);
+ p != NULL;
+ p = (char *) u8_next (&uc, (uint8_t *) p))
+ {
+ /* is_name_char */
+ if (!(c_isalnum (*p) ||
+ (!IS_COMMON_NAME_END_CHAR (*p) &&
+ (*p == '.' ||
+ *p == '-' ||
+ *p == '_' ||
+ *p == ':' ||
+ uc_is_alpha (uc)))))
+ {
+ char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"),
+ name, *p);
+ emit_error (context, error_text);
+ free (error_text);
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Use me for elements, attributes etc.
+ */
+static bool
+name_validate (markup_parse_context_ty *context, const char *name)
+{
+ char mask;
+ const char *p;
+
+ /* name start char */
+ p = name;
+ if (IS_COMMON_NAME_END_CHAR (*p)
+ || !(c_isalpha (*p) || *p == '_' || *p == ':'))
+ goto slow_validate;
+
+ for (mask = *p++; *p != '\0'; p++)
+ {
+ mask |= *p;
+
+ /* is_name_char */
+ if (!(c_isalnum (*p)
+ || (!IS_COMMON_NAME_END_CHAR (*p)
+ && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))
+ goto slow_validate;
+ }
+
+ if (mask & 0x80) /* un-common / non-ascii */
+ goto slow_validate;
+
+ return true;
+
+ slow_validate:
+ return slow_name_validate (context, name);
+}
+
+static bool
+text_validate (markup_parse_context_ty *context,
+ const char *p,
+ int len)
+{
+ if (u8_check ((const uint8_t *) p, len) != NULL)
+ {
+ emit_error (context, _("invalid UTF-8 sequence"));
+ return false;
+ }
+ else
+ return true;
+}
+
+/*
+ * re-write the GString in-place, unescaping anything that escaped.
+ * most XML does not contain entities, or escaping.
+ */
+static bool
+unescape_string_inplace (markup_parse_context_ty *context,
+ markup_string_ty *string,
+ bool *is_ascii)
+{
+ char mask, *to;
+ const char *from;
+ bool normalize_attribute;
+
+ if (string->buflen == 0)
+ return true;
+
+ *is_ascii = false;
+
+ /* are we unescaping an attribute or not ? */
+ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ
+ || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
+ normalize_attribute = true;
+ else
+ normalize_attribute = false;
+
+ /*
+ * Meeks' theorem: unescaping can only shrink text.
+ * for < etc. this is obvious, for  more
+ * thought is required, but this is patently so.
+ */
+ mask = 0;
+ for (from = to = string->buffer; *from != '\0'; from++, to++)
+ {
+ *to = *from;
+
+ mask |= *to;
+ if (normalize_attribute && (*to == '\t' || *to == '\n'))
+ *to = ' ';
+ if (*to == '\r')
+ {
+ *to = normalize_attribute ? ' ' : '\n';
+ if (from[1] == '\n')
+ from++;
+ }
+ if (*from == '&')
+ {
+ from++;
+ if (*from == '#')
+ {
+ int base = 10;
+ unsigned long l;
+ char *end = NULL;
+
+ from++;
+
+ if (*from == 'x')
+ {
+ base = 16;
+ from++;
+ }
+
+ errno = 0;
+ l = strtoul (from, &end, base);
+
+ if (end == from || errno != 0)
+ {
+ emit_error (context,
+ _("out of range when resolving character ref"));
+ return false;
+ }
+ else if (*end != ';')
+ {
+ emit_error (context,
+ _("character reference does not end with a ';'"));
+ return false;
+ }
+ else
+ {
+ /* characters XML 1.1 permits */
+ if ((0 < l && l <= 0xD7FF) ||
+ (0xE000 <= l && l <= 0xFFFD) ||
+ (0x10000 <= l && l <= 0x10FFFF))
+ {
+ char buf[8];
+ int length;
+ length = u8_uctomb ((uint8_t *) buf, l, 8);
+ memcpy (to, buf, length);
+ to += length - 1;
+ from = end;
+ if (l >= 0x80) /* not ascii */
+ mask |= 0x80;
+ }
+ else
+ {
+ emit_error (context, _("invalid character reference"));
+ return false;
+ }
+ }
+ }
+
+ else if (strncmp (from, "lt;", 3) == 0)
+ {
+ *to = '<';
+ from += 2;
+ }
+ else if (strncmp (from, "gt;", 3) == 0)
+ {
+ *to = '>';
+ from += 2;
+ }
+ else if (strncmp (from, "amp;", 4) == 0)
+ {
+ *to = '&';
+ from += 3;
+ }
+ else if (strncmp (from, "quot;", 5) == 0)
+ {
+ *to = '"';
+ from += 4;
+ }
+ else if (strncmp (from, "apos;", 5) == 0)
+ {
+ *to = '\'';
+ from += 4;
+ }
+ else
+ {
+ if (*from == ';')
+ emit_error (context, _("empty entity '&;'"));
+ else
+ {
+ const char *end = strchr (from, ';');
+ if (end)
+ emit_error (context, _("unknown entity name"));
+ else
+ emit_error (context, _("entity does not end with a ';'"));
+ }
+ return false;
+ }
+ }
+ }
+
+ assert (to - string->buffer <= string->buflen);
+ if (to - string->buffer != string->buflen)
+ markup_string_truncate (string, to - string->buffer);
+
+ *is_ascii = !(mask & 0x80);
+
+ return true;
+}
+
+static inline bool
+advance_char (markup_parse_context_ty *context)
+{
+ context->iter++;
+ context->char_number++;
+
+ if (context->iter == context->current_text_end)
+ return false;
+
+ else if (*context->iter == '\n')
+ {
+ context->line_number++;
+ context->char_number = 1;
+ }
+
+ return true;
+}
+
+static inline bool
+xml_isspace (char c)
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+
+static void
+skip_spaces (markup_parse_context_ty *context)
+{
+ do
+ {
+ if (!xml_isspace (*context->iter))
+ return;
+ }
+ while (advance_char (context));
+}
+
+static void
+advance_to_name_end (markup_parse_context_ty *context)
+{
+ do
+ {
+ if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ return;
+ if (xml_isspace (*(context->iter)))
+ return;
+ }
+ while (advance_char (context));
+}
+
+static void
+add_to_partial (markup_parse_context_ty *context,
+ const char *text_start,
+ const char *text_end)
+{
+ if (context->partial_chunk == NULL)
+ { /* allocate a new chunk to parse into */
+
+ context->partial_chunk = markup_string_new ();
+ }
+
+ if (text_start != text_end)
+ markup_string_append (context->partial_chunk,
+ text_start, text_end - text_start);
+}
+
+static inline void
+truncate_partial (markup_parse_context_ty *context)
+{
+ if (context->partial_chunk != NULL)
+ string_blank (context->partial_chunk);
+}
+
+static inline const char*
+current_element (markup_parse_context_ty *context)
+{
+ const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0);
+ return string->buffer;
+}
+
+static void
+pop_subparser_stack (markup_parse_context_ty *context)
+{
+ markup_recursion_tracker_ty *tracker;
+
+ assert (gl_list_size (context->subparser_stack) > 0);
+
+ tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0);
+
+ context->awaiting_pop = true;
+
+ context->user_data = tracker->prev_user_data;
+ context->parser = tracker->prev_parser;
+ context->subparser_element = tracker->prev_element;
+ free (tracker);
+
+ gl_list_remove_at (context->subparser_stack, 0);
+}
+
+static void
+push_partial_as_tag (markup_parse_context_ty *context)
+{
+ gl_list_add_first (context->tag_stack, context->partial_chunk);
+ context->partial_chunk = NULL;
+}
+
+static void
+pop_tag (markup_parse_context_ty *context)
+{
+ gl_list_remove_at (context->tag_stack, 0);
+}
+
+static void
+possibly_finish_subparser (markup_parse_context_ty *context)
+{
+ if (current_element (context) == context->subparser_element)
+ pop_subparser_stack (context);
+}
+
+static void
+ensure_no_outstanding_subparser (markup_parse_context_ty *context)
+{
+ context->awaiting_pop = false;
+}
+
+static void
+add_attribute (markup_parse_context_ty *context, markup_string_ty *string)
+{
+ if (context->cur_attr + 2 >= context->alloc_attrs)
+ {
+ context->alloc_attrs += 5; /* silly magic number */
+ context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs);
+ context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs);
+ }
+ context->cur_attr++;
+ context->attr_names[context->cur_attr] = xstrdup (string->buffer);
+ context->attr_values[context->cur_attr] = NULL;
+ context->attr_names[context->cur_attr+1] = NULL;
+ context->attr_values[context->cur_attr+1] = NULL;
+}
+
+static void
+clear_attributes (markup_parse_context_ty *context)
+{
+ /* Go ahead and free the attributes. */
+ for (; context->cur_attr >= 0; context->cur_attr--)
+ {
+ int pos = context->cur_attr;
+ free (context->attr_names[pos]);
+ free (context->attr_values[pos]);
+ context->attr_names[pos] = context->attr_values[pos] = NULL;
+ }
+ assert (context->cur_attr == -1);
+ assert (context->attr_names == NULL ||
+ context->attr_names[0] == NULL);
+ assert (context->attr_values == NULL ||
+ context->attr_values[0] == NULL);
+}
+
+static void
+markup_parse_context_push (markup_parse_context_ty *context,
+ const markup_parser_ty *parser,
+ void *user_data)
+{
+ markup_recursion_tracker_ty *tracker;
+
+ tracker = XMALLOC (markup_recursion_tracker_ty);
+ tracker->prev_element = context->subparser_element;
+ tracker->prev_parser = context->parser;
+ tracker->prev_user_data = context->user_data;
+
+ context->subparser_element = current_element (context);
+ context->parser = parser;
+ context->user_data = user_data;
+
+ gl_list_add_first (context->subparser_stack, tracker);
+}
+
+static void
+markup_parse_context_pop (markup_parse_context_ty *context)
+{
+ if (!context->awaiting_pop)
+ possibly_finish_subparser (context);
+
+ assert (context->awaiting_pop);
+
+ context->awaiting_pop = false;
+}
+
+/* This has to be a separate function to ensure the alloca's
+ * are unwound on exit - otherwise we grow & blow the stack
+ * with large documents
+ */
+static inline void
+emit_start_element (markup_parse_context_ty *context)
+{
+ int i, j = 0;
+ const char *start_name;
+ const char **attr_names;
+ const char **attr_values;
+
+ /* In case we want to ignore qualified tags and we see that we have
+ * one here, we push a subparser. This will ignore all tags inside of
+ * the qualified tag.
+ *
+ * We deal with the end of the subparser from emit_end_element.
+ */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (current_element (context), ':'))
+ {
+ static const markup_parser_ty ignore_parser;
+ markup_parse_context_push (context, &ignore_parser, NULL);
+ clear_attributes (context);
+ return;
+ }
+
+ attr_names = XCALLOC (context->cur_attr + 2, const char *);
+ attr_values = XCALLOC (context->cur_attr + 2, const char *);
+ for (i = 0; i < context->cur_attr + 1; i++)
+ {
+ /* Possibly omit qualified attribute names from the list */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (context->attr_names[i], ':'))
+ continue;
+
+ attr_names[j] = context->attr_names[i];
+ attr_values[j] = context->attr_values[i];
+ j++;
+ }
+ attr_names[j] = NULL;
+ attr_values[j] = NULL;
+
+ /* Call user callback for element start */
+ start_name = current_element (context);
+
+ if (context->parser->start_element && name_validate (context, start_name))
+ (* context->parser->start_element) (context,
+ start_name,
+ (const char **)attr_names,
+ (const char **)attr_values,
+ context->user_data);
+ free (attr_names);
+ free (attr_values);
+ clear_attributes (context);
+}
+
+static void
+emit_end_element (markup_parse_context_ty *context)
+{
+ assert (gl_list_size (context->tag_stack) != 0);
+
+ possibly_finish_subparser (context);
+
+ /* We might have just returned from our ignore subparser */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (current_element (context), ':'))
+ {
+ markup_parse_context_pop (context);
+ pop_tag (context);
+ return;
+ }
+
+ if (context->parser->end_element)
+ (* context->parser->end_element) (context,
+ current_element (context),
+ context->user_data);
+
+ ensure_no_outstanding_subparser (context);
+
+ pop_tag (context);
+}
+
+/* Feed some data to the parse context. The data need not be valid
+ UTF-8; an error will be signaled if it's invalid. The data need
+ not be an entire document; you can feed a document into the parser
+ incrementally, via multiple calls to this function. Typically, as
+ you receive data from a network connection or file, you feed each
+ received chunk of data into this function, aborting the process if
+ an error occurs. Once an error is reported, no further data may be
+ fed to the parse context; all errors are fatal. */
+bool
+markup_parse_context_parse (markup_parse_context_ty *context,
+ const char *text,
+ ssize_t text_len)
+{
+ assert (context != NULL);
+ assert (text != NULL);
+ assert (context->state != STATE_ERROR);
+ assert (!context->parsing);
+
+ if (text_len < 0)
+ text_len = strlen (text);
+
+ if (text_len == 0)
+ return true;
+
+ context->parsing = true;
+
+
+ context->current_text = text;
+ context->current_text_len = text_len;
+ context->current_text_end = context->current_text + text_len;
+ context->iter = context->current_text;
+ context->start = context->iter;
+
+ while (context->iter != context->current_text_end)
+ {
+ switch (context->state)
+ {
+ case STATE_START:
+ /* Possible next state: AFTER_OPEN_ANGLE */
+
+ assert (gl_list_size (context->tag_stack) == 0);
+
+ /* whitespace is ignored outside of any elements */
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '<')
+ {
+ /* Move after the open angle */
+ advance_char (context);
+
+ context->state = STATE_AFTER_OPEN_ANGLE;
+
+ /* this could start a passthrough */
+ context->start = context->iter;
+
+ /* document is now non-empty */
+ context->document_empty = false;
+ }
+ else
+ {
+ emit_error (context,
+ _("document must begin with an element"));
+ }
+ }
+ break;
+
+ case STATE_AFTER_OPEN_ANGLE:
+ /* Possible next states: INSIDE_OPEN_TAG_NAME,
+ * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
+ */
+ if (*context->iter == '?' ||
+ *context->iter == '!')
+ {
+ /* include < in the passthrough */
+ const char *openangle = "<";
+ add_to_partial (context, openangle, openangle + 1);
+ context->start = context->iter;
+ context->balance = 1;
+ context->state = STATE_INSIDE_PASSTHROUGH;
+ }
+ else if (*context->iter == '/')
+ {
+ /* move after it */
+ advance_char (context);
+
+ context->state = STATE_AFTER_CLOSE_TAG_SLASH;
+ }
+ else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_OPEN_TAG_NAME;
+
+ /* start of tag name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("invalid character after '<'"));
+ }
+ break;
+
+ /* The AFTER_CLOSE_ANGLE state is actually sort of
+ * broken, because it doesn't correspond to a range
+ * of characters in the input stream as the others do,
+ * and thus makes things harder to conceptualize
+ */
+ case STATE_AFTER_CLOSE_ANGLE:
+ /* Possible next states: INSIDE_TEXT, STATE_START */
+ if (gl_list_size (context->tag_stack) == 0)
+ {
+ context->start = NULL;
+ context->state = STATE_START;
+ }
+ else
+ {
+ context->start = context->iter;
+ context->state = STATE_INSIDE_TEXT;
+ }
+ break;
+
+ case STATE_AFTER_ELISION_SLASH:
+ /* Possible next state: AFTER_CLOSE_ANGLE */
+ if (*context->iter == '>')
+ {
+ /* move after the close angle */
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ emit_end_element (context);
+ }
+ else
+ {
+ emit_error (context, _("missing '>'"));
+ }
+ break;
+
+ case STATE_INSIDE_OPEN_TAG_NAME:
+ /* Possible next states: BETWEEN_ATTRIBUTES */
+
+ /* if there's a partial chunk then it's the first part of the
+ * tag name. If there's a context->start then it's the start
+ * of the tag name in current_text, the partial chunk goes
+ * before that start though.
+ */
+ advance_to_name_end (context);
+
+ if (context->iter == context->current_text_end)
+ {
+ /* The name hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ /* The name has ended. Combine it with the partial chunk
+ * if any; push it on the stack; enter next state.
+ */
+ add_to_partial (context, context->start, context->iter);
+ push_partial_as_tag (context);
+
+ context->state = STATE_BETWEEN_ATTRIBUTES;
+ context->start = NULL;
+ }
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_NAME:
+ /* Possible next states: AFTER_ATTRIBUTE_NAME */
+
+ advance_to_name_end (context);
+ add_to_partial (context, context->start, context->iter);
+
+ /* read the full name, if we enter the equals sign state
+ * then add the attribute to the list (without the value),
+ * otherwise store a partial chunk to be prepended later.
+ */
+ if (context->iter != context->current_text_end)
+ context->state = STATE_AFTER_ATTRIBUTE_NAME;
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_NAME:
+ /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ /* The name has ended. Combine it with the partial chunk
+ * if any; push it on the stack; enter next state.
+ */
+ if (!name_validate (context, context->partial_chunk->buffer))
+ break;
+
+ add_attribute (context, context->partial_chunk);
+
+ markup_string_free (context->partial_chunk, true);
+ context->partial_chunk = NULL;
+ context->start = NULL;
+
+ if (*context->iter == '=')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
+ }
+ else
+ {
+ emit_error (context, _("missing '='"));
+ }
+ }
+ break;
+
+ case STATE_BETWEEN_ATTRIBUTES:
+ /* Possible next states: AFTER_CLOSE_ANGLE,
+ * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
+ */
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '/')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_ELISION_SLASH;
+ }
+ else if (*context->iter == '>')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ }
+ else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_ATTRIBUTE_NAME;
+ /* start of attribute name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("missing '>' or '/'"));
+ }
+
+ /* If we're done with attributes, invoke
+ * the start_element callback
+ */
+ if (context->state == STATE_AFTER_ELISION_SLASH ||
+ context->state == STATE_AFTER_CLOSE_ANGLE)
+ emit_start_element (context);
+ }
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
+ /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '"')
+ {
+ advance_char (context);
+ context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
+ context->start = context->iter;
+ }
+ else if (*context->iter == '\'')
+ {
+ advance_char (context);
+ context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("missing opening quote"));
+ }
+ }
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
+ case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
+ /* Possible next states: BETWEEN_ATTRIBUTES */
+ {
+ char delim;
+
+ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
+ {
+ delim = '\'';
+ }
+ else
+ {
+ delim = '"';
+ }
+
+ do
+ {
+ if (*context->iter == delim)
+ break;
+ }
+ while (advance_char (context));
+ }
+ if (context->iter == context->current_text_end)
+ {
+ /* The value hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ bool is_ascii;
+ /* The value has ended at the quote mark. Combine it
+ * with the partial chunk if any; set it for the current
+ * attribute.
+ */
+ add_to_partial (context, context->start, context->iter);
+
+ assert (context->cur_attr >= 0);
+
+ if (unescape_string_inplace (context, context->partial_chunk,
+ &is_ascii)
+ && (is_ascii
+ || text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen)))
+ {
+ /* success, advance past quote and set state. */
+ context->attr_values[context->cur_attr] =
+ markup_string_free (context->partial_chunk, false);
+ context->partial_chunk = NULL;
+ advance_char (context);
+ context->state = STATE_BETWEEN_ATTRIBUTES;
+ context->start = NULL;
+ }
+
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_INSIDE_TEXT:
+ /* Possible next states: AFTER_OPEN_ANGLE */
+ do
+ {
+ if (*context->iter == '<')
+ break;
+ }
+ while (advance_char (context));
+
+ /* The text hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->iter != context->current_text_end)
+ {
+ bool is_ascii;
+
+ /* The text has ended at the open angle. Call the text
+ * callback.
+ */
+ if (unescape_string_inplace (context, context->partial_chunk,
+ &is_ascii)
+ && (is_ascii
+ || text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen)))
+ {
+ if (context->parser->text)
+ (*context->parser->text) (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen,
+ context->user_data);
+
+ /* advance past open angle and set state. */
+ advance_char (context);
+ context->state = STATE_AFTER_OPEN_ANGLE;
+ /* could begin a passthrough */
+ context->start = context->iter;
+ }
+
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_SLASH:
+ /* Possible next state: INSIDE_CLOSE_TAG_NAME */
+ if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_CLOSE_TAG_NAME;
+
+ /* start of tag name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("invalid character after '</'"));
+ }
+ break;
+
+ case STATE_INSIDE_CLOSE_TAG_NAME:
+ /* Possible next state: AFTER_CLOSE_TAG_NAME */
+ advance_to_name_end (context);
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->iter != context->current_text_end)
+ context->state = STATE_AFTER_CLOSE_TAG_NAME;
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_NAME:
+ /* Possible next state: AFTER_CLOSE_TAG_SLASH */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ markup_string_ty *close_name;
+
+ close_name = context->partial_chunk;
+ context->partial_chunk = NULL;
+
+ if (*context->iter != '>')
+ {
+ emit_error (context,
+ _("invalid character after a close element name"));
+ }
+ else if (gl_list_size (context->tag_stack) == 0)
+ {
+ emit_error (context, _("element is closed"));
+ }
+ else if (strcmp (close_name->buffer, current_element (context)) != 0)
+ {
+ emit_error (context, _("element is closed"));
+ }
+ else
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ context->start = NULL;
+
+ emit_end_element (context);
+ }
+ context->partial_chunk = close_name;
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_INSIDE_PASSTHROUGH:
+ /* Possible next state: AFTER_CLOSE_ANGLE */
+ do
+ {
+ if (*context->iter == '<')
+ context->balance++;
+ if (*context->iter == '>')
+ {
+ char *str;
+ size_t len;
+
+ context->balance--;
+ add_to_partial (context, context->start, context->iter);
+ context->start = context->iter;
+
+ str = context->partial_chunk->buffer;
+ len = context->partial_chunk->buflen;
+
+ if (str[1] == '?' && str[len - 1] == '?')
+ break;
+ if (strncmp (str, "<!--", 4) == 0 &&
+ strcmp (str + len - 2, "--") == 0)
+ break;
+ if (strncmp (str, "<![CDATA[", 9) == 0 &&
+ strcmp (str + len - 2, "]]") == 0)
+ break;
+ if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
+ context->balance == 0)
+ break;
+ }
+ }
+ while (advance_char (context));
+
+ if (context->iter == context->current_text_end)
+ {
+ /* The passthrough hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ /* The passthrough has ended at the close angle. Combine
+ * it with the partial chunk if any. Call the passthrough
+ * callback. Note that the open/close angles are
+ * included in the text of the passthrough.
+ */
+ advance_char (context); /* advance past close angle */
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT &&
+ strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0)
+ {
+ if (context->parser->text &&
+ text_validate (context,
+ context->partial_chunk->buffer + 9,
+ context->partial_chunk->buflen - 12))
+ (*context->parser->text) (context,
+ context->partial_chunk->buffer + 9,
+ context->partial_chunk->buflen - 12,
+ context->user_data);
+ }
+ else if (context->parser->passthrough &&
+ text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen))
+ (*context->parser->passthrough) (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen,
+ context->user_data);
+
+ truncate_partial (context);
+
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ context->start = context->iter; /* could begin text */
+ }
+ break;
+
+ case STATE_ERROR:
+ goto finished;
+ break;
+
+ default:
+ abort ();
+ break;
+ }
+ }
+
+ finished:
+ context->parsing = false;
+
+ return context->state != STATE_ERROR;
+}
+
+/* Signals to the parse context that all data has been fed into the
+ * parse context with markup_parse_context_parse.
+ *
+ * This function reports an error if the document isn't complete,
+ * for example if elements are still open. */
+bool
+markup_parse_context_end_parse (markup_parse_context_ty *context)
+{
+ assert (context != NULL);
+ assert (!context->parsing);
+ assert (context->state != STATE_ERROR);
+
+ if (context->partial_chunk != NULL)
+ {
+ markup_string_free (context->partial_chunk, true);
+ context->partial_chunk = NULL;
+ }
+
+ if (context->document_empty)
+ {
+ emit_error (context, _("empty document"));
+ return false;
+ }
+
+ context->parsing = true;
+
+ switch (context->state)
+ {
+ case STATE_START:
+ /* Nothing to do */
+ break;
+
+ case STATE_AFTER_OPEN_ANGLE:
+ emit_error (context,
+ _("document ended unexpectedly just after '<'"));
+ break;
+
+ case STATE_AFTER_CLOSE_ANGLE:
+ if (gl_list_size (context->tag_stack) > 0)
+ {
+ /* Error message the same as for INSIDE_TEXT */
+ emit_error (context,
+ _("document ended unexpectedly with elements still open"));
+ }
+ break;
+
+ case STATE_AFTER_ELISION_SLASH:
+ emit_error (context, _("document ended unexpectedly without '>'"));
+ break;
+
+ case STATE_INSIDE_OPEN_TAG_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside an element name"));
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_NAME:
+ case STATE_AFTER_ATTRIBUTE_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside an attribute name"));
+ break;
+
+ case STATE_BETWEEN_ATTRIBUTES:
+ emit_error (context,
+ _("document ended unexpectedly inside an open tag"));
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
+ emit_error (context, _("document ended unexpectedly after '='"));
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
+ case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
+ emit_error (context,
+ _("document ended unexpectedly inside an attribute value"));
+ break;
+
+ case STATE_INSIDE_TEXT:
+ assert (gl_list_size (context->tag_stack) > 0);
+ emit_error (context,
+ _("document ended unexpectedly with elements still open"));
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_SLASH:
+ case STATE_INSIDE_CLOSE_TAG_NAME:
+ case STATE_AFTER_CLOSE_TAG_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside the close tag"));
+ break;
+
+ case STATE_INSIDE_PASSTHROUGH:
+ emit_error (context,
+ _("document ended unexpectedly inside a comment or "
+ "processing instruction"));
+ break;
+
+ case STATE_ERROR:
+ default:
+ abort ();
+ break;
+ }
+
+ context->parsing = false;
+
+ return context->state != STATE_ERROR;
+}
+
+const char *
+markup_parse_context_get_error (markup_parse_context_ty *context)
+{
+ return context->error_text;
+}