]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
write_xar: move libxml2 behind an abstraction layer (#1849)
authorDustin L. Howett <dustin@howett.net>
Tue, 22 Oct 2024 09:10:50 +0000 (04:10 -0500)
committerGitHub <noreply@github.com>
Tue, 22 Oct 2024 09:10:50 +0000 (11:10 +0200)
This commit prepares the XAR writer for another XML writing backend.

Almost everything in this changeset leaves the code identical to how
it started, except for a new layer of indirection between the xar writer
and the XML writer.

The things that are not one-to-one renames include:
- The removal of `UTF8Toisolat1` for the purposes of validating UTF-8
- The writer code made a copy of every filename for the purposes of
  checking whether it was Latin-1 stored as UTF-8. In xar, Non-Latin-1
  gets stored Base64-encoded.
- I've replaced this use because (1) it was inefficient and (2)
  `UTF8Toisolat1` is a `libxml2` export.
- The new function has slightly different results than the one it is
  replacing for invalid UTF-8. Namely, it treats illegal UTF-8 "overlong"
  encodings of Latin-1 codepoints as _invalid_. It operates on the principle
  that we can determine whether something is Latin-1 based entirely on how
  long the sequence is expected to be.
- The move of `SetIndent` to before `StartDocument`, which the
  abstraction layer immediately undoes. This is to accommodate XML writers
  that require indent to be set _before_ the document starts.

libarchive/archive_write_set_format_xar.c
libarchive/test/test_write_format_xar.c

index 96ef85c694875b13b70fd31783364d27d1a1a176..fdc1b3de12845a8397e8321d08584c3dbb0668d4 100644 (file)
@@ -34,6 +34,9 @@
 #include <stdlib.h>
 #if HAVE_LIBXML_XMLWRITER_H
 #include <libxml/xmlwriter.h>
+#if defined(LIBXML_VERSION) && LIBXML_VERSION >= 20703
+#define XAR_WRITER_HAS_XML
+#endif /* LIBXML_VERSION */
 #endif
 #ifdef HAVE_BZLIB_H
 #include <bzlib.h>
@@ -70,8 +73,7 @@
  *
  */
 
-#if !(defined(HAVE_LIBXML_XMLWRITER_H) && defined(LIBXML_VERSION) &&\
-       LIBXML_VERSION >= 20703) ||\
+#if !defined(XAR_WRITER_HAS_XML) ||\
        !defined(HAVE_ZLIB_H) || \
        !defined(ARCHIVE_HAS_MD5) || !defined(ARCHIVE_HAS_SHA1)
 /*
@@ -94,9 +96,26 @@ archive_write_set_format_xar(struct archive *_a)
 
 #else  /* Support xar format */
 
-/*#define DEBUG_PRINT_TOC              1 */
+struct xml_writer;
+static int xml_writer_create(struct xml_writer **pctx);
+static int xml_writer_start_document(struct xml_writer *ctx);
+static int xml_writer_end_document(struct xml_writer *ctx);
+static int xml_writer_set_indent(struct xml_writer *ctx, unsigned int indent);
+static int xml_writer_start_element(struct xml_writer *ctx,
+    const char *localName);
+static int xml_writer_write_attribute(struct xml_writer *ctx, const char *key,
+    const char *value);
+static int xml_writer_write_attributef(struct xml_writer *ctx, const char *key,
+    const char *format, ...);
+static int xml_writer_write_string(struct xml_writer *ctx, const char *string);
+static int xml_writer_write_base64(struct xml_writer* ctx,
+    const char *data, size_t start, size_t len);
+static int xml_writer_end_element(struct xml_writer *ctx);
+static int xml_writer_get_final_content_and_length(struct xml_writer *ctx,
+    const char **out, size_t *size);
+static int xml_writer_destroy(struct xml_writer *ctx);
 
-#define BAD_CAST_CONST (const xmlChar *)
+/*#define DEBUG_PRINT_TOC              1 */
 
 #define HEADER_MAGIC   0x78617221
 #define HEADER_SIZE    28
@@ -807,50 +826,49 @@ xar_finish_entry(struct archive_write *a)
 }
 
 static int
-xmlwrite_string_attr(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_string_attr(struct archive_write *a, struct xml_writer *writer,
        const char *key, const char *value,
        const char *attrkey, const char *attrvalue)
 {
        int r;
 
-       r = xmlTextWriterStartElement(writer, BAD_CAST_CONST(key));
+       r = xml_writer_start_element(writer, key);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterStartElement() failed: %d", r);
+                   "xml_writer_start_element() failed: %d", r);
                return (ARCHIVE_FATAL);
        }
        if (attrkey != NULL && attrvalue != NULL) {
-               r = xmlTextWriterWriteAttribute(writer,
-                   BAD_CAST_CONST(attrkey), BAD_CAST_CONST(attrvalue));
+               r = xml_writer_write_attribute(writer, attrkey, attrvalue);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteAttribute() failed: %d", r);
+                           "xml_writer_write_attribute() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
        if (value != NULL) {
-               r = xmlTextWriterWriteString(writer, BAD_CAST_CONST(value));
+               r = xml_writer_write_string(writer, value);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteString() failed: %d", r);
+                           "xml_writer_write_string() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
-       r = xmlTextWriterEndElement(writer);
+       r = xml_writer_end_element(writer);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterEndElement() failed: %d", r);
+                   "xml_writer_end_element() failed: %d", r);
                return (ARCHIVE_FATAL);
        }
        return (ARCHIVE_OK);
 }
 
 static int
-xmlwrite_string(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_string(struct archive_write *a, struct xml_writer *writer,
        const char *key, const char *value)
 {
        int r;
@@ -858,34 +876,34 @@ xmlwrite_string(struct archive_write *a, xmlTextWriterPtr writer,
        if (value == NULL)
                return (ARCHIVE_OK);
 
-       r = xmlTextWriterStartElement(writer, BAD_CAST_CONST(key));
+       r = xml_writer_start_element(writer, key);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterStartElement() failed: %d", r);
+                   "xml_writer_start_element() failed: %d", r);
                return (ARCHIVE_FATAL);
        }
        if (value != NULL) {
-               r = xmlTextWriterWriteString(writer, BAD_CAST_CONST(value));
+               r = xml_writer_write_string(writer, value);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteString() failed: %d", r);
+                           "xml_writer_write_string() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
-       r = xmlTextWriterEndElement(writer);
+       r = xml_writer_end_element(writer);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterEndElement() failed: %d", r);
+                   "xml_writer_end_element() failed: %d", r);
                return (ARCHIVE_FATAL);
        }
        return (ARCHIVE_OK);
 }
 
 static int
-xmlwrite_fstring(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_fstring(struct archive_write *a, struct xml_writer *writer,
        const char *key, const char *fmt, ...)
 {
        struct xar *xar;
@@ -900,7 +918,7 @@ xmlwrite_fstring(struct archive_write *a, xmlTextWriterPtr writer,
 }
 
 static int
-xmlwrite_time(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_time(struct archive_write *a, struct xml_writer *writer,
        const char *key, time_t t, int z)
 {
        char timestr[100];
@@ -922,7 +940,7 @@ xmlwrite_time(struct archive_write *a, xmlTextWriterPtr writer,
 }
 
 static int
-xmlwrite_mode(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_mode(struct archive_write *a, struct xml_writer *writer,
        const char *key, mode_t mode)
 {
        char ms[5];
@@ -937,7 +955,7 @@ xmlwrite_mode(struct archive_write *a, xmlTextWriterPtr writer,
 }
 
 static int
-xmlwrite_sum(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_sum(struct archive_write *a, struct xml_writer *writer,
        const char *key, struct chksumval *sum)
 {
        const char *algname;
@@ -971,7 +989,7 @@ xmlwrite_sum(struct archive_write *a, xmlTextWriterPtr writer,
 }
 
 static int
-xmlwrite_heap(struct archive_write *a, xmlTextWriterPtr writer,
+xmlwrite_heap(struct archive_write *a, struct xml_writer *writer,
        struct heap_data *heap)
 {
        const char *encname;
@@ -1029,7 +1047,7 @@ xmlwrite_heap(struct archive_write *a, xmlTextWriterPtr writer,
  * Our implements records both <flags> and <ext2> if it's necessary.
  */
 static int
-make_fflags_entry(struct archive_write *a, xmlTextWriterPtr writer,
+make_fflags_entry(struct archive_write *a, struct xml_writer *writer,
     const char *element, const char *fflags_text)
 {
        static const struct flagentry {
@@ -1119,11 +1137,11 @@ make_fflags_entry(struct archive_write *a, xmlTextWriterPtr writer,
        } while (p != NULL);
 
        if (n > 0) {
-               r = xmlTextWriterStartElement(writer, BAD_CAST_CONST(element));
+               r = xml_writer_start_element(writer, element);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
                for (i = 0; i < n; i++) {
@@ -1133,29 +1151,59 @@ make_fflags_entry(struct archive_write *a, xmlTextWriterPtr writer,
                                return (r);
                }
 
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
        return (ARCHIVE_OK);
 }
 
+/*
+ * This function determines whether a UTF-8 string contains
+ * only codepoints that are convertible to Latin-1. Strings
+ * beyond Latin-1 are stored base64-encoded in the XAR TOC.
+ */
 static int
-make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
+is_u8_zstring_latin1(const char *in)
+{
+       unsigned int c;
+       while (*in) {
+               c = *in++;
+               if (c < 0x80) continue;
+               /*
+                * Filter out non-continuation, any continuation of 2-3
+                * bytes, and any continuation of 1 byte whose high 3 bits
+                * are non-zero. Recall, 1-byte continuations can store 11
+                * bits whereas Latin-1 codepoints are only 8 bits wide.
+                */
+               if ((c & 0xFC) != 0xC0)
+                       return (0);
+               c = *in++;
+               /*
+                * If we get any non-continuation byte (including 0x00!),
+                * the string is not valid UTF-8.
+                */
+               if ((c & 0xC0) != 0x80)
+                       return (0); /* invalid unicode */
+       }
+       return (1);
+}
+
+static int
+make_file_entry(struct archive_write *a, struct xml_writer *writer,
     struct file *file)
 {
        struct xar *xar;
        const char *filetype, *filelink, *fflags;
        struct archive_string linkto;
        struct heap_data *heap;
-       unsigned char *tmp;
        const char *p;
        size_t len;
-       int r, r2, l, ll;
+       int r, r2;
 
        xar = (struct xar *)a->format_data;
        r2 = ARCHIVE_OK;
@@ -1163,44 +1211,35 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
        /*
         * Make a file name entry, "<name>".
         */
-       l = ll = (int)archive_strlen(&(file->basename));
-       tmp = malloc(l);
-       if (tmp == NULL) {
-               archive_set_error(&a->archive, ENOMEM,
-                   "Can't allocate memory");
-               return (ARCHIVE_FATAL);
-       }
-       r = UTF8Toisolat1(tmp, &l, BAD_CAST(file->basename.s), &ll);
-       free(tmp);
-       if (r < 0) {
-               r = xmlTextWriterStartElement(writer, BAD_CAST("name"));
+       if (!is_u8_zstring_latin1(file->basename.s)) {
+               r = xml_writer_start_element(writer, "name");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
-               r = xmlTextWriterWriteAttribute(writer,
-                   BAD_CAST("enctype"), BAD_CAST("base64"));
+               r = xml_writer_write_attribute(writer,
+                   "enctype", "base64");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteAttribute() failed: %d", r);
+                           "xml_writer_write_attribute() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
-               r = xmlTextWriterWriteBase64(writer, file->basename.s,
+               r = xml_writer_write_base64(writer, file->basename.s,
                    0, (int)archive_strlen(&(file->basename)));
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteBase64() failed: %d", r);
+                           "xml_writer_write_base64() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        } else {
@@ -1281,11 +1320,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
                break;
        case AE_IFCHR:
        case AE_IFBLK:
-               r = xmlTextWriterStartElement(writer, BAD_CAST("device"));
+               r = xml_writer_start_element(writer, "device");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
                r = xmlwrite_fstring(a, writer, "major",
@@ -1296,11 +1335,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
                    "%d", archive_entry_rdevminor(file->entry));
                if (r < 0)
                        return (ARCHIVE_FATAL);
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
                break;
@@ -1436,19 +1475,19 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
 
                archive_entry_xattr_next(file->entry,
                    &name, &value, &size);
-               r = xmlTextWriterStartElement(writer, BAD_CAST("ea"));
+               r = xml_writer_start_element(writer, "ea");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
-               r = xmlTextWriterWriteFormatAttribute(writer,
-                   BAD_CAST("id"), "%d", heap->id);
+               r = xml_writer_write_attributef(writer,
+                   "id", "%d", heap->id);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteAttribute() failed: %d", r);
+                           "xml_writer_write_attributef() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
                r = xmlwrite_heap(a, writer, heap);
@@ -1458,11 +1497,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
                if (r < 0)
                        return (ARCHIVE_FATAL);
 
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
@@ -1471,11 +1510,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
         * Make a file data entry, "<data>".
         */
        if (file->data.length > 0) {
-               r = xmlTextWriterStartElement(writer, BAD_CAST("data"));
+               r = xml_writer_start_element(writer, "data");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
 
@@ -1483,21 +1522,21 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
                if (r < 0)
                        return (ARCHIVE_FATAL);
 
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
 
        if (archive_strlen(&file->script) > 0) {
-               r = xmlTextWriterStartElement(writer, BAD_CAST("content"));
+               r = xml_writer_start_element(writer, "content");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
 
@@ -1510,11 +1549,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer,
                if (r < 0)
                        return (ARCHIVE_FATAL);
 
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        return (ARCHIVE_FATAL);
                }
        }
@@ -1530,8 +1569,9 @@ make_toc(struct archive_write *a)
 {
        struct xar *xar;
        struct file *np;
-       xmlBufferPtr bp;
-       xmlTextWriterPtr writer;
+       struct xml_writer *writer;
+       const char* content;
+       size_t use;
        int algsize;
        int r, ret;
 
@@ -1543,51 +1583,43 @@ make_toc(struct archive_write *a)
         * Initialize xml writer.
         */
        writer = NULL;
-       bp = xmlBufferCreate();
-       if (bp == NULL) {
-               archive_set_error(&a->archive, ENOMEM,
-                   "xmlBufferCreate() "
-                   "couldn't create xml buffer");
-               goto exit_toc;
-       }
-       writer = xmlNewTextWriterMemory(bp, 0);
-       if (writer == NULL) {
+       r = xml_writer_create(&writer);
+       if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlNewTextWriterMemory() "
-                   "couldn't create xml writer");
+                   "xml_writer_create() failed: %d", r);
                goto exit_toc;
        }
-       r = xmlTextWriterStartDocument(writer, "1.0", "UTF-8", NULL);
+       r = xml_writer_set_indent(writer, 4);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterStartDocument() failed: %d", r);
+                   "xml_writer_set_indent() failed: %d", r);
                goto exit_toc;
        }
-       r = xmlTextWriterSetIndent(writer, 4);
+       r = xml_writer_start_document(writer);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterSetIndent() failed: %d", r);
+                   "xml_writer_start_document() failed: %d", r);
                goto exit_toc;
        }
 
        /*
         * Start recording TOC
         */
-       r = xmlTextWriterStartElement(writer, BAD_CAST("xar"));
+       r = xml_writer_start_element(writer, "xar");
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterStartElement() failed: %d", r);
+                   "xml_writer_start_element() failed: %d", r);
                goto exit_toc;
        }
-       r = xmlTextWriterStartElement(writer, BAD_CAST("toc"));
+       r = xml_writer_start_element(writer, "toc");
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterStartDocument() failed: %d", r);
+                   "xml_writer_start_element() failed: %d", r);
                goto exit_toc;
        }
 
@@ -1606,19 +1638,19 @@ make_toc(struct archive_write *a)
                /*
                 * Record TOC checksum
                 */
-               r = xmlTextWriterStartElement(writer, BAD_CAST("checksum"));
+               r = xml_writer_start_element(writer, "checksum");
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterStartElement() failed: %d", r);
+                           "xml_writer_start_element() failed: %d", r);
                        goto exit_toc;
                }
-               r = xmlTextWriterWriteAttribute(writer, BAD_CAST("style"),
-                   BAD_CAST_CONST(getalgname(xar->opt_toc_sumalg)));
+               r = xml_writer_write_attribute(writer, "style",
+                   getalgname(xar->opt_toc_sumalg));
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterWriteAttribute() failed: %d", r);
+                           "xml_writer_write_attribute() failed: %d", r);
                        goto exit_toc;
                }
 
@@ -1636,11 +1668,11 @@ make_toc(struct archive_write *a)
                if (r < 0)
                        goto exit_toc;
 
-               r = xmlTextWriterEndElement(writer);
+               r = xml_writer_end_element(writer);
                if (r < 0) {
                        archive_set_error(&a->archive,
                            ARCHIVE_ERRNO_MISC,
-                           "xmlTextWriterEndElement() failed: %d", r);
+                           "xml_writer_end_element() failed: %d", r);
                        goto exit_toc;
                }
        }
@@ -1656,32 +1688,32 @@ make_toc(struct archive_write *a)
                if (np->dir && np->children.first != NULL) {
                        /* Enter to sub directories. */
                        np = np->children.first;
-                       r = xmlTextWriterStartElement(writer,
-                           BAD_CAST("file"));
+                       r = xml_writer_start_element(writer,
+                           "file");
                        if (r < 0) {
                                archive_set_error(&a->archive,
                                    ARCHIVE_ERRNO_MISC,
-                                   "xmlTextWriterStartElement() "
+                                   "xml_writer_start_element() "
                                    "failed: %d", r);
                                goto exit_toc;
                        }
-                       r = xmlTextWriterWriteFormatAttribute(
-                           writer, BAD_CAST("id"), "%d", np->id);
+                       r = xml_writer_write_attributef(
+                           writer, "id", "%d", np->id);
                        if (r < 0) {
                                archive_set_error(&a->archive,
                                    ARCHIVE_ERRNO_MISC,
-                                   "xmlTextWriterWriteAttribute() "
+                                   "xml_writer_write_attributef() "
                                    "failed: %d", r);
                                goto exit_toc;
                        }
                        continue;
                }
                while (np != np->parent) {
-                       r = xmlTextWriterEndElement(writer);
+                       r = xml_writer_end_element(writer);
                        if (r < 0) {
                                archive_set_error(&a->archive,
                                    ARCHIVE_ERRNO_MISC,
-                                   "xmlTextWriterEndElement() "
+                                   "xml_writer_end_element() "
                                    "failed: %d", r);
                                goto exit_toc;
                        }
@@ -1690,21 +1722,21 @@ make_toc(struct archive_write *a)
                                np = np->parent;
                        } else {
                                np = np->chnext;
-                               r = xmlTextWriterStartElement(writer,
-                                   BAD_CAST("file"));
+                               r = xml_writer_start_element(writer,
+                                   "file");
                                if (r < 0) {
                                        archive_set_error(&a->archive,
                                            ARCHIVE_ERRNO_MISC,
-                                           "xmlTextWriterStartElement() "
+                                           "xml_writer_start_element() "
                                            "failed: %d", r);
                                        goto exit_toc;
                                }
-                               r = xmlTextWriterWriteFormatAttribute(
-                                   writer, BAD_CAST("id"), "%d", np->id);
+                               r = xml_writer_write_attributef(
+                                   writer, "id", "%d", np->id);
                                if (r < 0) {
                                        archive_set_error(&a->archive,
                                            ARCHIVE_ERRNO_MISC,
-                                           "xmlTextWriterWriteAttribute() "
+                                           "xml_writer_write_attributef() "
                                            "failed: %d", r);
                                        goto exit_toc;
                                }
@@ -1713,31 +1745,40 @@ make_toc(struct archive_write *a)
                }
        } while (np != np->parent);
 
-       r = xmlTextWriterEndDocument(writer);
+       r = xml_writer_end_document(writer);
        if (r < 0) {
                archive_set_error(&a->archive,
                    ARCHIVE_ERRNO_MISC,
-                   "xmlTextWriterEndDocument() failed: %d", r);
+                   "xml_writer_end_document() failed: %d", r);
                goto exit_toc;
        }
+
+       r = xml_writer_get_final_content_and_length(writer, &content, &use);
+       if (r < 0) {
+               archive_set_error(&a->archive,
+                   ARCHIVE_ERRNO_MISC,
+                   "xml_writer_get_final_content_and_length() failed: %d", r);
+               goto exit_toc;
+       }
+
 #if DEBUG_PRINT_TOC
        fprintf(stderr, "\n---TOC-- %d bytes --\n%s\n",
-           strlen((const char *)bp->content), bp->content);
+           (int)strlen(content), content);
 #endif
 
        /*
         * Compress the TOC and calculate the sum of the TOC.
         */
        xar->toc.temp_offset = xar->temp_offset;
-       xar->toc.size = bp->use;
+       xar->toc.size = (uint64_t)use;
        checksum_init(&(xar->a_sumwrk), xar->opt_toc_sumalg);
 
        r = compression_init_encoder_gzip(&(a->archive),
            &(xar->stream), 6, 1);
        if (r != ARCHIVE_OK)
                goto exit_toc;
-       xar->stream.next_in = bp->content;
-       xar->stream.avail_in = bp->use;
+       xar->stream.next_in = (const unsigned char *)content;
+       xar->stream.avail_in = use;
        xar->stream.total_in = 0;
        xar->stream.next_out = xar->wbuff;
        xar->stream.avail_out = sizeof(xar->wbuff);
@@ -1768,9 +1809,7 @@ make_toc(struct archive_write *a)
        ret = ARCHIVE_OK;
 exit_toc:
        if (writer)
-               xmlFreeTextWriter(writer);
-       if (bp)
-               xmlBufferFree(bp);
+               xml_writer_destroy(writer);
 
        return (ret);
 }
@@ -3251,4 +3290,133 @@ getalgname(enum sumalg sumalg)
        }
 }
 
+#if HAVE_LIBXML_XMLWRITER_H
+
+#define BAD_CAST_CONST (const xmlChar *)
+
+struct xml_writer {
+       xmlTextWriterPtr writer;
+       xmlBufferPtr bp;
+       unsigned int indent;
+};
+
+static int
+xml_writer_create(struct xml_writer **pctx)
+{
+       struct xml_writer *ctx = calloc(1, sizeof(struct xml_writer));
+       if (ctx == NULL) {
+               return (-1);
+       }
+
+       ctx->bp = xmlBufferCreate();
+       if (ctx->bp == NULL) {
+               free(ctx);
+               return (-1);
+       }
+
+       ctx->writer = xmlNewTextWriterMemory(ctx->bp, 0);
+       if (ctx->writer == NULL) {
+               xmlBufferFree(ctx->bp);
+               free(ctx);
+               return (-1);
+       }
+
+       *pctx = ctx;
+       return (0);
+}
+
+static int
+xml_writer_destroy(struct xml_writer *ctx)
+{
+       xmlFreeTextWriter(ctx->writer);
+       xmlBufferFree(ctx->bp);
+       free(ctx);
+       return (0);
+}
+
+static int
+xml_writer_start_document(struct xml_writer *ctx)
+{
+       int r;
+       r = xmlTextWriterStartDocument(ctx->writer, "1.0", "UTF-8", NULL);
+       if (r < 0) {
+               return (r);
+       }
+
+       r = xmlTextWriterSetIndent(ctx->writer, (int)ctx->indent);
+       return (r);
+}
+
+static int
+xml_writer_end_document(struct xml_writer *ctx)
+{
+       return (xmlTextWriterEndDocument(ctx->writer));
+}
+
+static int
+xml_writer_set_indent(struct xml_writer *ctx, unsigned int indent)
+{
+       /* libxml2 only lets you set the indent after starting the document */
+       ctx->indent = indent;
+       return (0);
+}
+
+static int
+xml_writer_start_element(struct xml_writer *ctx, const char *localName)
+{
+       return (xmlTextWriterStartElement(ctx->writer,
+           BAD_CAST_CONST(localName)));
+}
+
+static int
+xml_writer_write_attribute(struct xml_writer *ctx,
+    const char *key, const char *value)
+{
+       return (xmlTextWriterWriteAttribute(ctx->writer,
+           BAD_CAST_CONST(key), BAD_CAST_CONST(value)));
+}
+
+static int
+xml_writer_write_attributef(struct xml_writer *ctx,
+    const char *key, const char *format, ...)
+{
+       va_list ap;
+       int ret;
+       va_start(ap, format);
+       ret = xmlTextWriterWriteVFormatAttribute(ctx->writer,
+           BAD_CAST_CONST(key), format, ap);
+       va_end(ap);
+       return (ret);
+}
+
+static int
+xml_writer_write_string(struct xml_writer *ctx, const char *string)
+{
+       return (xmlTextWriterWriteString(ctx->writer, BAD_CAST_CONST(string)));
+}
+
+static int
+xml_writer_write_base64(struct xml_writer* ctx,
+    const char *data, size_t start, size_t len)
+{
+       return (xmlTextWriterWriteBase64(ctx->writer, data,
+           (int)start, (int)len));
+}
+
+static int
+xml_writer_end_element(struct xml_writer *ctx)
+{
+       return (xmlTextWriterEndElement(ctx->writer));
+}
+
+static int
+xml_writer_get_final_content_and_length(struct xml_writer *ctx,
+    const char **out, size_t *size)
+{
+       *out = (const char*)ctx->bp->content;
+       *size = (size_t)ctx->bp->use;
+       return (0);
+}
+#endif /* HAVE_LIBXML_XMLWRITER_H */
+
 #endif /* Support xar format */
index f1f3032912a7bd0fe53eb88109b08756fa12127a..a3e2577de9649f4f81af994872fb5a3c3683384e 100644 (file)
@@ -25,6 +25,8 @@
  */
 #include "test.h"
 
+#include <locale.h>
+
 static void
 test_xar(const char *option)
 {
@@ -146,6 +148,20 @@ test_xar(const char *option)
        assertEqualIntA(a, ARCHIVE_FAILED, archive_write_header(a, ae));
        archive_entry_free(ae);
 
+       /*
+        * "dir/file{UNICODE}" has a name that requires base64 encoding
+        */
+       assert((ae = archive_entry_new()) != NULL);
+       archive_entry_set_atime(ae, 2, 20);
+       archive_entry_set_ctime(ae, 4, 40);
+       archive_entry_set_mtime(ae, 5, 50);
+       archive_entry_copy_pathname_w(ae, L"dir/file\U0001F574");
+       archive_entry_set_mode(ae, AE_IFREG | 0755);
+       archive_entry_set_size(ae, 8);
+       assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, ae));
+       archive_entry_free(ae);
+       assertEqualIntA(a, 8, archive_write_data(a, "ghijklmn", 9));
+
        /*
         * XXX TODO XXX Archive directory, other file types.
         * Archive extended attributes, ACLs, other metadata.
@@ -261,6 +277,22 @@ test_xar(const char *option)
        assertEqualString("dir/dir3", archive_entry_pathname(ae));
        assert((AE_IFDIR | 0755) == archive_entry_mode(ae));
 
+       /*
+        * Read "dir/file{UNICODE}"
+        */
+       assertEqualIntA(a, 0, archive_read_next_header(a, &ae));
+       assertEqualInt(2, archive_entry_atime(ae));
+       assertEqualInt(0, archive_entry_atime_nsec(ae));
+       assertEqualInt(4, archive_entry_ctime(ae));
+       assertEqualInt(0, archive_entry_ctime_nsec(ae));
+       assertEqualInt(5, archive_entry_mtime(ae));
+       assertEqualInt(0, archive_entry_mtime_nsec(ae));
+       assertEqualWString(L"dir/file\U0001F574", archive_entry_pathname_w(ae));
+       assert((AE_IFREG | 0755) == archive_entry_mode(ae));
+       assertEqualInt(8, archive_entry_size(ae));
+       assertEqualIntA(a, 8, archive_read_data(a, buff2, 10));
+       assertEqualMem(buff2, "ghijklmn", 8);
+
        /*
         * Verify the end of the archive.
         */
@@ -273,6 +305,13 @@ test_xar(const char *option)
 
 DEFINE_TEST(test_write_format_xar)
 {
+       /* xar mandates the use of UTF-8 XML; if we cannot
+        * use UTF-8, perhaps we should not write xar. */
+       if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
+               skipping("en_US.UTF-8 locale not available on this system.");
+               return;
+       }
+
        /* Default mode. */
        test_xar(NULL);