From e619342dfa36b887ffa0ea33e98d04cb161cd7de Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Tue, 22 Oct 2024 04:10:50 -0500 Subject: [PATCH] write_xar: move libxml2 behind an abstraction layer (#1849) This commit prepares the XAR writer for another XML writing backend. Almost everything in this changeset leaves the code identical to how it started, except for a new layer of indirection between the xar writer and the XML writer. The things that are not one-to-one renames include: - The removal of `UTF8Toisolat1` for the purposes of validating UTF-8 - The writer code made a copy of every filename for the purposes of checking whether it was Latin-1 stored as UTF-8. In xar, Non-Latin-1 gets stored Base64-encoded. - I've replaced this use because (1) it was inefficient and (2) `UTF8Toisolat1` is a `libxml2` export. - The new function has slightly different results than the one it is replacing for invalid UTF-8. Namely, it treats illegal UTF-8 "overlong" encodings of Latin-1 codepoints as _invalid_. It operates on the principle that we can determine whether something is Latin-1 based entirely on how long the sequence is expected to be. - The move of `SetIndent` to before `StartDocument`, which the abstraction layer immediately undoes. This is to accommodate XML writers that require indent to be set _before_ the document starts. --- libarchive/archive_write_set_format_xar.c | 414 +++++++++++++++------- libarchive/test/test_write_format_xar.c | 39 ++ 2 files changed, 330 insertions(+), 123 deletions(-) diff --git a/libarchive/archive_write_set_format_xar.c b/libarchive/archive_write_set_format_xar.c index 96ef85c69..fdc1b3de1 100644 --- a/libarchive/archive_write_set_format_xar.c +++ b/libarchive/archive_write_set_format_xar.c @@ -34,6 +34,9 @@ #include #if HAVE_LIBXML_XMLWRITER_H #include +#if defined(LIBXML_VERSION) && LIBXML_VERSION >= 20703 +#define XAR_WRITER_HAS_XML +#endif /* LIBXML_VERSION */ #endif #ifdef HAVE_BZLIB_H #include @@ -70,8 +73,7 @@ * */ -#if !(defined(HAVE_LIBXML_XMLWRITER_H) && defined(LIBXML_VERSION) &&\ - LIBXML_VERSION >= 20703) ||\ +#if !defined(XAR_WRITER_HAS_XML) ||\ !defined(HAVE_ZLIB_H) || \ !defined(ARCHIVE_HAS_MD5) || !defined(ARCHIVE_HAS_SHA1) /* @@ -94,9 +96,26 @@ archive_write_set_format_xar(struct archive *_a) #else /* Support xar format */ -/*#define DEBUG_PRINT_TOC 1 */ +struct xml_writer; +static int xml_writer_create(struct xml_writer **pctx); +static int xml_writer_start_document(struct xml_writer *ctx); +static int xml_writer_end_document(struct xml_writer *ctx); +static int xml_writer_set_indent(struct xml_writer *ctx, unsigned int indent); +static int xml_writer_start_element(struct xml_writer *ctx, + const char *localName); +static int xml_writer_write_attribute(struct xml_writer *ctx, const char *key, + const char *value); +static int xml_writer_write_attributef(struct xml_writer *ctx, const char *key, + const char *format, ...); +static int xml_writer_write_string(struct xml_writer *ctx, const char *string); +static int xml_writer_write_base64(struct xml_writer* ctx, + const char *data, size_t start, size_t len); +static int xml_writer_end_element(struct xml_writer *ctx); +static int xml_writer_get_final_content_and_length(struct xml_writer *ctx, + const char **out, size_t *size); +static int xml_writer_destroy(struct xml_writer *ctx); -#define BAD_CAST_CONST (const xmlChar *) +/*#define DEBUG_PRINT_TOC 1 */ #define HEADER_MAGIC 0x78617221 #define HEADER_SIZE 28 @@ -807,50 +826,49 @@ xar_finish_entry(struct archive_write *a) } static int -xmlwrite_string_attr(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_string_attr(struct archive_write *a, struct xml_writer *writer, const char *key, const char *value, const char *attrkey, const char *attrvalue) { int r; - r = xmlTextWriterStartElement(writer, BAD_CAST_CONST(key)); + r = xml_writer_start_element(writer, key); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } if (attrkey != NULL && attrvalue != NULL) { - r = xmlTextWriterWriteAttribute(writer, - BAD_CAST_CONST(attrkey), BAD_CAST_CONST(attrvalue)); + r = xml_writer_write_attribute(writer, attrkey, attrvalue); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteAttribute() failed: %d", r); + "xml_writer_write_attribute() failed: %d", r); return (ARCHIVE_FATAL); } } if (value != NULL) { - r = xmlTextWriterWriteString(writer, BAD_CAST_CONST(value)); + r = xml_writer_write_string(writer, value); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteString() failed: %d", r); + "xml_writer_write_string() failed: %d", r); return (ARCHIVE_FATAL); } } - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } return (ARCHIVE_OK); } static int -xmlwrite_string(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_string(struct archive_write *a, struct xml_writer *writer, const char *key, const char *value) { int r; @@ -858,34 +876,34 @@ xmlwrite_string(struct archive_write *a, xmlTextWriterPtr writer, if (value == NULL) return (ARCHIVE_OK); - r = xmlTextWriterStartElement(writer, BAD_CAST_CONST(key)); + r = xml_writer_start_element(writer, key); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } if (value != NULL) { - r = xmlTextWriterWriteString(writer, BAD_CAST_CONST(value)); + r = xml_writer_write_string(writer, value); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteString() failed: %d", r); + "xml_writer_write_string() failed: %d", r); return (ARCHIVE_FATAL); } } - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } return (ARCHIVE_OK); } static int -xmlwrite_fstring(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_fstring(struct archive_write *a, struct xml_writer *writer, const char *key, const char *fmt, ...) { struct xar *xar; @@ -900,7 +918,7 @@ xmlwrite_fstring(struct archive_write *a, xmlTextWriterPtr writer, } static int -xmlwrite_time(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_time(struct archive_write *a, struct xml_writer *writer, const char *key, time_t t, int z) { char timestr[100]; @@ -922,7 +940,7 @@ xmlwrite_time(struct archive_write *a, xmlTextWriterPtr writer, } static int -xmlwrite_mode(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_mode(struct archive_write *a, struct xml_writer *writer, const char *key, mode_t mode) { char ms[5]; @@ -937,7 +955,7 @@ xmlwrite_mode(struct archive_write *a, xmlTextWriterPtr writer, } static int -xmlwrite_sum(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_sum(struct archive_write *a, struct xml_writer *writer, const char *key, struct chksumval *sum) { const char *algname; @@ -971,7 +989,7 @@ xmlwrite_sum(struct archive_write *a, xmlTextWriterPtr writer, } static int -xmlwrite_heap(struct archive_write *a, xmlTextWriterPtr writer, +xmlwrite_heap(struct archive_write *a, struct xml_writer *writer, struct heap_data *heap) { const char *encname; @@ -1029,7 +1047,7 @@ xmlwrite_heap(struct archive_write *a, xmlTextWriterPtr writer, * Our implements records both and if it's necessary. */ static int -make_fflags_entry(struct archive_write *a, xmlTextWriterPtr writer, +make_fflags_entry(struct archive_write *a, struct xml_writer *writer, const char *element, const char *fflags_text) { static const struct flagentry { @@ -1119,11 +1137,11 @@ make_fflags_entry(struct archive_write *a, xmlTextWriterPtr writer, } while (p != NULL); if (n > 0) { - r = xmlTextWriterStartElement(writer, BAD_CAST_CONST(element)); + r = xml_writer_start_element(writer, element); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } for (i = 0; i < n; i++) { @@ -1133,29 +1151,59 @@ make_fflags_entry(struct archive_write *a, xmlTextWriterPtr writer, return (r); } - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } } return (ARCHIVE_OK); } +/* + * This function determines whether a UTF-8 string contains + * only codepoints that are convertible to Latin-1. Strings + * beyond Latin-1 are stored base64-encoded in the XAR TOC. + */ static int -make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, +is_u8_zstring_latin1(const char *in) +{ + unsigned int c; + while (*in) { + c = *in++; + if (c < 0x80) continue; + /* + * Filter out non-continuation, any continuation of 2-3 + * bytes, and any continuation of 1 byte whose high 3 bits + * are non-zero. Recall, 1-byte continuations can store 11 + * bits whereas Latin-1 codepoints are only 8 bits wide. + */ + if ((c & 0xFC) != 0xC0) + return (0); + c = *in++; + /* + * If we get any non-continuation byte (including 0x00!), + * the string is not valid UTF-8. + */ + if ((c & 0xC0) != 0x80) + return (0); /* invalid unicode */ + } + return (1); +} + +static int +make_file_entry(struct archive_write *a, struct xml_writer *writer, struct file *file) { struct xar *xar; const char *filetype, *filelink, *fflags; struct archive_string linkto; struct heap_data *heap; - unsigned char *tmp; const char *p; size_t len; - int r, r2, l, ll; + int r, r2; xar = (struct xar *)a->format_data; r2 = ARCHIVE_OK; @@ -1163,44 +1211,35 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, /* * Make a file name entry, "". */ - l = ll = (int)archive_strlen(&(file->basename)); - tmp = malloc(l); - if (tmp == NULL) { - archive_set_error(&a->archive, ENOMEM, - "Can't allocate memory"); - return (ARCHIVE_FATAL); - } - r = UTF8Toisolat1(tmp, &l, BAD_CAST(file->basename.s), &ll); - free(tmp); - if (r < 0) { - r = xmlTextWriterStartElement(writer, BAD_CAST("name")); + if (!is_u8_zstring_latin1(file->basename.s)) { + r = xml_writer_start_element(writer, "name"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } - r = xmlTextWriterWriteAttribute(writer, - BAD_CAST("enctype"), BAD_CAST("base64")); + r = xml_writer_write_attribute(writer, + "enctype", "base64"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteAttribute() failed: %d", r); + "xml_writer_write_attribute() failed: %d", r); return (ARCHIVE_FATAL); } - r = xmlTextWriterWriteBase64(writer, file->basename.s, + r = xml_writer_write_base64(writer, file->basename.s, 0, (int)archive_strlen(&(file->basename))); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteBase64() failed: %d", r); + "xml_writer_write_base64() failed: %d", r); return (ARCHIVE_FATAL); } - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } } else { @@ -1281,11 +1320,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, break; case AE_IFCHR: case AE_IFBLK: - r = xmlTextWriterStartElement(writer, BAD_CAST("device")); + r = xml_writer_start_element(writer, "device"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } r = xmlwrite_fstring(a, writer, "major", @@ -1296,11 +1335,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, "%d", archive_entry_rdevminor(file->entry)); if (r < 0) return (ARCHIVE_FATAL); - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } break; @@ -1436,19 +1475,19 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, archive_entry_xattr_next(file->entry, &name, &value, &size); - r = xmlTextWriterStartElement(writer, BAD_CAST("ea")); + r = xml_writer_start_element(writer, "ea"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } - r = xmlTextWriterWriteFormatAttribute(writer, - BAD_CAST("id"), "%d", heap->id); + r = xml_writer_write_attributef(writer, + "id", "%d", heap->id); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteAttribute() failed: %d", r); + "xml_writer_write_attributef() failed: %d", r); return (ARCHIVE_FATAL); } r = xmlwrite_heap(a, writer, heap); @@ -1458,11 +1497,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, if (r < 0) return (ARCHIVE_FATAL); - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } } @@ -1471,11 +1510,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, * Make a file data entry, "". */ if (file->data.length > 0) { - r = xmlTextWriterStartElement(writer, BAD_CAST("data")); + r = xml_writer_start_element(writer, "data"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } @@ -1483,21 +1522,21 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, if (r < 0) return (ARCHIVE_FATAL); - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } } if (archive_strlen(&file->script) > 0) { - r = xmlTextWriterStartElement(writer, BAD_CAST("content")); + r = xml_writer_start_element(writer, "content"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); return (ARCHIVE_FATAL); } @@ -1510,11 +1549,11 @@ make_file_entry(struct archive_write *a, xmlTextWriterPtr writer, if (r < 0) return (ARCHIVE_FATAL); - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); return (ARCHIVE_FATAL); } } @@ -1530,8 +1569,9 @@ make_toc(struct archive_write *a) { struct xar *xar; struct file *np; - xmlBufferPtr bp; - xmlTextWriterPtr writer; + struct xml_writer *writer; + const char* content; + size_t use; int algsize; int r, ret; @@ -1543,51 +1583,43 @@ make_toc(struct archive_write *a) * Initialize xml writer. */ writer = NULL; - bp = xmlBufferCreate(); - if (bp == NULL) { - archive_set_error(&a->archive, ENOMEM, - "xmlBufferCreate() " - "couldn't create xml buffer"); - goto exit_toc; - } - writer = xmlNewTextWriterMemory(bp, 0); - if (writer == NULL) { + r = xml_writer_create(&writer); + if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlNewTextWriterMemory() " - "couldn't create xml writer"); + "xml_writer_create() failed: %d", r); goto exit_toc; } - r = xmlTextWriterStartDocument(writer, "1.0", "UTF-8", NULL); + r = xml_writer_set_indent(writer, 4); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartDocument() failed: %d", r); + "xml_writer_set_indent() failed: %d", r); goto exit_toc; } - r = xmlTextWriterSetIndent(writer, 4); + r = xml_writer_start_document(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterSetIndent() failed: %d", r); + "xml_writer_start_document() failed: %d", r); goto exit_toc; } /* * Start recording TOC */ - r = xmlTextWriterStartElement(writer, BAD_CAST("xar")); + r = xml_writer_start_element(writer, "xar"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); goto exit_toc; } - r = xmlTextWriterStartElement(writer, BAD_CAST("toc")); + r = xml_writer_start_element(writer, "toc"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartDocument() failed: %d", r); + "xml_writer_start_element() failed: %d", r); goto exit_toc; } @@ -1606,19 +1638,19 @@ make_toc(struct archive_write *a) /* * Record TOC checksum */ - r = xmlTextWriterStartElement(writer, BAD_CAST("checksum")); + r = xml_writer_start_element(writer, "checksum"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() failed: %d", r); + "xml_writer_start_element() failed: %d", r); goto exit_toc; } - r = xmlTextWriterWriteAttribute(writer, BAD_CAST("style"), - BAD_CAST_CONST(getalgname(xar->opt_toc_sumalg))); + r = xml_writer_write_attribute(writer, "style", + getalgname(xar->opt_toc_sumalg)); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteAttribute() failed: %d", r); + "xml_writer_write_attribute() failed: %d", r); goto exit_toc; } @@ -1636,11 +1668,11 @@ make_toc(struct archive_write *a) if (r < 0) goto exit_toc; - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() failed: %d", r); + "xml_writer_end_element() failed: %d", r); goto exit_toc; } } @@ -1656,32 +1688,32 @@ make_toc(struct archive_write *a) if (np->dir && np->children.first != NULL) { /* Enter to sub directories. */ np = np->children.first; - r = xmlTextWriterStartElement(writer, - BAD_CAST("file")); + r = xml_writer_start_element(writer, + "file"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() " + "xml_writer_start_element() " "failed: %d", r); goto exit_toc; } - r = xmlTextWriterWriteFormatAttribute( - writer, BAD_CAST("id"), "%d", np->id); + r = xml_writer_write_attributef( + writer, "id", "%d", np->id); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteAttribute() " + "xml_writer_write_attributef() " "failed: %d", r); goto exit_toc; } continue; } while (np != np->parent) { - r = xmlTextWriterEndElement(writer); + r = xml_writer_end_element(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndElement() " + "xml_writer_end_element() " "failed: %d", r); goto exit_toc; } @@ -1690,21 +1722,21 @@ make_toc(struct archive_write *a) np = np->parent; } else { np = np->chnext; - r = xmlTextWriterStartElement(writer, - BAD_CAST("file")); + r = xml_writer_start_element(writer, + "file"); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterStartElement() " + "xml_writer_start_element() " "failed: %d", r); goto exit_toc; } - r = xmlTextWriterWriteFormatAttribute( - writer, BAD_CAST("id"), "%d", np->id); + r = xml_writer_write_attributef( + writer, "id", "%d", np->id); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterWriteAttribute() " + "xml_writer_write_attributef() " "failed: %d", r); goto exit_toc; } @@ -1713,31 +1745,40 @@ make_toc(struct archive_write *a) } } while (np != np->parent); - r = xmlTextWriterEndDocument(writer); + r = xml_writer_end_document(writer); if (r < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "xmlTextWriterEndDocument() failed: %d", r); + "xml_writer_end_document() failed: %d", r); goto exit_toc; } + + r = xml_writer_get_final_content_and_length(writer, &content, &use); + if (r < 0) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "xml_writer_get_final_content_and_length() failed: %d", r); + goto exit_toc; + } + #if DEBUG_PRINT_TOC fprintf(stderr, "\n---TOC-- %d bytes --\n%s\n", - strlen((const char *)bp->content), bp->content); + (int)strlen(content), content); #endif /* * Compress the TOC and calculate the sum of the TOC. */ xar->toc.temp_offset = xar->temp_offset; - xar->toc.size = bp->use; + xar->toc.size = (uint64_t)use; checksum_init(&(xar->a_sumwrk), xar->opt_toc_sumalg); r = compression_init_encoder_gzip(&(a->archive), &(xar->stream), 6, 1); if (r != ARCHIVE_OK) goto exit_toc; - xar->stream.next_in = bp->content; - xar->stream.avail_in = bp->use; + xar->stream.next_in = (const unsigned char *)content; + xar->stream.avail_in = use; xar->stream.total_in = 0; xar->stream.next_out = xar->wbuff; xar->stream.avail_out = sizeof(xar->wbuff); @@ -1768,9 +1809,7 @@ make_toc(struct archive_write *a) ret = ARCHIVE_OK; exit_toc: if (writer) - xmlFreeTextWriter(writer); - if (bp) - xmlBufferFree(bp); + xml_writer_destroy(writer); return (ret); } @@ -3251,4 +3290,133 @@ getalgname(enum sumalg sumalg) } } +#if HAVE_LIBXML_XMLWRITER_H + +#define BAD_CAST_CONST (const xmlChar *) + +struct xml_writer { + xmlTextWriterPtr writer; + xmlBufferPtr bp; + unsigned int indent; +}; + +static int +xml_writer_create(struct xml_writer **pctx) +{ + struct xml_writer *ctx = calloc(1, sizeof(struct xml_writer)); + if (ctx == NULL) { + return (-1); + } + + ctx->bp = xmlBufferCreate(); + if (ctx->bp == NULL) { + free(ctx); + return (-1); + } + + ctx->writer = xmlNewTextWriterMemory(ctx->bp, 0); + if (ctx->writer == NULL) { + xmlBufferFree(ctx->bp); + free(ctx); + return (-1); + } + + *pctx = ctx; + return (0); +} + +static int +xml_writer_destroy(struct xml_writer *ctx) +{ + xmlFreeTextWriter(ctx->writer); + xmlBufferFree(ctx->bp); + free(ctx); + return (0); +} + +static int +xml_writer_start_document(struct xml_writer *ctx) +{ + int r; + r = xmlTextWriterStartDocument(ctx->writer, "1.0", "UTF-8", NULL); + if (r < 0) { + return (r); + } + + r = xmlTextWriterSetIndent(ctx->writer, (int)ctx->indent); + return (r); +} + +static int +xml_writer_end_document(struct xml_writer *ctx) +{ + return (xmlTextWriterEndDocument(ctx->writer)); +} + +static int +xml_writer_set_indent(struct xml_writer *ctx, unsigned int indent) +{ + /* libxml2 only lets you set the indent after starting the document */ + ctx->indent = indent; + return (0); +} + +static int +xml_writer_start_element(struct xml_writer *ctx, const char *localName) +{ + return (xmlTextWriterStartElement(ctx->writer, + BAD_CAST_CONST(localName))); +} + +static int +xml_writer_write_attribute(struct xml_writer *ctx, + const char *key, const char *value) +{ + return (xmlTextWriterWriteAttribute(ctx->writer, + BAD_CAST_CONST(key), BAD_CAST_CONST(value))); +} + +static int +xml_writer_write_attributef(struct xml_writer *ctx, + const char *key, const char *format, ...) +{ + va_list ap; + int ret; + va_start(ap, format); + ret = xmlTextWriterWriteVFormatAttribute(ctx->writer, + BAD_CAST_CONST(key), format, ap); + va_end(ap); + return (ret); +} + +static int +xml_writer_write_string(struct xml_writer *ctx, const char *string) +{ + return (xmlTextWriterWriteString(ctx->writer, BAD_CAST_CONST(string))); +} + +static int +xml_writer_write_base64(struct xml_writer* ctx, + const char *data, size_t start, size_t len) +{ + return (xmlTextWriterWriteBase64(ctx->writer, data, + (int)start, (int)len)); +} + +static int +xml_writer_end_element(struct xml_writer *ctx) +{ + return (xmlTextWriterEndElement(ctx->writer)); +} + +static int +xml_writer_get_final_content_and_length(struct xml_writer *ctx, + const char **out, size_t *size) +{ + *out = (const char*)ctx->bp->content; + *size = (size_t)ctx->bp->use; + return (0); +} +#endif /* HAVE_LIBXML_XMLWRITER_H */ + #endif /* Support xar format */ diff --git a/libarchive/test/test_write_format_xar.c b/libarchive/test/test_write_format_xar.c index f1f303291..a3e2577de 100644 --- a/libarchive/test/test_write_format_xar.c +++ b/libarchive/test/test_write_format_xar.c @@ -25,6 +25,8 @@ */ #include "test.h" +#include + static void test_xar(const char *option) { @@ -146,6 +148,20 @@ test_xar(const char *option) assertEqualIntA(a, ARCHIVE_FAILED, archive_write_header(a, ae)); archive_entry_free(ae); + /* + * "dir/file{UNICODE}" has a name that requires base64 encoding + */ + assert((ae = archive_entry_new()) != NULL); + archive_entry_set_atime(ae, 2, 20); + archive_entry_set_ctime(ae, 4, 40); + archive_entry_set_mtime(ae, 5, 50); + archive_entry_copy_pathname_w(ae, L"dir/file\U0001F574"); + archive_entry_set_mode(ae, AE_IFREG | 0755); + archive_entry_set_size(ae, 8); + assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, ae)); + archive_entry_free(ae); + assertEqualIntA(a, 8, archive_write_data(a, "ghijklmn", 9)); + /* * XXX TODO XXX Archive directory, other file types. * Archive extended attributes, ACLs, other metadata. @@ -261,6 +277,22 @@ test_xar(const char *option) assertEqualString("dir/dir3", archive_entry_pathname(ae)); assert((AE_IFDIR | 0755) == archive_entry_mode(ae)); + /* + * Read "dir/file{UNICODE}" + */ + assertEqualIntA(a, 0, archive_read_next_header(a, &ae)); + assertEqualInt(2, archive_entry_atime(ae)); + assertEqualInt(0, archive_entry_atime_nsec(ae)); + assertEqualInt(4, archive_entry_ctime(ae)); + assertEqualInt(0, archive_entry_ctime_nsec(ae)); + assertEqualInt(5, archive_entry_mtime(ae)); + assertEqualInt(0, archive_entry_mtime_nsec(ae)); + assertEqualWString(L"dir/file\U0001F574", archive_entry_pathname_w(ae)); + assert((AE_IFREG | 0755) == archive_entry_mode(ae)); + assertEqualInt(8, archive_entry_size(ae)); + assertEqualIntA(a, 8, archive_read_data(a, buff2, 10)); + assertEqualMem(buff2, "ghijklmn", 8); + /* * Verify the end of the archive. */ @@ -273,6 +305,13 @@ test_xar(const char *option) DEFINE_TEST(test_write_format_xar) { + /* xar mandates the use of UTF-8 XML; if we cannot + * use UTF-8, perhaps we should not write xar. */ + if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { + skipping("en_US.UTF-8 locale not available on this system."); + return; + } + /* Default mode. */ test_xar(NULL); -- 2.47.2