From: Dustin L. Howett Date: Fri, 9 May 2025 11:40:21 +0000 (-0500) Subject: xar: add xmllite support to the XAR reader and writer (#2388) X-Git-Tag: v3.8.0~29 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=16fd043f51d911b106f2a7834ad8f08f65051977;p=thirdparty%2Flibarchive.git xar: add xmllite support to the XAR reader and writer (#2388) This commit adds support for reading and writing XAR archives on Windows using the built-in xmllite library. xmllite is present in all versions of Windows starting with Windows XP. With this change, no external XML library (libxml2, expat) is required to read or produce XAR archives on Windows. xmllite is a little bit annoying in that it's entirely a COM API--the likes of which are annoying to use from C. Signed-off-by: Dustin L. Howett Depends on e619342dfa36b887ffa0ea33e98d04cb161cd7de Closes #1811 --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 20e77453c..7096b7c00 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -228,6 +228,7 @@ OPTION(ENABLE_ZLIB "Enable the use of the system ZLIB library if found" ON) OPTION(ENABLE_BZip2 "Enable the use of the system BZip2 library if found" ON) OPTION(ENABLE_LIBXML2 "Enable the use of the system libxml2 library if found" ON) OPTION(ENABLE_EXPAT "Enable the use of the system EXPAT library if found" ON) +OPTION(ENABLE_WIN32_XMLLITE "Enable the use of the Windows XmlLite library if found" ON) OPTION(ENABLE_PCREPOSIX "Enable the use of the system PCREPOSIX library if found" ON) OPTION(ENABLE_PCRE2POSIX "Enable the use of the system PCRE2POSIX library if found" ON) OPTION(ENABLE_LIBGCC "Enable the use of the system LibGCC library if found" ON) @@ -1251,6 +1252,25 @@ ELSE(LIBXML2_FOUND) SET(HAVE_LIBEXPAT 1) LA_CHECK_INCLUDE_FILE("expat.h" HAVE_EXPAT_H) CMAKE_POP_CHECK_STATE() # Restore the state of the variables + ELSE(EXPAT_FOUND) + IF(WIN32 AND ENABLE_WIN32_XMLLITE) + # Check linkage as well; versions of mingw-w64 before v11.0.0 + # do not contain an import library for xmllite. + cmake_push_check_state() + SET(CMAKE_REQUIRED_LIBRARIES "xmllite") + check_c_source_compiles(" + #include + #include + int main() { + return CreateXmlReader(&IID_IXmlReader, NULL, NULL); + } + " HAVE_XMLLITE_H) + cmake_pop_check_state() + IF(HAVE_XMLLITE_H) + SET(XMLLITE_FOUND TRUE) + LIST(APPEND ADDITIONAL_LIBS "xmllite") + ENDIF() + ENDIF() ENDIF(EXPAT_FOUND) ENDIF(LIBXML2_FOUND) MARK_AS_ADVANCED(CLEAR LIBXML2_INCLUDE_DIR) diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in index dd58f3382..692d4516a 100644 --- a/build/cmake/config.h.in +++ b/build/cmake/config.h.in @@ -1282,6 +1282,9 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have a working FS_IOC_GETFLAGS */ #cmakedefine HAVE_WORKING_FS_IOC_GETFLAGS 1 +/* Define to 1 if you have the Windows `xmllite' library (-lxmllite). */ +#cmakedefine HAVE_XMLLITE_H 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_ZLIB_H 1 diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index 7dfae3f68..b4e1192ef 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -36,6 +36,10 @@ #include #elif HAVE_EXPAT_H #include +#elif HAVE_XMLLITE_H +#include +#include +#include #endif #ifdef HAVE_BZLIB_H #include @@ -56,12 +60,13 @@ #include "archive_read_private.h" #if (!defined(HAVE_LIBXML_XMLREADER_H) && \ - !defined(HAVE_BSDXML_H) && !defined(HAVE_EXPAT_H)) ||\ + !defined(HAVE_BSDXML_H) && !defined(HAVE_EXPAT_H) && \ + !defined(HAVE_XMLLITE_H)) ||\ !defined(HAVE_ZLIB_H) || \ !defined(ARCHIVE_HAS_MD5) || !defined(ARCHIVE_HAS_SHA1) /* * xar needs several external libraries. - * o libxml2 or expat --- XML parser + * o libxml2, expat or (Windows only) xmllite --- XML parser * o openssl or MD5/SHA1 hash function * o zlib * o bzlib2 (option) @@ -438,6 +443,8 @@ static void expat_start_cb(void *, const XML_Char *, const XML_Char **); static void expat_end_cb(void *, const XML_Char *); static void expat_data_cb(void *, const XML_Char *, int); static int expat_read_toc(struct archive_read *); +#elif defined(HAVE_XMLLITE_H) +static int xmllite_read_toc(struct archive_read *); #endif int @@ -589,6 +596,8 @@ read_toc(struct archive_read *a) r = xml2_read_toc(a); #elif defined(HAVE_BSDXML_H) || defined(HAVE_EXPAT_H) r = expat_read_toc(a); +#elif defined(HAVE_XMLLITE_H) + r = xmllite_read_toc(a); #endif if (r != ARCHIVE_OK) return (r); @@ -3333,6 +3342,326 @@ expat_read_toc(struct archive_read *a) XML_ParserFree(parser); return (ud.state); } -#endif /* defined(HAVE_BSDXML_H) || defined(HAVE_EXPAT_H) */ + +#elif defined(HAVE_XMLLITE_H) + +struct ArchiveStreamAdapter { + const ISequentialStreamVtbl *lpVtbl; /* see asaStaticVtable */ + struct archive_read *a; +}; + +static HRESULT STDMETHODCALLTYPE +asaQueryInterface(ISequentialStream *this, REFIID riid, void **ppv) +{ + if (!IsEqualIID(riid, &IID_ISequentialStream)) { + *ppv = NULL; + return E_NOINTERFACE; + } + *ppv = this; + return S_OK; +} + +/* + * We can dispense with reference counting as we tightly manage the lifetime + * of an ArchiveStreamAdapter. + */ +static ULONG STDMETHODCALLTYPE +asaAddRef(ISequentialStream *this) +{ + (void)this; /* UNUSED */ + return ULONG_MAX; +} + +static ULONG STDMETHODCALLTYPE +asaRelease(ISequentialStream *this) +{ + (void)this; /* UNUSED */ + return ULONG_MAX; +} + +static HRESULT STDMETHODCALLTYPE +asaRead(ISequentialStream *this, void *pv, ULONG cb, ULONG *pcbRead) +{ + struct ArchiveStreamAdapter *asa = (struct ArchiveStreamAdapter *)this; + struct archive_read *a; + struct xar *xar; + const void *d = pv; + size_t outbytes = cb; + size_t used = 0; + int r; + + a = asa->a; + xar = (struct xar *)(a->format->data); + + *pcbRead = 0; + + if (xar->toc_remaining <= 0) + return cb != 0 ? S_FALSE : S_OK; + + r = rd_contents(a, &d, &outbytes, &used, xar->toc_remaining); + if (r != ARCHIVE_OK) + return E_FAIL; + __archive_read_consume(a, used); + xar->toc_remaining -= used; + xar->offset += used; + xar->toc_total += outbytes; + PRINT_TOC(pv, outbytes); + + *pcbRead = (ULONG)outbytes; + return outbytes < cb ? S_FALSE : S_OK; +} + +static HRESULT STDMETHODCALLTYPE +asaWrite(ISequentialStream *this, const void *pv, ULONG cb, ULONG *pcbWritten) +{ + (void)this; /* UNUSED */ + (void)pv; /* UNUSED */ + (void)cb; /* UNUSED */ + if (!pcbWritten) return E_INVALIDARG; + *pcbWritten = 0; + return E_NOTIMPL; +} + +static const ISequentialStreamVtbl asaStaticVtable = { + .QueryInterface = asaQueryInterface, + .AddRef = asaAddRef, + .Release = asaRelease, + .Read = asaRead, + .Write = asaWrite, +}; + +static int +xmllite_create_stream_adapter(struct archive_read *a, + struct ArchiveStreamAdapter **pasa) +{ + struct ArchiveStreamAdapter *asa = + calloc(1, sizeof(struct ArchiveStreamAdapter)); + if (!asa) { + archive_set_error(&(a->archive), ENOMEM, "Out of memory"); + return (ARCHIVE_FATAL); + } + asa->lpVtbl = &asaStaticVtable; + asa->a = a; + *pasa = asa; + return (ARCHIVE_OK); +} + +typedef HRESULT(STDMETHODCALLTYPE *xmllite_wstr_func)(IXmlReader *, LPCWSTR *, + UINT *); + +/* + * Returns an narrow-char archive_string in *as after calling + * the wide-char COM API callee() on the XmlReader reader. + * Sets an appropriate error on the archive if it fails. + */ +static int +xmllite_call_return_as(struct archive_read *a, struct archive_string *as, + IXmlReader *reader, xmllite_wstr_func callee) +{ + LPCWSTR wcs; + UINT wlen; + + if (FAILED(callee(reader, &wcs, &wlen))) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "Failed to read XML data"); + return (ARCHIVE_FATAL); + } + + archive_string_init(as); + if (archive_string_append_from_wcs(as, wcs, (size_t)wlen) < 0) { + archive_string_free(as); + archive_set_error(&(a->archive), ENOMEM, "Out of memory"); + return (ARCHIVE_FATAL); + } + + return (ARCHIVE_OK); +} + +static char * +xmllite_call_return_mbs(struct archive_read *a, IXmlReader *reader, + xmllite_wstr_func callee) +{ + char *ret; + struct archive_string as; + + if (xmllite_call_return_as(a, &as, reader, callee) < 0) { + return NULL; + } + + ret = strdup(as.s); + archive_string_free(&as); + if (ret == NULL) { + archive_set_error(&(a->archive), ENOMEM, "Out of memory"); + return NULL; + } + return ret; +} + +static int +xmllite_xmlattr_setup(struct archive_read *a, + struct xmlattr_list *list, IXmlReader *reader) +{ + struct xmlattr *attr; + HRESULT hr; + + list->first = NULL; + list->last = &(list->first); + hr = reader->lpVtbl->MoveToFirstAttribute(reader); + /* Contrary to other checks, we're not using SUCCEEDED/FAILED + * because MoveToNextAttribute returns *S_FALSE* (success!) + * when it runs out of attributes. + */ + while (hr == S_OK) { + /* Attributes implied as being default by the DTD are ignored */ + if (reader->lpVtbl->IsDefault(reader)) + continue; + + attr = malloc(sizeof*(attr)); + if (attr == NULL) { + archive_set_error(&(a->archive), ENOMEM, + "Out of memory"); + return (ARCHIVE_FATAL); + } + + attr->name = xmllite_call_return_mbs(a, reader, + reader->lpVtbl->GetLocalName); + if (attr->name == NULL) { + free(attr); + /* xmllite_call_return_mbs sets an appropriate error */ + return (ARCHIVE_FATAL); + } + + attr->value = xmllite_call_return_mbs(a, reader, + reader->lpVtbl->GetValue); + if (attr->value == NULL) { + free(attr->name); + free(attr); + /* xmllite_call_return_mbs sets an appropriate error */ + return (ARCHIVE_FATAL); + } + + attr->next = NULL; + *list->last = attr; + list->last = &(attr->next); + hr = reader->lpVtbl->MoveToNextAttribute(reader); + } + + if (FAILED(hr)) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_FILE_FORMAT, + "Failed to parse XML document"); + return (ARCHIVE_FAILED); + } + + return (ARCHIVE_OK); +} + +static int +xmllite_read_toc(struct archive_read *a) +{ + struct ArchiveStreamAdapter *asa = NULL; + char *name; + struct archive_string as; + BOOL empty; + XmlNodeType type; + struct xmlattr_list list; + IXmlReader *reader = NULL; + int r = ARCHIVE_OK; + + if ((r = xmllite_create_stream_adapter(a, &asa)) < 0) { + goto out; + } + + if (FAILED(CreateXmlReader(&IID_IXmlReader, (void **)&reader, NULL))) { + r = ARCHIVE_FATAL; + goto out; + } + + if (FAILED(reader->lpVtbl->SetInput(reader, (IUnknown *)asa))) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "Failed to prepare XML stream"); + r = ARCHIVE_FATAL; + goto out; + } + + while (!reader->lpVtbl->IsEOF(reader)) { + if (FAILED(reader->lpVtbl->Read(reader, &type))) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "Failed to read XML stream"); + r = ARCHIVE_FATAL; + goto out; + } + + switch (type) { + case XmlNodeType_Element: + empty = reader->lpVtbl->IsEmptyElement(reader); + + name = xmllite_call_return_mbs(a, reader, + reader->lpVtbl->GetLocalName); + if (name == NULL) { + /* xmllite_call_return_mbs sets an appropriate error */ + r = ARCHIVE_FATAL; + goto out; + } + + r = xmllite_xmlattr_setup(a, &list, reader); + if (r == ARCHIVE_OK) { + r = xml_start(a, name, &list); + } + xmlattr_cleanup(&list); + if (r == ARCHIVE_OK && empty) { + xml_end(a, name); + } + + free(name); + if (r != ARCHIVE_OK) { + goto out; + } + + break; + case XmlNodeType_EndElement: + name = xmllite_call_return_mbs(a, reader, + reader->lpVtbl->GetLocalName); + if (name == NULL) { + /* xmllite_call_return_mbs sets an appropriate error */ + r = ARCHIVE_FATAL; + goto out; + } + + xml_end(a, name); + free(name); + break; + case XmlNodeType_Text: + r = xmllite_call_return_as(a, &as, reader, + reader->lpVtbl->GetValue); + if (r != ARCHIVE_OK) { + /* xmllite_call_return_as sets an appropriate error */ + goto out; + } + + xml_data(a, as.s, (int)archive_strlen(&as)); + archive_string_free(&as); + + case XmlNodeType_None: + case XmlNodeType_Attribute: + case XmlNodeType_CDATA: + case XmlNodeType_ProcessingInstruction: + case XmlNodeType_Comment: + case XmlNodeType_DocumentType: + case XmlNodeType_Whitespace: + case XmlNodeType_XmlDeclaration: + default: + break; + } + } + +out: + if (reader) + reader->lpVtbl->Release(reader); + + free(asa); + + return r; +} +#endif /* defined(XMLLITE) */ #endif /* Support xar format */ diff --git a/libarchive/archive_write_set_format_xar.c b/libarchive/archive_write_set_format_xar.c index fdc1b3de1..3775e9f58 100644 --- a/libarchive/archive_write_set_format_xar.c +++ b/libarchive/archive_write_set_format_xar.c @@ -37,6 +37,11 @@ #if defined(LIBXML_VERSION) && LIBXML_VERSION >= 20703 #define XAR_WRITER_HAS_XML #endif /* LIBXML_VERSION */ +#elif HAVE_XMLLITE_H +#include +#include +#include +#define XAR_WRITER_HAS_XML #endif #ifdef HAVE_BZLIB_H #include @@ -78,7 +83,7 @@ !defined(ARCHIVE_HAS_MD5) || !defined(ARCHIVE_HAS_SHA1) /* * xar needs several external libraries. - * o libxml2 + * o libxml2 or xmllite (on Windows) * o openssl or MD5/SHA1 hash function * o zlib * o bzlib2 (option) @@ -3417,6 +3422,277 @@ xml_writer_get_final_content_and_length(struct xml_writer *ctx, *size = (size_t)ctx->bp->use; return (0); } + +#elif HAVE_XMLLITE_H + +struct xml_writer { + IXmlWriter *writer; + IStream *stream; + HGLOBAL global; +}; + +static int +xml_writer_create(struct xml_writer **pctx) +{ + struct xml_writer *ctx; + HRESULT hr; + + ctx = calloc(1, sizeof(struct xml_writer)); + if (ctx == NULL) { + return (E_OUTOFMEMORY); + } + + hr = CreateStreamOnHGlobal(NULL, TRUE, &ctx->stream); + if (FAILED(hr)) { + free(ctx); + return (hr); + } + + hr = CreateXmlWriter(&IID_IXmlWriter, (void **)&ctx->writer, NULL); + if (FAILED(hr)) { + ctx->stream->lpVtbl->Release(ctx->stream); + free(ctx); + return (hr); + } + + hr = ctx->writer->lpVtbl->SetOutput(ctx->writer, + (IUnknown *)ctx->stream); + if (FAILED(hr)) { + ctx->writer->lpVtbl->Release(ctx->writer); + ctx->stream->lpVtbl->Release(ctx->stream); + free(ctx); + return (hr); + } + + *pctx = ctx; + return (S_OK); +} + +static int +xml_writer_destroy(struct xml_writer *ctx) +{ + if (ctx->global) + GlobalUnlock(ctx->global); + ctx->writer->lpVtbl->Release(ctx->writer); /* Destroys only writer */ + ctx->stream->lpVtbl->Release(ctx->stream); /* Destroys stream, global */ + free(ctx); + return (S_OK); +} + +static int +xml_writer_start_document(struct xml_writer *ctx) +{ + return ctx->writer->lpVtbl->WriteStartDocument(ctx->writer, + XmlStandalone_Omit); +} + +static int +xml_writer_end_document(struct xml_writer *ctx) +{ + return ctx->writer->lpVtbl->WriteEndDocument(ctx->writer); +} + +static int +xml_writer_set_indent(struct xml_writer *ctx, unsigned int indent) +{ + /* Windows' xmllite does not support indent sizes; will always be 2 */ + (void)indent; + return ctx->writer->lpVtbl->SetProperty(ctx->writer, + XmlWriterProperty_Indent, (LONG_PTR)TRUE); +} + +static int +xml_writer_start_element(struct xml_writer *ctx, const char *localName) +{ + struct archive_wstring as; + HRESULT hr; + archive_string_init(&as); + if (archive_wstring_append_from_mbs(&as, localName, + strlen(localName))) { + hr = E_OUTOFMEMORY; + goto exit_hr; + } + hr = ctx->writer->lpVtbl->WriteStartElement(ctx->writer, NULL, + as.s, NULL); + +exit_hr: + archive_wstring_free(&as); + return hr; +} + +static int +xml_writer_write_attribute(struct xml_writer *ctx, + const char *key, const char *value) +{ + struct archive_wstring ask, asv; + HRESULT hr; + archive_string_init(&ask); + archive_string_init(&asv); + if (archive_wstring_append_from_mbs(&ask, key, strlen(key))) { + hr = E_OUTOFMEMORY; + goto exit_hr; + } + if (archive_wstring_append_from_mbs(&asv, value, strlen(value))) { + hr = E_OUTOFMEMORY; + goto exit_hr; + } + hr = ctx->writer->lpVtbl->WriteAttributeString(ctx->writer, NULL, + ask.s, NULL, asv.s); + +exit_hr: + archive_wstring_free(&asv); + archive_wstring_free(&ask); + return hr; +} + +static int +xml_writer_write_attributef(struct xml_writer *ctx, + const char *key, const char *format, ...) +{ + struct archive_wstring ask, asv; + struct archive_string asf; + HRESULT hr; + va_list ap; + + va_start(ap, format); + archive_string_init(&ask); + archive_string_init(&asv); + archive_string_init(&asf); + + if (archive_wstring_append_from_mbs(&ask, key, strlen(key))) { + hr = E_OUTOFMEMORY; + goto exit_hr; + } + + archive_string_vsprintf(&asf, format, ap); + if (archive_wstring_append_from_mbs(&asv, asf.s, asf.length)) { + hr = E_OUTOFMEMORY; + goto exit_hr; + } + + hr = ctx->writer->lpVtbl->WriteAttributeString(ctx->writer, NULL, + ask.s, NULL, asv.s); + +exit_hr: + archive_string_free(&asf); + archive_wstring_free(&asv); + archive_wstring_free(&ask); + va_end(ap); + + return hr; +} + +static int +xml_writer_write_string(struct xml_writer *ctx, const char *string) +{ + struct archive_wstring as; + HRESULT hr; + archive_string_init(&as); + if (archive_wstring_append_from_mbs(&as, string, strlen(string))) { + hr = E_OUTOFMEMORY; + goto exit_hr; + } + hr = ctx->writer->lpVtbl->WriteString(ctx->writer, as.s); + +exit_hr: + archive_wstring_free(&as); + return hr; +} + +static const wchar_t base64[] = { + L'A', L'B', L'C', L'D', L'E', L'F', L'G', L'H', + L'I', L'J', L'K', L'L', L'M', L'N', L'O', L'P', + L'Q', L'R', L'S', L'T', L'U', L'V', L'W', L'X', + L'Y', L'Z', L'a', L'b', L'c', L'd', L'e', L'f', + L'g', L'h', L'i', L'j', L'k', L'l', L'm', L'n', + L'o', L'p', L'q', L'r', L's', L't', L'u', L'v', + L'w', L'x', L'y', L'z', L'0', L'1', L'2', L'3', + L'4', L'5', L'6', L'7', L'8', L'9', L'+', L'/' +}; + +static void +la_b64_wencode(struct archive_wstring *as, const unsigned char *p, size_t len) +{ + int c; + + for (; len >= 3; p += 3, len -= 3) { + c = p[0] >> 2; + archive_wstrappend_wchar(as, base64[c]); + c = ((p[0] & 0x03) << 4) | ((p[1] & 0xf0) >> 4); + archive_wstrappend_wchar(as, base64[c]); + c = ((p[1] & 0x0f) << 2) | ((p[2] & 0xc0) >> 6); + archive_wstrappend_wchar(as, base64[c]); + c = p[2] & 0x3f; + archive_wstrappend_wchar(as, base64[c]); + } + if (len > 0) { + c = p[0] >> 2; + archive_wstrappend_wchar(as, base64[c]); + c = (p[0] & 0x03) << 4; + if (len == 1) { + archive_wstrappend_wchar(as, base64[c]); + archive_wstrappend_wchar(as, '='); + archive_wstrappend_wchar(as, '='); + } else { + c |= (p[1] & 0xf0) >> 4; + archive_wstrappend_wchar(as, base64[c]); + c = (p[1] & 0x0f) << 2; + archive_wstrappend_wchar(as, base64[c]); + archive_wstrappend_wchar(as, '='); + } + } +} + +static int +xml_writer_write_base64(struct xml_writer* ctx, + const char *data, size_t start, size_t len) +{ + struct archive_wstring as; + HRESULT hr; + archive_string_init(&as); + la_b64_wencode(&as, (const unsigned char *)data + start, len - start); + hr = ctx->writer->lpVtbl->WriteString(ctx->writer, as.s); + archive_wstring_free(&as); + return hr; +} + +static int +xml_writer_end_element(struct xml_writer *ctx) +{ + return ctx->writer->lpVtbl->WriteEndElement(ctx->writer); +} + +static int +xml_writer_get_final_content_and_length(struct xml_writer *ctx, + const char **out, size_t *size) +{ + HGLOBAL gbl; + HRESULT hr; + + hr = ctx->writer->lpVtbl->Flush(ctx->writer); + if (FAILED(hr)) { + return (hr); + } + + hr = GetHGlobalFromStream(ctx->stream, &gbl); + if (FAILED(hr)) { + return (hr); + } + + *out = (const char *)GlobalLock(gbl); + if (*out == NULL) { + hr = HRESULT_FROM_WIN32(GetLastError()); + return (hr); + } + + /* GlobalUnlock is called in + * xml_writer_destroy. + */ + *size = (size_t)GlobalSize(gbl); + ctx->global = gbl; + return (hr); +} + #endif /* HAVE_LIBXML_XMLWRITER_H */ #endif /* Support xar format */