]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
This adds UNICODE filename support for lha.
authorClaybird <claybird.without.wing@gmail.com>
Mon, 14 Oct 2019 13:32:54 +0000 (22:32 +0900)
committerClaybird <claybird.without.wing@gmail.com>
Wed, 6 Nov 2019 11:34:31 +0000 (20:34 +0900)
The lastest lha format supports UNICODE filenames on its content, using extended headers(EXT_UTF16_FILENAME and EXT_UTF16_DIRECTORY).
However, currently libarchive ignores them.
This modification is to handle these extensions.

libarchive/archive_read_support_format_lha.c

index 95c99bb1f31e848892dc6ab96bbba55fae9c5284..16480b3cd7402887c80f8a7fe177b916bc7c287b 100644 (file)
@@ -180,6 +180,8 @@ struct lha {
 
        struct archive_string    dirname;
        struct archive_string    filename;
+       struct archive_wstring   dirname_w;
+       struct archive_wstring   filename_w;
        struct archive_wstring   ws;
 
        unsigned char            dos_attr;
@@ -475,6 +477,7 @@ archive_read_format_lha_read_header(struct archive_read *a,
 {
        struct archive_string linkname;
        struct archive_string pathname;
+       struct archive_wstring pathname_w;
        struct lha *lha;
        const unsigned char *p;
        const char *signature;
@@ -559,7 +562,9 @@ archive_read_format_lha_read_header(struct archive_read *a,
        lha->uid = 0;
        lha->gid = 0;
        archive_string_empty(&lha->dirname);
+       archive_wstring_empty(&lha->dirname_w);
        archive_string_empty(&lha->filename);
+       archive_wstring_empty(&lha->filename_w);
        lha->dos_attr = 0;
        if (lha->opt_sconv != NULL)
                lha->sconv = lha->opt_sconv;
@@ -596,14 +601,40 @@ archive_read_format_lha_read_header(struct archive_read *a,
        /*
         * Make a pathname from a dirname and a filename.
         */
+       if (archive_strlen(&lha->filename_w) > 0 || archive_strlen(&lha->dirname_w) > 0) {
+               /* This archive has some unicode contents */
+               if (archive_strlen(&lha->filename_w) == 0)
+                       if (-1 == archive_wstring_append_from_mbs(&lha->filename_w, lha->filename.s, archive_strlen(&lha->filename))) {
+                               if (errno == ENOMEM)
+                                       archive_set_error(&a->archive, ENOMEM, "Can't allocate memory");
+                               else
+                                       archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't convert a path to a wchar_t string");
+                               a->archive.state = ARCHIVE_STATE_FATAL;
+                               return ARCHIVE_FATAL;
+                       }
+               if (archive_strlen(&lha->dirname_w) == 0)
+                       if (-1 == archive_wstring_append_from_mbs(&lha->dirname_w, lha->dirname.s, archive_strlen(&lha->dirname))) {
+                               if (errno == ENOMEM)
+                                       archive_set_error(&a->archive, ENOMEM, "Can't allocate memory");
+                               else
+                                       archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't convert a path to a wchar_t string");
+                               a->archive.state = ARCHIVE_STATE_FATAL;
+                               return ARCHIVE_FATAL;
+                       }
+       }
+
        archive_string_concat(&lha->dirname, &lha->filename);
+       archive_wstring_concat(&lha->dirname_w, &lha->filename_w);
        archive_string_init(&pathname);
+       archive_string_init(&pathname_w);
        archive_string_init(&linkname);
        archive_string_copy(&pathname, &lha->dirname);
+       archive_wstring_copy(&pathname_w, &lha->dirname_w);
 
        if ((lha->mode & AE_IFMT) == AE_IFLNK) {
                /*
                 * Extract the symlink-name if it's included in the pathname.
+                * Symlink-name is assumed not to exist in UNICODE filename, because UNICODE archives are created only on Windows(unlha32.dll)
                 */
                if (!lha_parse_linkname(&linkname, &pathname)) {
                        /* We couldn't get the symlink-name. */
@@ -611,6 +642,7 @@ archive_read_format_lha_read_header(struct archive_read *a,
                            ARCHIVE_ERRNO_FILE_FORMAT,
                            "Unknown symlink-name");
                        archive_string_free(&pathname);
+                       archive_wstring_free(&pathname_w);
                        archive_string_free(&linkname);
                        return (ARCHIVE_FAILED);
                }
@@ -629,7 +661,9 @@ archive_read_format_lha_read_header(struct archive_read *a,
        /*
         * Set basic file parameters.
         */
-       if (archive_entry_copy_pathname_l(entry, pathname.s,
+       if (archive_strlen(&pathname_w) > 0 ) {
+               archive_entry_copy_pathname_w(entry, pathname_w.s);
+       }else if (archive_entry_copy_pathname_l(entry, pathname.s,
            pathname.length, lha->sconv) != 0) {
                if (errno == ENOMEM) {
                        archive_set_error(&a->archive, ENOMEM,
@@ -644,6 +678,7 @@ archive_read_format_lha_read_header(struct archive_read *a,
                err = ARCHIVE_WARN;
        }
        archive_string_free(&pathname);
+       archive_wstring_free(&pathname_w);
        if (archive_strlen(&linkname) > 0) {
                if (archive_entry_copy_symlink_l(entry, linkname.s,
                    linkname.length, lha->sconv) != 0) {
@@ -1208,6 +1243,18 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
                        archive_strncpy(&lha->filename,
                            (const char *)extdheader, datasize);
                        break;
+               case EXT_UTF16_FILENAME:
+                       if (datasize == 0) {
+                               /* maybe directory header */
+                               archive_wstring_empty(&lha->filename_w);
+                               break;
+                       }
+                       if (extdheader[0] == '\0')
+                               goto invalid;
+                       archive_wstrncpy(&lha->filename_w,
+                               (const wchar_t *)extdheader, datasize / 2);
+                       break;
+
                case EXT_DIRECTORY:
                        if (datasize == 0 || extdheader[0] == '\0')
                                /* no directory name data. exit this case. */
@@ -1228,6 +1275,26 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
                                /* invalid directory data */
                                goto invalid;
                        break;
+               case EXT_UTF16_DIRECTORY:
+                       if (datasize == 0 || extdheader[0] == '\0')
+                               /* no directory name data. exit this case. */
+                               goto invalid;
+
+                       archive_wstrncpy(&lha->dirname_w,
+                               (const wchar_t *)extdheader, datasize / 2);
+                       /*
+                        * Convert directory delimiter from 0xFF
+                        * to '/' for local system.
+                        */
+                       for (i = 0; i < lha->dirname_w.length; i++) {
+                               if (lha->dirname_w.s[i] == 0xFFFF)
+                                       lha->dirname_w.s[i] = L'/';
+                       }
+                       /* Is last character directory separator? */
+                       if (lha->dirname_w.s[lha->dirname_w.length - 1] != L'/')
+                               /* invalid directory data */
+                               goto invalid;
+                       break;
                case EXT_DOS_ATTR:
                        if (datasize == 2)
                                lha->dos_attr = (unsigned char)
@@ -1336,8 +1403,7 @@ lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
                        }
                        break;
                case EXT_TIMEZONE:              /* Not supported */
-               case EXT_UTF16_FILENAME:        /* Not supported */
-               case EXT_UTF16_DIRECTORY:       /* Not supported */
+                       break;
                default:
                        break;
                }