]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Issue 225: Misreading directories in MSDOS Zip files.
authorTim Kientzle <kientzle@gmail.com>
Mon, 23 Jan 2012 03:21:30 +0000 (22:21 -0500)
committerTim Kientzle <kientzle@gmail.com>
Mon, 23 Jan 2012 03:21:30 +0000 (22:21 -0500)
These files don't put a POSIX "mode" value in the "external
attributes" field.  We used to always assume a regular file if we
didn't see that.

Now, the code leaves the mode set to zero in that case.  When we read
the local file header, the fallback heuristics will kick in and
examine the filename to determine whether this is a directory
or a regular file.

SVN-Revision: 4190

Makefile.am
libarchive/archive_read_support_format_zip.c
libarchive/test/test_compat_zip.c
libarchive/test/test_compat_zip_6.zip.uu [new file with mode: 0644]

index b6f9e226cae3898fe5707a9264d0d814821c1585..1ecaf3cb3c7e47a3deaadf5e5f4d0c46ee1a4ad8 100644 (file)
@@ -462,6 +462,7 @@ libarchive_test_EXTRA_DIST=\
        libarchive/test/test_compat_zip_3.zip.uu                        \
        libarchive/test/test_compat_zip_4.zip.uu                        \
        libarchive/test/test_compat_zip_5.zip.uu                        \
+       libarchive/test/test_compat_zip_6.zip.uu                        \
        libarchive/test/test_fuzz_1.iso.Z.uu                            \
        libarchive/test/test_fuzz.cab.uu                                \
        libarchive/test/test_fuzz.lzh.uu                                \
index a84c0bd3e7fb0b449280fbc80f907db9a7f8e731..d6b77a0439b114a3f34f9cdeedd139de5bd8b5de 100644 (file)
@@ -217,14 +217,13 @@ archive_read_support_format_zip(struct archive *a)
 }
 
 /*
- * TODO: This is a performance sink because it forces
- * the read core to drop buffered data from the start
- * of file, which will then have to be re-read again
- * if this bidder loses.
+ * TODO: This is a performance sink because it forces the read core to
+ * drop buffered data from the start of file, which will then have to
+ * be re-read again if this bidder loses.
  *
- * Consider passing in the winning bid value to subsequent
- * bidders so that this bidder in particular can avoid
- * seeking if it knows it's going to lose anyway.
+ * We workaround this a little by passing in the best bid so far so
+ * that later bidders can do nothing if they know they'll never
+ * outbid.  But we can certainly do better...
  */
 static int
 archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid)
@@ -311,19 +310,29 @@ slurp_central_directory(struct archive_read *a, struct zip *zip)
                external_attributes = archive_le32dec(p + 38);
                zip_entry->local_header_offset = archive_le32dec(p + 42);
 
+               /* If we can't guess the mode, leave it zero here;
+                  when we read the local file header we might get
+                  more information. */
+               zip_entry->mode = 0;
                if (zip_entry->system == 3) {
                        zip_entry->mode = external_attributes >> 16;
-               } else {
-                       zip_entry->mode = AE_IFREG | 0777;
                }
 
-               /* Do we need to parse filename here? */
-               /* Or can we wait until we read the local header? */
+               /* We don't read the filename until we get to the
+                  local file header.  Reading it here would speed up
+                  table-of-contents operations (removing the need to
+                  find and read local file header to get the
+                  filename) at the cost of requiring a lot of extra
+                  space. */
+               /* We don't read the extra block here.  We assume it
+                  will be duplicated at the local file header. */
                __archive_read_consume(a,
                    46 + filename_length + extra_length + comment_length);
        }
 
-       /* TODO: Sort zip entries. */
+       /* TODO: Sort zip entries by file offset so that we
+          can optimize get_next_header() to use skip instead of
+          seek. */
 
        return ARCHIVE_OK;
 }
@@ -434,6 +443,11 @@ archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid)
                        return (30);
        }
 
+       /* TODO: It's worth looking ahead a little bit for a valid
+        * PK signature.  In particular, that would make it possible
+        * to read some UUEncoded SFX files or SFX files coming from
+        * a network socket. */
+
        return (0);
 }
 
index 2fa27c143f1eff4e014817e0084e25726b235ea6..a2c35db1ac8345ed0100db8e6c70333bbd643468 100644 (file)
@@ -348,6 +348,53 @@ test_compat_zip_5(void)
        free(p);
 }
 
+/*
+ * Issue 225: Errors extracting MSDOS Zip archives with directories.
+ */
+static void
+compat_zip_6_verify(struct archive *a)
+{
+       struct archive_entry *ae;
+
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assertEqualString("New Folder/New Folder/", archive_entry_pathname(ae));
+       assertEqualInt(AE_IFDIR, archive_entry_filetype(ae));
+       assertEqualInt(1327314468, archive_entry_mtime(ae));
+       assertEqualInt(0, archive_entry_size(ae));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assertEqualString("New Folder/New Folder/New Text Document.txt", archive_entry_pathname(ae));
+       assertEqualInt(AE_IFREG, archive_entry_filetype(ae));
+       assertEqualInt(1327314476, archive_entry_mtime(ae));
+       assertEqualInt(11, archive_entry_size(ae));
+       assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
+}
+
+static void
+test_compat_zip_6(void)
+{
+       const char *refname = "test_compat_zip_6.zip";
+       struct archive *a;
+       void *p;
+       size_t s;
+
+       extract_reference_file(refname);
+       p = slurpfile(&s, refname);
+
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, p, s, 7));
+       compat_zip_6_verify(a);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a));
+
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, p, s, 7));
+       compat_zip_6_verify(a);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a));
+}
+
 DEFINE_TEST(test_compat_zip)
 {
        test_compat_zip_1();
@@ -355,6 +402,7 @@ DEFINE_TEST(test_compat_zip)
        test_compat_zip_3();
        test_compat_zip_4();
        test_compat_zip_5();
+       test_compat_zip_6();
 }
 
 
diff --git a/libarchive/test/test_compat_zip_6.zip.uu b/libarchive/test/test_compat_zip_6.zip.uu
new file mode 100644 (file)
index 0000000..ef6d191
--- /dev/null
@@ -0,0 +1,10 @@
+begin 755 test_compat_zip_6.zip
+M4$L#!`H``````'@3-T`````````````````6````3F5W($9O;&1E<B].97<@
+M1F]L9&5R+U!+`P0*``````!\$S=`%4-8OPL````+````*P```$YE=R!&;VQD
+M97(O3F5W($9O;&1E<B].97<@5&5X="!$;V-U;65N="YT>'1S;VUE('1E>'0-
+M"E!+`0(4"PH``````'@3-T`````````````````6````````````$```````
+M``!.97<@1F]L9&5R+TYE=R!&;VQD97(O4$L!`A0+"@``````?!,W0!5#6+\+
+M````"P```"L``````````0`@````-````$YE=R!&;VQD97(O3F5W($9O;&1E
+M<B].97<@5&5X="!$;V-U;65N="YT>'102P4&``````(``@"=````B```````
+`
+end