]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Bring the code supporting directory traversals from bsdtar/tree.[ch]
authorMichihiro NAKAJIMA <ggcueroad@gmail.com>
Wed, 9 Jun 2010 21:54:13 +0000 (17:54 -0400)
committerMichihiro NAKAJIMA <ggcueroad@gmail.com>
Wed, 9 Jun 2010 21:54:13 +0000 (17:54 -0400)
into archive_read_disk.c and modify it.
Introduce new APIs archive_read_disk_open and archive_read_disk_descend.

TODO: - implement archive_read_data_block and others.
      - adapt it to libarchive world, for example, use struct archive_string.

SVN-Revision: 2455

Makefile.am
libarchive/archive.h
libarchive/archive_read_disk.c
libarchive/archive_read_disk_private.h
libarchive/test/CMakeLists.txt
libarchive/test/test_read_disk_directory_traversals.c [new file with mode: 0644]

index 041998996cfab1eef9ba2fdb84504a8d31b6d32d..df45c9763045e20d2d484ff052942e5f1bff8b93 100644 (file)
@@ -264,6 +264,7 @@ libarchive_test_SOURCES=                                    \
        libarchive/test/test_read_compress_program.c            \
        libarchive/test/test_read_data_large.c                  \
        libarchive/test/test_read_disk.c                        \
+       libarchive/test/test_read_disk_directory_traversals.c   \
        libarchive/test/test_read_disk_entry_from_file.c        \
        libarchive/test/test_read_extract.c                     \
        libarchive/test/test_read_file_nonexistent.c            \
index 4945245bbd4aa3911cabf0de5824158bb34115c3..1072f81d6fb306edd28e38dcca89e1193109ef2f 100644 (file)
@@ -777,6 +777,16 @@ __LA_DECL int      archive_read_disk_set_uname_lookup(struct archive *,
     const char *(* /* lookup_fn */)(void *, __LA_INT64_T),
     void (* /* cleanup_fn */)(void *));
 #endif
+/* Start traversal. */
+__LA_DECL int  archive_read_disk_open(struct archive *, const char *);
+/*
+ * Request that current entry be visited.  If you invoke it on every
+ * directory, you'll get a physical traversal.  This is ignored if the
+ * current entry isn't a directory or a link to a directory.  So, if
+ * you invoke this on every returned path, you'll get a full logical
+ * traversal.
+ */
+__LA_DECL int  archive_read_disk_descend(struct archive *);
 
 /*
  * Accessor functions to read/set various information in
index ffd378949dc7bf339b0b22c9d4132d5162870a27..6fc046c21a9bde535f1b20fb283bb1d2ca1f1129 100644 (file)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2003-2009 Tim Kientzle
+ * Copyright (c) 2010 Michihiro NAKAJIMA
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "archive_platform.h"
 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_disk.c 189429 2009-03-06 04:35:31Z kientzle $");
 
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_DIRECT_H
+#include <direct.h>
+#endif
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
 #include "archive.h"
 #include "archive_string.h"
 #include "archive_entry.h"
 #include "archive_private.h"
 #include "archive_read_disk_private.h"
 
+/*-
+ * This is a new directory-walking system that addresses a number
+ * of problems I've had with fts(3).  In particular, it has no
+ * pathname-length limits (other than the size of 'int'), handles
+ * deep logical traversals, uses considerably less memory, and has
+ * an opaque interface (easier to modify in the future).
+ *
+ * Internally, it keeps a single list of "tree_entry" items that
+ * represent filesystem objects that require further attention.
+ * Non-directories are not kept in memory: they are pulled from
+ * readdir(), returned to the client, then freed as soon as possible.
+ * Any directory entry to be traversed gets pushed onto the stack.
+ *
+ * There is surprisingly little information that needs to be kept for
+ * each item on the stack.  Just the name, depth (represented here as the
+ * string length of the parent directory's pathname), and some markers
+ * indicating how to get back to the parent (via chdir("..") for a
+ * regular dir or via fchdir(2) for a symlink).
+ */
+/*
+ * TODO:
+ *    1) Loop checking.
+ *    3) Arbitrary logical traversals by closing/reopening intermediate fds.
+ */
+
+struct tree_entry {
+       int depth;
+       struct tree_entry *next;
+       struct tree_entry *parent;
+       char *name;
+       size_t dirname_length;
+       dev_t dev;
+       ino_t ino;
+       int flags;
+       /* How to return back to the parent of a symlink. */
+#ifdef HAVE_FCHDIR
+       int symlink_parent_fd;
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+       char *symlink_parent_path;
+#else
+#error fchdir function required.
+#endif
+};
+
+/* Definitions for tree_entry.flags bitmap. */
+#define        isDir 1 /* This entry is a regular directory. */
+#define        isDirLink 2 /* This entry is a symbolic link to a directory. */
+#define        needsFirstVisit 4 /* This is an initial entry. */
+#define        needsDescent 8 /* This entry needs to be previsited. */
+#define        needsOpen 16 /* This is a directory that needs to be opened. */
+#define        needsAscent 32 /* This entry needs to be postvisited. */
+
+/*
+ * On Windows, "first visit" is handled as a pattern to be handed to
+ * _findfirst().  This is consistent with Windows conventions that
+ * file patterns are handled within the application.  On Posix,
+ * "first visit" is just returned to the client.
+ */
+
+/*
+ * Local data for this package.
+ */
+struct tree {
+       struct tree_entry       *stack;
+       struct tree_entry       *current;
+#if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
+       HANDLE d;
+       BY_HANDLE_FILE_INFORMATION fileInfo;
+#define        INVALID_DIR_HANDLE INVALID_HANDLE_VALUE
+       WIN32_FIND_DATA _findData;
+       WIN32_FIND_DATA *findData;
+#else
+       DIR     *d;
+#define        INVALID_DIR_HANDLE NULL
+       struct dirent *de;
+#endif
+       int      flags;
+       int      visit_type;
+       int      tree_errno; /* Error code from last failed operation. */
+
+       /* Dynamically-sized buffer for holding path */
+       char    *buff;
+       size_t   buff_length;
+
+       const char *basename; /* Last path element */
+       size_t   dirname_length; /* Leading dir length */
+       size_t   path_length; /* Total path length */
+
+       int      depth;
+       int      openCount;
+       int      maxOpenCount;
+
+       struct stat     lst;
+       struct stat     st;
+       int      descend;
+
+       char     symlink_mode;
+       char     dev_recorded;
+       dev_t    current_dev;
+};
+
+/* Definitions for tree.flags bitmap. */
+#define        hasStat 16  /* The st entry is valid. */
+#define        hasLstat 32 /* The lst entry is valid. */
+#define        hasFileInfo 64 /* The Windows fileInfo entry is valid. */
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+static int
+tree_dir_next_windows(struct tree *t, const char *pattern);
+#else
+static int
+tree_dir_next_posix(struct tree *t);
+#endif
+
+#ifdef HAVE_DIRENT_D_NAMLEN
+/* BSD extension; avoids need for a strlen() call. */
+#define        D_NAMELEN(dp)   (dp)->d_namlen
+#else
+#define        D_NAMELEN(dp)   (strlen((dp)->d_name))
+#endif
+
+/* Initiate/terminate a tree traversal. */
+static struct tree *tree_open(const char *);
+static void tree_close(struct tree *);
+static void tree_push(struct tree *, const char *);
+
+/*
+ * tree_next() returns Zero if there is no next entry, non-zero if
+ * there is.  Note that directories are visited three times.
+ * Directories are always visited first as part of enumerating their
+ * parent; that is a "regular" visit.  If tree_descend() is invoked at
+ * that time, the directory is added to a work list and will
+ * subsequently be visited two more times: once just after descending
+ * into the directory ("postdescent") and again just after ascending
+ * back to the parent ("postascent").
+ *
+ * TREE_ERROR_DIR is returned if the descent failed (because the
+ * directory couldn't be opened, for instance).  This is returned
+ * instead of TREE_POSTDESCENT/TREE_POSTASCENT.  TREE_ERROR_DIR is not a
+ * fatal error, but it does imply that the relevant subtree won't be
+ * visited.  TREE_ERROR_FATAL is returned for an error that left the
+ * traversal completely hosed.  Right now, this is only returned for
+ * chdir() failures during ascent.
+ */
+#define        TREE_REGULAR    1
+#define        TREE_POSTDESCENT        2
+#define        TREE_POSTASCENT 3
+#define        TREE_ERROR_DIR  -1
+#define        TREE_ERROR_FATAL -2
+
+static int tree_next(struct tree *);
+
+/*
+ * Return information about the current entry.
+ */
+
+/*
+ * The current full pathname, length of the full pathname, and a name
+ * that can be used to access the file.  Because tree does use chdir
+ * extensively, the access path is almost never the same as the full
+ * current path.
+ *
+ * TODO: Flesh out this interface to provide other information.  In
+ * particular, Windows can provide file size, mode, and some permission
+ * information without invoking stat() at all.
+ *
+ * TODO: On platforms that support it, use openat()-style operations
+ * to eliminate the chdir() operations entirely while still supporting
+ * arbitrarily deep traversals.  This makes access_path troublesome to
+ * support, of course, which means we'll need a rich enough interface
+ * that clients can function without it.  (In particular, we'll need
+ * tree_current_open() that returns an open file descriptor.)
+ *
+ */
+static const char *tree_current_path(struct tree *);
+static const char *tree_current_access_path(struct tree *);
+
+/*
+ * Request the lstat() or stat() data for the current path.  Since the
+ * tree package needs to do some of this anyway, and caches the
+ * results, you should take advantage of it here if you need it rather
+ * than make a redundant stat() or lstat() call of your own.
+ */
+static const struct stat *tree_current_stat(struct tree *);
+static const struct stat *tree_current_lstat(struct tree *);
+
+/* The following functions use tricks to avoid a certain number of
+ * stat()/lstat() calls. */
+/* "is_physical_dir" is equivalent to S_ISDIR(tree_current_lstat()->st_mode) */
+static int tree_current_is_physical_dir(struct tree *);
+#if defined(_WIN32) && !defined(__CYGWIN__)
+/* "is_physical_link" is equivalent to S_ISLNK(tree_current_lstat()->st_mode) */
+static int tree_current_is_physical_link(struct tree *);
+#endif
+/* "is_dir" is equivalent to S_ISDIR(tree_current_stat()->st_mode) */
+static int tree_current_is_dir(struct tree *);
+
+
 static int     _archive_read_free(struct archive *);
 static int     _archive_read_close(struct archive *);
 static int     _archive_read_data_block(struct archive *,
@@ -46,6 +270,9 @@ static const char *trivial_lookup_uname(void *, uid_t uid);
 static const char *trivial_lookup_gname(void *, int64_t gid);
 static const char *trivial_lookup_uname(void *, int64_t uid);
 #endif
+
+
+
 static struct archive_vtable *
 archive_read_disk_vtable(void)
 {
@@ -162,8 +389,7 @@ archive_read_disk_new(void)
                return (NULL);
        memset(a, 0, sizeof(*a));
        a->archive.magic = ARCHIVE_READ_DISK_MAGIC;
-       /* We're ready to write a header immediately. */
-       a->archive.state = ARCHIVE_STATE_HEADER;
+       a->archive.state = ARCHIVE_STATE_NEW;
        a->archive.vtable = archive_read_disk_vtable();
        a->lookup_uname = trivial_lookup_uname;
        a->lookup_gname = trivial_lookup_gname;
@@ -184,6 +410,8 @@ _archive_read_free(struct archive *_a)
        if (a->cleanup_uname != NULL && a->lookup_uname_data != NULL)
                (a->cleanup_uname)(a->lookup_uname_data);
        archive_string_free(&a->archive.error_string);
+       if (a->tree != NULL)
+               tree_close(a->tree);
        a->archive.magic = 0;
        free(a);
        return (ARCHIVE_OK);
@@ -279,11 +507,741 @@ _archive_read_data_block(struct archive *_a, const void **buff,
 static int
 _archive_read_next_header2(struct archive *_a, struct archive_entry *entry)
 {
+       struct archive_read_disk *a = (struct archive_read_disk *)_a;
+       struct tree *t;
+       const struct stat *st; /* info to use for this entry */
+       const struct stat *lst;/* lstat() information */
+       int descend, r;
+
        archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC,
            ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA,
            "archive_read_next_header2");
 
-       (void)entry; /* UNUSED */
-       /* Not implemented yet. */
-       return (ARCHIVE_FAILED);
+       t = a->tree;
+       st = NULL;
+       lst = NULL;
+       do {
+               switch (tree_next(t)) {
+               case TREE_ERROR_FATAL:
+                       archive_set_error(&a->archive, t->tree_errno,
+                           "%s: Unable to continue traversing directory tree",
+                           tree_current_path(t));
+                       a->archive.state = ARCHIVE_STATE_FATAL;
+                       return (ARCHIVE_FATAL);
+               case TREE_ERROR_DIR:
+                       archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+                           "%s: Couldn't visit directory",
+                           tree_current_path(t));
+                       return (ARCHIVE_FAILED);
+               case 0:
+                       return (ARCHIVE_EOF);
+               case TREE_POSTDESCENT:
+               case TREE_POSTASCENT:
+                       break;
+               case TREE_REGULAR:
+                       lst = tree_current_lstat(t);
+                       if (lst == NULL) {
+                               archive_set_error(&a->archive, errno,
+                                   "%s: Cannot stat",
+                                   tree_current_path(t));
+                               return (ARCHIVE_FAILED);
+                       }
+                       break;
+               }       
+       } while (lst == NULL);
+
+       /*
+        * Distinguish 'L'/'P'/'H' symlink following.
+        */
+       switch(t->symlink_mode) {
+       case 'H':
+               /* 'H': After the first item, rest like 'P'. */
+               t->symlink_mode = 'P';
+               /* 'H': First item (from command line) like 'L'. */
+               /* FALLTHROUGH */
+       case 'L':
+               /* 'L': Do descend through a symlink to dir. */
+               descend = tree_current_is_dir(t);
+               /* 'L': Follow symlinks to files. */
+               a->symlink_mode = 'L';
+               a->follow_symlinks = 1;
+               /* 'L': Archive symlinks as targets, if we can. */
+               st = tree_current_stat(t);
+               if (st != NULL)
+                       break;
+               /* If stat fails, we have a broken symlink;
+                * in that case, don't follow the link. */
+               /* FALLTHROUGH */
+       default:
+               /* 'P': Don't descend through a symlink to dir. */
+               descend = tree_current_is_physical_dir(t);
+               /* 'P': Don't follow symlinks to files. */
+               a->symlink_mode = 'P';
+               a->follow_symlinks = 0;
+               /* 'P': Archive symlinks as symlinks. */
+               st = lst;
+               break;
+       }
+
+       if (!t->dev_recorded) {
+               /* This is the initial file system. */
+               t->current_dev = lst->st_dev;
+               t->dev_recorded = 1;
+       }
+       t->descend = descend;
+
+       archive_entry_set_pathname(entry, tree_current_path(t));
+       archive_entry_copy_sourcepath(entry, tree_current_access_path(t));
+
+       /* Populate the archive_entry with metadata from the disk. */
+       r = archive_read_disk_entry_from_file(&(a->archive), entry, -1, st);
+
+       /*
+        * EOF and FATAL are persistent at this layer.  By
+        * modifying the state, we guarantee that future calls to
+        * read a header or read data will fail.
+        */
+       switch (r) {
+       case ARCHIVE_EOF:
+               a->archive.state = ARCHIVE_STATE_EOF;
+               break;
+       case ARCHIVE_OK:
+               a->archive.state = ARCHIVE_STATE_DATA;
+               break;
+       case ARCHIVE_WARN:
+               a->archive.state = ARCHIVE_STATE_DATA;
+               break;
+       case ARCHIVE_RETRY:
+               break;
+       case ARCHIVE_FATAL:
+               a->archive.state = ARCHIVE_STATE_FATAL;
+               break;
+       }
+
+       return (r);
+}
+
+/*
+ * Called by the client to mark the directory just returned from
+ * tree_next() as needing to be visited.
+ */
+int
+archive_read_disk_descend(struct archive *_a)
+{
+       struct archive_read_disk *a = (struct archive_read_disk *)_a;
+       struct tree *t = a->tree;
+
+       archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA,
+           "archive_read_disk_descend");
+
+       if (t->visit_type != TREE_REGULAR || !t->descend) {
+               archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+                   "Ignored the request descending the current object");
+               return (ARCHIVE_WARN);
+       }
+
+       if (tree_current_is_physical_dir(t)) {
+               tree_push(t, t->basename);
+               t->stack->flags |= isDir;
+       } else if (tree_current_is_dir(t)) {
+               tree_push(t, t->basename);
+               t->stack->flags |= isDirLink;
+       }
+       t->descend = 0;
+       return (ARCHIVE_OK);
+}
+
+int
+archive_read_disk_open(struct archive *_a, const char *pathname)
+{
+       struct archive_read_disk *a = (struct archive_read_disk *)_a;
+       struct tree *tree;
+
+       archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_NEW,
+           "archive_read_disk_open");
+       archive_clear_error(&a->archive);
+
+       tree = tree_open(pathname);
+       if (tree == NULL) {
+               archive_set_error(&a->archive, ENOMEM,
+                   "Can't allocate tar data");
+               a->archive.state = ARCHIVE_STATE_FATAL;
+               return (ARCHIVE_FATAL);
+       }
+       tree->symlink_mode = a->symlink_mode;
+       tree->dev_recorded = 0;
+       a->tree = tree;
+       a->archive.state = ARCHIVE_STATE_HEADER;
+
+       return (ARCHIVE_OK);
+}
+
+
+/*
+ * Add a directory path to the current stack.
+ */
+static void
+tree_push(struct tree *t, const char *path)
+{
+       struct tree_entry *te;
+
+       te = malloc(sizeof(*te));
+       memset(te, 0, sizeof(*te));
+       te->next = t->stack;
+       te->parent = t->current;
+       if (te->parent)
+               te->depth = te->parent->depth + 1;
+       t->stack = te;
+#ifdef HAVE_FCHDIR
+       te->symlink_parent_fd = -1;
+       te->name = strdup(path);
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+       te->symlink_parent_path = NULL;
+       te->name = strdup(path);
+#endif
+       te->flags = needsDescent | needsOpen | needsAscent;
+       te->dirname_length = t->dirname_length;
+}
+
+/*
+ * Append a name to the current dir path.
+ */
+static void
+tree_append(struct tree *t, const char *name, size_t name_length)
+{
+       char *p;
+       size_t size_needed;
+
+       if (t->buff != NULL)
+               t->buff[t->dirname_length] = '\0';
+       /* Strip trailing '/' from name, unless entire name is "/". */
+       while (name_length > 1 && name[name_length - 1] == '/')
+               name_length--;
+
+       /* Resize pathname buffer as needed. */
+       size_needed = name_length + 1 + t->dirname_length;
+       if (t->buff_length < size_needed) {
+               if (t->buff_length < 1024)
+                       t->buff_length = 1024;
+               while (t->buff_length < size_needed)
+                       t->buff_length *= 2;
+               t->buff = realloc(t->buff, t->buff_length);
+       }
+       if (t->buff == NULL)
+               abort();
+       p = t->buff + t->dirname_length;
+       t->path_length = t->dirname_length + name_length;
+       /* Add a separating '/' if it's needed. */
+       if (t->dirname_length > 0 && p[-1] != '/') {
+               *p++ = '/';
+               t->path_length ++;
+       }
+#if HAVE_STRNCPY_S
+       strncpy_s(p, t->buff_length - (p - t->buff), name, name_length);
+#else
+       strncpy(p, name, name_length);
+#endif
+       p[name_length] = '\0';
+       t->basename = p;
 }
+
+/*
+ * Open a directory tree for traversal.
+ */
+static struct tree *
+tree_open(const char *path)
+{
+#ifdef HAVE_FCHDIR
+       struct tree *t;
+
+       t = malloc(sizeof(*t));
+       memset(t, 0, sizeof(*t));
+       /* First item is set up a lot like a symlink traversal. */
+       tree_push(t, path);
+       t->stack->flags = needsFirstVisit | isDirLink | needsAscent;
+       t->stack->symlink_parent_fd = open(".", O_RDONLY);
+       t->openCount++;
+       t->d = INVALID_DIR_HANDLE;
+       return (t);
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+       struct tree *t;
+       char *cwd = _getcwd(NULL, 0);
+       char *pathname = strdup(path), *p, *base;
+
+       if (pathname == NULL)
+               abort();
+       for (p = pathname; *p != '\0'; ++p) {
+               if (*p == '\\')
+                       *p = '/';
+       }
+       base = pathname;
+
+       t = malloc(sizeof(*t));
+       memset(t, 0, sizeof(*t));
+       /* First item is set up a lot like a symlink traversal. */
+       /* printf("Looking for wildcard in %s\n", path); */
+       /* TODO: wildcard detection here screws up on \\?\c:\ UNC names */
+       if (strchr(base, '*') || strchr(base, '?')) {
+               // It has a wildcard in it...
+               // Separate the last element.
+               p = strrchr(base, '/');
+               if (p != NULL) {
+                       *p = '\0';
+                       chdir(base);
+                       tree_append(t, base, p - base);
+                       t->dirname_length = t->path_length;
+                       base = p + 1;
+               }
+       }
+       tree_push(t, base);
+       free(pathname);
+       t->stack->flags = needsFirstVisit | isDirLink | needsAscent;
+       t->stack->symlink_parent_path = cwd;
+       t->d = INVALID_DIR_HANDLE;
+       return (t);
+#endif
+}
+
+/*
+ * We've finished a directory; ascend back to the parent.
+ */
+static int
+tree_ascend(struct tree *t)
+{
+       struct tree_entry *te;
+       int r = 0;
+
+       te = t->stack;
+       t->depth--;
+       if (te->flags & isDirLink) {
+#ifdef HAVE_FCHDIR
+               if (fchdir(te->symlink_parent_fd) != 0) {
+                       t->tree_errno = errno;
+                       r = TREE_ERROR_FATAL;
+               }
+               close(te->symlink_parent_fd);
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+               if (SetCurrentDirectory(te->symlink_parent_path) == 0) {
+                       t->tree_errno = errno;
+                       r = TREE_ERROR_FATAL;
+               }
+               free(te->symlink_parent_path);
+               te->symlink_parent_path = NULL;
+#endif
+               t->openCount--;
+       } else {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+               if (SetCurrentDirectory("..") == 0) {
+#else
+               if (chdir("..") != 0) {
+#endif
+                       t->tree_errno = errno;
+                       r = TREE_ERROR_FATAL;
+               }
+       }
+       return (r);
+}
+
+/*
+ * Pop the working stack.
+ */
+static void
+tree_pop(struct tree *t)
+{
+       struct tree_entry *te;
+
+       if (t->buff)
+               t->buff[t->dirname_length] = '\0';
+       if (t->stack == t->current && t->current != NULL)
+               t->current = t->current->parent;
+       te = t->stack;
+       t->stack = te->next;
+       t->dirname_length = te->dirname_length;
+       if (t->buff) {
+               t->basename = t->buff + t->dirname_length;
+               while (t->basename[0] == '/')
+                       t->basename++;
+       }
+       free(te->name);
+       free(te);
+}
+
+/*
+ * Get the next item in the tree traversal.
+ */
+static int
+tree_next(struct tree *t)
+{
+       int r;
+
+       while (t->stack != NULL) {
+               /* If there's an open dir, get the next entry from there. */
+               if (t->d != INVALID_DIR_HANDLE) {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+                       r = tree_dir_next_windows(t, NULL);
+#else
+                       r = tree_dir_next_posix(t);
+#endif
+                       if (r == 0)
+                               continue;
+                       return (r);
+               }
+
+               if (t->stack->flags & needsFirstVisit) {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+                       char *d = t->stack->name;
+                       t->stack->flags &= ~needsFirstVisit;
+                       if (strchr(d, '*') || strchr(d, '?')) {
+                               r = tree_dir_next_windows(t, d);
+                               if (r == 0)
+                                       continue;
+                               return (r);
+                       }
+                       // Not a pattern, handle it as-is...
+#endif
+                       /* Top stack item needs a regular visit. */
+                       t->current = t->stack;
+                       tree_append(t, t->stack->name, strlen(t->stack->name));
+                       //t->dirname_length = t->path_length;
+                       //tree_pop(t);
+                       t->stack->flags &= ~needsFirstVisit;
+                       return (t->visit_type = TREE_REGULAR);
+               } else if (t->stack->flags & needsDescent) {
+                       /* Top stack item is dir to descend into. */
+                       t->current = t->stack;
+                       tree_append(t, t->stack->name, strlen(t->stack->name));
+                       t->stack->flags &= ~needsDescent;
+                       /* If it is a link, set up fd for the ascent. */
+                       if (t->stack->flags & isDirLink) {
+#ifdef HAVE_FCHDIR
+                               t->stack->symlink_parent_fd = open(".", O_RDONLY);
+                               t->openCount++;
+                               if (t->openCount > t->maxOpenCount)
+                                       t->maxOpenCount = t->openCount;
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+                               t->stack->symlink_parent_path = _getcwd(NULL, 0);
+#endif
+                       }
+                       t->dirname_length = t->path_length;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+                       if (t->path_length == 259 || !SetCurrentDirectory(t->stack->name) != 0)
+#else
+                       if (chdir(t->stack->name) != 0)
+#endif
+                       {
+                               /* chdir() failed; return error */
+                               tree_pop(t);
+                               t->tree_errno = errno;
+                               return (t->visit_type = TREE_ERROR_DIR);
+                       }
+                       t->depth++;
+                       return (t->visit_type = TREE_POSTDESCENT);
+               } else if (t->stack->flags & needsOpen) {
+                       t->stack->flags &= ~needsOpen;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+                       r = tree_dir_next_windows(t, "*");
+#else
+                       r = tree_dir_next_posix(t);
+#endif
+                       if (r == 0)
+                               continue;
+                       return (r);
+               } else if (t->stack->flags & needsAscent) {
+                       /* Top stack item is dir and we're done with it. */
+                       r = tree_ascend(t);
+                       tree_pop(t);
+                       t->visit_type = r != 0 ? r : TREE_POSTASCENT;
+                       return (t->visit_type);
+               } else {
+                       /* Top item on stack is dead. */
+                       tree_pop(t);
+                       t->flags &= ~hasLstat;
+                       t->flags &= ~hasStat;
+               }
+       }
+       return (t->visit_type = 0);
+}
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+static int
+tree_dir_next_windows(struct tree *t, const char *pattern)
+{
+       const char *name;
+       size_t namelen;
+       int r;
+
+       for (;;) {
+               if (pattern != NULL) {
+                       t->d = FindFirstFile(pattern, &t->_findData);
+                       if (t->d == INVALID_DIR_HANDLE) {
+                               r = tree_ascend(t); /* Undo "chdir" */
+                               tree_pop(t);
+                               t->tree_errno = errno;
+                               t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
+                               return (t->visit_type);
+                       }
+                       t->findData = &t->_findData;
+                       pattern = NULL;
+               } else if (!FindNextFile(t->d, &t->_findData)) {
+                       FindClose(t->d);
+                       t->d = INVALID_DIR_HANDLE;
+                       t->findData = NULL;
+                       return (0);
+               }
+               name = t->findData->cFileName;
+               namelen = strlen(name);
+               t->flags &= ~hasLstat;
+               t->flags &= ~hasStat;
+               if (name[0] == '.' && name[1] == '\0')
+                       continue;
+               if (name[0] == '.' && name[1] == '.' && name[2] == '\0')
+                       continue;
+               tree_append(t, name, namelen);
+               return (t->visit_type = TREE_REGULAR);
+       }
+}
+#else
+static int
+tree_dir_next_posix(struct tree *t)
+{
+       int r;
+       const char *name;
+       size_t namelen;
+
+       if (t->d == NULL) {
+               if ((t->d = opendir(".")) == NULL) {
+                       r = tree_ascend(t); /* Undo "chdir" */
+                       tree_pop(t);
+                       t->tree_errno = errno;
+                       t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
+                       return (t->visit_type);
+               }
+       }
+       for (;;) {
+               t->de = readdir(t->d);
+               if (t->de == NULL) {
+                       closedir(t->d);
+                       t->d = INVALID_DIR_HANDLE;
+                       return (0);
+               }
+               name = t->de->d_name;
+               namelen = D_NAMELEN(t->de);
+               t->flags &= ~hasLstat;
+               t->flags &= ~hasStat;
+               if (name[0] == '.' && name[1] == '\0')
+                       continue;
+               if (name[0] == '.' && name[1] == '.' && name[2] == '\0')
+                       continue;
+               tree_append(t, name, namelen);
+               return (t->visit_type = TREE_REGULAR);
+       }
+}
+#endif
+
+/*
+ * Get the stat() data for the entry just returned from tree_next().
+ */
+static const struct stat *
+tree_current_stat(struct tree *t)
+{
+       if (!(t->flags & hasStat)) {
+               if (stat(tree_current_access_path(t), &t->st) != 0)
+                       return NULL;
+               t->flags |= hasStat;
+       }
+       return (&t->st);
+}
+
+#if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
+static const BY_HANDLE_FILE_INFORMATION *
+tree_current_file_information(struct tree *t)
+{
+       if (!(t->flags & hasFileInfo)) {
+               HANDLE h = CreateFile(tree_current_access_path(t),
+                       0, 0, NULL,
+                       OPEN_EXISTING,
+                       FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT,
+                       NULL);
+               if (h == INVALID_HANDLE_VALUE)
+                       return NULL;
+               if (!GetFileInformationByHandle(h, &t->fileInfo)) {
+                       CloseHandle(h);
+                       return NULL;
+               }
+               CloseHandle(h);
+               t->flags |= hasFileInfo;
+       }
+       return (&t->fileInfo);
+}
+#endif
+/*
+ * Get the lstat() data for the entry just returned from tree_next().
+ */
+static const struct stat *
+tree_current_lstat(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+       return (tree_current_stat(t));
+#else
+       if (!(t->flags & hasLstat)) {
+               if (lstat(tree_current_access_path(t), &t->lst) != 0)
+                       return NULL;
+               t->flags |= hasLstat;
+       }
+       return (&t->lst);
+#endif
+}
+
+/*
+ * Test whether current entry is a dir or link to a dir.
+ */
+static int
+tree_current_is_dir(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+       if (t->findData)
+               return (t->findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+       if (tree_current_file_information(t))
+               return (t->fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+       return (0);
+#else
+       const struct stat *st;
+       /*
+        * If we already have lstat() info, then try some
+        * cheap tests to determine if this is a dir.
+        */
+       if (t->flags & hasLstat) {
+               /* If lstat() says it's a dir, it must be a dir. */
+               if (S_ISDIR(tree_current_lstat(t)->st_mode))
+                       return 1;
+               /* Not a dir; might be a link to a dir. */
+               /* If it's not a link, then it's not a link to a dir. */
+               if (!S_ISLNK(tree_current_lstat(t)->st_mode))
+                       return 0;
+               /*
+                * It's a link, but we don't know what it's a link to,
+                * so we'll have to use stat().
+                */
+       }
+
+       st = tree_current_stat(t);
+       /* If we can't stat it, it's not a dir. */
+       if (st == NULL)
+               return 0;
+       /* Use the definitive test.  Hopefully this is cached. */
+       return (S_ISDIR(st->st_mode));
+#endif
+}
+
+/*
+ * Test whether current entry is a physical directory.  Usually, we
+ * already have at least one of stat() or lstat() in memory, so we
+ * use tricks to try to avoid an extra trip to the disk.
+ */
+static int
+tree_current_is_physical_dir(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+       if (tree_current_is_physical_link(t))
+               return (0);
+       return (tree_current_is_dir(t));
+#else
+       const struct stat *st;
+
+       /*
+        * If stat() says it isn't a dir, then it's not a dir.
+        * If stat() data is cached, this check is free, so do it first.
+        */
+       if ((t->flags & hasStat)
+           && (!S_ISDIR(tree_current_stat(t)->st_mode)))
+               return 0;
+
+       /*
+        * Either stat() said it was a dir (in which case, we have
+        * to determine whether it's really a link to a dir) or
+        * stat() info wasn't available.  So we use lstat(), which
+        * hopefully is already cached.
+        */
+
+       st = tree_current_lstat(t);
+       /* If we can't stat it, it's not a dir. */
+       if (st == NULL)
+               return 0;
+       /* Use the definitive test.  Hopefully this is cached. */
+       return (S_ISDIR(st->st_mode));
+#endif
+}
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+/*
+ * Test whether current entry is a symbolic link.
+ */
+static int
+tree_current_is_physical_link(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#ifndef IO_REPARSE_TAG_SYMLINK
+/* Old SDKs do not provide IO_REPARSE_TAG_SYMLINK */
+#define        IO_REPARSE_TAG_SYMLINK 0xA000000CL
+#endif
+       if (t->findData)
+               return ((t->findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
+                               && (t->findData->dwReserved0 == IO_REPARSE_TAG_SYMLINK));
+       return (0);
+#else
+       const struct stat *st = tree_current_lstat(t);
+       if (st == NULL)
+               return 0;
+       return (S_ISLNK(st->st_mode));
+#endif
+}
+#endif
+
+/*
+ * Return the access path for the entry just returned from tree_next().
+ */
+static const char *
+tree_current_access_path(struct tree *t)
+{
+       return (t->basename);
+}
+
+/*
+ * Return the full path for the entry just returned from tree_next().
+ */
+static const char *
+tree_current_path(struct tree *t)
+{
+       return (t->buff);
+}
+
+/*
+ * Terminate the traversal and release any resources.
+ */
+static void
+tree_close(struct tree *t)
+{
+       /* Release anything remaining in the stack. */
+       while (t->stack != NULL)
+               tree_pop(t);
+       free(t->buff);
+       /* TODO: Ensure that premature close() resets cwd */
+#if 0
+#ifdef HAVE_FCHDIR
+       if (t->initialDirFd >= 0) {
+               int s = fchdir(t->initialDirFd);
+               (void)s; /* UNUSED */
+               close(t->initialDirFd);
+               t->initialDirFd = -1;
+       }
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+       if (t->initialDir != NULL) {
+               SetCurrentDir(t->initialDir);
+               free(t->initialDir);
+               t->initialDir = NULL;
+       }
+#endif
+#endif
+       free(t);
+}
+
index 3061225738f7f76d38394fd41ff3a12d43381df0..42f6ce016d6639ffe94b7059905e616f69d1cfb3 100644 (file)
@@ -33,6 +33,8 @@
 #ifndef ARCHIVE_READ_DISK_PRIVATE_H_INCLUDED
 #define ARCHIVE_READ_DISK_PRIVATE_H_INCLUDED
 
+struct tree;
+
 struct archive_read_disk {
        struct archive  archive;
 
@@ -51,6 +53,8 @@ struct archive_read_disk {
         */
        char    follow_symlinks;  /* Either 'L' or 'P'. */
 
+       struct tree *tree;
+
 #if ARCHIVE_VERSION_NUMBER < 3000000
        const char * (*lookup_gname)(void *private, gid_t gid);
 #else
index c3da942fcbac4522d86488a81ef6220b74ebb57d..b6608c264683dfb45305f8d6499877a98b8a8448 100644 (file)
@@ -38,6 +38,7 @@ IF(ENABLE_TEST)
     test_read_compress_program.c
     test_read_data_large.c
     test_read_disk.c
+    test_read_disk_directory_traversals.c
     test_read_disk_entry_from_file.c
     test_read_extract.c
     test_read_file_nonexistent.c
diff --git a/libarchive/test/test_read_disk_directory_traversals.c b/libarchive/test/test_read_disk_directory_traversals.c
new file mode 100644 (file)
index 0000000..0336df7
--- /dev/null
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (c) 2010 Michihiro NAKAJIMA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+__FBSDID("$FreeBSD$");
+
+DEFINE_TEST(test_read_disk_directory_traversals)
+{
+       struct archive *a;
+       struct archive_entry *ae;
+       //const void *p;
+       //size_t size;
+       //int64_t offset;
+
+       assertMakeDir("dir1", 0755);
+       assertMakeFile("dir1/file1", 0644, "0123456789");
+       assertMakeFile("dir1/file2", 0644, "0123456789");
+       assertMakeDir("dir1/sub1", 0755);
+       assertMakeFile("dir1/sub1/file1", 0644, "0123456789");
+       assertMakeDir("dir1/sub2", 0755);
+       assertMakeFile("dir1/sub2/file1", 0644, "0123456789");
+       assertMakeFile("dir1/sub2/file2", 0644, "0123456789");
+       assertMakeDir("dir1/sub2/sub1", 0755);
+       assertMakeDir("dir1/sub2/sub2", 0755);
+       assertMakeDir("dir1/sub2/sub3", 0755);
+       assertMakeFile("dir1/sub2/sub3/file", 0644, "0123456789");
+
+       assert((ae = archive_entry_new()) != NULL);
+       assert((a = archive_read_disk_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_open(a, "dir1"));
+
+       /* dir1 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+       /* Descend into the current object */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+       /* dir1/file1 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/file1");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+       assertEqualInt(archive_entry_size(ae), 10);
+       //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+       //assertEqualInt((int)size, 10);
+       //assertEqualInt((int)offset, 0);
+       //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+       /* dir1/file2 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/file2");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+       assertEqualInt(archive_entry_size(ae), 10);
+       //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+       //assertEqualInt((int)size, 10);
+       //assertEqualInt((int)offset, 0);
+       //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+       /* dir1/sub1 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub1");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+       /* Descend into the current object */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+       /* dir1/sub2 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+       /* Descend into the current object */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+       /* dir1/sub2/file1 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2/file1");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+       assertEqualInt(archive_entry_size(ae), 10);
+       //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+       //assertEqualInt((int)size, 10);
+       //assertEqualInt((int)offset, 0);
+       //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+       /* dir1/sub2/file2 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2/file2");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+       assertEqualInt(archive_entry_size(ae), 10);
+       //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+       //assertEqualInt((int)size, 10);
+       //assertEqualInt((int)offset, 0);
+       //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+       /* dir1/sub2/sub1 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub1");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+       /* Descend into the current object */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+       /* dir1/sub2/sub2 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub2");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+       /* Descend into the current object */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+       /* dir1/sub2/sub3 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub3");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+       /* Descend into the current object */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+       /* dir1/sub2/sub3/file */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub3/file");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+       assertEqualInt(archive_entry_size(ae), 10);
+       //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+       //assertEqualInt((int)size, 10);
+       //assertEqualInt((int)offset, 0);
+       //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+       /* dir1/sub1/file1 */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+       assertEqualString(archive_entry_pathname(ae), "dir1/sub1/file1");
+       assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+       assertEqualInt(archive_entry_size(ae), 10);
+       //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+       //assertEqualInt((int)size, 10);
+       //assertEqualInt((int)offset, 0);
+       //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+       /* There is no entry. */
+       assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header2(a, ae));
+
+       /* Destroy the archive. */
+       assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+       archive_entry_free(ae);
+}