/*-
* Copyright (c) 2003-2009 Tim Kientzle
+ * Copyright (c) 2010 Michihiro NAKAJIMA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
#include "archive_platform.h"
__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_disk.c 189429 2009-03-06 04:35:31Z kientzle $");
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_DIRECT_H
+#include <direct.h>
+#endif
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
#include "archive.h"
#include "archive_string.h"
#include "archive_entry.h"
#include "archive_private.h"
#include "archive_read_disk_private.h"
+/*-
+ * This is a new directory-walking system that addresses a number
+ * of problems I've had with fts(3). In particular, it has no
+ * pathname-length limits (other than the size of 'int'), handles
+ * deep logical traversals, uses considerably less memory, and has
+ * an opaque interface (easier to modify in the future).
+ *
+ * Internally, it keeps a single list of "tree_entry" items that
+ * represent filesystem objects that require further attention.
+ * Non-directories are not kept in memory: they are pulled from
+ * readdir(), returned to the client, then freed as soon as possible.
+ * Any directory entry to be traversed gets pushed onto the stack.
+ *
+ * There is surprisingly little information that needs to be kept for
+ * each item on the stack. Just the name, depth (represented here as the
+ * string length of the parent directory's pathname), and some markers
+ * indicating how to get back to the parent (via chdir("..") for a
+ * regular dir or via fchdir(2) for a symlink).
+ */
+/*
+ * TODO:
+ * 1) Loop checking.
+ * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
+ */
+
+struct tree_entry {
+ int depth;
+ struct tree_entry *next;
+ struct tree_entry *parent;
+ char *name;
+ size_t dirname_length;
+ dev_t dev;
+ ino_t ino;
+ int flags;
+ /* How to return back to the parent of a symlink. */
+#ifdef HAVE_FCHDIR
+ int symlink_parent_fd;
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+ char *symlink_parent_path;
+#else
+#error fchdir function required.
+#endif
+};
+
+/* Definitions for tree_entry.flags bitmap. */
+#define isDir 1 /* This entry is a regular directory. */
+#define isDirLink 2 /* This entry is a symbolic link to a directory. */
+#define needsFirstVisit 4 /* This is an initial entry. */
+#define needsDescent 8 /* This entry needs to be previsited. */
+#define needsOpen 16 /* This is a directory that needs to be opened. */
+#define needsAscent 32 /* This entry needs to be postvisited. */
+
+/*
+ * On Windows, "first visit" is handled as a pattern to be handed to
+ * _findfirst(). This is consistent with Windows conventions that
+ * file patterns are handled within the application. On Posix,
+ * "first visit" is just returned to the client.
+ */
+
+/*
+ * Local data for this package.
+ */
+struct tree {
+ struct tree_entry *stack;
+ struct tree_entry *current;
+#if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
+ HANDLE d;
+ BY_HANDLE_FILE_INFORMATION fileInfo;
+#define INVALID_DIR_HANDLE INVALID_HANDLE_VALUE
+ WIN32_FIND_DATA _findData;
+ WIN32_FIND_DATA *findData;
+#else
+ DIR *d;
+#define INVALID_DIR_HANDLE NULL
+ struct dirent *de;
+#endif
+ int flags;
+ int visit_type;
+ int tree_errno; /* Error code from last failed operation. */
+
+ /* Dynamically-sized buffer for holding path */
+ char *buff;
+ size_t buff_length;
+
+ const char *basename; /* Last path element */
+ size_t dirname_length; /* Leading dir length */
+ size_t path_length; /* Total path length */
+
+ int depth;
+ int openCount;
+ int maxOpenCount;
+
+ struct stat lst;
+ struct stat st;
+ int descend;
+
+ char symlink_mode;
+ char dev_recorded;
+ dev_t current_dev;
+};
+
+/* Definitions for tree.flags bitmap. */
+#define hasStat 16 /* The st entry is valid. */
+#define hasLstat 32 /* The lst entry is valid. */
+#define hasFileInfo 64 /* The Windows fileInfo entry is valid. */
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+static int
+tree_dir_next_windows(struct tree *t, const char *pattern);
+#else
+static int
+tree_dir_next_posix(struct tree *t);
+#endif
+
+#ifdef HAVE_DIRENT_D_NAMLEN
+/* BSD extension; avoids need for a strlen() call. */
+#define D_NAMELEN(dp) (dp)->d_namlen
+#else
+#define D_NAMELEN(dp) (strlen((dp)->d_name))
+#endif
+
+/* Initiate/terminate a tree traversal. */
+static struct tree *tree_open(const char *);
+static void tree_close(struct tree *);
+static void tree_push(struct tree *, const char *);
+
+/*
+ * tree_next() returns Zero if there is no next entry, non-zero if
+ * there is. Note that directories are visited three times.
+ * Directories are always visited first as part of enumerating their
+ * parent; that is a "regular" visit. If tree_descend() is invoked at
+ * that time, the directory is added to a work list and will
+ * subsequently be visited two more times: once just after descending
+ * into the directory ("postdescent") and again just after ascending
+ * back to the parent ("postascent").
+ *
+ * TREE_ERROR_DIR is returned if the descent failed (because the
+ * directory couldn't be opened, for instance). This is returned
+ * instead of TREE_POSTDESCENT/TREE_POSTASCENT. TREE_ERROR_DIR is not a
+ * fatal error, but it does imply that the relevant subtree won't be
+ * visited. TREE_ERROR_FATAL is returned for an error that left the
+ * traversal completely hosed. Right now, this is only returned for
+ * chdir() failures during ascent.
+ */
+#define TREE_REGULAR 1
+#define TREE_POSTDESCENT 2
+#define TREE_POSTASCENT 3
+#define TREE_ERROR_DIR -1
+#define TREE_ERROR_FATAL -2
+
+static int tree_next(struct tree *);
+
+/*
+ * Return information about the current entry.
+ */
+
+/*
+ * The current full pathname, length of the full pathname, and a name
+ * that can be used to access the file. Because tree does use chdir
+ * extensively, the access path is almost never the same as the full
+ * current path.
+ *
+ * TODO: Flesh out this interface to provide other information. In
+ * particular, Windows can provide file size, mode, and some permission
+ * information without invoking stat() at all.
+ *
+ * TODO: On platforms that support it, use openat()-style operations
+ * to eliminate the chdir() operations entirely while still supporting
+ * arbitrarily deep traversals. This makes access_path troublesome to
+ * support, of course, which means we'll need a rich enough interface
+ * that clients can function without it. (In particular, we'll need
+ * tree_current_open() that returns an open file descriptor.)
+ *
+ */
+static const char *tree_current_path(struct tree *);
+static const char *tree_current_access_path(struct tree *);
+
+/*
+ * Request the lstat() or stat() data for the current path. Since the
+ * tree package needs to do some of this anyway, and caches the
+ * results, you should take advantage of it here if you need it rather
+ * than make a redundant stat() or lstat() call of your own.
+ */
+static const struct stat *tree_current_stat(struct tree *);
+static const struct stat *tree_current_lstat(struct tree *);
+
+/* The following functions use tricks to avoid a certain number of
+ * stat()/lstat() calls. */
+/* "is_physical_dir" is equivalent to S_ISDIR(tree_current_lstat()->st_mode) */
+static int tree_current_is_physical_dir(struct tree *);
+#if defined(_WIN32) && !defined(__CYGWIN__)
+/* "is_physical_link" is equivalent to S_ISLNK(tree_current_lstat()->st_mode) */
+static int tree_current_is_physical_link(struct tree *);
+#endif
+/* "is_dir" is equivalent to S_ISDIR(tree_current_stat()->st_mode) */
+static int tree_current_is_dir(struct tree *);
+
+
static int _archive_read_free(struct archive *);
static int _archive_read_close(struct archive *);
static int _archive_read_data_block(struct archive *,
static const char *trivial_lookup_gname(void *, int64_t gid);
static const char *trivial_lookup_uname(void *, int64_t uid);
#endif
+
+
+
static struct archive_vtable *
archive_read_disk_vtable(void)
{
return (NULL);
memset(a, 0, sizeof(*a));
a->archive.magic = ARCHIVE_READ_DISK_MAGIC;
- /* We're ready to write a header immediately. */
- a->archive.state = ARCHIVE_STATE_HEADER;
+ a->archive.state = ARCHIVE_STATE_NEW;
a->archive.vtable = archive_read_disk_vtable();
a->lookup_uname = trivial_lookup_uname;
a->lookup_gname = trivial_lookup_gname;
if (a->cleanup_uname != NULL && a->lookup_uname_data != NULL)
(a->cleanup_uname)(a->lookup_uname_data);
archive_string_free(&a->archive.error_string);
+ if (a->tree != NULL)
+ tree_close(a->tree);
a->archive.magic = 0;
free(a);
return (ARCHIVE_OK);
static int
_archive_read_next_header2(struct archive *_a, struct archive_entry *entry)
{
+ struct archive_read_disk *a = (struct archive_read_disk *)_a;
+ struct tree *t;
+ const struct stat *st; /* info to use for this entry */
+ const struct stat *lst;/* lstat() information */
+ int descend, r;
+
archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC,
ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA,
"archive_read_next_header2");
- (void)entry; /* UNUSED */
- /* Not implemented yet. */
- return (ARCHIVE_FAILED);
+ t = a->tree;
+ st = NULL;
+ lst = NULL;
+ do {
+ switch (tree_next(t)) {
+ case TREE_ERROR_FATAL:
+ archive_set_error(&a->archive, t->tree_errno,
+ "%s: Unable to continue traversing directory tree",
+ tree_current_path(t));
+ a->archive.state = ARCHIVE_STATE_FATAL;
+ return (ARCHIVE_FATAL);
+ case TREE_ERROR_DIR:
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "%s: Couldn't visit directory",
+ tree_current_path(t));
+ return (ARCHIVE_FAILED);
+ case 0:
+ return (ARCHIVE_EOF);
+ case TREE_POSTDESCENT:
+ case TREE_POSTASCENT:
+ break;
+ case TREE_REGULAR:
+ lst = tree_current_lstat(t);
+ if (lst == NULL) {
+ archive_set_error(&a->archive, errno,
+ "%s: Cannot stat",
+ tree_current_path(t));
+ return (ARCHIVE_FAILED);
+ }
+ break;
+ }
+ } while (lst == NULL);
+
+ /*
+ * Distinguish 'L'/'P'/'H' symlink following.
+ */
+ switch(t->symlink_mode) {
+ case 'H':
+ /* 'H': After the first item, rest like 'P'. */
+ t->symlink_mode = 'P';
+ /* 'H': First item (from command line) like 'L'. */
+ /* FALLTHROUGH */
+ case 'L':
+ /* 'L': Do descend through a symlink to dir. */
+ descend = tree_current_is_dir(t);
+ /* 'L': Follow symlinks to files. */
+ a->symlink_mode = 'L';
+ a->follow_symlinks = 1;
+ /* 'L': Archive symlinks as targets, if we can. */
+ st = tree_current_stat(t);
+ if (st != NULL)
+ break;
+ /* If stat fails, we have a broken symlink;
+ * in that case, don't follow the link. */
+ /* FALLTHROUGH */
+ default:
+ /* 'P': Don't descend through a symlink to dir. */
+ descend = tree_current_is_physical_dir(t);
+ /* 'P': Don't follow symlinks to files. */
+ a->symlink_mode = 'P';
+ a->follow_symlinks = 0;
+ /* 'P': Archive symlinks as symlinks. */
+ st = lst;
+ break;
+ }
+
+ if (!t->dev_recorded) {
+ /* This is the initial file system. */
+ t->current_dev = lst->st_dev;
+ t->dev_recorded = 1;
+ }
+ t->descend = descend;
+
+ archive_entry_set_pathname(entry, tree_current_path(t));
+ archive_entry_copy_sourcepath(entry, tree_current_access_path(t));
+
+ /* Populate the archive_entry with metadata from the disk. */
+ r = archive_read_disk_entry_from_file(&(a->archive), entry, -1, st);
+
+ /*
+ * EOF and FATAL are persistent at this layer. By
+ * modifying the state, we guarantee that future calls to
+ * read a header or read data will fail.
+ */
+ switch (r) {
+ case ARCHIVE_EOF:
+ a->archive.state = ARCHIVE_STATE_EOF;
+ break;
+ case ARCHIVE_OK:
+ a->archive.state = ARCHIVE_STATE_DATA;
+ break;
+ case ARCHIVE_WARN:
+ a->archive.state = ARCHIVE_STATE_DATA;
+ break;
+ case ARCHIVE_RETRY:
+ break;
+ case ARCHIVE_FATAL:
+ a->archive.state = ARCHIVE_STATE_FATAL;
+ break;
+ }
+
+ return (r);
+}
+
+/*
+ * Called by the client to mark the directory just returned from
+ * tree_next() as needing to be visited.
+ */
+int
+archive_read_disk_descend(struct archive *_a)
+{
+ struct archive_read_disk *a = (struct archive_read_disk *)_a;
+ struct tree *t = a->tree;
+
+ archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA,
+ "archive_read_disk_descend");
+
+ if (t->visit_type != TREE_REGULAR || !t->descend) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+ "Ignored the request descending the current object");
+ return (ARCHIVE_WARN);
+ }
+
+ if (tree_current_is_physical_dir(t)) {
+ tree_push(t, t->basename);
+ t->stack->flags |= isDir;
+ } else if (tree_current_is_dir(t)) {
+ tree_push(t, t->basename);
+ t->stack->flags |= isDirLink;
+ }
+ t->descend = 0;
+ return (ARCHIVE_OK);
+}
+
+int
+archive_read_disk_open(struct archive *_a, const char *pathname)
+{
+ struct archive_read_disk *a = (struct archive_read_disk *)_a;
+ struct tree *tree;
+
+ archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_NEW,
+ "archive_read_disk_open");
+ archive_clear_error(&a->archive);
+
+ tree = tree_open(pathname);
+ if (tree == NULL) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Can't allocate tar data");
+ a->archive.state = ARCHIVE_STATE_FATAL;
+ return (ARCHIVE_FATAL);
+ }
+ tree->symlink_mode = a->symlink_mode;
+ tree->dev_recorded = 0;
+ a->tree = tree;
+ a->archive.state = ARCHIVE_STATE_HEADER;
+
+ return (ARCHIVE_OK);
+}
+
+
+/*
+ * Add a directory path to the current stack.
+ */
+static void
+tree_push(struct tree *t, const char *path)
+{
+ struct tree_entry *te;
+
+ te = malloc(sizeof(*te));
+ memset(te, 0, sizeof(*te));
+ te->next = t->stack;
+ te->parent = t->current;
+ if (te->parent)
+ te->depth = te->parent->depth + 1;
+ t->stack = te;
+#ifdef HAVE_FCHDIR
+ te->symlink_parent_fd = -1;
+ te->name = strdup(path);
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+ te->symlink_parent_path = NULL;
+ te->name = strdup(path);
+#endif
+ te->flags = needsDescent | needsOpen | needsAscent;
+ te->dirname_length = t->dirname_length;
+}
+
+/*
+ * Append a name to the current dir path.
+ */
+static void
+tree_append(struct tree *t, const char *name, size_t name_length)
+{
+ char *p;
+ size_t size_needed;
+
+ if (t->buff != NULL)
+ t->buff[t->dirname_length] = '\0';
+ /* Strip trailing '/' from name, unless entire name is "/". */
+ while (name_length > 1 && name[name_length - 1] == '/')
+ name_length--;
+
+ /* Resize pathname buffer as needed. */
+ size_needed = name_length + 1 + t->dirname_length;
+ if (t->buff_length < size_needed) {
+ if (t->buff_length < 1024)
+ t->buff_length = 1024;
+ while (t->buff_length < size_needed)
+ t->buff_length *= 2;
+ t->buff = realloc(t->buff, t->buff_length);
+ }
+ if (t->buff == NULL)
+ abort();
+ p = t->buff + t->dirname_length;
+ t->path_length = t->dirname_length + name_length;
+ /* Add a separating '/' if it's needed. */
+ if (t->dirname_length > 0 && p[-1] != '/') {
+ *p++ = '/';
+ t->path_length ++;
+ }
+#if HAVE_STRNCPY_S
+ strncpy_s(p, t->buff_length - (p - t->buff), name, name_length);
+#else
+ strncpy(p, name, name_length);
+#endif
+ p[name_length] = '\0';
+ t->basename = p;
}
+
+/*
+ * Open a directory tree for traversal.
+ */
+static struct tree *
+tree_open(const char *path)
+{
+#ifdef HAVE_FCHDIR
+ struct tree *t;
+
+ t = malloc(sizeof(*t));
+ memset(t, 0, sizeof(*t));
+ /* First item is set up a lot like a symlink traversal. */
+ tree_push(t, path);
+ t->stack->flags = needsFirstVisit | isDirLink | needsAscent;
+ t->stack->symlink_parent_fd = open(".", O_RDONLY);
+ t->openCount++;
+ t->d = INVALID_DIR_HANDLE;
+ return (t);
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+ struct tree *t;
+ char *cwd = _getcwd(NULL, 0);
+ char *pathname = strdup(path), *p, *base;
+
+ if (pathname == NULL)
+ abort();
+ for (p = pathname; *p != '\0'; ++p) {
+ if (*p == '\\')
+ *p = '/';
+ }
+ base = pathname;
+
+ t = malloc(sizeof(*t));
+ memset(t, 0, sizeof(*t));
+ /* First item is set up a lot like a symlink traversal. */
+ /* printf("Looking for wildcard in %s\n", path); */
+ /* TODO: wildcard detection here screws up on \\?\c:\ UNC names */
+ if (strchr(base, '*') || strchr(base, '?')) {
+ // It has a wildcard in it...
+ // Separate the last element.
+ p = strrchr(base, '/');
+ if (p != NULL) {
+ *p = '\0';
+ chdir(base);
+ tree_append(t, base, p - base);
+ t->dirname_length = t->path_length;
+ base = p + 1;
+ }
+ }
+ tree_push(t, base);
+ free(pathname);
+ t->stack->flags = needsFirstVisit | isDirLink | needsAscent;
+ t->stack->symlink_parent_path = cwd;
+ t->d = INVALID_DIR_HANDLE;
+ return (t);
+#endif
+}
+
+/*
+ * We've finished a directory; ascend back to the parent.
+ */
+static int
+tree_ascend(struct tree *t)
+{
+ struct tree_entry *te;
+ int r = 0;
+
+ te = t->stack;
+ t->depth--;
+ if (te->flags & isDirLink) {
+#ifdef HAVE_FCHDIR
+ if (fchdir(te->symlink_parent_fd) != 0) {
+ t->tree_errno = errno;
+ r = TREE_ERROR_FATAL;
+ }
+ close(te->symlink_parent_fd);
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+ if (SetCurrentDirectory(te->symlink_parent_path) == 0) {
+ t->tree_errno = errno;
+ r = TREE_ERROR_FATAL;
+ }
+ free(te->symlink_parent_path);
+ te->symlink_parent_path = NULL;
+#endif
+ t->openCount--;
+ } else {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ if (SetCurrentDirectory("..") == 0) {
+#else
+ if (chdir("..") != 0) {
+#endif
+ t->tree_errno = errno;
+ r = TREE_ERROR_FATAL;
+ }
+ }
+ return (r);
+}
+
+/*
+ * Pop the working stack.
+ */
+static void
+tree_pop(struct tree *t)
+{
+ struct tree_entry *te;
+
+ if (t->buff)
+ t->buff[t->dirname_length] = '\0';
+ if (t->stack == t->current && t->current != NULL)
+ t->current = t->current->parent;
+ te = t->stack;
+ t->stack = te->next;
+ t->dirname_length = te->dirname_length;
+ if (t->buff) {
+ t->basename = t->buff + t->dirname_length;
+ while (t->basename[0] == '/')
+ t->basename++;
+ }
+ free(te->name);
+ free(te);
+}
+
+/*
+ * Get the next item in the tree traversal.
+ */
+static int
+tree_next(struct tree *t)
+{
+ int r;
+
+ while (t->stack != NULL) {
+ /* If there's an open dir, get the next entry from there. */
+ if (t->d != INVALID_DIR_HANDLE) {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ r = tree_dir_next_windows(t, NULL);
+#else
+ r = tree_dir_next_posix(t);
+#endif
+ if (r == 0)
+ continue;
+ return (r);
+ }
+
+ if (t->stack->flags & needsFirstVisit) {
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ char *d = t->stack->name;
+ t->stack->flags &= ~needsFirstVisit;
+ if (strchr(d, '*') || strchr(d, '?')) {
+ r = tree_dir_next_windows(t, d);
+ if (r == 0)
+ continue;
+ return (r);
+ }
+ // Not a pattern, handle it as-is...
+#endif
+ /* Top stack item needs a regular visit. */
+ t->current = t->stack;
+ tree_append(t, t->stack->name, strlen(t->stack->name));
+ //t->dirname_length = t->path_length;
+ //tree_pop(t);
+ t->stack->flags &= ~needsFirstVisit;
+ return (t->visit_type = TREE_REGULAR);
+ } else if (t->stack->flags & needsDescent) {
+ /* Top stack item is dir to descend into. */
+ t->current = t->stack;
+ tree_append(t, t->stack->name, strlen(t->stack->name));
+ t->stack->flags &= ~needsDescent;
+ /* If it is a link, set up fd for the ascent. */
+ if (t->stack->flags & isDirLink) {
+#ifdef HAVE_FCHDIR
+ t->stack->symlink_parent_fd = open(".", O_RDONLY);
+ t->openCount++;
+ if (t->openCount > t->maxOpenCount)
+ t->maxOpenCount = t->openCount;
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+ t->stack->symlink_parent_path = _getcwd(NULL, 0);
+#endif
+ }
+ t->dirname_length = t->path_length;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ if (t->path_length == 259 || !SetCurrentDirectory(t->stack->name) != 0)
+#else
+ if (chdir(t->stack->name) != 0)
+#endif
+ {
+ /* chdir() failed; return error */
+ tree_pop(t);
+ t->tree_errno = errno;
+ return (t->visit_type = TREE_ERROR_DIR);
+ }
+ t->depth++;
+ return (t->visit_type = TREE_POSTDESCENT);
+ } else if (t->stack->flags & needsOpen) {
+ t->stack->flags &= ~needsOpen;
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ r = tree_dir_next_windows(t, "*");
+#else
+ r = tree_dir_next_posix(t);
+#endif
+ if (r == 0)
+ continue;
+ return (r);
+ } else if (t->stack->flags & needsAscent) {
+ /* Top stack item is dir and we're done with it. */
+ r = tree_ascend(t);
+ tree_pop(t);
+ t->visit_type = r != 0 ? r : TREE_POSTASCENT;
+ return (t->visit_type);
+ } else {
+ /* Top item on stack is dead. */
+ tree_pop(t);
+ t->flags &= ~hasLstat;
+ t->flags &= ~hasStat;
+ }
+ }
+ return (t->visit_type = 0);
+}
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+static int
+tree_dir_next_windows(struct tree *t, const char *pattern)
+{
+ const char *name;
+ size_t namelen;
+ int r;
+
+ for (;;) {
+ if (pattern != NULL) {
+ t->d = FindFirstFile(pattern, &t->_findData);
+ if (t->d == INVALID_DIR_HANDLE) {
+ r = tree_ascend(t); /* Undo "chdir" */
+ tree_pop(t);
+ t->tree_errno = errno;
+ t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
+ return (t->visit_type);
+ }
+ t->findData = &t->_findData;
+ pattern = NULL;
+ } else if (!FindNextFile(t->d, &t->_findData)) {
+ FindClose(t->d);
+ t->d = INVALID_DIR_HANDLE;
+ t->findData = NULL;
+ return (0);
+ }
+ name = t->findData->cFileName;
+ namelen = strlen(name);
+ t->flags &= ~hasLstat;
+ t->flags &= ~hasStat;
+ if (name[0] == '.' && name[1] == '\0')
+ continue;
+ if (name[0] == '.' && name[1] == '.' && name[2] == '\0')
+ continue;
+ tree_append(t, name, namelen);
+ return (t->visit_type = TREE_REGULAR);
+ }
+}
+#else
+static int
+tree_dir_next_posix(struct tree *t)
+{
+ int r;
+ const char *name;
+ size_t namelen;
+
+ if (t->d == NULL) {
+ if ((t->d = opendir(".")) == NULL) {
+ r = tree_ascend(t); /* Undo "chdir" */
+ tree_pop(t);
+ t->tree_errno = errno;
+ t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
+ return (t->visit_type);
+ }
+ }
+ for (;;) {
+ t->de = readdir(t->d);
+ if (t->de == NULL) {
+ closedir(t->d);
+ t->d = INVALID_DIR_HANDLE;
+ return (0);
+ }
+ name = t->de->d_name;
+ namelen = D_NAMELEN(t->de);
+ t->flags &= ~hasLstat;
+ t->flags &= ~hasStat;
+ if (name[0] == '.' && name[1] == '\0')
+ continue;
+ if (name[0] == '.' && name[1] == '.' && name[2] == '\0')
+ continue;
+ tree_append(t, name, namelen);
+ return (t->visit_type = TREE_REGULAR);
+ }
+}
+#endif
+
+/*
+ * Get the stat() data for the entry just returned from tree_next().
+ */
+static const struct stat *
+tree_current_stat(struct tree *t)
+{
+ if (!(t->flags & hasStat)) {
+ if (stat(tree_current_access_path(t), &t->st) != 0)
+ return NULL;
+ t->flags |= hasStat;
+ }
+ return (&t->st);
+}
+
+#if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
+static const BY_HANDLE_FILE_INFORMATION *
+tree_current_file_information(struct tree *t)
+{
+ if (!(t->flags & hasFileInfo)) {
+ HANDLE h = CreateFile(tree_current_access_path(t),
+ 0, 0, NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT,
+ NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return NULL;
+ if (!GetFileInformationByHandle(h, &t->fileInfo)) {
+ CloseHandle(h);
+ return NULL;
+ }
+ CloseHandle(h);
+ t->flags |= hasFileInfo;
+ }
+ return (&t->fileInfo);
+}
+#endif
+/*
+ * Get the lstat() data for the entry just returned from tree_next().
+ */
+static const struct stat *
+tree_current_lstat(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ return (tree_current_stat(t));
+#else
+ if (!(t->flags & hasLstat)) {
+ if (lstat(tree_current_access_path(t), &t->lst) != 0)
+ return NULL;
+ t->flags |= hasLstat;
+ }
+ return (&t->lst);
+#endif
+}
+
+/*
+ * Test whether current entry is a dir or link to a dir.
+ */
+static int
+tree_current_is_dir(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ if (t->findData)
+ return (t->findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+ if (tree_current_file_information(t))
+ return (t->fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
+ return (0);
+#else
+ const struct stat *st;
+ /*
+ * If we already have lstat() info, then try some
+ * cheap tests to determine if this is a dir.
+ */
+ if (t->flags & hasLstat) {
+ /* If lstat() says it's a dir, it must be a dir. */
+ if (S_ISDIR(tree_current_lstat(t)->st_mode))
+ return 1;
+ /* Not a dir; might be a link to a dir. */
+ /* If it's not a link, then it's not a link to a dir. */
+ if (!S_ISLNK(tree_current_lstat(t)->st_mode))
+ return 0;
+ /*
+ * It's a link, but we don't know what it's a link to,
+ * so we'll have to use stat().
+ */
+ }
+
+ st = tree_current_stat(t);
+ /* If we can't stat it, it's not a dir. */
+ if (st == NULL)
+ return 0;
+ /* Use the definitive test. Hopefully this is cached. */
+ return (S_ISDIR(st->st_mode));
+#endif
+}
+
+/*
+ * Test whether current entry is a physical directory. Usually, we
+ * already have at least one of stat() or lstat() in memory, so we
+ * use tricks to try to avoid an extra trip to the disk.
+ */
+static int
+tree_current_is_physical_dir(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+ if (tree_current_is_physical_link(t))
+ return (0);
+ return (tree_current_is_dir(t));
+#else
+ const struct stat *st;
+
+ /*
+ * If stat() says it isn't a dir, then it's not a dir.
+ * If stat() data is cached, this check is free, so do it first.
+ */
+ if ((t->flags & hasStat)
+ && (!S_ISDIR(tree_current_stat(t)->st_mode)))
+ return 0;
+
+ /*
+ * Either stat() said it was a dir (in which case, we have
+ * to determine whether it's really a link to a dir) or
+ * stat() info wasn't available. So we use lstat(), which
+ * hopefully is already cached.
+ */
+
+ st = tree_current_lstat(t);
+ /* If we can't stat it, it's not a dir. */
+ if (st == NULL)
+ return 0;
+ /* Use the definitive test. Hopefully this is cached. */
+ return (S_ISDIR(st->st_mode));
+#endif
+}
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+/*
+ * Test whether current entry is a symbolic link.
+ */
+static int
+tree_current_is_physical_link(struct tree *t)
+{
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#ifndef IO_REPARSE_TAG_SYMLINK
+/* Old SDKs do not provide IO_REPARSE_TAG_SYMLINK */
+#define IO_REPARSE_TAG_SYMLINK 0xA000000CL
+#endif
+ if (t->findData)
+ return ((t->findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)
+ && (t->findData->dwReserved0 == IO_REPARSE_TAG_SYMLINK));
+ return (0);
+#else
+ const struct stat *st = tree_current_lstat(t);
+ if (st == NULL)
+ return 0;
+ return (S_ISLNK(st->st_mode));
+#endif
+}
+#endif
+
+/*
+ * Return the access path for the entry just returned from tree_next().
+ */
+static const char *
+tree_current_access_path(struct tree *t)
+{
+ return (t->basename);
+}
+
+/*
+ * Return the full path for the entry just returned from tree_next().
+ */
+static const char *
+tree_current_path(struct tree *t)
+{
+ return (t->buff);
+}
+
+/*
+ * Terminate the traversal and release any resources.
+ */
+static void
+tree_close(struct tree *t)
+{
+ /* Release anything remaining in the stack. */
+ while (t->stack != NULL)
+ tree_pop(t);
+ free(t->buff);
+ /* TODO: Ensure that premature close() resets cwd */
+#if 0
+#ifdef HAVE_FCHDIR
+ if (t->initialDirFd >= 0) {
+ int s = fchdir(t->initialDirFd);
+ (void)s; /* UNUSED */
+ close(t->initialDirFd);
+ t->initialDirFd = -1;
+ }
+#elif defined(_WIN32) && !defined(__CYGWIN__)
+ if (t->initialDir != NULL) {
+ SetCurrentDir(t->initialDir);
+ free(t->initialDir);
+ t->initialDir = NULL;
+ }
+#endif
+#endif
+ free(t);
+}
+
--- /dev/null
+/*-
+ * Copyright (c) 2010 Michihiro NAKAJIMA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+__FBSDID("$FreeBSD$");
+
+DEFINE_TEST(test_read_disk_directory_traversals)
+{
+ struct archive *a;
+ struct archive_entry *ae;
+ //const void *p;
+ //size_t size;
+ //int64_t offset;
+
+ assertMakeDir("dir1", 0755);
+ assertMakeFile("dir1/file1", 0644, "0123456789");
+ assertMakeFile("dir1/file2", 0644, "0123456789");
+ assertMakeDir("dir1/sub1", 0755);
+ assertMakeFile("dir1/sub1/file1", 0644, "0123456789");
+ assertMakeDir("dir1/sub2", 0755);
+ assertMakeFile("dir1/sub2/file1", 0644, "0123456789");
+ assertMakeFile("dir1/sub2/file2", 0644, "0123456789");
+ assertMakeDir("dir1/sub2/sub1", 0755);
+ assertMakeDir("dir1/sub2/sub2", 0755);
+ assertMakeDir("dir1/sub2/sub3", 0755);
+ assertMakeFile("dir1/sub2/sub3/file", 0644, "0123456789");
+
+ assert((ae = archive_entry_new()) != NULL);
+ assert((a = archive_read_disk_new()) != NULL);
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_open(a, "dir1"));
+
+ /* dir1 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+ /* Descend into the current object */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+ /* dir1/file1 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/file1");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+ assertEqualInt(archive_entry_size(ae), 10);
+ //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+ //assertEqualInt((int)size, 10);
+ //assertEqualInt((int)offset, 0);
+ //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+ /* dir1/file2 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/file2");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+ assertEqualInt(archive_entry_size(ae), 10);
+ //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+ //assertEqualInt((int)size, 10);
+ //assertEqualInt((int)offset, 0);
+ //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+ /* dir1/sub1 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub1");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+ /* Descend into the current object */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+ /* dir1/sub2 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+ /* Descend into the current object */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+ /* dir1/sub2/file1 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2/file1");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+ assertEqualInt(archive_entry_size(ae), 10);
+ //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+ //assertEqualInt((int)size, 10);
+ //assertEqualInt((int)offset, 0);
+ //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+ /* dir1/sub2/file2 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2/file2");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+ assertEqualInt(archive_entry_size(ae), 10);
+ //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+ //assertEqualInt((int)size, 10);
+ //assertEqualInt((int)offset, 0);
+ //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+ /* dir1/sub2/sub1 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub1");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+ /* Descend into the current object */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+ /* dir1/sub2/sub2 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub2");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+ /* Descend into the current object */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+ /* dir1/sub2/sub3 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub3");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFDIR);
+
+ /* Descend into the current object */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a));
+
+ /* dir1/sub2/sub3/file */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub3/file");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+ assertEqualInt(archive_entry_size(ae), 10);
+ //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+ //assertEqualInt((int)size, 10);
+ //assertEqualInt((int)offset, 0);
+ //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+ /* dir1/sub1/file1 */
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae));
+ assertEqualString(archive_entry_pathname(ae), "dir1/sub1/file1");
+ assertEqualInt(archive_entry_filetype(ae), AE_IFREG);
+ assertEqualInt(archive_entry_size(ae), 10);
+ //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0);
+ //assertEqualInt((int)size, 10);
+ //assertEqualInt((int)offset, 0);
+ //assertEqualInt(memcmp(p, "0123456789", 10), 0);
+
+ /* There is no entry. */
+ assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header2(a, ae));
+
+ /* Destroy the archive. */
+ assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+ archive_entry_free(ae);
+}