From: Michihiro NAKAJIMA Date: Wed, 9 Jun 2010 21:54:13 +0000 (-0400) Subject: Bring the code supporting directory traversals from bsdtar/tree.[ch] X-Git-Tag: v3.0.0a~972 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3311bb52cbe4;p=thirdparty%2Flibarchive.git Bring the code supporting directory traversals from bsdtar/tree.[ch] into archive_read_disk.c and modify it. Introduce new APIs archive_read_disk_open and archive_read_disk_descend. TODO: - implement archive_read_data_block and others. - adapt it to libarchive world, for example, use struct archive_string. SVN-Revision: 2455 --- diff --git a/Makefile.am b/Makefile.am index 041998996..df45c9763 100644 --- a/Makefile.am +++ b/Makefile.am @@ -264,6 +264,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_read_compress_program.c \ libarchive/test/test_read_data_large.c \ libarchive/test/test_read_disk.c \ + libarchive/test/test_read_disk_directory_traversals.c \ libarchive/test/test_read_disk_entry_from_file.c \ libarchive/test/test_read_extract.c \ libarchive/test/test_read_file_nonexistent.c \ diff --git a/libarchive/archive.h b/libarchive/archive.h index 4945245bb..1072f81d6 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -777,6 +777,16 @@ __LA_DECL int archive_read_disk_set_uname_lookup(struct archive *, const char *(* /* lookup_fn */)(void *, __LA_INT64_T), void (* /* cleanup_fn */)(void *)); #endif +/* Start traversal. */ +__LA_DECL int archive_read_disk_open(struct archive *, const char *); +/* + * Request that current entry be visited. If you invoke it on every + * directory, you'll get a physical traversal. This is ignored if the + * current entry isn't a directory or a link to a directory. So, if + * you invoke this on every returned path, you'll get a full logical + * traversal. + */ +__LA_DECL int archive_read_disk_descend(struct archive *); /* * Accessor functions to read/set various information in diff --git a/libarchive/archive_read_disk.c b/libarchive/archive_read_disk.c index ffd378949..6fc046c21 100644 --- a/libarchive/archive_read_disk.c +++ b/libarchive/archive_read_disk.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2003-2009 Tim Kientzle + * Copyright (c) 2010 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,12 +28,235 @@ #include "archive_platform.h" __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_disk.c 189429 2009-03-06 04:35:31Z kientzle $"); +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_DIRECT_H +#include +#endif +#ifdef HAVE_DIRENT_H +#include +#endif +#ifdef HAVE_ERRNO_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + #include "archive.h" #include "archive_string.h" #include "archive_entry.h" #include "archive_private.h" #include "archive_read_disk_private.h" +/*- + * This is a new directory-walking system that addresses a number + * of problems I've had with fts(3). In particular, it has no + * pathname-length limits (other than the size of 'int'), handles + * deep logical traversals, uses considerably less memory, and has + * an opaque interface (easier to modify in the future). + * + * Internally, it keeps a single list of "tree_entry" items that + * represent filesystem objects that require further attention. + * Non-directories are not kept in memory: they are pulled from + * readdir(), returned to the client, then freed as soon as possible. + * Any directory entry to be traversed gets pushed onto the stack. + * + * There is surprisingly little information that needs to be kept for + * each item on the stack. Just the name, depth (represented here as the + * string length of the parent directory's pathname), and some markers + * indicating how to get back to the parent (via chdir("..") for a + * regular dir or via fchdir(2) for a symlink). + */ +/* + * TODO: + * 1) Loop checking. + * 3) Arbitrary logical traversals by closing/reopening intermediate fds. + */ + +struct tree_entry { + int depth; + struct tree_entry *next; + struct tree_entry *parent; + char *name; + size_t dirname_length; + dev_t dev; + ino_t ino; + int flags; + /* How to return back to the parent of a symlink. */ +#ifdef HAVE_FCHDIR + int symlink_parent_fd; +#elif defined(_WIN32) && !defined(__CYGWIN__) + char *symlink_parent_path; +#else +#error fchdir function required. +#endif +}; + +/* Definitions for tree_entry.flags bitmap. */ +#define isDir 1 /* This entry is a regular directory. */ +#define isDirLink 2 /* This entry is a symbolic link to a directory. */ +#define needsFirstVisit 4 /* This is an initial entry. */ +#define needsDescent 8 /* This entry needs to be previsited. */ +#define needsOpen 16 /* This is a directory that needs to be opened. */ +#define needsAscent 32 /* This entry needs to be postvisited. */ + +/* + * On Windows, "first visit" is handled as a pattern to be handed to + * _findfirst(). This is consistent with Windows conventions that + * file patterns are handled within the application. On Posix, + * "first visit" is just returned to the client. + */ + +/* + * Local data for this package. + */ +struct tree { + struct tree_entry *stack; + struct tree_entry *current; +#if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__) + HANDLE d; + BY_HANDLE_FILE_INFORMATION fileInfo; +#define INVALID_DIR_HANDLE INVALID_HANDLE_VALUE + WIN32_FIND_DATA _findData; + WIN32_FIND_DATA *findData; +#else + DIR *d; +#define INVALID_DIR_HANDLE NULL + struct dirent *de; +#endif + int flags; + int visit_type; + int tree_errno; /* Error code from last failed operation. */ + + /* Dynamically-sized buffer for holding path */ + char *buff; + size_t buff_length; + + const char *basename; /* Last path element */ + size_t dirname_length; /* Leading dir length */ + size_t path_length; /* Total path length */ + + int depth; + int openCount; + int maxOpenCount; + + struct stat lst; + struct stat st; + int descend; + + char symlink_mode; + char dev_recorded; + dev_t current_dev; +}; + +/* Definitions for tree.flags bitmap. */ +#define hasStat 16 /* The st entry is valid. */ +#define hasLstat 32 /* The lst entry is valid. */ +#define hasFileInfo 64 /* The Windows fileInfo entry is valid. */ + +#if defined(_WIN32) && !defined(__CYGWIN__) +static int +tree_dir_next_windows(struct tree *t, const char *pattern); +#else +static int +tree_dir_next_posix(struct tree *t); +#endif + +#ifdef HAVE_DIRENT_D_NAMLEN +/* BSD extension; avoids need for a strlen() call. */ +#define D_NAMELEN(dp) (dp)->d_namlen +#else +#define D_NAMELEN(dp) (strlen((dp)->d_name)) +#endif + +/* Initiate/terminate a tree traversal. */ +static struct tree *tree_open(const char *); +static void tree_close(struct tree *); +static void tree_push(struct tree *, const char *); + +/* + * tree_next() returns Zero if there is no next entry, non-zero if + * there is. Note that directories are visited three times. + * Directories are always visited first as part of enumerating their + * parent; that is a "regular" visit. If tree_descend() is invoked at + * that time, the directory is added to a work list and will + * subsequently be visited two more times: once just after descending + * into the directory ("postdescent") and again just after ascending + * back to the parent ("postascent"). + * + * TREE_ERROR_DIR is returned if the descent failed (because the + * directory couldn't be opened, for instance). This is returned + * instead of TREE_POSTDESCENT/TREE_POSTASCENT. TREE_ERROR_DIR is not a + * fatal error, but it does imply that the relevant subtree won't be + * visited. TREE_ERROR_FATAL is returned for an error that left the + * traversal completely hosed. Right now, this is only returned for + * chdir() failures during ascent. + */ +#define TREE_REGULAR 1 +#define TREE_POSTDESCENT 2 +#define TREE_POSTASCENT 3 +#define TREE_ERROR_DIR -1 +#define TREE_ERROR_FATAL -2 + +static int tree_next(struct tree *); + +/* + * Return information about the current entry. + */ + +/* + * The current full pathname, length of the full pathname, and a name + * that can be used to access the file. Because tree does use chdir + * extensively, the access path is almost never the same as the full + * current path. + * + * TODO: Flesh out this interface to provide other information. In + * particular, Windows can provide file size, mode, and some permission + * information without invoking stat() at all. + * + * TODO: On platforms that support it, use openat()-style operations + * to eliminate the chdir() operations entirely while still supporting + * arbitrarily deep traversals. This makes access_path troublesome to + * support, of course, which means we'll need a rich enough interface + * that clients can function without it. (In particular, we'll need + * tree_current_open() that returns an open file descriptor.) + * + */ +static const char *tree_current_path(struct tree *); +static const char *tree_current_access_path(struct tree *); + +/* + * Request the lstat() or stat() data for the current path. Since the + * tree package needs to do some of this anyway, and caches the + * results, you should take advantage of it here if you need it rather + * than make a redundant stat() or lstat() call of your own. + */ +static const struct stat *tree_current_stat(struct tree *); +static const struct stat *tree_current_lstat(struct tree *); + +/* The following functions use tricks to avoid a certain number of + * stat()/lstat() calls. */ +/* "is_physical_dir" is equivalent to S_ISDIR(tree_current_lstat()->st_mode) */ +static int tree_current_is_physical_dir(struct tree *); +#if defined(_WIN32) && !defined(__CYGWIN__) +/* "is_physical_link" is equivalent to S_ISLNK(tree_current_lstat()->st_mode) */ +static int tree_current_is_physical_link(struct tree *); +#endif +/* "is_dir" is equivalent to S_ISDIR(tree_current_stat()->st_mode) */ +static int tree_current_is_dir(struct tree *); + + static int _archive_read_free(struct archive *); static int _archive_read_close(struct archive *); static int _archive_read_data_block(struct archive *, @@ -46,6 +270,9 @@ static const char *trivial_lookup_uname(void *, uid_t uid); static const char *trivial_lookup_gname(void *, int64_t gid); static const char *trivial_lookup_uname(void *, int64_t uid); #endif + + + static struct archive_vtable * archive_read_disk_vtable(void) { @@ -162,8 +389,7 @@ archive_read_disk_new(void) return (NULL); memset(a, 0, sizeof(*a)); a->archive.magic = ARCHIVE_READ_DISK_MAGIC; - /* We're ready to write a header immediately. */ - a->archive.state = ARCHIVE_STATE_HEADER; + a->archive.state = ARCHIVE_STATE_NEW; a->archive.vtable = archive_read_disk_vtable(); a->lookup_uname = trivial_lookup_uname; a->lookup_gname = trivial_lookup_gname; @@ -184,6 +410,8 @@ _archive_read_free(struct archive *_a) if (a->cleanup_uname != NULL && a->lookup_uname_data != NULL) (a->cleanup_uname)(a->lookup_uname_data); archive_string_free(&a->archive.error_string); + if (a->tree != NULL) + tree_close(a->tree); a->archive.magic = 0; free(a); return (ARCHIVE_OK); @@ -279,11 +507,741 @@ _archive_read_data_block(struct archive *_a, const void **buff, static int _archive_read_next_header2(struct archive *_a, struct archive_entry *entry) { + struct archive_read_disk *a = (struct archive_read_disk *)_a; + struct tree *t; + const struct stat *st; /* info to use for this entry */ + const struct stat *lst;/* lstat() information */ + int descend, r; + archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_next_header2"); - (void)entry; /* UNUSED */ - /* Not implemented yet. */ - return (ARCHIVE_FAILED); + t = a->tree; + st = NULL; + lst = NULL; + do { + switch (tree_next(t)) { + case TREE_ERROR_FATAL: + archive_set_error(&a->archive, t->tree_errno, + "%s: Unable to continue traversing directory tree", + tree_current_path(t)); + a->archive.state = ARCHIVE_STATE_FATAL; + return (ARCHIVE_FATAL); + case TREE_ERROR_DIR: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "%s: Couldn't visit directory", + tree_current_path(t)); + return (ARCHIVE_FAILED); + case 0: + return (ARCHIVE_EOF); + case TREE_POSTDESCENT: + case TREE_POSTASCENT: + break; + case TREE_REGULAR: + lst = tree_current_lstat(t); + if (lst == NULL) { + archive_set_error(&a->archive, errno, + "%s: Cannot stat", + tree_current_path(t)); + return (ARCHIVE_FAILED); + } + break; + } + } while (lst == NULL); + + /* + * Distinguish 'L'/'P'/'H' symlink following. + */ + switch(t->symlink_mode) { + case 'H': + /* 'H': After the first item, rest like 'P'. */ + t->symlink_mode = 'P'; + /* 'H': First item (from command line) like 'L'. */ + /* FALLTHROUGH */ + case 'L': + /* 'L': Do descend through a symlink to dir. */ + descend = tree_current_is_dir(t); + /* 'L': Follow symlinks to files. */ + a->symlink_mode = 'L'; + a->follow_symlinks = 1; + /* 'L': Archive symlinks as targets, if we can. */ + st = tree_current_stat(t); + if (st != NULL) + break; + /* If stat fails, we have a broken symlink; + * in that case, don't follow the link. */ + /* FALLTHROUGH */ + default: + /* 'P': Don't descend through a symlink to dir. */ + descend = tree_current_is_physical_dir(t); + /* 'P': Don't follow symlinks to files. */ + a->symlink_mode = 'P'; + a->follow_symlinks = 0; + /* 'P': Archive symlinks as symlinks. */ + st = lst; + break; + } + + if (!t->dev_recorded) { + /* This is the initial file system. */ + t->current_dev = lst->st_dev; + t->dev_recorded = 1; + } + t->descend = descend; + + archive_entry_set_pathname(entry, tree_current_path(t)); + archive_entry_copy_sourcepath(entry, tree_current_access_path(t)); + + /* Populate the archive_entry with metadata from the disk. */ + r = archive_read_disk_entry_from_file(&(a->archive), entry, -1, st); + + /* + * EOF and FATAL are persistent at this layer. By + * modifying the state, we guarantee that future calls to + * read a header or read data will fail. + */ + switch (r) { + case ARCHIVE_EOF: + a->archive.state = ARCHIVE_STATE_EOF; + break; + case ARCHIVE_OK: + a->archive.state = ARCHIVE_STATE_DATA; + break; + case ARCHIVE_WARN: + a->archive.state = ARCHIVE_STATE_DATA; + break; + case ARCHIVE_RETRY: + break; + case ARCHIVE_FATAL: + a->archive.state = ARCHIVE_STATE_FATAL; + break; + } + + return (r); +} + +/* + * Called by the client to mark the directory just returned from + * tree_next() as needing to be visited. + */ +int +archive_read_disk_descend(struct archive *_a) +{ + struct archive_read_disk *a = (struct archive_read_disk *)_a; + struct tree *t = a->tree; + + archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA, + "archive_read_disk_descend"); + + if (t->visit_type != TREE_REGULAR || !t->descend) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Ignored the request descending the current object"); + return (ARCHIVE_WARN); + } + + if (tree_current_is_physical_dir(t)) { + tree_push(t, t->basename); + t->stack->flags |= isDir; + } else if (tree_current_is_dir(t)) { + tree_push(t, t->basename); + t->stack->flags |= isDirLink; + } + t->descend = 0; + return (ARCHIVE_OK); +} + +int +archive_read_disk_open(struct archive *_a, const char *pathname) +{ + struct archive_read_disk *a = (struct archive_read_disk *)_a; + struct tree *tree; + + archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_NEW, + "archive_read_disk_open"); + archive_clear_error(&a->archive); + + tree = tree_open(pathname); + if (tree == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate tar data"); + a->archive.state = ARCHIVE_STATE_FATAL; + return (ARCHIVE_FATAL); + } + tree->symlink_mode = a->symlink_mode; + tree->dev_recorded = 0; + a->tree = tree; + a->archive.state = ARCHIVE_STATE_HEADER; + + return (ARCHIVE_OK); +} + + +/* + * Add a directory path to the current stack. + */ +static void +tree_push(struct tree *t, const char *path) +{ + struct tree_entry *te; + + te = malloc(sizeof(*te)); + memset(te, 0, sizeof(*te)); + te->next = t->stack; + te->parent = t->current; + if (te->parent) + te->depth = te->parent->depth + 1; + t->stack = te; +#ifdef HAVE_FCHDIR + te->symlink_parent_fd = -1; + te->name = strdup(path); +#elif defined(_WIN32) && !defined(__CYGWIN__) + te->symlink_parent_path = NULL; + te->name = strdup(path); +#endif + te->flags = needsDescent | needsOpen | needsAscent; + te->dirname_length = t->dirname_length; +} + +/* + * Append a name to the current dir path. + */ +static void +tree_append(struct tree *t, const char *name, size_t name_length) +{ + char *p; + size_t size_needed; + + if (t->buff != NULL) + t->buff[t->dirname_length] = '\0'; + /* Strip trailing '/' from name, unless entire name is "/". */ + while (name_length > 1 && name[name_length - 1] == '/') + name_length--; + + /* Resize pathname buffer as needed. */ + size_needed = name_length + 1 + t->dirname_length; + if (t->buff_length < size_needed) { + if (t->buff_length < 1024) + t->buff_length = 1024; + while (t->buff_length < size_needed) + t->buff_length *= 2; + t->buff = realloc(t->buff, t->buff_length); + } + if (t->buff == NULL) + abort(); + p = t->buff + t->dirname_length; + t->path_length = t->dirname_length + name_length; + /* Add a separating '/' if it's needed. */ + if (t->dirname_length > 0 && p[-1] != '/') { + *p++ = '/'; + t->path_length ++; + } +#if HAVE_STRNCPY_S + strncpy_s(p, t->buff_length - (p - t->buff), name, name_length); +#else + strncpy(p, name, name_length); +#endif + p[name_length] = '\0'; + t->basename = p; } + +/* + * Open a directory tree for traversal. + */ +static struct tree * +tree_open(const char *path) +{ +#ifdef HAVE_FCHDIR + struct tree *t; + + t = malloc(sizeof(*t)); + memset(t, 0, sizeof(*t)); + /* First item is set up a lot like a symlink traversal. */ + tree_push(t, path); + t->stack->flags = needsFirstVisit | isDirLink | needsAscent; + t->stack->symlink_parent_fd = open(".", O_RDONLY); + t->openCount++; + t->d = INVALID_DIR_HANDLE; + return (t); +#elif defined(_WIN32) && !defined(__CYGWIN__) + struct tree *t; + char *cwd = _getcwd(NULL, 0); + char *pathname = strdup(path), *p, *base; + + if (pathname == NULL) + abort(); + for (p = pathname; *p != '\0'; ++p) { + if (*p == '\\') + *p = '/'; + } + base = pathname; + + t = malloc(sizeof(*t)); + memset(t, 0, sizeof(*t)); + /* First item is set up a lot like a symlink traversal. */ + /* printf("Looking for wildcard in %s\n", path); */ + /* TODO: wildcard detection here screws up on \\?\c:\ UNC names */ + if (strchr(base, '*') || strchr(base, '?')) { + // It has a wildcard in it... + // Separate the last element. + p = strrchr(base, '/'); + if (p != NULL) { + *p = '\0'; + chdir(base); + tree_append(t, base, p - base); + t->dirname_length = t->path_length; + base = p + 1; + } + } + tree_push(t, base); + free(pathname); + t->stack->flags = needsFirstVisit | isDirLink | needsAscent; + t->stack->symlink_parent_path = cwd; + t->d = INVALID_DIR_HANDLE; + return (t); +#endif +} + +/* + * We've finished a directory; ascend back to the parent. + */ +static int +tree_ascend(struct tree *t) +{ + struct tree_entry *te; + int r = 0; + + te = t->stack; + t->depth--; + if (te->flags & isDirLink) { +#ifdef HAVE_FCHDIR + if (fchdir(te->symlink_parent_fd) != 0) { + t->tree_errno = errno; + r = TREE_ERROR_FATAL; + } + close(te->symlink_parent_fd); +#elif defined(_WIN32) && !defined(__CYGWIN__) + if (SetCurrentDirectory(te->symlink_parent_path) == 0) { + t->tree_errno = errno; + r = TREE_ERROR_FATAL; + } + free(te->symlink_parent_path); + te->symlink_parent_path = NULL; +#endif + t->openCount--; + } else { +#if defined(_WIN32) && !defined(__CYGWIN__) + if (SetCurrentDirectory("..") == 0) { +#else + if (chdir("..") != 0) { +#endif + t->tree_errno = errno; + r = TREE_ERROR_FATAL; + } + } + return (r); +} + +/* + * Pop the working stack. + */ +static void +tree_pop(struct tree *t) +{ + struct tree_entry *te; + + if (t->buff) + t->buff[t->dirname_length] = '\0'; + if (t->stack == t->current && t->current != NULL) + t->current = t->current->parent; + te = t->stack; + t->stack = te->next; + t->dirname_length = te->dirname_length; + if (t->buff) { + t->basename = t->buff + t->dirname_length; + while (t->basename[0] == '/') + t->basename++; + } + free(te->name); + free(te); +} + +/* + * Get the next item in the tree traversal. + */ +static int +tree_next(struct tree *t) +{ + int r; + + while (t->stack != NULL) { + /* If there's an open dir, get the next entry from there. */ + if (t->d != INVALID_DIR_HANDLE) { +#if defined(_WIN32) && !defined(__CYGWIN__) + r = tree_dir_next_windows(t, NULL); +#else + r = tree_dir_next_posix(t); +#endif + if (r == 0) + continue; + return (r); + } + + if (t->stack->flags & needsFirstVisit) { +#if defined(_WIN32) && !defined(__CYGWIN__) + char *d = t->stack->name; + t->stack->flags &= ~needsFirstVisit; + if (strchr(d, '*') || strchr(d, '?')) { + r = tree_dir_next_windows(t, d); + if (r == 0) + continue; + return (r); + } + // Not a pattern, handle it as-is... +#endif + /* Top stack item needs a regular visit. */ + t->current = t->stack; + tree_append(t, t->stack->name, strlen(t->stack->name)); + //t->dirname_length = t->path_length; + //tree_pop(t); + t->stack->flags &= ~needsFirstVisit; + return (t->visit_type = TREE_REGULAR); + } else if (t->stack->flags & needsDescent) { + /* Top stack item is dir to descend into. */ + t->current = t->stack; + tree_append(t, t->stack->name, strlen(t->stack->name)); + t->stack->flags &= ~needsDescent; + /* If it is a link, set up fd for the ascent. */ + if (t->stack->flags & isDirLink) { +#ifdef HAVE_FCHDIR + t->stack->symlink_parent_fd = open(".", O_RDONLY); + t->openCount++; + if (t->openCount > t->maxOpenCount) + t->maxOpenCount = t->openCount; +#elif defined(_WIN32) && !defined(__CYGWIN__) + t->stack->symlink_parent_path = _getcwd(NULL, 0); +#endif + } + t->dirname_length = t->path_length; +#if defined(_WIN32) && !defined(__CYGWIN__) + if (t->path_length == 259 || !SetCurrentDirectory(t->stack->name) != 0) +#else + if (chdir(t->stack->name) != 0) +#endif + { + /* chdir() failed; return error */ + tree_pop(t); + t->tree_errno = errno; + return (t->visit_type = TREE_ERROR_DIR); + } + t->depth++; + return (t->visit_type = TREE_POSTDESCENT); + } else if (t->stack->flags & needsOpen) { + t->stack->flags &= ~needsOpen; +#if defined(_WIN32) && !defined(__CYGWIN__) + r = tree_dir_next_windows(t, "*"); +#else + r = tree_dir_next_posix(t); +#endif + if (r == 0) + continue; + return (r); + } else if (t->stack->flags & needsAscent) { + /* Top stack item is dir and we're done with it. */ + r = tree_ascend(t); + tree_pop(t); + t->visit_type = r != 0 ? r : TREE_POSTASCENT; + return (t->visit_type); + } else { + /* Top item on stack is dead. */ + tree_pop(t); + t->flags &= ~hasLstat; + t->flags &= ~hasStat; + } + } + return (t->visit_type = 0); +} + +#if defined(_WIN32) && !defined(__CYGWIN__) +static int +tree_dir_next_windows(struct tree *t, const char *pattern) +{ + const char *name; + size_t namelen; + int r; + + for (;;) { + if (pattern != NULL) { + t->d = FindFirstFile(pattern, &t->_findData); + if (t->d == INVALID_DIR_HANDLE) { + r = tree_ascend(t); /* Undo "chdir" */ + tree_pop(t); + t->tree_errno = errno; + t->visit_type = r != 0 ? r : TREE_ERROR_DIR; + return (t->visit_type); + } + t->findData = &t->_findData; + pattern = NULL; + } else if (!FindNextFile(t->d, &t->_findData)) { + FindClose(t->d); + t->d = INVALID_DIR_HANDLE; + t->findData = NULL; + return (0); + } + name = t->findData->cFileName; + namelen = strlen(name); + t->flags &= ~hasLstat; + t->flags &= ~hasStat; + if (name[0] == '.' && name[1] == '\0') + continue; + if (name[0] == '.' && name[1] == '.' && name[2] == '\0') + continue; + tree_append(t, name, namelen); + return (t->visit_type = TREE_REGULAR); + } +} +#else +static int +tree_dir_next_posix(struct tree *t) +{ + int r; + const char *name; + size_t namelen; + + if (t->d == NULL) { + if ((t->d = opendir(".")) == NULL) { + r = tree_ascend(t); /* Undo "chdir" */ + tree_pop(t); + t->tree_errno = errno; + t->visit_type = r != 0 ? r : TREE_ERROR_DIR; + return (t->visit_type); + } + } + for (;;) { + t->de = readdir(t->d); + if (t->de == NULL) { + closedir(t->d); + t->d = INVALID_DIR_HANDLE; + return (0); + } + name = t->de->d_name; + namelen = D_NAMELEN(t->de); + t->flags &= ~hasLstat; + t->flags &= ~hasStat; + if (name[0] == '.' && name[1] == '\0') + continue; + if (name[0] == '.' && name[1] == '.' && name[2] == '\0') + continue; + tree_append(t, name, namelen); + return (t->visit_type = TREE_REGULAR); + } +} +#endif + +/* + * Get the stat() data for the entry just returned from tree_next(). + */ +static const struct stat * +tree_current_stat(struct tree *t) +{ + if (!(t->flags & hasStat)) { + if (stat(tree_current_access_path(t), &t->st) != 0) + return NULL; + t->flags |= hasStat; + } + return (&t->st); +} + +#if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__) +static const BY_HANDLE_FILE_INFORMATION * +tree_current_file_information(struct tree *t) +{ + if (!(t->flags & hasFileInfo)) { + HANDLE h = CreateFile(tree_current_access_path(t), + 0, 0, NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT, + NULL); + if (h == INVALID_HANDLE_VALUE) + return NULL; + if (!GetFileInformationByHandle(h, &t->fileInfo)) { + CloseHandle(h); + return NULL; + } + CloseHandle(h); + t->flags |= hasFileInfo; + } + return (&t->fileInfo); +} +#endif +/* + * Get the lstat() data for the entry just returned from tree_next(). + */ +static const struct stat * +tree_current_lstat(struct tree *t) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + return (tree_current_stat(t)); +#else + if (!(t->flags & hasLstat)) { + if (lstat(tree_current_access_path(t), &t->lst) != 0) + return NULL; + t->flags |= hasLstat; + } + return (&t->lst); +#endif +} + +/* + * Test whether current entry is a dir or link to a dir. + */ +static int +tree_current_is_dir(struct tree *t) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + if (t->findData) + return (t->findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); + if (tree_current_file_information(t)) + return (t->fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); + return (0); +#else + const struct stat *st; + /* + * If we already have lstat() info, then try some + * cheap tests to determine if this is a dir. + */ + if (t->flags & hasLstat) { + /* If lstat() says it's a dir, it must be a dir. */ + if (S_ISDIR(tree_current_lstat(t)->st_mode)) + return 1; + /* Not a dir; might be a link to a dir. */ + /* If it's not a link, then it's not a link to a dir. */ + if (!S_ISLNK(tree_current_lstat(t)->st_mode)) + return 0; + /* + * It's a link, but we don't know what it's a link to, + * so we'll have to use stat(). + */ + } + + st = tree_current_stat(t); + /* If we can't stat it, it's not a dir. */ + if (st == NULL) + return 0; + /* Use the definitive test. Hopefully this is cached. */ + return (S_ISDIR(st->st_mode)); +#endif +} + +/* + * Test whether current entry is a physical directory. Usually, we + * already have at least one of stat() or lstat() in memory, so we + * use tricks to try to avoid an extra trip to the disk. + */ +static int +tree_current_is_physical_dir(struct tree *t) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) + if (tree_current_is_physical_link(t)) + return (0); + return (tree_current_is_dir(t)); +#else + const struct stat *st; + + /* + * If stat() says it isn't a dir, then it's not a dir. + * If stat() data is cached, this check is free, so do it first. + */ + if ((t->flags & hasStat) + && (!S_ISDIR(tree_current_stat(t)->st_mode))) + return 0; + + /* + * Either stat() said it was a dir (in which case, we have + * to determine whether it's really a link to a dir) or + * stat() info wasn't available. So we use lstat(), which + * hopefully is already cached. + */ + + st = tree_current_lstat(t); + /* If we can't stat it, it's not a dir. */ + if (st == NULL) + return 0; + /* Use the definitive test. Hopefully this is cached. */ + return (S_ISDIR(st->st_mode)); +#endif +} + +#if defined(_WIN32) && !defined(__CYGWIN__) +/* + * Test whether current entry is a symbolic link. + */ +static int +tree_current_is_physical_link(struct tree *t) +{ +#if defined(_WIN32) && !defined(__CYGWIN__) +#ifndef IO_REPARSE_TAG_SYMLINK +/* Old SDKs do not provide IO_REPARSE_TAG_SYMLINK */ +#define IO_REPARSE_TAG_SYMLINK 0xA000000CL +#endif + if (t->findData) + return ((t->findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + && (t->findData->dwReserved0 == IO_REPARSE_TAG_SYMLINK)); + return (0); +#else + const struct stat *st = tree_current_lstat(t); + if (st == NULL) + return 0; + return (S_ISLNK(st->st_mode)); +#endif +} +#endif + +/* + * Return the access path for the entry just returned from tree_next(). + */ +static const char * +tree_current_access_path(struct tree *t) +{ + return (t->basename); +} + +/* + * Return the full path for the entry just returned from tree_next(). + */ +static const char * +tree_current_path(struct tree *t) +{ + return (t->buff); +} + +/* + * Terminate the traversal and release any resources. + */ +static void +tree_close(struct tree *t) +{ + /* Release anything remaining in the stack. */ + while (t->stack != NULL) + tree_pop(t); + free(t->buff); + /* TODO: Ensure that premature close() resets cwd */ +#if 0 +#ifdef HAVE_FCHDIR + if (t->initialDirFd >= 0) { + int s = fchdir(t->initialDirFd); + (void)s; /* UNUSED */ + close(t->initialDirFd); + t->initialDirFd = -1; + } +#elif defined(_WIN32) && !defined(__CYGWIN__) + if (t->initialDir != NULL) { + SetCurrentDir(t->initialDir); + free(t->initialDir); + t->initialDir = NULL; + } +#endif +#endif + free(t); +} + diff --git a/libarchive/archive_read_disk_private.h b/libarchive/archive_read_disk_private.h index 306122573..42f6ce016 100644 --- a/libarchive/archive_read_disk_private.h +++ b/libarchive/archive_read_disk_private.h @@ -33,6 +33,8 @@ #ifndef ARCHIVE_READ_DISK_PRIVATE_H_INCLUDED #define ARCHIVE_READ_DISK_PRIVATE_H_INCLUDED +struct tree; + struct archive_read_disk { struct archive archive; @@ -51,6 +53,8 @@ struct archive_read_disk { */ char follow_symlinks; /* Either 'L' or 'P'. */ + struct tree *tree; + #if ARCHIVE_VERSION_NUMBER < 3000000 const char * (*lookup_gname)(void *private, gid_t gid); #else diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index c3da942fc..b6608c264 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -38,6 +38,7 @@ IF(ENABLE_TEST) test_read_compress_program.c test_read_data_large.c test_read_disk.c + test_read_disk_directory_traversals.c test_read_disk_entry_from_file.c test_read_extract.c test_read_file_nonexistent.c diff --git a/libarchive/test/test_read_disk_directory_traversals.c b/libarchive/test/test_read_disk_directory_traversals.c new file mode 100644 index 000000000..0336df70f --- /dev/null +++ b/libarchive/test/test_read_disk_directory_traversals.c @@ -0,0 +1,167 @@ +/*- + * Copyright (c) 2010 Michihiro NAKAJIMA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +DEFINE_TEST(test_read_disk_directory_traversals) +{ + struct archive *a; + struct archive_entry *ae; + //const void *p; + //size_t size; + //int64_t offset; + + assertMakeDir("dir1", 0755); + assertMakeFile("dir1/file1", 0644, "0123456789"); + assertMakeFile("dir1/file2", 0644, "0123456789"); + assertMakeDir("dir1/sub1", 0755); + assertMakeFile("dir1/sub1/file1", 0644, "0123456789"); + assertMakeDir("dir1/sub2", 0755); + assertMakeFile("dir1/sub2/file1", 0644, "0123456789"); + assertMakeFile("dir1/sub2/file2", 0644, "0123456789"); + assertMakeDir("dir1/sub2/sub1", 0755); + assertMakeDir("dir1/sub2/sub2", 0755); + assertMakeDir("dir1/sub2/sub3", 0755); + assertMakeFile("dir1/sub2/sub3/file", 0644, "0123456789"); + + assert((ae = archive_entry_new()) != NULL); + assert((a = archive_read_disk_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_open(a, "dir1")); + + /* dir1 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1"); + assertEqualInt(archive_entry_filetype(ae), AE_IFDIR); + + /* Descend into the current object */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a)); + + /* dir1/file1 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/file1"); + assertEqualInt(archive_entry_filetype(ae), AE_IFREG); + assertEqualInt(archive_entry_size(ae), 10); + //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0); + //assertEqualInt((int)size, 10); + //assertEqualInt((int)offset, 0); + //assertEqualInt(memcmp(p, "0123456789", 10), 0); + + /* dir1/file2 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/file2"); + assertEqualInt(archive_entry_filetype(ae), AE_IFREG); + assertEqualInt(archive_entry_size(ae), 10); + //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0); + //assertEqualInt((int)size, 10); + //assertEqualInt((int)offset, 0); + //assertEqualInt(memcmp(p, "0123456789", 10), 0); + + /* dir1/sub1 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub1"); + assertEqualInt(archive_entry_filetype(ae), AE_IFDIR); + + /* Descend into the current object */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a)); + + /* dir1/sub2 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2"); + assertEqualInt(archive_entry_filetype(ae), AE_IFDIR); + + /* Descend into the current object */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a)); + + /* dir1/sub2/file1 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2/file1"); + assertEqualInt(archive_entry_filetype(ae), AE_IFREG); + assertEqualInt(archive_entry_size(ae), 10); + //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0); + //assertEqualInt((int)size, 10); + //assertEqualInt((int)offset, 0); + //assertEqualInt(memcmp(p, "0123456789", 10), 0); + + /* dir1/sub2/file2 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2/file2"); + assertEqualInt(archive_entry_filetype(ae), AE_IFREG); + assertEqualInt(archive_entry_size(ae), 10); + //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0); + //assertEqualInt((int)size, 10); + //assertEqualInt((int)offset, 0); + //assertEqualInt(memcmp(p, "0123456789", 10), 0); + + /* dir1/sub2/sub1 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub1"); + assertEqualInt(archive_entry_filetype(ae), AE_IFDIR); + + /* Descend into the current object */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a)); + + /* dir1/sub2/sub2 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub2"); + assertEqualInt(archive_entry_filetype(ae), AE_IFDIR); + + /* Descend into the current object */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a)); + + /* dir1/sub2/sub3 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub3"); + assertEqualInt(archive_entry_filetype(ae), AE_IFDIR); + + /* Descend into the current object */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_disk_descend(a)); + + /* dir1/sub2/sub3/file */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub2/sub3/file"); + assertEqualInt(archive_entry_filetype(ae), AE_IFREG); + assertEqualInt(archive_entry_size(ae), 10); + //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0); + //assertEqualInt((int)size, 10); + //assertEqualInt((int)offset, 0); + //assertEqualInt(memcmp(p, "0123456789", 10), 0); + + /* dir1/sub1/file1 */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header2(a, ae)); + assertEqualString(archive_entry_pathname(ae), "dir1/sub1/file1"); + assertEqualInt(archive_entry_filetype(ae), AE_IFREG); + assertEqualInt(archive_entry_size(ae), 10); + //assertEqualInt(archive_read_data_block(a, &p, &size, &offset), 0); + //assertEqualInt((int)size, 10); + //assertEqualInt((int)offset, 0); + //assertEqualInt(memcmp(p, "0123456789", 10), 0); + + /* There is no entry. */ + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header2(a, ae)); + + /* Destroy the archive. */ + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + archive_entry_free(ae); +}