From: Joerg Sonnenberger Date: Wed, 14 May 2008 02:12:21 +0000 (-0400) Subject: Add support for pax-like tar substitutions as found in NetBSD's X-Git-Tag: v2.6.0~239 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=326a25bc94c06b10e9cc511923aa8d58a4c447fc;p=thirdparty%2Flibarchive.git Add support for pax-like tar substitutions as found in NetBSD's pax-as-tar. This code is not compiled in on platforms lacking regex.h. SVN-Revision: 53 --- diff --git a/Makefile.am b/Makefile.am index 23317aac6..b5a3eea00 100644 --- a/Makefile.am +++ b/Makefile.am @@ -270,6 +270,7 @@ bsdtar_SOURCES= \ tar/getdate.y \ tar/matching.c \ tar/read.c \ + tar/subst.c \ tar/tree.c \ tar/tree.h \ tar/util.c \ diff --git a/config.h.in b/config.h.in index aa69378e3..04ea6148a 100644 --- a/config.h.in +++ b/config.h.in @@ -259,6 +259,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_PWD_H +/* Define to 1 if you have the header file. */ +#undef HAVE_REGEX_H + /* Define to 1 if you have the `select' function. */ #undef HAVE_SELECT @@ -473,7 +476,7 @@ #undef _LARGE_FILES /* Define for Solaris 2.5.1 so the uint64_t typedef from , - , or is not used. If the typedef was allowed, the + , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ #undef _UINT64_T diff --git a/configure.ac b/configure.ac index 7ed5ed133..9fb719b81 100644 --- a/configure.ac +++ b/configure.ac @@ -173,7 +173,7 @@ AC_CHECK_HEADERS([inttypes.h langinfo.h limits.h linux/ext2_fs.h linux/fs.h]) AC_CHECK_HEADER(inttypes.h, [AC_SUBST(ARCHIVE_H_INCLUDE_INTTYPES_H,['#include /* For int64_t */'])], [AC_SUBST(ARCHIVE_H_INCLUDE_INTTYPES_H,[''])]) -AC_CHECK_HEADERS([locale.h paths.h poll.h pwd.h stdarg.h]) +AC_CHECK_HEADERS([locale.h paths.h poll.h pwd.h regex.h stdarg.h]) AC_CHECK_HEADERS([stdint.h stdlib.h string.h sys/acl.h sys/ioctl.h]) AC_CHECK_HEADERS([sys/param.h sys/poll.h sys/select.h sys/time.h sys/utime.h]) AC_CHECK_HEADERS([time.h unistd.h utime.h wchar.h zlib.h]) diff --git a/tar/bsdtar.1 b/tar/bsdtar.1 index 60a5d8e77..f959bbf85 100644 --- a/tar/bsdtar.1 +++ b/tar/bsdtar.1 @@ -347,6 +347,23 @@ Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped. Note that the pathname is edited after checking inclusion/exclusion patterns but before security checks. +.It Fl s Ar pattern +(x and t mode only) +Modify file or archive member names according to +.Pa pattern . +The pattern has the format /old/new/[gps]. +old is a basic regular expression. +If it doesn't apply, the pattern is skipped. +new is the replacement string of the matched part. +~ is substituted with the match, \1 to \9 with the content of +the corresponding captured group. +The optional trailing g specifies that matching should continue +after the matched part and stopped on the first unmatched pattern. +The optional trailing s specifies that the pattern applies to the value +of symbolic links. +The optional trailing p specifies that after a successful substitution +the original path name and the new path name should be printed to +standard error. .It Fl T Ar filename In x or t mode, .Nm diff --git a/tar/bsdtar.c b/tar/bsdtar.c index 23db60189..76ef6c108 100644 --- a/tar/bsdtar.c +++ b/tar/bsdtar.c @@ -502,6 +502,14 @@ main(int argc, char **argv) case 'S': /* NetBSD pax-as-tar */ bsdtar->extract_flags |= ARCHIVE_EXTRACT_SPARSE; break; + case 's': /* NetBSD pax-as-tar */ +#if HAVE_REGEX_H + add_substitution(bsdtar, optarg); +#else + bsdtar_warnc(bsdtar, 0, "-s is not supported by this version of bsdtar"); + usage(bsdtar); +#endif + break; case OPTION_STRIP_COMPONENTS: /* GNU tar 1.15 */ bsdtar->strip_components = atoi(optarg); break; @@ -677,6 +685,10 @@ main(int argc, char **argv) } cleanup_exclusions(bsdtar); +#if HAVE_REGEX_H + cleanup_substitution(bsdtar); +#endif + if (bsdtar->return_value != 0) bsdtar_warnc(bsdtar, 0, "Error exit delayed from previous errors."); diff --git a/tar/bsdtar.h b/tar/bsdtar.h index 7fe27df41..a50529d4f 100644 --- a/tar/bsdtar.h +++ b/tar/bsdtar.h @@ -97,6 +97,7 @@ struct bsdtar { struct matching *matching; /* for matching.c */ struct security *security; /* for read.c */ struct name_cache *uname_cache; /* for write.c */ + struct substitution *substitution; /* for subst.c */ }; void bsdtar_errc(struct bsdtar *, int _eval, int _code, @@ -125,3 +126,8 @@ int unmatched_inclusions_warn(struct bsdtar *bsdtar, const char *msg); void usage(struct bsdtar *); int yes(const char *fmt, ...); +#if HAVE_REGEX_H +void add_substitution(struct bsdtar *, const char *); +int apply_substitution(struct bsdtar *, const char *, char **, int); +void cleanup_substitution(struct bsdtar *); +#endif diff --git a/tar/read.c b/tar/read.c index edb33cb45..0376b8b14 100644 --- a/tar/read.c +++ b/tar/read.c @@ -98,6 +98,7 @@ read_archive(struct bsdtar *bsdtar, char mode) struct archive_entry *entry; const struct stat *st; int r; + char *subst_name; while (*bsdtar->argv) { include(bsdtar, *bsdtar->argv); @@ -157,6 +158,18 @@ read_archive(struct bsdtar *bsdtar, char mode) archive_entry_set_gname(entry, NULL); } +#if HAVE_REGEX_H + r = apply_substitution(bsdtar, archive_entry_pathname(entry), &subst_name, 0); + if (r == -1) { + bsdtar_warnc(bsdtar, 0, "Invalid substituion, skipping entry"); + continue; + } + if (r == 1) { + archive_entry_set_pathname(entry, subst_name); + free(subst_name); + } +#endif + /* * Exclude entries that are too old. */ @@ -177,6 +190,31 @@ read_archive(struct bsdtar *bsdtar, char mode) <= bsdtar->newer_mtime_nsec) continue; /* Too old, skip it. */ } + +#if HAVE_REGEX_H + if (archive_entry_hardlink(entry)) { + r = apply_substitution(bsdtar, archive_entry_hardlink(entry), &subst_name, 1); + if (r == -1) { + bsdtar_warnc(bsdtar, 0, "Invalid substituion, skipping entry"); + continue; + } + if (r == 1) { + archive_entry_set_hardlink(entry, subst_name); + free(subst_name); + } + } + if (S_ISLNK(st->st_mode)) { + r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1); + if (r == -1) { + bsdtar_warnc(bsdtar, 0, "Invalid substituion, skipping entry"); + continue; + } + if (r == 1) { + archive_entry_set_symlink(entry, subst_name); + free(subst_name); + } + } +#endif /* * Note that pattern exclusions are checked before diff --git a/tar/subst.c b/tar/subst.c new file mode 100644 index 000000000..2233a21e1 --- /dev/null +++ b/tar/subst.c @@ -0,0 +1,275 @@ +/*- + * Copyright (c) 2008 Joerg Sonnenberger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bsdtar_platform.h" +__FBSDID("$FreeBSD$"); + +#if HAVE_REGEX_H +#include "bsdtar.h" + +#include +#include +#include +#include + +struct subst_rule { + struct subst_rule *next; + regex_t re; + char *result; + int global:1, print:1, symlink:1; +}; + +struct substitution { + struct subst_rule *first_rule, *last_rule; +}; + +static void +init_substitution(struct bsdtar *bsdtar) +{ + struct substitution *subst; + + bsdtar->substitution = subst = malloc(sizeof(*subst)); + if (subst == NULL) + bsdtar_errc(bsdtar, 1, errno, "Out of memory"); + subst->first_rule = subst->last_rule = NULL; +} + +void +add_substitution(struct bsdtar *bsdtar, const char *rule_text) +{ + struct subst_rule *rule; + struct substitution *subst; + const char *end_pattern, *start_subst; + char *pattern; + int r; + + if ((subst = bsdtar->substitution) == NULL) { + init_substitution(bsdtar); + subst = bsdtar->substitution; + } + + rule = malloc(sizeof(*rule)); + if (rule == NULL) + bsdtar_errc(bsdtar, 1, errno, "Out of memory"); + rule->next = NULL; + + if (subst->last_rule == NULL) + subst->first_rule = rule; + else + subst->last_rule->next = rule; + subst->last_rule = rule; + + if (*rule_text == '\0') + bsdtar_errc(bsdtar, 1, 0, "Empty replacement string"); + end_pattern = strchr(rule_text + 1, *rule_text); + if (end_pattern == NULL) + bsdtar_errc(bsdtar, 1, 0, "Invalid replacement string"); + + pattern = malloc(end_pattern - rule_text); + if (pattern == NULL) + bsdtar_errc(bsdtar, 1, errno, "Out of memory"); + memcpy(pattern, rule_text + 1, end_pattern - rule_text - 1); + pattern[end_pattern - rule_text - 1] = '\0'; + + if ((r = regcomp(&rule->re, pattern, REG_BASIC)) != 0) { + char buf[80]; + regerror(r, &rule->re, buf, sizeof(buf)); + bsdtar_errc(bsdtar, 1, 0, "Invalid regular expression: %s", buf); + } + free(pattern); + + start_subst = end_pattern + 1; + end_pattern = strchr(start_subst, *rule_text); + if (end_pattern == NULL) + bsdtar_errc(bsdtar, 1, 0, "Invalid replacement string"); + + rule->result = malloc(end_pattern - start_subst + 1); + if (rule->result == NULL) + bsdtar_errc(bsdtar, 1, errno, "Out of memory"); + memcpy(rule->result, start_subst, end_pattern - start_subst); + rule->result[end_pattern - start_subst] = '\0'; + + rule->global = 0; + rule->print = 0; + rule->symlink = 0; + + while (*++end_pattern) { + switch (*end_pattern) { + case 'g': + case 'G': + rule->global = 1; + break; + case 'p': + case 'P': + rule->print = 1; + break; + case 's': + case 'S': + rule->symlink = 1; + break; + default: + bsdtar_errc(bsdtar, 1, 0, "Invalid replacement flag %c", *end_pattern); + } + } +} + +static void +realloc_strncat(struct bsdtar *bsdtar, char **str, const char *append, size_t len) +{ + char *new_str; + size_t old_len; + + if (*str == NULL) + old_len = 0; + else + old_len = strlen(*str); + + new_str = malloc(old_len + len + 1); + if (new_str == NULL) + bsdtar_errc(bsdtar, 1, errno, "Out of memory"); + memcpy(new_str, *str, old_len); + memcpy(new_str + old_len, append, len); + new_str[old_len + len] = '\0'; + free(*str); + *str = new_str; +} + +static void +realloc_strcat(struct bsdtar *bsdtar, char **str, const char *append) +{ + char *new_str; + size_t old_len; + + if (*str == NULL) + old_len = 0; + else + old_len = strlen(*str); + + new_str = malloc(old_len + strlen(append) + 1); + if (new_str == NULL) + bsdtar_errc(bsdtar, 1, errno, "Out of memory"); + memcpy(new_str, *str, old_len); + strcpy(new_str + old_len, append); + free(*str); + *str = new_str; +} + +int +apply_substitution(struct bsdtar *bsdtar, const char *name, char **result, int symlink_only) +{ + const char *path = name; + regmatch_t matches[10]; + size_t i, j; + struct subst_rule *rule; + struct substitution *subst; + int c, got_match, print_match; + + *result = NULL; + + if ((subst = bsdtar->substitution) == NULL) + return 0; + + got_match = 0; + print_match = 0; + + for (rule = subst->first_rule; rule != NULL; rule = rule->next) { + if (symlink_only && !rule->symlink) + continue; + if (regexec(&rule->re, name, 10, matches, 0)) + break; + + got_match = 1; + print_match |= rule->print; + realloc_strncat(bsdtar, result, name, matches[0].rm_so); + + for (i = 0, j = 0; rule->result[i] != '\0'; ++i) { + if (rule->result[i] == '~') { + realloc_strncat(bsdtar, result, rule->result + j, i - j); + realloc_strncat(bsdtar, result, name, matches[0].rm_eo); + j = i + 1; + continue; + } + if (rule->result[i] != '\\') + continue; + + ++i; + c = rule->result[i]; + switch (c) { + case '~': + case '\\': + realloc_strncat(bsdtar, result, rule->result + j, i - j - 1); + j = i; + break; + case '1' ... '9': + realloc_strncat(bsdtar, result, rule->result + j, i - j - 1); + if (c - '0' > rule->re.re_nsub) { + free(*result); + *result = NULL; + return -1; + } + realloc_strncat(bsdtar, result, name + matches[c - '0'].rm_so, matches[c - '0'].rm_eo - matches[c - '0'].rm_so); + j = i + 1; + break; + default: + /* Just continue; */ + break; + } + + } + + realloc_strcat(bsdtar, result, rule->result + j); + + name += matches[0].rm_eo; + + if (!rule->global) + break; + } + + if (got_match) + realloc_strcat(bsdtar, result, name); + + if (print_match) + fprintf(stderr, "%s >> %s\n", path, *result); + + return got_match; +} + +void +cleanup_substitution(struct bsdtar *bsdtar) +{ + struct subst_rule *rule; + struct substitution *subst; + + if ((subst = bsdtar->substitution) == NULL) + return; + + while ((rule = subst->first_rule) != NULL) { + subst->first_rule = rule->next; + free(rule->result); + free(rule); + } + free(subst); +} +#endif /* HAVE_REGEX_H */