]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Add support for pax-like tar substitutions as found in NetBSD's
authorJoerg Sonnenberger <joerg.sonnenberger@gmail.com>
Wed, 14 May 2008 02:12:21 +0000 (22:12 -0400)
committerJoerg Sonnenberger <joerg.sonnenberger@gmail.com>
Wed, 14 May 2008 02:12:21 +0000 (22:12 -0400)
pax-as-tar.  This code is not compiled in on platforms lacking
regex.h.

SVN-Revision: 53

Makefile.am
config.h.in
configure.ac
tar/bsdtar.1
tar/bsdtar.c
tar/bsdtar.h
tar/read.c
tar/subst.c [new file with mode: 0644]

index 23317aac6aae984e80ec27fbcd51576652caef81..b5a3eea00c9cb4b8b8f6c8ceb58cede904dd87d7 100644 (file)
@@ -270,6 +270,7 @@ bsdtar_SOURCES=                             \
                tar/getdate.y           \
                tar/matching.c          \
                tar/read.c              \
+               tar/subst.c             \
                tar/tree.c              \
                tar/tree.h              \
                tar/util.c              \
index aa69378e37f96bdcb25de9ae2ffab7b927c6264d..04ea6148a9a36fdc05ffc665fe9d4ae7f2b13c00 100644 (file)
 /* Define to 1 if you have the <pwd.h> header file. */
 #undef HAVE_PWD_H
 
+/* Define to 1 if you have the <regex.h> header file. */
+#undef HAVE_REGEX_H
+
 /* Define to 1 if you have the `select' function. */
 #undef HAVE_SELECT
 
 #undef _LARGE_FILES
 
 /* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
-   <pthread.h>, or <semaphore.h> is not used. If the typedef was allowed, the
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
    #define below would cause a syntax error. */
 #undef _UINT64_T
 
index 7ed5ed1330305c4542389f88cb2a1ce432394518..9fb719b81cb5f7579a866812fdfb1911bfde3e1b 100644 (file)
@@ -173,7 +173,7 @@ AC_CHECK_HEADERS([inttypes.h langinfo.h limits.h linux/ext2_fs.h linux/fs.h])
 AC_CHECK_HEADER(inttypes.h,
         [AC_SUBST(ARCHIVE_H_INCLUDE_INTTYPES_H,['#include <inttypes.h> /* For int64_t */'])],
         [AC_SUBST(ARCHIVE_H_INCLUDE_INTTYPES_H,[''])])
-AC_CHECK_HEADERS([locale.h paths.h poll.h pwd.h stdarg.h])
+AC_CHECK_HEADERS([locale.h paths.h poll.h pwd.h regex.h stdarg.h])
 AC_CHECK_HEADERS([stdint.h stdlib.h string.h sys/acl.h sys/ioctl.h])
 AC_CHECK_HEADERS([sys/param.h sys/poll.h sys/select.h sys/time.h sys/utime.h])
 AC_CHECK_HEADERS([time.h unistd.h utime.h wchar.h zlib.h])
index 60a5d8e778dd2bd2e947713fe79d70490ad99191..f959bbf852dacebc364577967522b1866b676c1f 100644 (file)
@@ -347,6 +347,23 @@ Remove the specified number of leading path elements.
 Pathnames with fewer elements will be silently skipped.
 Note that the pathname is edited after checking inclusion/exclusion patterns
 but before security checks.
+.It Fl s Ar pattern
+(x and t mode only)
+Modify file or archive member names according to
+.Pa pattern .
+The pattern has the format /old/new/[gps].
+old is a basic regular expression.
+If it doesn't apply, the pattern is skipped.
+new is the replacement string of the matched part.
+~ is substituted with the match, \1 to \9 with the content of
+the corresponding captured group.
+The optional trailing g specifies that matching should continue
+after the matched part and stopped on the first unmatched pattern.
+The optional trailing s specifies that the pattern applies to the value
+of symbolic links.
+The optional trailing p specifies that after a successful substitution
+the original path name and the new path name should be printed to
+standard error.
 .It Fl T Ar filename
 In x or t mode,
 .Nm
index 23db60189ce344fa9c847408d4bc608fc63c43fe..76ef6c108763dd1a430490ae79c006cd883f11af 100644 (file)
@@ -502,6 +502,14 @@ main(int argc, char **argv)
                case 'S': /* NetBSD pax-as-tar */
                        bsdtar->extract_flags |= ARCHIVE_EXTRACT_SPARSE;
                        break;
+               case 's': /* NetBSD pax-as-tar */
+#if HAVE_REGEX_H
+                       add_substitution(bsdtar, optarg);
+#else
+                       bsdtar_warnc(bsdtar, 0, "-s is not supported by this version of bsdtar");
+                       usage(bsdtar);
+#endif
+                       break;
                case OPTION_STRIP_COMPONENTS: /* GNU tar 1.15 */
                        bsdtar->strip_components = atoi(optarg);
                        break;
@@ -677,6 +685,10 @@ main(int argc, char **argv)
        }
 
        cleanup_exclusions(bsdtar);
+#if HAVE_REGEX_H
+       cleanup_substitution(bsdtar);
+#endif
+
        if (bsdtar->return_value != 0)
                bsdtar_warnc(bsdtar, 0,
                    "Error exit delayed from previous errors.");
index 7fe27df41b79896ce8b9846aeeb1c4d879eab91d..a50529d4f8976a0ea02947f3e2ddf16b38944245 100644 (file)
@@ -97,6 +97,7 @@ struct bsdtar {
        struct matching         *matching;      /* for matching.c */
        struct security         *security;      /* for read.c */
        struct name_cache       *uname_cache;   /* for write.c */
+       struct substitution     *substitution;  /* for subst.c */
 };
 
 void   bsdtar_errc(struct bsdtar *, int _eval, int _code,
@@ -125,3 +126,8 @@ int unmatched_inclusions_warn(struct bsdtar *bsdtar, const char *msg);
 void   usage(struct bsdtar *);
 int    yes(const char *fmt, ...);
 
+#if HAVE_REGEX_H
+void   add_substitution(struct bsdtar *, const char *);
+int    apply_substitution(struct bsdtar *, const char *, char **, int);
+void   cleanup_substitution(struct bsdtar *);
+#endif
index edb33cb45431213a335a18b9c1c8549e482bdc1b..0376b8b14548cd3fb707b434acf22ad7ce7c3b76 100644 (file)
@@ -98,6 +98,7 @@ read_archive(struct bsdtar *bsdtar, char mode)
        struct archive_entry     *entry;
        const struct stat        *st;
        int                       r;
+       char                     *subst_name;
 
        while (*bsdtar->argv) {
                include(bsdtar, *bsdtar->argv);
@@ -157,6 +158,18 @@ read_archive(struct bsdtar *bsdtar, char mode)
                        archive_entry_set_gname(entry, NULL);
                }
 
+#if HAVE_REGEX_H
+               r = apply_substitution(bsdtar, archive_entry_pathname(entry), &subst_name, 0);
+               if (r == -1) {
+                       bsdtar_warnc(bsdtar, 0, "Invalid substituion, skipping entry");
+                       continue;
+               }
+               if (r == 1) {
+                       archive_entry_set_pathname(entry, subst_name);
+                       free(subst_name);
+               }
+#endif
+
                /*
                 * Exclude entries that are too old.
                 */
@@ -177,6 +190,31 @@ read_archive(struct bsdtar *bsdtar, char mode)
                            <= bsdtar->newer_mtime_nsec)
                                continue; /* Too old, skip it. */
                }
+               
+#if HAVE_REGEX_H
+               if (archive_entry_hardlink(entry)) {
+                       r = apply_substitution(bsdtar, archive_entry_hardlink(entry), &subst_name, 1);
+                       if (r == -1) {
+                               bsdtar_warnc(bsdtar, 0, "Invalid substituion, skipping entry");
+                               continue;
+                       }
+                       if (r == 1) {
+                               archive_entry_set_hardlink(entry, subst_name);
+                               free(subst_name);
+                       }
+               }
+               if (S_ISLNK(st->st_mode)) {
+                       r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1);
+                       if (r == -1) {
+                               bsdtar_warnc(bsdtar, 0, "Invalid substituion, skipping entry");
+                               continue;
+                       }
+                       if (r == 1) {
+                               archive_entry_set_symlink(entry, subst_name);
+                               free(subst_name);
+                       }
+               }
+#endif
 
                /*
                 * Note that pattern exclusions are checked before
diff --git a/tar/subst.c b/tar/subst.c
new file mode 100644 (file)
index 0000000..2233a21
--- /dev/null
@@ -0,0 +1,275 @@
+/*-
+ * Copyright (c) 2008 Joerg Sonnenberger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bsdtar_platform.h"
+__FBSDID("$FreeBSD$");
+
+#if HAVE_REGEX_H
+#include "bsdtar.h"
+
+#include <errno.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct subst_rule {
+       struct subst_rule *next;
+       regex_t re;
+       char *result;
+       int global:1, print:1, symlink:1;
+};
+
+struct substitution {
+       struct subst_rule *first_rule, *last_rule;
+};
+
+static void
+init_substitution(struct bsdtar *bsdtar)
+{
+       struct substitution *subst;
+
+       bsdtar->substitution = subst = malloc(sizeof(*subst));
+       if (subst == NULL)
+               bsdtar_errc(bsdtar, 1, errno, "Out of memory");
+       subst->first_rule = subst->last_rule = NULL;
+}
+
+void
+add_substitution(struct bsdtar *bsdtar, const char *rule_text)
+{
+       struct subst_rule *rule;
+       struct substitution *subst;
+       const char *end_pattern, *start_subst;
+       char *pattern;
+       int r;
+
+       if ((subst = bsdtar->substitution) == NULL) {
+               init_substitution(bsdtar);
+               subst = bsdtar->substitution;
+       }
+
+       rule = malloc(sizeof(*rule));
+       if (rule == NULL)
+               bsdtar_errc(bsdtar, 1, errno, "Out of memory");
+       rule->next = NULL;
+
+       if (subst->last_rule == NULL)
+               subst->first_rule = rule;
+       else
+               subst->last_rule->next = rule;
+       subst->last_rule = rule;
+
+       if (*rule_text == '\0')
+               bsdtar_errc(bsdtar, 1, 0, "Empty replacement string");
+       end_pattern = strchr(rule_text + 1, *rule_text);
+       if (end_pattern == NULL)
+               bsdtar_errc(bsdtar, 1, 0, "Invalid replacement string");
+
+       pattern = malloc(end_pattern - rule_text);
+       if (pattern == NULL)
+               bsdtar_errc(bsdtar, 1, errno, "Out of memory");
+       memcpy(pattern, rule_text + 1, end_pattern - rule_text - 1);
+       pattern[end_pattern - rule_text - 1] = '\0';
+
+       if ((r = regcomp(&rule->re, pattern, REG_BASIC)) != 0) {
+               char buf[80];
+               regerror(r, &rule->re, buf, sizeof(buf));
+               bsdtar_errc(bsdtar, 1, 0, "Invalid regular expression: %s", buf);
+       }
+       free(pattern);
+
+       start_subst = end_pattern + 1;
+       end_pattern = strchr(start_subst, *rule_text);
+       if (end_pattern == NULL)
+               bsdtar_errc(bsdtar, 1, 0, "Invalid replacement string");
+
+       rule->result = malloc(end_pattern - start_subst + 1);
+       if (rule->result == NULL)
+               bsdtar_errc(bsdtar, 1, errno, "Out of memory");
+       memcpy(rule->result, start_subst, end_pattern - start_subst);
+       rule->result[end_pattern - start_subst] = '\0';
+
+       rule->global = 0;
+       rule->print = 0;
+       rule->symlink = 0;
+
+       while (*++end_pattern) {
+               switch (*end_pattern) {
+               case 'g':
+               case 'G':
+                       rule->global = 1;
+                       break;
+               case 'p':
+               case 'P':
+                       rule->print = 1;
+                       break;
+               case 's':
+               case 'S':
+                       rule->symlink = 1;
+                       break;
+               default:
+                       bsdtar_errc(bsdtar, 1, 0, "Invalid replacement flag %c", *end_pattern);
+               }
+       }
+}
+
+static void
+realloc_strncat(struct bsdtar *bsdtar, char **str, const char *append, size_t len)
+{
+       char *new_str;
+       size_t old_len;
+
+       if (*str == NULL)
+               old_len = 0;
+       else
+               old_len = strlen(*str);
+
+       new_str = malloc(old_len + len + 1);
+       if (new_str == NULL)
+               bsdtar_errc(bsdtar, 1, errno, "Out of memory");
+       memcpy(new_str, *str, old_len);
+       memcpy(new_str + old_len, append, len);
+       new_str[old_len + len] = '\0';
+       free(*str);
+       *str = new_str;
+}
+
+static void
+realloc_strcat(struct bsdtar *bsdtar, char **str, const char *append)
+{
+       char *new_str;
+       size_t old_len;
+
+       if (*str == NULL)
+               old_len = 0;
+       else
+               old_len = strlen(*str);
+
+       new_str = malloc(old_len + strlen(append) + 1);
+       if (new_str == NULL)
+               bsdtar_errc(bsdtar, 1, errno, "Out of memory");
+       memcpy(new_str, *str, old_len);
+       strcpy(new_str + old_len, append);
+       free(*str);
+       *str = new_str;
+}
+
+int
+apply_substitution(struct bsdtar *bsdtar, const char *name, char **result, int symlink_only)
+{
+       const char *path = name;
+       regmatch_t matches[10];
+       size_t i, j;
+       struct subst_rule *rule;
+       struct substitution *subst;
+       int c, got_match, print_match;
+
+       *result = NULL;
+
+       if ((subst = bsdtar->substitution) == NULL)
+               return 0;
+
+       got_match = 0;
+       print_match = 0;
+
+       for (rule = subst->first_rule; rule != NULL; rule = rule->next) {
+               if (symlink_only && !rule->symlink)
+                       continue;
+               if (regexec(&rule->re, name, 10, matches, 0))
+                       break;
+
+               got_match = 1;
+               print_match |= rule->print;
+               realloc_strncat(bsdtar, result, name, matches[0].rm_so);
+
+               for (i = 0, j = 0; rule->result[i] != '\0'; ++i) {
+                       if (rule->result[i] == '~') {
+                               realloc_strncat(bsdtar, result, rule->result + j, i - j);
+                               realloc_strncat(bsdtar, result, name, matches[0].rm_eo);
+                               j = i + 1;
+                               continue;
+                       }
+                       if (rule->result[i] != '\\')
+                               continue;
+
+                       ++i;
+                       c = rule->result[i];
+                       switch (c) {
+                       case '~':
+                       case '\\':
+                               realloc_strncat(bsdtar, result, rule->result + j, i - j - 1);
+                               j = i;
+                               break;
+                       case '1' ... '9':
+                               realloc_strncat(bsdtar, result, rule->result + j, i - j - 1);
+                               if (c - '0' > rule->re.re_nsub) {
+                                       free(*result);
+                                       *result = NULL;
+                                       return -1;
+                               }
+                               realloc_strncat(bsdtar, result, name + matches[c - '0'].rm_so, matches[c - '0'].rm_eo - matches[c - '0'].rm_so);
+                               j = i + 1;
+                               break;
+                       default:
+                               /* Just continue; */
+                               break;
+                       }
+
+               }
+
+               realloc_strcat(bsdtar, result, rule->result + j);
+
+               name += matches[0].rm_eo;
+
+               if (!rule->global)
+                       break;
+       }
+
+       if (got_match)
+               realloc_strcat(bsdtar, result, name);
+
+       if (print_match)
+               fprintf(stderr, "%s >> %s\n", path, *result);
+
+       return got_match;
+}
+
+void
+cleanup_substitution(struct bsdtar *bsdtar)
+{
+       struct subst_rule *rule;
+       struct substitution *subst;
+
+       if ((subst = bsdtar->substitution) == NULL)
+               return;
+
+       while ((rule = subst->first_rule) != NULL) {
+               subst->first_rule = rule->next;
+               free(rule->result);
+               free(rule);
+       }
+       free(subst);
+}
+#endif /* HAVE_REGEX_H */