From b51d6b621533b6ea7a0a9c4770cc23c2d62a88cf Mon Sep 17 00:00:00 2001 From: zoulasc Date: Mon, 20 Jan 2020 13:20:42 +0100 Subject: [PATCH] Introduce archive_write_disk(3) flag ARCHIVE_EXTRACT_SAFE_WRITES This flag changes the way that regular files are extracted: Instead of removing existing files first and re-creating them in order to replace their contents, a temporary file is created and when writing to the temporary file is completed, the file is rename(2)d to the final destination name. This has the effect of presenting a consistent view of the file to the system (either the file with the new contents or the file with the old contents). Removing and overwriting the file has the undesired side effect that the the system can either not see the file at all (from the time it is being removed till the time it is being re-created), or worse it can see partial file contents. This is problematic when extracting system files (for example shared libraries). If the existing file that is going to be overwritten is a hard link, for now we unlink it before calling rename(2). This can be done correctly by creating a hardlink to a tmpnam(3) generated file and then use rename(2), but that is fairly intrusive and requires refactoring. Fixes #1289 --- libarchive/archive.h | 2 + libarchive/archive_private.h | 5 ++ libarchive/archive_util.c | 85 +++++++++++++++++++-------- libarchive/archive_write_disk.3 | 7 ++- libarchive/archive_write_disk_posix.c | 71 +++++++++++++++++++--- tar/bsdtar.1 | 27 +++++++++ tar/bsdtar.c | 6 ++ tar/bsdtar.h | 2 + tar/cmdline.c | 2 + 9 files changed, 173 insertions(+), 34 deletions(-) diff --git a/libarchive/archive.h b/libarchive/archive.h index fe6dc63c8..16a2b61bf 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -693,6 +693,8 @@ __LA_DECL int archive_read_set_passphrase_callback(struct archive *, #define ARCHIVE_EXTRACT_SECURE_NOABSOLUTEPATHS (0x10000) /* Default: Do not clear no-change flags when unlinking object */ #define ARCHIVE_EXTRACT_CLEAR_NOCHANGE_FFLAGS (0x20000) +/* Default: Do not extract atomically (using rename) */ +#define ARCHIVE_EXTRACT_SAFE_WRITES (0x40000) __LA_DECL int archive_read_extract(struct archive *, struct archive_entry *, int flags); diff --git a/libarchive/archive_private.h b/libarchive/archive_private.h index 3a43036f6..937a87bb1 100644 --- a/libarchive/archive_private.h +++ b/libarchive/archive_private.h @@ -153,6 +153,11 @@ void __archive_errx(int retvalue, const char *msg) __LA_DEAD; void __archive_ensure_cloexec_flag(int fd); int __archive_mktemp(const char *tmpdir); +#if defined(_WIN32) && !defined(__CYGWIN__) +int __archive_mkstemp(wchar_t *template); +#else +int __archive_mkstemp(char *template); +#endif int __archive_clean(struct archive *); diff --git a/libarchive/archive_util.c b/libarchive/archive_util.c index 3399c0b5f..dc34adcc5 100644 --- a/libarchive/archive_util.c +++ b/libarchive/archive_util.c @@ -414,14 +414,24 @@ exit_tmpfile: return (fd); } -#else +int +__archive_mkstemp(char *template) +{ + int fd = -1; + fd = mkstemp(template); + if (fd >= 0) + __archive_ensure_cloexec_flag(fd); + return (fd); +} + +#else /* !HAVE_MKSTEMP */ /* * We use a private routine. */ -int -__archive_mktemp(const char *tmpdir) +static int +__archive_mktempx(const char *tmpdir, char *template) { static const char num[] = { '0', '1', '2', '3', '4', '5', '6', '7', @@ -439,26 +449,37 @@ __archive_mktemp(const char *tmpdir) char *tp, *ep; fd = -1; - archive_string_init(&temp_name); - if (tmpdir == NULL) { - if (get_tempdir(&temp_name) != ARCHIVE_OK) + if (template == NULL) { + archive_string_init(&temp_name); + if (tmpdir == NULL) { + if (get_tempdir(&temp_name) != ARCHIVE_OK) + goto exit_tmpfile; + } else + archive_strcpy(&temp_name, tmpdir); + if (temp_name.s[temp_name.length-1] == '/') { + temp_name.s[temp_name.length-1] = '\0'; + temp_name.length --; + } + if (la_stat(temp_name.s, &st) < 0) goto exit_tmpfile; - } else - archive_strcpy(&temp_name, tmpdir); - if (temp_name.s[temp_name.length-1] == '/') { - temp_name.s[temp_name.length-1] = '\0'; - temp_name.length --; - } - if (la_stat(temp_name.s, &st) < 0) - goto exit_tmpfile; - if (!S_ISDIR(st.st_mode)) { - errno = ENOTDIR; - goto exit_tmpfile; + if (!S_ISDIR(st.st_mode)) { + errno = ENOTDIR; + goto exit_tmpfile; + } + archive_strcat(&temp_name, "/libarchive_"); + tp = temp_name.s + archive_strlen(&temp_name); + archive_strcat(&temp_name, "XXXXXXXXXX"); + ep = temp_name.s + archive_strlen(&temp_name); + template = temp_name.s; + } else { + tp = strchr(template, 'X'); + if (tp == NULL) /* No X, programming error */ + abort(); + for (ep = tp; *ep == 'X'; ep++) + continue; + if (*ep) /* X followed by non X, programming error */ + abort(); } - archive_strcat(&temp_name, "/libarchive_"); - tp = temp_name.s + archive_strlen(&temp_name); - archive_strcat(&temp_name, "XXXXXXXXXX"); - ep = temp_name.s + archive_strlen(&temp_name); do { char *p; @@ -469,19 +490,33 @@ __archive_mktemp(const char *tmpdir) int d = *((unsigned char *)p) % sizeof(num); *p++ = num[d]; } - fd = open(temp_name.s, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, + fd = open(template, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0600); } while (fd < 0 && errno == EEXIST); if (fd < 0) goto exit_tmpfile; __archive_ensure_cloexec_flag(fd); - unlink(temp_name.s); + if (template == temp_name.s) + unlink(temp_name.s); exit_tmpfile: - archive_string_free(&temp_name); + if (template == temp_name.s) + archive_string_free(&temp_name); return (fd); } -#endif /* HAVE_MKSTEMP */ +int +__archive_mktemp(const char *tmpdir) +{ + return __archive_mktempx(tmpdir, NULL); +} + +int +__archive_mkstemp(char *template) +{ + return __archive_mktempx(NULL, template); +} + +#endif /* !HAVE_MKSTEMP */ #endif /* !_WIN32 || __CYGWIN__ */ /* diff --git a/libarchive/archive_write_disk.3 b/libarchive/archive_write_disk.3 index ff8e1a36a..2fa016e45 100644 --- a/libarchive/archive_write_disk.3 +++ b/libarchive/archive_write_disk.3 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 3, 2017 +.Dd January 19, 2020 .Dt ARCHIVE_WRITE_DISK 3 .Os .Sh NAME @@ -139,6 +139,11 @@ is not specified, then SUID and SGID bits will only be restored if the default user and group IDs of newly-created objects on disk happen to match those specified in the archive entry. By default, only basic permissions are restored, and umask is obeyed. +.It Cm ARCHIVE_EXTRACT_SAFE_WRITES +Extract files atomically, by first creating a unique temporary file and then +renaming it to its required destination name. +This avoids a race where an application might see a partial file (or no +file) during extraction. .It Cm ARCHIVE_EXTRACT_SECURE_NOABSOLUTEPATHS Refuse to extract an absolute path. The default is to not refuse such paths. diff --git a/libarchive/archive_write_disk_posix.c b/libarchive/archive_write_disk_posix.c index df4b02f5e..63a5fc71f 100644 --- a/libarchive/archive_write_disk_posix.c +++ b/libarchive/archive_write_disk_posix.c @@ -253,6 +253,8 @@ struct archive_write_disk { struct archive_entry *entry; /* Entry being extracted. */ char *name; /* Name of entry, possibly edited. */ struct archive_string _name_data; /* backing store for 'name' */ + char *tmpname; /* Temporary name * */ + struct archive_string _tmpname_data; /* backing store for 'tmpname' */ /* Tasks remaining for this object. */ int todo; /* Tasks deferred until end-of-archive. */ @@ -354,6 +356,7 @@ struct archive_write_disk { static int la_opendirat(int, const char *); +static int la_mktemp(struct archive_write_disk *); static void fsobj_error(int *, struct archive_string *, int, const char *, const char *); static int check_symlinks_fsobj(char *, int *, struct archive_string *, @@ -406,6 +409,30 @@ static ssize_t _archive_write_disk_data(struct archive *, const void *, static ssize_t _archive_write_disk_data_block(struct archive *, const void *, size_t, int64_t); +static int +la_mktemp(struct archive_write_disk *a) +{ + int oerrno, fd; + mode_t mode; + + archive_string_empty(&a->_tmpname_data); + archive_string_sprintf(&a->_tmpname_data, "%s.XXXXXX", a->name); + a->tmpname = a->_tmpname_data.s; + + fd = __archive_mkstemp(a->tmpname); + if (fd == -1) + return -1; + + mode = a->mode & 0777 & ~a->user_umask; + if (fchmod(fd, mode) == -1) { + oerrno = errno; + close(fd); + errno = oerrno; + return -1; + } + return fd; +} + static int la_opendirat(int fd, const char *path) { const int flags = O_CLOEXEC @@ -1826,6 +1853,14 @@ finish_metadata: if (a->fd >= 0) { close(a->fd); a->fd = -1; + if (a->tmpname) { + if (rename(a->tmpname, a->name) == -1) { + archive_set_error(&a->archive, errno, + "rename failed"); + ret = ARCHIVE_FATAL; + } + a->tmpname = NULL; + } } /* If there's an entry, we can release it now. */ archive_entry_free(a->entry); @@ -2103,17 +2138,28 @@ restore_entry(struct archive_write_disk *a) } if (!S_ISDIR(a->st.st_mode)) { - /* A non-dir is in the way, unlink it. */ if (a->flags & ARCHIVE_EXTRACT_CLEAR_NOCHANGE_FFLAGS) (void)clear_nochange_fflags(a); - if (unlink(a->name) != 0) { - archive_set_error(&a->archive, errno, - "Can't unlink already-existing object"); - return (ARCHIVE_FAILED); + + if ((a->flags & ARCHIVE_EXTRACT_SAFE_WRITES) && + S_ISREG(a->st.st_mode)) { + /* Use a temporary file to extract */ + if ((a->fd = la_mktemp(a)) == -1) + return ARCHIVE_FAILED; + a->pst = NULL; + en = 0; + } else { + /* A non-dir is in the way, unlink it. */ + if (unlink(a->name) != 0) { + archive_set_error(&a->archive, errno, + "Can't unlink already-existing " + "object"); + return (ARCHIVE_FAILED); + } + a->pst = NULL; + /* Try again. */ + en = create_filesystem_object(a); } - a->pst = NULL; - /* Try again. */ - en = create_filesystem_object(a); } else if (!S_ISDIR(a->mode)) { /* A dir is in the way of a non-dir, rmdir it. */ if (a->flags & ARCHIVE_EXTRACT_CLEAR_NOCHANGE_FFLAGS) @@ -2215,6 +2261,13 @@ create_filesystem_object(struct archive_write_disk *a) } free(linkname_copy); archive_string_free(&error_string); + /* + * Unlinking and linking here is really not atomic, + * but doing it right, would require us to construct + * an mktemplink() function, and then use rename(2). + */ + if (a->flags & ARCHIVE_EXTRACT_SAFE_WRITES) + unlink(a->name); r = link(linkname, a->name) ? errno : 0; /* * New cpio and pax formats allow hardlink entries @@ -2288,6 +2341,7 @@ create_filesystem_object(struct archive_write_disk *a) /* POSIX requires that we fall through here. */ /* FALLTHROUGH */ case AE_IFREG: + a->tmpname = NULL; a->fd = open(a->name, O_WRONLY | O_CREAT | O_EXCL | O_BINARY | O_CLOEXEC, mode); __archive_ensure_cloexec_flag(a->fd); @@ -2449,6 +2503,7 @@ _archive_write_disk_free(struct archive *_a) archive_write_disk_set_user_lookup(&a->archive, NULL, NULL, NULL); archive_entry_free(a->entry); archive_string_free(&a->_name_data); + archive_string_free(&a->_tmpname_data); archive_string_free(&a->archive.error_string); archive_string_free(&a->path_safe); a->archive.magic = 0; diff --git a/tar/bsdtar.1 b/tar/bsdtar.1 index 04b56553c..723ea38f0 100644 --- a/tar/bsdtar.1 +++ b/tar/bsdtar.1 @@ -469,6 +469,13 @@ This is the reverse of and the default behavior if .Nm is run as non-root in x mode. +.It Fl Fl no-safe-writes +(x mode only) +Do not create temporary files and use +.Xr rename 2 +to replace the original ones. +This is the reverse of +.Fl Fl safe-writes . .It Fl Fl no-same-owner (x mode only) Do not extract owner and group IDs. @@ -756,6 +763,26 @@ The default is .Ar hrs which applies substitutions to all names. In particular, it is never necessary to specify h, r, or s. +.It Fl Fl safe-writes +(x mode only) +Extract files atomically. +By default +.Nm +unlinks the original file with the same name as the extracted file (if it +exists), and then creates it immediately under the same name and writes to +it. +For a short period of time, applications trying to access the file might +not find it, or see incomplete results. +If +.Fl Fl safe-writes +is enabled, +.Nm +first creates a unique temporary file, then writes the new contents to +the temporary file, and finally renames the temporary file to its final +name atomically using +.Xr rename 2 . +This guarantees that an application accessing the file, will either see +the old contents or the new contents at all times. .It Fl Fl same-owner (x mode only) Extract owner and group IDs. diff --git a/tar/bsdtar.c b/tar/bsdtar.c index b59963d0f..af41be5e4 100644 --- a/tar/bsdtar.c +++ b/tar/bsdtar.c @@ -542,6 +542,9 @@ main(int argc, char **argv) bsdtar->extract_flags &= ~ARCHIVE_EXTRACT_MAC_METADATA; bsdtar->flags |= OPTFLAG_NO_MAC_METADATA; break; + case OPTION_NO_SAFE_WRITES: + bsdtar->extract_flags &= ~ARCHIVE_EXTRACT_SAFE_WRITES; + break; case OPTION_NO_SAME_OWNER: /* GNU tar */ bsdtar->extract_flags &= ~ARCHIVE_EXTRACT_OWNER; break; @@ -658,6 +661,9 @@ main(int argc, char **argv) usage(); #endif break; + case OPTION_SAFE_WRITES: + bsdtar->extract_flags |= ARCHIVE_EXTRACT_SAFE_WRITES; + break; case OPTION_SAME_OWNER: /* GNU tar */ bsdtar->extract_flags |= ARCHIVE_EXTRACT_OWNER; break; diff --git a/tar/bsdtar.h b/tar/bsdtar.h index c9206dfd5..89aa3aa91 100644 --- a/tar/bsdtar.h +++ b/tar/bsdtar.h @@ -164,6 +164,7 @@ enum { OPTION_NO_ACLS, OPTION_NO_FFLAGS, OPTION_NO_MAC_METADATA, + OPTION_NO_SAFE_WRITES, OPTION_NO_SAME_OWNER, OPTION_NO_SAME_PERMISSIONS, OPTION_NO_XATTRS, @@ -177,6 +178,7 @@ enum { OPTION_OPTIONS, OPTION_PASSPHRASE, OPTION_POSIX, + OPTION_SAFE_WRITES, OPTION_SAME_OWNER, OPTION_STRIP_COMPONENTS, OPTION_TOTALS, diff --git a/tar/cmdline.c b/tar/cmdline.c index 21558e12d..b80937ffc 100644 --- a/tar/cmdline.c +++ b/tar/cmdline.c @@ -123,6 +123,7 @@ static const struct bsdtar_option { { "no-fflags", 0, OPTION_NO_FFLAGS }, { "no-mac-metadata", 0, OPTION_NO_MAC_METADATA }, { "no-recursion", 0, 'n' }, + { "no-safe-writes", 0, OPTION_NO_SAFE_WRITES }, { "no-same-owner", 0, OPTION_NO_SAME_OWNER }, { "no-same-permissions", 0, OPTION_NO_SAME_PERMISSIONS }, { "no-xattr", 0, OPTION_NO_XATTRS }, @@ -144,6 +145,7 @@ static const struct bsdtar_option { { "posix", 0, OPTION_POSIX }, { "preserve-permissions", 0, 'p' }, { "read-full-blocks", 0, 'B' }, + { "safe-writes", 0, OPTION_SAFE_WRITES }, { "same-owner", 0, OPTION_SAME_OWNER }, { "same-permissions", 0, 'p' }, { "strip-components", 1, OPTION_STRIP_COMPONENTS }, -- 2.39.5