From: Paul Eggert Date: Fri, 14 Nov 2025 06:59:24 +0000 (-0800) Subject: Do not create empty placeholder files X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=50b559c3d72163c2e7fa822f4ac9d297d91e1db3;p=thirdparty%2Ftar.git Do not create empty placeholder files * src/extract.c (HAVE_BIRTHTIME, BIRTHTIME_EQ, struct delayed_link) (delayed_link_table, delayed_link_head delayed_link_tail) (struct string_list, dl_hash, dl_compare, find_direct_ancestor) (find_delayed_link_source, create_placeholder_file, apply_delayed_link) (apply_delayed_links): Remove. All uses removed. (struct delayed_set_stat): New member metadata_set, replacing after_links. All uses changed. (apply_nonancestor_delayed_set_stat): Arg METADATA_SET replaces the old AFTER_LINKS. All callers changed. (extract_archive): Do not worry about "..", since openat2 now does that for us. * src/names.c (first_dot_dot): Remove. All uses removed. --- diff --git a/NEWS b/NEWS index 3cfe2b0d..c5f921d9 100644 --- a/NEWS +++ b/NEWS @@ -62,6 +62,11 @@ option. ** Sparse files are now read and written with larger blocksizes. +** When extracting, tar no longer creates empty placeholder files + that are later replaced by symbolic links. The placeholders are no + longer needed now that tar no longer follows symbolic links to + targets outside the working directory. + version 1.35 - Sergey Poznyakoff, 2023-07-18 diff --git a/doc/tar.texi b/doc/tar.texi index e3c5474d..a6302b41 100644 --- a/doc/tar.texi +++ b/doc/tar.texi @@ -9549,7 +9549,8 @@ really @file{etc/passwd}. File names containing @file{..} can cause problems when extracting, so @command{tar} normally warns you about such files when creating an -archive, and rejects attempts to extracts such files. +archive, and prevents attempts to extract such files if that would +affect files outside the working directory. Other @command{tar} programs do not do this. As a result, if you create an archive whose member names start with a slash, they will be @@ -9565,10 +9566,6 @@ is not, generally speaking, the same as the one you'd get running scripts for comparing both outputs. @xref{listing member and file names}, for the information on how to handle this case.}. -Symbolic links containing @file{..} or leading @samp{/} can also cause -problems when extracting, so @command{tar} normally extracts them last; -it may create empty files as placeholders during extraction. - If you use the @option{--absolute-names} (@option{-P}) option, @command{tar} will do none of these transformations. diff --git a/src/common.h b/src/common.h index 4502d953..101c5db8 100644 --- a/src/common.h +++ b/src/common.h @@ -826,8 +826,6 @@ bool all_names_found (struct tar_stat_info *st); void add_avoided_name (char const *name); bool is_avoided_name (char const *name); -bool contains_dot_dot (char const *name); - COMMON_INLINE bool isfound (struct name const *c) { diff --git a/src/extract.c b/src/extract.c index 7b11abbd..0e099efd 100644 --- a/src/extract.c +++ b/src/extract.c @@ -47,21 +47,6 @@ static mode_t const all_mode_bits = ~ (mode_t) 0; # define fchown(fd, uid, gid) (errno = ENOSYS, -1) #endif -#if (defined HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC \ - || defined HAVE_STRUCT_STAT_ST_BIRTHTIM_TV_NSEC \ - || defined HAVE_STRUCT_STAT_ST_BIRTHTIMENSEC \ - || (defined _WIN32 && ! defined __CYGWIN__)) -# define HAVE_BIRTHTIME 1 -#else -# define HAVE_BIRTHTIME 0 -#endif - -#if HAVE_BIRTHTIME -# define BIRTHTIME_EQ(a, b) (timespec_cmp (a, b) == 0) -#else -# define BIRTHTIME_EQ(a, b) true -#endif - /* Return true if an error number ERR means the system call is supported in this case. */ static bool @@ -73,13 +58,9 @@ implemented (int err) } /* List of directories whose statuses we need to extract after we've - finished extracting their subsidiary files. If you consider each - contiguous subsequence of elements of the form [D]?[^D]*, where [D] - represents an element where AFTER_LINKS is nonzero and [^D] - represents an element where AFTER_LINKS is zero, then the head - of the subsequence has the longest name, and each non-head element - in the prefix is an ancestor (in the directory hierarchy) of the - preceding element. */ + finished extracting their subsidiary files. The head of the list + has the longest name, and each non-head element is an ancestor (in + the directory hierarchy) of the preceding element. */ struct delayed_set_stat { @@ -87,6 +68,7 @@ struct delayed_set_stat struct delayed_set_stat *next; /* Metadata for this directory. */ + bool metadata_set; dev_t st_dev; ino_t st_ino; mode_t mode; /* The desired mode is MODE & ~ current_umask. */ @@ -112,10 +94,6 @@ struct delayed_set_stat directory. */ int atflag; - /* Do not set the status of this directory until after delayed - links are created. */ - bool after_links; - /* Directory that the name is relative to. */ idx_t change_dir; @@ -136,90 +114,6 @@ static struct delayed_set_stat *delayed_set_stat_head; /* Table of delayed stat updates hashed by path; null if none. */ static Hash_table *delayed_set_stat_table; -/* A link whose creation we have delayed. */ -struct delayed_link - { - /* The next in a list of delayed links that should be made after - this delayed link. */ - struct delayed_link *next; - - /* The device, inode number and birthtime of the placeholder. - birthtime.tv_nsec is negative if the birthtime is not available. - Don't use mtime as this would allow for false matches if some - other process removes the placeholder. Don't use ctime as - this would cause race conditions and other screwups, e.g., - when restoring hard-linked symlinks. */ - dev_t st_dev; - ino_t st_ino; -#if HAVE_BIRTHTIME - struct timespec birthtime; -#endif - - /* True if the link is symbolic. */ - bool is_symlink; - - /* The desired metadata, valid only the link is symbolic. */ - mode_t mode; - uid_t uid; - gid_t gid; - struct timespec atime; - struct timespec mtime; - - /* The directory that the sources and target are relative to. */ - idx_t change_dir; - - /* A list of sources for this link. The sources are all to be - hard-linked together. */ - struct string_list *sources; - - /* SELinux context */ - char *cntx_name; - - /* ACLs */ - char *acls_a_ptr; - idx_t acls_a_len; - char *acls_d_ptr; - idx_t acls_d_len; - - struct xattr_map xattr_map; - - /* The desired target of the desired link. */ - char target[FLEXIBLE_ARRAY_MEMBER]; - }; - -/* Table of delayed links hashed by device and inode; null if none. */ -static Hash_table *delayed_link_table; - -/* A list of the delayed links in tar file order, - and the tail of that list. */ -static struct delayed_link *delayed_link_head; -static struct delayed_link **delayed_link_tail = &delayed_link_head; - -struct string_list - { - struct string_list *next; - char string[FLEXIBLE_ARRAY_MEMBER]; - }; - -static size_t -dl_hash (void const *entry, size_t table_size) -{ - struct delayed_link const *dl = entry; - uintmax_t n = dl->st_dev; - int nshift = TYPE_WIDTH (n) - TYPE_WIDTH (dl->st_dev); - if (0 < nshift) - n <<= nshift; - n ^= dl->st_ino; - return n % table_size; -} - -static bool -dl_compare (void const *a, void const *b) -{ - struct delayed_link const *da = a, *db = b; - return PSAME_INODE (da, db); -} - static size_t ds_hash (void const *entry, size_t table_size) { @@ -475,27 +369,8 @@ set_stat (char const *file_name, xattrs_selinux_set (st, file_name, typeflag); } -/* Find the direct ancestor of FILE_NAME in the delayed_set_stat list. - */ -static struct delayed_set_stat * -find_direct_ancestor (char const *file_name) -{ - struct delayed_set_stat *h = delayed_set_stat_head; - while (h) - { - if (! h->after_links - && strncmp (file_name, h->file_name, h->file_name_len) == 0 - && ISSLASH (file_name[h->file_name_len]) - && (last_component (file_name) == file_name + h->file_name_len + 1)) - break; - h = h->next; - } - return h; -} - -/* For each entry H in the leading prefix of entries in HEAD that do - not have after_links marked, mark H and fill in its dev and ino - members. Assume HEAD && ! HEAD->after_links. */ +/* For each entry H in the entries in HEAD, mark H and fill in its dev + and ino members. Assume HEAD. */ static void mark_after_links (struct delayed_set_stat *head) { @@ -504,7 +379,7 @@ mark_after_links (struct delayed_set_stat *head) do { struct stat st; - h->after_links = 1; + h->metadata_set = true; if (deref_stat (h->file_name, &st) < 0) stat_error (h->file_name); @@ -514,7 +389,7 @@ mark_after_links (struct delayed_set_stat *head) h->st_ino = st.st_ino; } } - while ((h = h->next) && ! h->after_links); + while ((h = h->next) && ! h->metadata_set); } /* Remember to restore stat attributes (owner, group, mode and times) @@ -579,7 +454,7 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st, data->file_name = xstrdup (file_name); if (! hash_insert (delayed_set_stat_table, data)) xalloc_die (); - data->after_links = false; + data->metadata_set = false; if (st) { data->st_dev = st->stat.st_dev; @@ -1000,12 +875,11 @@ set_xattr (MAYBE_UNUSED char const *file_name, } /* Fix the statuses of all directories whose statuses need fixing, and - which are not ancestors of FILE_NAME. If AFTER_LINKS is - nonzero, do this for all such directories; otherwise, stop at the - first directory that is marked to be fixed up only after delayed - links are applied. */ + which are not ancestors of FILE_NAME. If METADATA_SET, + do this for all such directories; otherwise, stop at the + first directory with metadata already determined. */ static void -apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links) +apply_nonancestor_delayed_set_stat (char const *file_name, bool metadata_set) { idx_t file_name_len = strlen (file_name); bool check_for_renamed_directories = 0; @@ -1018,9 +892,9 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links) mode_t current_mode = data->current_mode; mode_t current_mode_mask = data->current_mode_mask; - check_for_renamed_directories |= data->after_links; + check_for_renamed_directories |= data->metadata_set; - if (after_links < data->after_links + if (metadata_set < data->metadata_set || (data->file_name_len < file_name_len && file_name[data->file_name_len] && (ISSLASH (file_name[data->file_name_len]) @@ -1413,143 +1287,6 @@ extract_file (char *file_name, char typeflag) return status == 0; } -/* Return true if NAME is a delayed link. This can happen only if the link - placeholder file has been created. Therefore, try to stat the NAME - first. If it doesn't exist, there is no matching entry in the table. - Otherwise, look for the entry in the table that has the matching dev - and ino numbers. Return false if not found. - - Do not rely on comparing file names, which may differ for - various reasons (e.g. relative vs. absolute file names). - */ -static bool -find_delayed_link_source (char const *name) -{ - struct stat st; - - if (!delayed_link_table) - return false; - - struct fdbase f = fdbase (name); - if (f.fd == BADFD || fstatat (f.fd, f.base, &st, AT_SYMLINK_NOFOLLOW) < 0) - { - if (errno != ENOENT) - stat_error (name); - return false; - } - - struct delayed_link dl; - dl.st_dev = st.st_dev; - dl.st_ino = st.st_ino; - return hash_lookup (delayed_link_table, &dl) != NULL; -} - -/* Create a placeholder file with name FILE_NAME, which will be - replaced after other extraction is done by a symbolic link if - IS_SYMLINK is true, and by a hard link otherwise. Set - *INTERDIR_MADE if an intermediate directory is made in the - process. -*/ - -static bool -create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made) -{ - int fd; - struct stat st; - - for (;;) - { - struct fdbase f = fdbase (file_name); - if (f.fd != BADFD) - { - fd = openat (f.fd, f.base, O_WRONLY | O_CREAT | O_EXCL, 0); - if (0 <= fd) - break; - } - - if (errno == EEXIST && find_delayed_link_source (file_name)) - { - /* The placeholder file has already been created. This means - that the link being extracted is a duplicate of an already - processed one. Skip it. - */ - return true; - } - - switch (maybe_recoverable (file_name, false, interdir_made)) - { - case RECOVER_OK: - continue; - - case RECOVER_SKIP: - return true; - - case RECOVER_NO: - open_error (file_name); - return false; - } - } - - if (fstat (fd, &st) < 0) - { - stat_error (file_name); - close (fd); - } - else if (close (fd) < 0) - close_error (file_name); - else - { - struct delayed_set_stat *h; - struct delayed_link *p = - xmalloc (FLEXNSIZEOF (struct delayed_link, target, - strlen (current_stat_info.link_name) + 1)); - p->next = NULL; - p->st_dev = st.st_dev; - p->st_ino = st.st_ino; -#if HAVE_BIRTHTIME - p->birthtime = get_stat_birthtime (&st); -#endif - p->is_symlink = is_symlink; - if (is_symlink) - { - p->mode = current_stat_info.stat.st_mode; - p->uid = current_stat_info.stat.st_uid; - p->gid = current_stat_info.stat.st_gid; - p->atime = current_stat_info.atime; - p->mtime = current_stat_info.mtime; - } - p->change_dir = chdir_current; - p->sources = xmalloc (FLEXNSIZEOF (struct string_list, string, - strlen (file_name) + 1)); - p->sources->next = 0; - strcpy (p->sources->string, file_name); - p->cntx_name = NULL; - assign_string_or_null (&p->cntx_name, current_stat_info.cntx_name); - p->acls_a_ptr = NULL; - p->acls_a_len = 0; - p->acls_d_ptr = NULL; - p->acls_d_len = 0; - xattr_map_init (&p->xattr_map); - xattr_map_copy (&p->xattr_map, ¤t_stat_info.xattr_map); - strcpy (p->target, current_stat_info.link_name); - - *delayed_link_tail = p; - delayed_link_tail = &p->next; - if (! ((delayed_link_table - || (delayed_link_table = hash_initialize (0, 0, dl_hash, - dl_compare, free))) - && hash_insert (delayed_link_table, p))) - xalloc_die (); - - if ((h = find_direct_ancestor (file_name)) != NULL) - mark_after_links (h); - - return true; - } - - return false; -} - static bool extract_link (char *file_name, MAYBE_UNUSED char typeflag) { @@ -1559,10 +1296,6 @@ extract_link (char *file_name, MAYBE_UNUSED char typeflag) link_name = current_stat_info.link_name; - if ((! absolute_names_option && contains_dot_dot (link_name)) - || find_delayed_link_source (link_name)) - return create_placeholder_file (file_name, false, &interdir_made); - do { struct stat st, st1; @@ -1579,28 +1312,7 @@ extract_link (char *file_name, MAYBE_UNUSED char typeflag) } if (status == 0) - { - if (delayed_link_table - && fstatat (f1.fd, f1.base, &st1, AT_SYMLINK_NOFOLLOW) == 0) - { - struct delayed_link dl1; - dl1.st_ino = st1.st_ino; - dl1.st_dev = st1.st_dev; - struct delayed_link *ds = hash_lookup (delayed_link_table, &dl1); - if (ds && ds->change_dir == chdir_current - && BIRTHTIME_EQ (ds->birthtime, get_stat_birthtime (&st1))) - { - struct string_list *p - = xmalloc (FLEXNSIZEOF (struct string_list, - string, strlen (file_name) + 1)); - strcpy (p->string, file_name); - p->next = ds->sources; - ds->sources = p; - } - } - - return true; - } + return true; int e = errno; if ((e == EEXIST && streq (link_name, file_name)) @@ -1629,11 +1341,6 @@ extract_symlink (char *file_name, MAYBE_UNUSED char typeflag) { bool interdir_made = false; - if (! absolute_names_option - && (IS_ABSOLUTE_FILE_NAME (current_stat_info.link_name) - || contains_dot_dot (current_stat_info.link_name))) - return create_placeholder_file (file_name, true, &interdir_made); - for (struct fdbase f; ((f = fdbase (file_name)).fd == BADFD || symlinkat (current_stat_info.link_name, f.fd, f.base) < 0); @@ -1857,20 +1564,12 @@ void extract_archive (void) { char typeflag; - bool skip_dotdot_name; fatal_exit_hook = extract_finish; set_next_block_after (current_header); - skip_dotdot_name = (!absolute_names_option - && contains_dot_dot (current_stat_info.orig_file_name)); - if (skip_dotdot_name) - paxerror (0, _("%s: Member name contains '..'"), - quotearg_colon (current_stat_info.orig_file_name)); - if (!current_stat_info.file_name[0] - || skip_dotdot_name || (interactive_option && !confirm ("extract", current_stat_info.file_name))) { @@ -1922,115 +1621,11 @@ extract_archive (void) undo_last_backup (); } -/* Extract the link DS whose final extraction was delayed. */ -static void -apply_delayed_link (struct delayed_link *ds) -{ - struct string_list *sources = ds->sources; - char const *valid_source = NULL; - - chdir_do (ds->change_dir); - - for (sources = ds->sources; sources; sources = sources->next) - { - char const *source = sources->string; - struct stat st; - - /* Make sure the placeholder file is still there. If not, - don't create a link, as the placeholder was probably - removed by a later extraction. */ - struct fdbase f = fdbase (source); - if (f.fd != BADFD && fstatat (f.fd, f.base, &st, AT_SYMLINK_NOFOLLOW) == 0 - && SAME_INODE (st, *ds) - && BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime)) - { - /* Unlink the placeholder, then create a hard link if possible, - a symbolic link otherwise. */ - struct fdbase f1; - if (unlinkat (f.fd, f.base, 0) < 0) - unlink_error (source); - else if (valid_source - && ((f1 = f.fd == BADFD ? f : fdbase1 (valid_source)).fd - != BADFD) - && linkat (f1.fd, f1.base, f.fd, f.base, 0) == 0) - ; - else if (!ds->is_symlink) - { - f1 = f.fd == BADFD ? f : fdbase1 (ds->target); - if (f1.fd == BADFD - || linkat (f1.fd, f1.base, f.fd, f.base, 0) < 0) - link_error (ds->target, source); - } - else if (symlinkat (ds->target, f.fd, f.base) < 0) - symlink_error (ds->target, source); - else - { - struct tar_stat_info st1; - st1.stat.st_mode = ds->mode; - st1.stat.st_uid = ds->uid; - st1.stat.st_gid = ds->gid; - st1.atime = ds->atime; - st1.mtime = ds->mtime; - st1.cntx_name = ds->cntx_name; - st1.acls_a_ptr = ds->acls_a_ptr; - st1.acls_a_len = ds->acls_a_len; - st1.acls_d_ptr = ds->acls_d_ptr; - st1.acls_d_len = ds->acls_d_len; - st1.xattr_map = ds->xattr_map; - set_stat (source, &st1, -1, 0, 0, SYMTYPE, - false, AT_SYMLINK_NOFOLLOW); - valid_source = source; - } - } - } - - /* There is little point to freeing, as we are about to exit, - and freeing is more likely to cause than cure trouble. */ - if (false) - { - for (sources = ds->sources; sources; ) - { - struct string_list *next = sources->next; - free (sources); - sources = next; - } - - xattr_map_free (&ds->xattr_map); - free (ds->cntx_name); - } -} - -/* Extract the links whose final extraction were delayed. */ -static void -apply_delayed_links (void) -{ - for (struct delayed_link *ds = delayed_link_head; ds; ds = ds->next) - apply_delayed_link (ds); - - if (false && delayed_link_table) - { - /* There is little point to freeing, as we are about to exit, - and freeing is more likely to cause than cure trouble. - Also, the above code has not bothered to free the list - in delayed_link_head. */ - hash_free (delayed_link_table); - delayed_link_table = NULL; - } -} - /* Finish the extraction of an archive. */ void extract_finish (void) { - /* First, fix the status of ordinary directories that need fixing. */ - apply_nonancestor_delayed_set_stat ("", false); - - /* Then, apply delayed links, so that they don't affect delayed - directory status-setting for ordinary directories. */ - apply_delayed_links (); - - /* Finally, fix the status of directories that are ancestors - of delayed links. */ + /* Fix the status of ordinary directories that need fixing. */ apply_nonancestor_delayed_set_stat ("", true); /* This table should be empty after apply_nonancestor_delayed_set_stat. */ diff --git a/src/names.c b/src/names.c index 4d8b6ee7..784595f1 100644 --- a/src/names.c +++ b/src/names.c @@ -1989,22 +1989,3 @@ stripped_prefix_len (char const *file_name, idx_t num) } return -1; } - -/* Return nonzero if NAME contains ".." as a file name component. */ -bool -contains_dot_dot (char const *name) -{ - char const *p = name + FILE_SYSTEM_PREFIX_LEN (name); - - for (;; p++) - { - if (p[0] == '.' && p[1] == '.' && (ISSLASH (p[2]) || !p[2])) - return 1; - - while (! ISSLASH (*p)) - { - if (! *p++) - return 0; - } - } -}