From: Paul Eggert Date: Sat, 26 Jul 2025 03:39:32 +0000 (-0700) Subject: Port more code to UBSan, and fix alignment bug X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=75735940f1464e45d8fa43169499ad39d5940743;p=thirdparty%2Ftar.git Port more code to UBSan, and fix alignment bug Problem with extract_file reported by Kirill Furman in: https://lists.gnu.org/r/bug-tar/2025-07/msg00003.html Since the UBSan thing seems to be a recurring issue, I fixed other instances of the problem that I found. Also, I noticed that the same line of code had another failure to conform to C23’s rules for pointers (an alignment issue not caught by UBSan), so I fixed that too. None of these issues matter on practical production hosts. * src/common.h (charptr): New function. * src/buffer.c (available_space_after, short_read, flush_archive) (backspace_output, try_new_volume, simple_flush_read) (_gnu_flush_read, _gnu_flush_write): * src/compare.c (read_and_process): * src/create.c (write_eot, write_gnu_long_link) (dump_regular_file, dump_dir0): * src/extract.c (extract_file): * src/incremen.c (get_gnu_dumpdir): * src/list.c (read_header): * src/sparse.c (sparse_dump_region, sparse_extract_region): * src/system.c (sys_write_archive_buffer) (sys_child_open_for_compress, sys_child_open_for_uncompress): * src/update.c (append_file, update_archive): Use it. * src/buffer.c (set_next_block_after): Arg is now void *, not union block *, since it need not be a valid union block * pointer and this can matter on unusual or debugging implementations. Turn a loop into an if so that the code is O(1) not O(N). --- diff --git a/NEWS b/NEWS index 39986f23..e12fff27 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU tar NEWS - User visible changes. 2025-05-06 +GNU tar NEWS - User visible changes. 2025-07-25 Please send GNU tar bug reports to version TBD @@ -45,6 +45,9 @@ option. ** Transformations that change case (e.g., --transform='s/.*/\L&/') now work correctly with multi-byte characters. +** tar now works better in strict debugging environments that do not + allow pointer arithmetic to escape from a sub-element of an array. + * Performance improvements ** Sparse files are now read and written with larger blocksizes. diff --git a/src/buffer.c b/src/buffer.c index 2c40e948..1165186a 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -626,12 +626,16 @@ find_next_block (void) return current_block; } -/* Indicate that we have used all blocks up thru BLOCK. */ +/* Indicate that we have used all blocks up thru the block addressed by PTR, + which may point to any of the bytes in the addressed block. + This does nothing if PTR points before the current block. */ void -set_next_block_after (union block *block) +set_next_block_after (void *ptr) { - while (block >= current_block) - current_block++; + char *p = ptr; + ptrdiff_t offset = p - charptr (current_block); + if (0 <= offset) + current_block += (offset >> LG_BLOCKSIZE) + 1; /* Do *not* flush the archive here. If we do, the same argument to set_next_block_after could mean the next block (if the input record @@ -648,7 +652,7 @@ set_next_block_after (union block *block) idx_t available_space_after (union block *pointer) { - return record_end->buffer - pointer->buffer; + return charptr (record_end) - charptr (pointer); } /* Close file having descriptor FD, and abort if close unsuccessful. */ @@ -960,7 +964,7 @@ static void short_read (idx_t status) { idx_t left = record_size - status; /* bytes left to read */ - char *more = (char *) record_start + status; /* address of next read */ + char *more = charptr (record_start) + status; /* address of next read */ if (left && left % BLOCKSIZE == 0 && (warning_option & WARN_RECORD_SIZE) @@ -1029,7 +1033,7 @@ flush_archive (void) } } - buffer_level = current_block->buffer - record_start->buffer; + buffer_level = charptr (current_block) - charptr (record_start); record_start_block += record_end - record_start; current_block = record_start; record_end = record_start + blocking_factor; @@ -1063,7 +1067,7 @@ backspace_output (void) /* Seek back to the beginning of this record and start writing there. */ - position -= record_end->buffer - record_start->buffer; + position -= charptr (record_end) - charptr (record_start); if (position < 0) position = 0; if (rmtlseek (archive, position, SEEK_SET) != position) @@ -1076,8 +1080,7 @@ backspace_output (void) /* Replace the first part of the record with NULs. */ if (record_start->buffer != output_start) - memset (record_start->buffer, 0, - output_start - record_start->buffer); + memset (record_start, 0, output_start - charptr (record_start)); } } } @@ -1444,7 +1447,7 @@ try_new_volume (void) if (!new_volume (acc)) return true; - while ((status = rmtread (archive, record_start->buffer, record_size)) + while ((status = rmtread (archive, charptr (record_start), record_size)) < 0) archive_read_error (); @@ -1809,7 +1812,7 @@ simple_flush_read (void) } ptrdiff_t nread; - while ((nread = rmtread (archive, record_start->buffer, record_size)) < 0) + while ((nread = rmtread (archive, charptr (record_start), record_size)) < 0) archive_read_error (); short_read_slop = 0; if (nread == record_size) @@ -1856,7 +1859,7 @@ _gnu_flush_read (void) } ptrdiff_t nread; - while ((nread = rmtread (archive, record_start->buffer, record_size)) < 0 + while ((nread = rmtread (archive, charptr (record_start), record_size)) < 0 && ! (errno == ENOSPC && multi_volume_option)) archive_read_error (); /* The condition below used to include @@ -1934,7 +1937,7 @@ _gnu_flush_write (idx_t buffer_level) prev_written += bytes_written; bytes_written = 0; - copy_ptr = record_start->buffer + status; + copy_ptr = charptr (record_start) + status; copy_size = buffer_level - status; /* Switch to the next buffer */ @@ -1960,16 +1963,16 @@ _gnu_flush_write (idx_t buffer_level) inhibit_map = false; while (bufsize < copy_size) { - memcpy (header->buffer, copy_ptr, bufsize); + memcpy (header, copy_ptr, bufsize); copy_ptr += bufsize; copy_size -= bufsize; - set_next_block_after (header + ((bufsize - 1) >> LG_BLOCKSIZE)); + set_next_block_after (charptr (header) + bufsize - 1); header = find_next_block (); bufsize = available_space_after (header); } - memcpy (header->buffer, copy_ptr, copy_size); - memset (header->buffer + copy_size, 0, bufsize - copy_size); - set_next_block_after (header + ((copy_size - 1) >> LG_BLOCKSIZE)); + memcpy (header, copy_ptr, copy_size); + memset (charptr (header) + copy_size, 0, bufsize - copy_size); + set_next_block_after (charptr (header) + copy_size - 1); find_next_block (); } diff --git a/src/common.h b/src/common.h index 5d7cfae7..9bc06523 100644 --- a/src/common.h +++ b/src/common.h @@ -412,6 +412,22 @@ extern enum access_mode access_mode; /* Module buffer.c. */ +/* Return BLOCK, but as a char * pointer that can be used to address + any byte in the array of blocks containing *BLOCK, as opposed to + BLOCK->buffer which can address only the bytes in *BLOCK itself. + The distinction can matter in strict debugging environments. + + Callers should use this function only when possibly needing access + to storage outside of *BLOCK, or when the resulting pointer needs + to be compared to other pointers that point outside of *BLOCK. + Code not needing such access should use BLOCK->buffer instead, as + some debugging environments can catch some subscript errors that way. */ +COMMON_INLINE char * +charptr (union block *block) +{ + return (char *) block; +} + /* File descriptor for archive file. */ extern int archive; @@ -456,7 +472,7 @@ void init_volume_number (void); void open_archive (enum access_mode mode); void print_total_stats (void); void reset_eof (void); -void set_next_block_after (union block *block); +void set_next_block_after (void *); void clear_read_error_count (void); void xclose (int fd); _Noreturn void archive_write_error (ssize_t status); diff --git a/src/compare.c b/src/compare.c index de54d652..95a91181 100644 --- a/src/compare.c +++ b/src/compare.c @@ -136,10 +136,9 @@ read_and_process (struct tar_stat_info *st, bool (*processor) (idx_t, char *)) idx_t data_size = available_space_after (data_block); if (data_size > size) data_size = size; - if (!processor (data_size, data_block->buffer)) + if (!processor (data_size, charptr (data_block))) processor = process_noop; - set_next_block_after ((union block *) - (data_block->buffer + data_size - 1)); + set_next_block_after (charptr (data_block) + data_size - 1); size -= data_size; mv_size_left (size); } diff --git a/src/create.c b/src/create.c index 2e487314..7da4560d 100644 --- a/src/create.c +++ b/src/create.c @@ -473,7 +473,7 @@ write_eot (void) memset (pointer->buffer, 0, BLOCKSIZE); set_next_block_after (pointer); pointer = find_next_block (); - memset (pointer->buffer, 0, available_space_after (pointer)); + memset (charptr (pointer), 0, available_space_after (pointer)); set_next_block_after (pointer); } @@ -540,16 +540,16 @@ write_gnu_long_link (struct tar_stat_info *st, const char *p, char type) while (bufsize < size) { - memcpy (header->buffer, p, bufsize); + memcpy (charptr (header), p, bufsize); p += bufsize; size -= bufsize; - set_next_block_after (header + ((bufsize - 1) >> LG_BLOCKSIZE)); + set_next_block_after (charptr (header) + bufsize - 1); header = find_next_block (); bufsize = available_space_after (header); } - memcpy (header->buffer, p, size); - memset (header->buffer + size, 0, bufsize - size); - set_next_block_after (header + ((size - 1) >> LG_BLOCKSIZE)); + memcpy (charptr (header), p, size); + memset (charptr (header) + size, 0, bufsize - size); + set_next_block_after (charptr (header) + size - 1); } static int @@ -1047,16 +1047,16 @@ dump_regular_file (int fd, struct tar_stat_info *st) } idx_t count = (fd <= 0 ? bufsize - : blocking_read (fd, blk->buffer, bufsize)); + : blocking_read (fd, charptr (blk), bufsize)); size_left -= count; - set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE)); + set_next_block_after (charptr (blk) + bufsize - 1); if (count != bufsize) { if (errno) read_diag_details (st->orig_file_name, st->stat.st_size - size_left, bufsize); - memset (blk->buffer + count, 0, bufsize - count); + memset (charptr (blk) + count, 0, bufsize - count); warnopt (WARN_FILE_SHRANK, 0, ngettext (("%s: File shrank by %jd byte;" " padding with zeros"), @@ -1134,10 +1134,10 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) if (count) memset (blk->buffer + size_left, 0, BLOCKSIZE - count); } - memcpy (blk->buffer, p_buffer, bufsize); + memcpy (charptr (blk), p_buffer, bufsize); size_left -= bufsize; p_buffer += bufsize; - set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE)); + set_next_block_after (charptr (blk) + bufsize - 1); } } return; diff --git a/src/extract.c b/src/extract.c index 5d89458a..304e1f79 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1377,11 +1377,10 @@ extract_file (char *file_name, char typeflag) if (written > size) written = size; errno = 0; - idx_t count = blocking_write (fd, data_block->buffer, written); + idx_t count = blocking_write (fd, charptr (data_block), written); size -= written; - set_next_block_after ((union block *) - (data_block->buffer + written - 1)); + set_next_block_after (charptr (data_block) + written - 1); if (count != written) { if (!to_command_option) diff --git a/src/incremen.c b/src/incremen.c index 92ef9c62..0e418893 100644 --- a/src/incremen.c +++ b/src/incremen.c @@ -1521,10 +1521,9 @@ get_gnu_dumpdir (struct tar_stat_info *stat_info) copied = available_space_after (data_block); if (copied > size) copied = size; - memcpy (to, data_block->buffer, copied); + memcpy (to, charptr (data_block), copied); to += copied; - set_next_block_after ((union block *) - (data_block->buffer + copied - 1)); + set_next_block_after (charptr (data_block) + copied - 1); } mv_end (); diff --git a/src/list.c b/src/list.c index fc1b10ce..8e807141 100644 --- a/src/list.c +++ b/src/list.c @@ -393,7 +393,7 @@ tar_checksum (union block *header, bool silent) read_header_x_global when a POSIX global header is read, decode it and return HEADER_SUCCESS_EXTENDED. - You must always set_next_block_after(*return_block) to skip past + You must always set_next_block_after (*return_block) to skip past the header which this routine reads. */ enum read_header @@ -473,7 +473,7 @@ read_header (union block **return_block, struct tar_stat_info *info, set_next_block_after (header); *header_copy = *header; - bp = header_copy->buffer + BLOCKSIZE; + bp = charptr (header_copy + 1); for (size -= BLOCKSIZE; size > 0; size -= written) { @@ -487,10 +487,9 @@ read_header (union block **return_block, struct tar_stat_info *info, if (written > size) written = size; - memcpy (bp, data_block->buffer, written); + memcpy (bp, charptr (data_block), written); bp += written; - set_next_block_after ((union block *) - (data_block->buffer + written - 1)); + set_next_block_after (charptr (data_block) + written - 1); } *bp = '\0'; @@ -532,7 +531,7 @@ read_header (union block **return_block, struct tar_stat_info *info, if (next_long_name) { - name = next_long_name->buffer + BLOCKSIZE; + name = charptr (next_long_name + 1); recent_long_name = next_long_name; recent_long_name_blocks = next_long_name_blocks; next_long_name = NULL; @@ -564,7 +563,7 @@ read_header (union block **return_block, struct tar_stat_info *info, if (next_long_link) { - name = next_long_link->buffer + BLOCKSIZE; + name = charptr (next_long_link + 1); recent_long_link = next_long_link; recent_long_link_blocks = next_long_link_blocks; next_long_link = NULL; diff --git a/src/sparse.c b/src/sparse.c index b0673081..327a9069 100644 --- a/src/sparse.c +++ b/src/sparse.c @@ -417,7 +417,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i) union block *blk = find_next_block (); idx_t avail = available_space_after (blk); idx_t bufsize = min (bytes_left, avail); - idx_t bytes_read = full_read (file->fd, blk->buffer, bufsize); + idx_t bytes_read = full_read (file->fd, charptr (blk), bufsize); if (bytes_read < BLOCKSIZE) memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read); bytes_left -= bytes_read; @@ -450,7 +450,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i) return false; } - set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE)); + set_next_block_after (charptr (blk) + bufsize - 1); } return true; @@ -482,9 +482,9 @@ sparse_extract_region (struct tar_sparse_file *file, idx_t i) } idx_t avail = available_space_after (blk); idx_t wrbytes = min (write_size, avail); - set_next_block_after (blk + ((wrbytes - 1) >> LG_BLOCKSIZE)); + set_next_block_after (charptr (blk) + wrbytes - 1); file->dumped_size += avail; - idx_t count = blocking_write (file->fd, blk->buffer, wrbytes); + idx_t count = blocking_write (file->fd, charptr (blk), wrbytes); write_size -= count; mv_size_left (file->stat_info->archive_file_size - file->dumped_size); file->offset += count; diff --git a/src/system.c b/src/system.c index 72dfade4..a52b8af4 100644 --- a/src/system.c +++ b/src/system.c @@ -140,7 +140,7 @@ sys_truncate (int fd) idx_t sys_write_archive_buffer (void) { - return full_write (archive, record_start->buffer, record_size); + return full_write (archive, charptr (record_start), record_size); } /* Set ARCHIVE for writing, then compressing an archive. */ @@ -309,7 +309,7 @@ is_regular_file (const char *name) idx_t sys_write_archive_buffer (void) { - return rmtwrite (archive, record_start->buffer, record_size); + return rmtwrite (archive, charptr (record_start), record_size); } /* Read and write file descriptors from a pipe(pipefd) call. */ @@ -458,7 +458,7 @@ sys_child_open_for_compress (void) /* Assemble a record. */ - for (length = 0, cursor = record_start->buffer; + for (length = 0, cursor = charptr (record_start); length < record_size; length += status, cursor += status) { @@ -481,7 +481,7 @@ sys_child_open_for_compress (void) if (length > 0) { - memset (record_start->buffer + length, 0, record_size - length); + memset (charptr (record_start) + length, 0, record_size - length); status = sys_write_archive_buffer (); if (status != record_size) archive_write_error (status); @@ -622,12 +622,12 @@ sys_child_open_for_uncompress (void) clear_read_error_count (); ptrdiff_t n; - while ((n = rmtread (archive, record_start->buffer, record_size)) < 0) + while ((n = rmtread (archive, charptr (record_start), record_size)) < 0) archive_read_error (); if (n == 0) break; - char *cursor = record_start->buffer; + char *cursor = charptr (record_start); do { idx_t count = min (n, BLOCKSIZE); diff --git a/src/update.c b/src/update.c index faf0550b..c74d3baa 100644 --- a/src/update.c +++ b/src/update.c @@ -57,15 +57,15 @@ append_file (char *file_name) { union block *start = find_next_block (); idx_t bufsize = available_space_after (start); - idx_t status = full_read (handle, start->buffer, bufsize); + idx_t status = full_read (handle, charptr (start), bufsize); if (status < bufsize && errno) read_fatal (file_name); if (status == 0) break; idx_t rem = status % BLOCKSIZE; if (rem) - memset (start->buffer + (status - rem), 0, BLOCKSIZE - rem); - set_next_block_after (start + ((status - 1) >> LG_BLOCKSIZE)); + memset (charptr (start) + (status - rem), 0, BLOCKSIZE - rem); + set_next_block_after (charptr (start) + status - 1); } if (close (handle) < 0) @@ -206,7 +206,7 @@ update_archive (void) reset_eof (); time_to_start_writing = true; - output_start = current_block->buffer; + output_start = charptr (current_block); { struct name const *p;