From 3ece0355d52e41a1b079c0c46477a32250278c11 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 19 Nov 2008 19:36:45 +0100 Subject: [PATCH] cp: use far less memory in some cases cp --link was "remembering" many name,dev,inode triples unnecessarily. cp was doing the same, even without --link, for every directory in the source hierarchy, while it can do its job with entries merely for the command-line arguments. Prompted by a report from Patrick Shoenfeld. Details . * src/copy.c (copy_internal): Refrain from remembering name,dev,inode for most files, when invoked via cp --link. Record an infloop-avoidance triple for each directory specified on the command line, not for each directory in the source tree. Don't record a dir-triple when x->hard_link is set. * NEWS (Buf fixes): Mention it. * tests/cp/link-heap: New file. Test for cp's lowered memory usage. * tests/Makefile.am (TESTS): Add link-heap. --- NEWS | 2 ++ src/copy.c | 36 +++++++++++++++++++++--------------- tests/Makefile.am | 1 + tests/cp/link-heap | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 15 deletions(-) create mode 100755 tests/cp/link-heap diff --git a/NEWS b/NEWS index cbea67c3d9..360cb4b561 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ GNU coreutils NEWS -*- outline -*- ** Bug fixes + cp uses much less memory in some situations + seq 9223372036854775807 9223372036854775808 now prints only two numbers on systems with extended long double support and good library support. Even with this patch, on some systems, it still produces invalid output, diff --git a/src/copy.c b/src/copy.c index bc1b20ee2b..c9c79a1b1f 100644 --- a/src/copy.c +++ b/src/copy.c @@ -1411,6 +1411,10 @@ copy_internal (char const *src_name, char const *dst_name, we can arrange to create a hard link between the corresponding names in the destination tree. + When using the --link (-l) option, there is no need to take special + measures, because (barring race conditions) files that are hard-linked + in the source tree will also be hard-linked in the destination tree. + Sometimes, when preserving links, we have to record dev/ino even though st_nlink == 1: - when in move_mode, since we may be moving a group of N hard-linked @@ -1429,27 +1433,29 @@ copy_internal (char const *src_name, char const *dst_name, - likewise for -L except that it applies to all files, not just command line arguments. - Also record directory dev/ino when using --recursive. We'll use that - info to detect this problem: cp -R dir dir. FIXME-maybe: ideally, - directory info would be recorded in a separate hash table, since - such entries are useful only while a single command line hierarchy - is being copied -- so that separate table could be cleared between - command line args. Using the same hash table to preserve hard - links means that it may not be cleared. */ + Also, with --recursive, record dev/ino of each command-line directory. + We'll use that info to detect this problem: cp -R dir dir. */ if (x->move_mode && src_sb.st_nlink == 1) { earlier_file = src_to_dest_lookup (src_sb.st_ino, src_sb.st_dev); } - else if ((x->preserve_links - && (1 < src_sb.st_nlink - || (command_line_arg - && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) - || x->dereference == DEREF_ALWAYS)) - || (x->recursive && S_ISDIR (src_mode))) + else if (x->preserve_links + && !x->hard_link + && (1 < src_sb.st_nlink + || (command_line_arg + && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) + || x->dereference == DEREF_ALWAYS)) { earlier_file = remember_copied (dst_name, src_sb.st_ino, src_sb.st_dev); } + else if (x->recursive && S_ISDIR (src_mode)) + { + if (command_line_arg) + earlier_file = remember_copied (dst_name, src_sb.st_ino, src_sb.st_dev); + else + earlier_file = src_to_dest_lookup (src_sb.st_ino, src_sb.st_dev); + } /* Did we copy this inode somewhere else (in this command line argument) and therefore this is a second hard link to the inode? */ @@ -1730,8 +1736,8 @@ copy_internal (char const *src_name, char const *dst_name, /* Insert the created directory's inode and device numbers into the search structure, so that we can avoid copying it again. */ - - remember_copied (dst_name, dst_sb.st_ino, dst_sb.st_dev); + if (!x->hard_link) + remember_copied (dst_name, dst_sb.st_ino, dst_sb.st_dev); if (x->verbose) emit_verbose (src_name, dst_name, NULL); diff --git a/tests/Makefile.am b/tests/Makefile.am index e955d9b77b..f264bd0c67 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -262,6 +262,7 @@ TESTS = \ cp/file-perm-race \ cp/into-self \ cp/link \ + cp/link-heap \ cp/link-no-deref \ cp/link-preserve \ cp/no-deref-link1 \ diff --git a/tests/cp/link-heap b/tests/cp/link-heap new file mode 100755 index 0000000000..b20c7d3b14 --- /dev/null +++ b/tests/cp/link-heap @@ -0,0 +1,41 @@ +#!/bin/sh +# ensure that cp --preserve=link --link doesn't waste heap + +# Copyright (C) 2008 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + cp --version +fi + +. $srcdir/test-lib.sh +expensive_ +require_ulimit_ + +a=$(printf %031d 0) +b=$(printf %031d 1) +(mkdir $a \ + && cd $a \ + && seq --format=%031g 10000 |xargs touch \ + && seq --format=d%030g 10000 |xargs mkdir ) || framework_failure +cp -al $a $b || framework_failure +mkdir e || framework_failure +mv $a $b e || framework_failure + +fail=0 +(ulimit -v 10000; cp -al e f) || fail=1 + +Exit $fail -- 2.39.5