]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-57141: Add dircmp shallow option (GH-109499)
authorTobias Rautenkranz <github@tobias.rautenkranz.ch>
Mon, 4 Mar 2024 17:27:43 +0000 (18:27 +0100)
committerGitHub <noreply@github.com>
Mon, 4 Mar 2024 17:27:43 +0000 (17:27 +0000)
Co-authored-by: Steve Ward <planet36@gmail.com>
Co-authored-by: Sanyam Khurana <8039608+CuriousLearner@users.noreply.github.com>
Doc/library/filecmp.rst
Lib/filecmp.py
Lib/test/test_filecmp.py
Misc/NEWS.d/next/Library/2023-07-12-14-52-04.gh-issue-57141.L2k8Xb.rst [new file with mode: 0644]

index dfe4b7c59fd578b8b9345f8319c09d2407e9173c..42d20b9c20178302d1482a063d45cc2355f15cfe 100644 (file)
@@ -70,7 +70,7 @@ The :mod:`filecmp` module defines the following functions:
 The :class:`dircmp` class
 -------------------------
 
-.. class:: dircmp(a, b, ignore=None, hide=None)
+.. class:: dircmp(a, b, ignore=None, hide=None, shallow=True)
 
    Construct a new directory comparison object, to compare the directories *a*
    and *b*.  *ignore* is a list of names to ignore, and defaults to
@@ -78,7 +78,12 @@ The :class:`dircmp` class
    defaults to ``[os.curdir, os.pardir]``.
 
    The :class:`dircmp` class compares files by doing *shallow* comparisons
-   as described for :func:`filecmp.cmp`.
+   as described for :func:`filecmp.cmp` by default using the *shallow*
+   parameter.
+
+   .. versionchanged:: 3.13
+
+      Added the *shallow* parameter.
 
    The :class:`dircmp` class provides the following methods:
 
index 30bd900fa805aa961c195023bccc9e091123deca..6ffc71fc059a80b2e5f1bc7cbacb47a9e2ce70bb 100644 (file)
@@ -88,12 +88,15 @@ def _do_cmp(f1, f2):
 class dircmp:
     """A class that manages the comparison of 2 directories.
 
-    dircmp(a, b, ignore=None, hide=None)
+    dircmp(a, b, ignore=None, hide=None, shallow=True)
       A and B are directories.
       IGNORE is a list of names to ignore,
         defaults to DEFAULT_IGNORES.
       HIDE is a list of names to hide,
         defaults to [os.curdir, os.pardir].
+      SHALLOW specifies whether to just check the stat signature (do not read
+        the files).
+        defaults to True.
 
     High level usage:
       x = dircmp(dir1, dir2)
@@ -121,7 +124,7 @@ class dircmp:
        in common_dirs.
      """
 
-    def __init__(self, a, b, ignore=None, hide=None): # Initialize
+    def __init__(self, a, b, ignore=None, hide=None, shallow=True): # Initialize
         self.left = a
         self.right = b
         if hide is None:
@@ -132,6 +135,7 @@ class dircmp:
             self.ignore = DEFAULT_IGNORES
         else:
             self.ignore = ignore
+        self.shallow = shallow
 
     def phase0(self): # Compare everything except common subdirectories
         self.left_list = _filter(os.listdir(self.left),
@@ -184,7 +188,7 @@ class dircmp:
                 self.common_funny.append(x)
 
     def phase3(self): # Find out differences between common files
-        xx = cmpfiles(self.left, self.right, self.common_files)
+        xx = cmpfiles(self.left, self.right, self.common_files, self.shallow)
         self.same_files, self.diff_files, self.funny_files = xx
 
     def phase4(self): # Find out differences between common subdirectories
@@ -196,7 +200,8 @@ class dircmp:
         for x in self.common_dirs:
             a_x = os.path.join(self.left, x)
             b_x = os.path.join(self.right, x)
-            self.subdirs[x]  = self.__class__(a_x, b_x, self.ignore, self.hide)
+            self.subdirs[x]  = self.__class__(a_x, b_x, self.ignore, self.hide,
+                                              self.shallow)
 
     def phase4_closure(self): # Recursively call phase4() on subdirectories
         self.phase4()
index 9b5ac12bccc58f9ce4615bb48060c5edf5815e16..b5df71678264a813c3339f21e1232b3cbd2f373d 100644 (file)
@@ -8,11 +8,24 @@ from test import support
 from test.support import os_helper
 
 
+def _create_file_shallow_equal(template_path, new_path):
+    """create a file with the same size and mtime but different content."""
+    shutil.copy2(template_path, new_path)
+    with open(new_path, 'r+b') as f:
+        next_char = bytearray(f.read(1))
+        next_char[0] = (next_char[0] + 1) % 256
+        f.seek(0)
+        f.write(next_char)
+    shutil.copystat(template_path, new_path)
+    assert os.stat(new_path).st_size == os.stat(template_path).st_size
+    assert os.stat(new_path).st_mtime == os.stat(template_path).st_mtime
+
 class FileCompareTestCase(unittest.TestCase):
     def setUp(self):
         self.name = os_helper.TESTFN
         self.name_same = os_helper.TESTFN + '-same'
         self.name_diff = os_helper.TESTFN + '-diff'
+        self.name_same_shallow = os_helper.TESTFN + '-same-shallow'
         data = 'Contents of file go here.\n'
         for name in [self.name, self.name_same, self.name_diff]:
             with open(name, 'w', encoding="utf-8") as output:
@@ -20,12 +33,19 @@ class FileCompareTestCase(unittest.TestCase):
 
         with open(self.name_diff, 'a+', encoding="utf-8") as output:
             output.write('An extra line.\n')
+
+        for name in [self.name_same, self.name_diff]:
+            shutil.copystat(self.name, name)
+
+        _create_file_shallow_equal(self.name, self.name_same_shallow)
+
         self.dir = tempfile.gettempdir()
 
     def tearDown(self):
         os.unlink(self.name)
         os.unlink(self.name_same)
         os.unlink(self.name_diff)
+        os.unlink(self.name_same_shallow)
 
     def test_matching(self):
         self.assertTrue(filecmp.cmp(self.name, self.name),
@@ -36,12 +56,17 @@ class FileCompareTestCase(unittest.TestCase):
                         "Comparing file to identical file fails")
         self.assertTrue(filecmp.cmp(self.name, self.name_same, shallow=False),
                         "Comparing file to identical file fails")
+        self.assertTrue(filecmp.cmp(self.name, self.name_same_shallow),
+                        "Shallow identical files should be considered equal")
 
     def test_different(self):
         self.assertFalse(filecmp.cmp(self.name, self.name_diff),
                     "Mismatched files compare as equal")
         self.assertFalse(filecmp.cmp(self.name, self.dir),
                     "File and directory compare as equal")
+        self.assertFalse(filecmp.cmp(self.name, self.name_same_shallow,
+                                     shallow=False),
+                        "Mismatched file to shallow identical file compares as equal")
 
     def test_cache_clear(self):
         first_compare = filecmp.cmp(self.name, self.name_same, shallow=False)
@@ -56,6 +81,8 @@ class DirCompareTestCase(unittest.TestCase):
         self.dir = os.path.join(tmpdir, 'dir')
         self.dir_same = os.path.join(tmpdir, 'dir-same')
         self.dir_diff = os.path.join(tmpdir, 'dir-diff')
+        self.dir_diff_file = os.path.join(tmpdir, 'dir-diff-file')
+        self.dir_same_shallow = os.path.join(tmpdir, 'dir-same-shallow')
 
         # Another dir is created under dir_same, but it has a name from the
         # ignored list so it should not affect testing results.
@@ -63,7 +90,17 @@ class DirCompareTestCase(unittest.TestCase):
 
         self.caseinsensitive = os.path.normcase('A') == os.path.normcase('a')
         data = 'Contents of file go here.\n'
-        for dir in (self.dir, self.dir_same, self.dir_diff, self.dir_ignored):
+
+        shutil.rmtree(self.dir, True)
+        os.mkdir(self.dir)
+        subdir_path = os.path.join(self.dir, 'subdir')
+        os.mkdir(subdir_path)
+        dir_file_path = os.path.join(self.dir, "file")
+        with open(dir_file_path, 'w', encoding="utf-8") as output:
+            output.write(data)
+
+        for dir in (self.dir_same, self.dir_same_shallow,
+                    self.dir_diff, self.dir_diff_file):
             shutil.rmtree(dir, True)
             os.mkdir(dir)
             subdir_path = os.path.join(dir, 'subdir')
@@ -72,14 +109,25 @@ class DirCompareTestCase(unittest.TestCase):
                 fn = 'FiLe'     # Verify case-insensitive comparison
             else:
                 fn = 'file'
-            with open(os.path.join(dir, fn), 'w', encoding="utf-8") as output:
-                output.write(data)
+
+            file_path = os.path.join(dir, fn)
+
+            if dir is self.dir_same_shallow:
+                _create_file_shallow_equal(dir_file_path, file_path)
+            else:
+                shutil.copy2(dir_file_path, file_path)
 
         with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
             output.write('An extra file.\n')
 
+        # Add different file2 with respect to dir_diff
+        with open(os.path.join(self.dir_diff_file, 'file2'), 'w', encoding="utf-8") as output:
+            output.write('Different contents.\n')
+
+
     def tearDown(self):
-        for dir in (self.dir, self.dir_same, self.dir_diff):
+        for dir in (self.dir, self.dir_same, self.dir_diff,
+                    self.dir_same_shallow, self.dir_diff_file):
             shutil.rmtree(dir)
 
     def test_default_ignores(self):
@@ -102,11 +150,7 @@ class DirCompareTestCase(unittest.TestCase):
                                          shallow=False),
                         "Comparing directory to same fails")
 
-        # Add different file2
-        with open(os.path.join(self.dir, 'file2'), 'w', encoding="utf-8") as output:
-            output.write('Different contents.\n')
-
-        self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_same,
+        self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_diff_file,
                                      ['file', 'file2']) ==
                     (['file'], ['file2'], []),
                     "Comparing mismatched directories fails")
@@ -116,11 +160,22 @@ class DirCompareTestCase(unittest.TestCase):
         """Assert that two lists are equal, up to ordering."""
         self.assertEqual(sorted(actual), sorted(expected))
 
+    def test_dircmp_identical_directories(self):
+        self._assert_dircmp_identical_directories()
+        self._assert_dircmp_identical_directories(shallow=False)
 
-    def test_dircmp(self):
+    def test_dircmp_different_file(self):
+        self._assert_dircmp_different_file()
+        self._assert_dircmp_different_file(shallow=False)
+
+    def test_dircmp_different_directories(self):
+        self._assert_dircmp_different_directories()
+        self._assert_dircmp_different_directories(shallow=False)
+
+    def _assert_dircmp_identical_directories(self, **options):
         # Check attributes for comparison of two identical directories
         left_dir, right_dir = self.dir, self.dir_same
-        d = filecmp.dircmp(left_dir, right_dir)
+        d = filecmp.dircmp(left_dir, right_dir, **options)
         self.assertEqual(d.left, left_dir)
         self.assertEqual(d.right, right_dir)
         if self.caseinsensitive:
@@ -142,9 +197,10 @@ class DirCompareTestCase(unittest.TestCase):
         ]
         self._assert_report(d.report, expected_report)
 
+    def _assert_dircmp_different_directories(self, **options):
         # Check attributes for comparison of two different directories (right)
         left_dir, right_dir = self.dir, self.dir_diff
-        d = filecmp.dircmp(left_dir, right_dir)
+        d = filecmp.dircmp(left_dir, right_dir, **options)
         self.assertEqual(d.left, left_dir)
         self.assertEqual(d.right, right_dir)
         self._assert_lists(d.left_list, ['file', 'subdir'])
@@ -164,12 +220,8 @@ class DirCompareTestCase(unittest.TestCase):
         self._assert_report(d.report, expected_report)
 
         # Check attributes for comparison of two different directories (left)
-        left_dir, right_dir = self.dir, self.dir_diff
-        shutil.move(
-            os.path.join(self.dir_diff, 'file2'),
-            os.path.join(self.dir, 'file2')
-        )
-        d = filecmp.dircmp(left_dir, right_dir)
+        left_dir, right_dir = self.dir_diff, self.dir
+        d = filecmp.dircmp(left_dir, right_dir, **options)
         self.assertEqual(d.left, left_dir)
         self.assertEqual(d.right, right_dir)
         self._assert_lists(d.left_list, ['file', 'file2', 'subdir'])
@@ -180,27 +232,51 @@ class DirCompareTestCase(unittest.TestCase):
         self.assertEqual(d.same_files, ['file'])
         self.assertEqual(d.diff_files, [])
         expected_report = [
-            "diff {} {}".format(self.dir, self.dir_diff),
-            "Only in {} : ['file2']".format(self.dir),
+            "diff {} {}".format(self.dir_diff, self.dir),
+            "Only in {} : ['file2']".format(self.dir_diff),
             "Identical files : ['file']",
             "Common subdirectories : ['subdir']",
         ]
         self._assert_report(d.report, expected_report)
 
-        # Add different file2
-        with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
-            output.write('Different contents.\n')
-        d = filecmp.dircmp(self.dir, self.dir_diff)
+
+    def _assert_dircmp_different_file(self, **options):
+        # A different file2
+        d = filecmp.dircmp(self.dir_diff, self.dir_diff_file, **options)
         self.assertEqual(d.same_files, ['file'])
         self.assertEqual(d.diff_files, ['file2'])
         expected_report = [
-            "diff {} {}".format(self.dir, self.dir_diff),
+            "diff {} {}".format(self.dir_diff, self.dir_diff_file),
             "Identical files : ['file']",
             "Differing files : ['file2']",
             "Common subdirectories : ['subdir']",
         ]
         self._assert_report(d.report, expected_report)
 
+    def test_dircmp_no_shallow_different_file(self):
+        # A non shallow different file2
+        d = filecmp.dircmp(self.dir, self.dir_same_shallow, shallow=False)
+        self.assertEqual(d.same_files, [])
+        self.assertEqual(d.diff_files, ['file'])
+        expected_report = [
+            "diff {} {}".format(self.dir, self.dir_same_shallow),
+            "Differing files : ['file']",
+            "Common subdirectories : ['subdir']",
+        ]
+        self._assert_report(d.report, expected_report)
+
+    def test_dircmp_shallow_same_file(self):
+        # A non shallow different file2
+        d = filecmp.dircmp(self.dir, self.dir_same_shallow)
+        self.assertEqual(d.same_files, ['file'])
+        self.assertEqual(d.diff_files, [])
+        expected_report = [
+            "diff {} {}".format(self.dir, self.dir_same_shallow),
+            "Identical files : ['file']",
+            "Common subdirectories : ['subdir']",
+        ]
+        self._assert_report(d.report, expected_report)
+
     def test_dircmp_subdirs_type(self):
         """Check that dircmp.subdirs respects subclassing."""
         class MyDirCmp(filecmp.dircmp):
diff --git a/Misc/NEWS.d/next/Library/2023-07-12-14-52-04.gh-issue-57141.L2k8Xb.rst b/Misc/NEWS.d/next/Library/2023-07-12-14-52-04.gh-issue-57141.L2k8Xb.rst
new file mode 100644 (file)
index 0000000..b8a1236
--- /dev/null
@@ -0,0 +1,3 @@
+Add option for *non-shallow* comparisons to :class:`filecmp.dircmp` like
+:func:`filecmp.cmp`. Original patch by Steven Ward. Enhanced by
+Tobias Rautenkranz