]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-115809: Improve TimedRotatingFileHandler.getFilesToDelete() (GH-115812)
authorSerhiy Storchaka <storchaka@gmail.com>
Sun, 3 Mar 2024 07:42:08 +0000 (09:42 +0200)
committerGitHub <noreply@github.com>
Sun, 3 Mar 2024 07:42:08 +0000 (09:42 +0200)
Improve algorithm for computing which rolled-over log files to delete
in logging.TimedRotatingFileHandler. It is now reliable for handlers
without namer and with arbitrary deterministic namer that leaves
the datetime part in the file name unmodified.

Lib/logging/handlers.py
Lib/test/test_logging.py
Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst [new file with mode: 0644]

index 6f0816bb2827179d382ef90a7dd711e6d96646a5..cdb014bca04b6e044170e4c54189b2984c7efd1e 100644 (file)
@@ -232,19 +232,19 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
         if self.when == 'S':
             self.interval = 1 # one second
             self.suffix = "%Y-%m-%d_%H-%M-%S"
-            self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$"
+            extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(?!\d)"
         elif self.when == 'M':
             self.interval = 60 # one minute
             self.suffix = "%Y-%m-%d_%H-%M"
-            self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$"
+            extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(?!\d)"
         elif self.when == 'H':
             self.interval = 60 * 60 # one hour
             self.suffix = "%Y-%m-%d_%H"
-            self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$"
+            extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}_\d{2}(?!\d)"
         elif self.when == 'D' or self.when == 'MIDNIGHT':
             self.interval = 60 * 60 * 24 # one day
             self.suffix = "%Y-%m-%d"
-            self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
+            extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
         elif self.when.startswith('W'):
             self.interval = 60 * 60 * 24 * 7 # one week
             if len(self.when) != 2:
@@ -253,11 +253,17 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
                 raise ValueError("Invalid day specified for weekly rollover: %s" % self.when)
             self.dayOfWeek = int(self.when[1])
             self.suffix = "%Y-%m-%d"
-            self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
+            extMatch = r"(?<!\d)\d{4}-\d{2}-\d{2}(?!\d)"
         else:
             raise ValueError("Invalid rollover interval specified: %s" % self.when)
 
-        self.extMatch = re.compile(self.extMatch, re.ASCII)
+        # extMatch is a pattern for matching a datetime suffix in a file name.
+        # After custom naming, it is no longer guaranteed to be separated by
+        # periods from other parts of the filename.  The lookup statements
+        # (?<!\d) and (?!\d) ensure that the datetime suffix (which itself
+        # starts and ends with digits) is not preceded or followed by digits.
+        # This reduces the number of false matches and improves performance.
+        self.extMatch = re.compile(extMatch, re.ASCII)
         self.interval = self.interval * interval # multiply by units requested
         # The following line added because the filename passed in could be a
         # path object (see Issue #27493), but self.baseFilename will be a string
@@ -376,31 +382,22 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
             for fileName in fileNames:
                 if fileName[:plen] == prefix:
                     suffix = fileName[plen:]
-                    if self.extMatch.match(suffix):
+                    if self.extMatch.fullmatch(suffix):
                         result.append(os.path.join(dirName, fileName))
         else:
-            # See bpo-44753: Don't use the extension when computing the prefix.
-            n, e = os.path.splitext(baseName)
-            prefix = n + '.'
-            plen = len(prefix)
             for fileName in fileNames:
-                # Our files could be just about anything after custom naming, but
-                # likely candidates are of the form
-                # foo.log.DATETIME_SUFFIX or foo.DATETIME_SUFFIX.log
-                if (not fileName.startswith(baseName) and fileName.endswith(e) and
-                    len(fileName) > (plen + 1) and not fileName[plen+1].isdigit()):
-                    continue
+                # Our files could be just about anything after custom naming,
+                # but they should contain the datetime suffix.
+                # Try to find the datetime suffix in the file name and verify
+                # that the file name can be generated by this handler.
+                m = self.extMatch.search(fileName)
+                while m:
+                    dfn = self.namer(self.baseFilename + "." + m[0])
+                    if os.path.basename(dfn) == fileName:
+                        result.append(os.path.join(dirName, fileName))
+                        break
+                    m = self.extMatch.search(fileName, m.start() + 1)
 
-                if fileName[:plen] == prefix:
-                    suffix = fileName[plen:]
-                    # See bpo-45628: The date/time suffix could be anywhere in the
-                    # filename
-
-                    parts = suffix.split('.')
-                    for part in parts:
-                        if self.extMatch.match(part):
-                            result.append(os.path.join(dirName, fileName))
-                            break
         if len(result) < self.backupCount:
             result = []
         else:
index 6791459d5ef69c68786551f4f5e1de4525c2ad3d..d71385bf2c78d7ef7d00cc76e571f25e45f77d47 100644 (file)
@@ -6260,7 +6260,7 @@ class TimedRotatingFileHandlerTest(BaseFileTest):
         for i in range(10):
             times.append(dt.strftime('%Y-%m-%d_%H-%M-%S'))
             dt += datetime.timedelta(seconds=5)
-        prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f')
+        prefixes = ('a.b', 'a.b.c', 'd.e', 'd.e.f', 'g')
         files = []
         rotators = []
         for prefix in prefixes:
@@ -6273,10 +6273,22 @@ class TimedRotatingFileHandlerTest(BaseFileTest):
             if prefix.startswith('a.b'):
                 for t in times:
                     files.append('%s.log.%s' % (prefix, t))
-            else:
-                rotator.namer = lambda name: name.replace('.log', '') + '.log'
+            elif prefix.startswith('d.e'):
+                def namer(filename):
+                    dirname, basename = os.path.split(filename)
+                    basename = basename.replace('.log', '') + '.log'
+                    return os.path.join(dirname, basename)
+                rotator.namer = namer
                 for t in times:
                     files.append('%s.%s.log' % (prefix, t))
+            elif prefix == 'g':
+                def namer(filename):
+                    dirname, basename = os.path.split(filename)
+                    basename = 'g' + basename[6:] + '.oldlog'
+                    return os.path.join(dirname, basename)
+                rotator.namer = namer
+                for t in times:
+                    files.append('g%s.oldlog' % t)
         # Create empty files
         for fn in files:
             p = os.path.join(wd, fn)
@@ -6286,18 +6298,23 @@ class TimedRotatingFileHandlerTest(BaseFileTest):
         for i, prefix in enumerate(prefixes):
             rotator = rotators[i]
             candidates = rotator.getFilesToDelete()
-            self.assertEqual(len(candidates), 3)
+            self.assertEqual(len(candidates), 3, candidates)
             if prefix.startswith('a.b'):
                 p = '%s.log.' % prefix
                 for c in candidates:
                     d, fn = os.path.split(c)
                     self.assertTrue(fn.startswith(p))
-            else:
+            elif prefix.startswith('d.e'):
                 for c in candidates:
                     d, fn = os.path.split(c)
-                    self.assertTrue(fn.endswith('.log'))
+                    self.assertTrue(fn.endswith('.log'), fn)
                     self.assertTrue(fn.startswith(prefix + '.') and
                                     fn[len(prefix) + 2].isdigit())
+            elif prefix == 'g':
+                for c in candidates:
+                    d, fn = os.path.split(c)
+                    self.assertTrue(fn.endswith('.oldlog'))
+                    self.assertTrue(fn.startswith('g') and fn[1].isdigit())
 
     def test_compute_files_to_delete_same_filename_different_extensions(self):
         # See GH-93205 for background
@@ -6321,6 +6338,8 @@ class TimedRotatingFileHandlerTest(BaseFileTest):
             rotators.append(rotator)
             for t in times:
                 files.append('%s.%s' % (prefix, t))
+        for t in times:
+            files.append('a.log.%s.c' % t)
         # Create empty files
         for f in files:
             (wd / f).touch()
@@ -6329,11 +6348,11 @@ class TimedRotatingFileHandlerTest(BaseFileTest):
             backupCount = i+1
             rotator = rotators[i]
             candidates = rotator.getFilesToDelete()
-            self.assertEqual(len(candidates), n_files - backupCount)
-            matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$")
+            self.assertEqual(len(candidates), n_files - backupCount, candidates)
+            matcher = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\Z")
             for c in candidates:
                 d, fn = os.path.split(c)
-                self.assertTrue(fn.startswith(prefix))
+                self.assertTrue(fn.startswith(prefix+'.'))
                 suffix = fn[(len(prefix)+1):]
                 self.assertRegex(suffix, matcher)
 
diff --git a/Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst b/Misc/NEWS.d/next/Library/2024-02-22-11-29-27.gh-issue-115809.9H1DhB.rst
new file mode 100644 (file)
index 0000000..2a47efb
--- /dev/null
@@ -0,0 +1,4 @@
+Improve algorithm for computing which rolled-over log files to delete in
+:class:`logging.TimedRotatingFileHandler`. It is now reliable for handlers
+without ``namer`` and with arbitrary deterministic ``namer`` that leaves the
+datetime part in the file name unmodified.