]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269) (GH...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 23 Jan 2024 20:49:13 +0000 (21:49 +0100)
committerGitHub <noreply@github.com>
Tue, 23 Jan 2024 20:49:13 +0000 (20:49 +0000)
The iterator returned by ElementTree.iterparse() may hold on to a file
descriptor. The reference cycle prevented prompt clean-up of the file
descriptor if the returned iterator was not exhausted.
(cherry picked from commit ce01ab536f22a3cf095d621f3b3579c1e3567859)

Co-authored-by: Sam Gross <colesbury@gmail.com>
Lib/xml/etree/ElementTree.py
Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst [new file with mode: 0644]

index 1dc80351bf7dddcea33a750ba809a4c7ef793726..d4b259e31a7de77e8c8f6b7ddb82b0ef48a7b867 100644 (file)
@@ -99,6 +99,7 @@ import io
 import collections
 import collections.abc
 import contextlib
+import weakref
 
 from . import ElementPath
 
@@ -1238,13 +1239,14 @@ def iterparse(source, events=None, parser=None):
     # parser argument of iterparse is removed, this can be killed.
     pullparser = XMLPullParser(events=events, _parser=parser)
 
-    def iterator(source):
+    if not hasattr(source, "read"):
+        source = open(source, "rb")
+        close_source = True
+    else:
         close_source = False
+
+    def iterator(source):
         try:
-            if not hasattr(source, "read"):
-                source = open(source, "rb")
-                close_source = True
-            yield None
             while True:
                 yield from pullparser.read_events()
                 # load event buffer
@@ -1254,18 +1256,23 @@ def iterparse(source, events=None, parser=None):
                 pullparser.feed(data)
             root = pullparser._close_and_return_root()
             yield from pullparser.read_events()
-            it.root = root
+            it = wr()
+            if it is not None:
+                it.root = root
         finally:
             if close_source:
                 source.close()
 
     class IterParseIterator(collections.abc.Iterator):
         __next__ = iterator(source).__next__
-    it = IterParseIterator()
-    it.root = None
-    del iterator, IterParseIterator
 
-    next(it)
+        def __del__(self):
+            if close_source:
+                source.close()
+
+    it = IterParseIterator()
+    wr = weakref.ref(it)
+    del IterParseIterator
     return it
 
 
diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
new file mode 100644 (file)
index 0000000..9b69b5d
--- /dev/null
@@ -0,0 +1,4 @@
+Avoid reference cycle in ElementTree.iterparse. The iterator returned by
+``ElementTree.iterparse`` may hold on to a file descriptor. The reference
+cycle prevented prompt clean-up of the file descriptor if the returned
+iterator was not exhausted.