]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269)
authorSam Gross <colesbury@gmail.com>
Tue, 23 Jan 2024 20:14:46 +0000 (15:14 -0500)
committerGitHub <noreply@github.com>
Tue, 23 Jan 2024 20:14:46 +0000 (20:14 +0000)
The iterator returned by ElementTree.iterparse() may hold on to a file
descriptor. The reference cycle prevented prompt clean-up of the file
descriptor if the returned iterator was not exhausted.

Lib/xml/etree/ElementTree.py
Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst [new file with mode: 0644]

index 42574eefd81bebe6b6b0c2fb92f4709f4efa6229..ae6575028be11cbe8fed8d1a1f8d0fe4db12df12 100644 (file)
@@ -99,6 +99,7 @@ import io
 import collections
 import collections.abc
 import contextlib
+import weakref
 
 from . import ElementPath
 
@@ -1223,13 +1224,14 @@ def iterparse(source, events=None, parser=None):
     # parser argument of iterparse is removed, this can be killed.
     pullparser = XMLPullParser(events=events, _parser=parser)
 
-    def iterator(source):
+    if not hasattr(source, "read"):
+        source = open(source, "rb")
+        close_source = True
+    else:
         close_source = False
+
+    def iterator(source):
         try:
-            if not hasattr(source, "read"):
-                source = open(source, "rb")
-                close_source = True
-            yield None
             while True:
                 yield from pullparser.read_events()
                 # load event buffer
@@ -1239,18 +1241,23 @@ def iterparse(source, events=None, parser=None):
                 pullparser.feed(data)
             root = pullparser._close_and_return_root()
             yield from pullparser.read_events()
-            it.root = root
+            it = wr()
+            if it is not None:
+                it.root = root
         finally:
             if close_source:
                 source.close()
 
     class IterParseIterator(collections.abc.Iterator):
         __next__ = iterator(source).__next__
-    it = IterParseIterator()
-    it.root = None
-    del iterator, IterParseIterator
 
-    next(it)
+        def __del__(self):
+            if close_source:
+                source.close()
+
+    it = IterParseIterator()
+    wr = weakref.ref(it)
+    del IterParseIterator
     return it
 
 
diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
new file mode 100644 (file)
index 0000000..9b69b5d
--- /dev/null
@@ -0,0 +1,4 @@
+Avoid reference cycle in ElementTree.iterparse. The iterator returned by
+``ElementTree.iterparse`` may hold on to a file descriptor. The reference
+cycle prevented prompt clean-up of the file descriptor if the returned
+iterator was not exhausted.