[3.11] gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269) (GH...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Tue, 23 Jan 2024 20:49:13 +0000 (21:49 +0100)

committer GitHub <noreply@github.com>

Tue, 23 Jan 2024 20:49:13 +0000 (20:49 +0000)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 23 Jan 2024 20:49:13 +0000 (21:49 +0100)
committer GitHub <noreply@github.com>
Tue, 23 Jan 2024 20:49:13 +0000 (20:49 +0000)
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py

index 1dc80351bf7dddcea33a750ba809a4c7ef793726..d4b259e31a7de77e8c8f6b7ddb82b0ef48a7b867 100644 (file)
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -99,6 +99,7 @@ import io
  import collections
  import collections.abc
  import contextlib
+import weakref
  
  from . import ElementPath
  
@@ -1238,13 +1239,14 @@ def iterparse(source, events=None, parser=None):
      # parser argument of iterparse is removed, this can be killed.
      pullparser = XMLPullParser(events=events, _parser=parser)
  
-    def iterator(source):
+    if not hasattr(source, "read"):
+        source = open(source, "rb")
+        close_source = True
+    else:
          close_source = False
+
+    def iterator(source):
          try:
-            if not hasattr(source, "read"):
-                source = open(source, "rb")
-                close_source = True
-            yield None
              while True:
                  yield from pullparser.read_events()
                  # load event buffer
@@ -1254,18 +1256,23 @@ def iterparse(source, events=None, parser=None):
                  pullparser.feed(data)
              root = pullparser._close_and_return_root()
              yield from pullparser.read_events()
-            it.root = root
+            it = wr()
+            if it is not None:
+                it.root = root
          finally:
              if close_source:
                  source.close()
  
      class IterParseIterator(collections.abc.Iterator):
          __next__ = iterator(source).__next__
-    it = IterParseIterator()
-    it.root = None
-    del iterator, IterParseIterator
  
-    next(it)
+        def __del__(self):
+            if close_source:
+                source.close()
+
+    it = IterParseIterator()
+    wr = weakref.ref(it)
+    del IterParseIterator
      return it
  
  
diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst

new file mode 100644 (file)

index 0000000..9b69b5d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst
@@ -0,0 +1,4 @@
+Avoid reference cycle in ElementTree.iterparse. The iterator returned by
+``ElementTree.iterparse`` may hold on to a file descriptor. The reference
+cycle prevented prompt clean-up of the file descriptor if the returned
+iterator was not exhausted.
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Tue, 23 Jan 2024 20:49:13 +0000 (21:49 +0100)
committer	GitHub <noreply@github.com>
	Tue, 23 Jan 2024 20:49:13 +0000 (20:49 +0000)
Lib/xml/etree/ElementTree.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst	[new file with mode: 0644]	patch \| blob