From: Gregory P. Smith Date: Sun, 8 Dec 2013 03:14:59 +0000 (-0800) Subject: Fixes issue #19506: Use a memoryview to avoid a data copy when piping data X-Git-Tag: v3.4.0b2~302 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5ca129b8f016668bf914592e58082c452a7ad9b4;p=thirdparty%2FPython%2Fcpython.git Fixes issue #19506: Use a memoryview to avoid a data copy when piping data to stdin within subprocess.Popen.communicate. 5-10% less cpu usage. --- 5ca129b8f016668bf914592e58082c452a7ad9b4 diff --cc Lib/subprocess.py index 0942d94ef71f,546a7a0fcda8..e7f39fef0627 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@@ -1544,65 -1544,12 +1544,68 @@@ class Popen(object) if not input: self.stdin.close() - if _has_poll: - stdout, stderr = self._communicate_with_poll(input, endtime, - orig_timeout) - else: - stdout, stderr = self._communicate_with_select(input, endtime, - orig_timeout) + stdout = None + stderr = None + + # Only create this mapping if we haven't already. + if not self._communication_started: + self._fileobj2output = {} + if self.stdout: + self._fileobj2output[self.stdout] = [] + if self.stderr: + self._fileobj2output[self.stderr] = [] + + if self.stdout: + stdout = self._fileobj2output[self.stdout] + if self.stderr: + stderr = self._fileobj2output[self.stderr] + + self._save_input(input) + ++ if self._input: ++ input_view = memoryview(self._input) ++ + with _PopenSelector() as selector: + if self.stdin and input: + selector.register(self.stdin, selectors.EVENT_WRITE) + if self.stdout: + selector.register(self.stdout, selectors.EVENT_READ) + if self.stderr: + selector.register(self.stderr, selectors.EVENT_READ) + + while selector.get_map(): + timeout = self._remaining_time(endtime) + if timeout is not None and timeout < 0: + raise TimeoutExpired(self.args, orig_timeout) + + ready = selector.select(timeout) + self._check_timeout(endtime, orig_timeout) + + # XXX Rewrite these to use non-blocking I/O on the file + # objects; they are no longer using C stdio! + + for key, events in ready: + if key.fileobj is self.stdin: - chunk = self._input[self._input_offset : - self._input_offset + _PIPE_BUF] ++ chunk = input_view[self._input_offset : ++ self._input_offset + _PIPE_BUF] + try: + self._input_offset += os.write(key.fd, chunk) + except OSError as e: + if e.errno == errno.EPIPE: + selector.unregister(key.fileobj) + key.fileobj.close() + else: + raise + else: + if self._input_offset >= len(self._input): + selector.unregister(key.fileobj) + key.fileobj.close() + elif key.fileobj in (self.stdout, self.stderr): + data = os.read(key.fd, 4096) + if not data: + selector.unregister(key.fileobj) + key.fileobj.close() + self._fileobj2output[key.fileobj].append(data) self.wait(timeout=self._remaining_time(endtime)) diff --cc Misc/NEWS index 1501f6f92e4d,706446072032..3f613dc2a824 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -18,18 -18,9 +18,21 @@@ Core and Builtin Library ------- + - Issue #19506: Use a memoryview to avoid a data copy when piping data + to stdin within subprocess.Popen.communicate. 5-10% less cpu usage. + +- Issue #19876: selectors unregister() no longer raises ValueError or OSError + if the FD is closed (as long as it was registered). + +- Issue #19908: pathlib now joins relative Windows paths correctly when a drive + is present. Original patch by Antoine Pitrou. + +- Issue #19296: Silence compiler warning in dbm_open + +- Issue #6784: Strings from Python 2 can now be unpickled as bytes + objects by setting the encoding argument of Unpickler to be 'bytes'. + Initial patch by Merlijn van Deen. + - Issue #19839: Fix regression in bz2 module's handling of non-bzip2 data at EOF, and analogous bug in lzma module.