From: Serhiy Storchaka Date: Wed, 27 Apr 2016 20:13:46 +0000 (+0300) Subject: Issue #25788: fileinput.hook_encoded() now supports an "errors" argument X-Git-Tag: v3.6.0a1~105 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b275210a3b0e04691ebd1c1d0374720be59911b9;p=thirdparty%2FPython%2Fcpython.git Issue #25788: fileinput.hook_encoded() now supports an "errors" argument for passing to open. Original patch by Joseph Hackman. --- diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst index 343368265b58..8efe8e3b94ba 100644 --- a/Doc/library/fileinput.rst +++ b/Doc/library/fileinput.rst @@ -193,10 +193,14 @@ The two following opening hooks are provided by this module: Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` -.. function:: hook_encoded(encoding) +.. function:: hook_encoded(encoding, errors=None) Returns a hook which opens each file with :func:`open`, using the given - *encoding* to read the file. + *encoding* and *errors* to read the file. Usage example: ``fi = - fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))`` + fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", + "surrogateescape"))`` + + .. versionchanged:: 3.6 + Added the optional *errors* parameter. diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 99223af31db5..be4c01409f4f 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments. (Contributed by Aviv Palivoda in :issue:`26243`.) +fileinput +--------- + +:func:`~fileinput.hook_encoded` now supports the *errors* argument. +(Contributed by Joseph Hackman in :issue:`25788`.) + + Optimizations ============= diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 1e19d242136d..721fe9c9612c 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -400,9 +400,9 @@ def hook_compressed(filename, mode): return open(filename, mode) -def hook_encoded(encoding): +def hook_encoded(encoding, errors=None): def openhook(filename, mode): - return open(filename, mode, encoding=encoding) + return open(filename, mode, encoding=encoding, errors=errors) return openhook diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index 4f67c25f908e..565633fcccd9 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase): def test(self): encoding = object() - result = fileinput.hook_encoded(encoding) + errors = object() + result = fileinput.hook_encoded(encoding, errors=errors) fake_open = InvocationRecorder() original_open = builtins.open @@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase): self.assertIs(args[0], filename) self.assertIs(args[1], mode) self.assertIs(kwargs.pop('encoding'), encoding) + self.assertIs(kwargs.pop('errors'), errors) self.assertFalse(kwargs) + def test_errors(self): + with open(TESTFN, 'wb') as f: + f.write(b'\x80abc') + self.addCleanup(safe_unlink, TESTFN) + + def check(errors, expected_lines): + with FileInput(files=TESTFN, mode='r', + openhook=hook_encoded('utf-8', errors=errors)) as fi: + lines = list(fi) + self.assertEqual(lines, expected_lines) + + check('ignore', ['abc']) + with self.assertRaises(UnicodeDecodeError): + check('strict', ['abc']) + check('replace', ['\ufffdabc']) + check('backslashreplace', ['\\x80abc']) + def test_modes(self): with open(TESTFN, 'wb') as f: # UTF-7 is a convenient, seldom used encoding diff --git a/Misc/ACKS b/Misc/ACKS index dd3a56747fcb..ebc3fc614652 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -538,6 +538,7 @@ Michael Guravage Lars Gustäbel Thomas Güttler Jonas H. +Joseph Hackman Barry Haddow Philipp Hagemeister Paul ten Hagen diff --git a/Misc/NEWS b/Misc/NEWS index b6fb8f8f6761..e68bbdf3a743 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -256,6 +256,9 @@ Core and Builtins Library ------- +- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument + for passing to open. Original patch by Joseph Hackman. + - Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by Xiang Zhang.