]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-67044: Always quote or escape \r and \n in csv.writer() (GH-115741)
authorSerhiy Storchaka <storchaka@gmail.com>
Fri, 23 Feb 2024 20:25:09 +0000 (22:25 +0200)
committerGitHub <noreply@github.com>
Fri, 23 Feb 2024 20:25:09 +0000 (22:25 +0200)
Lib/test/test_csv.py
Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst [new file with mode: 0644]
Modules/_csv.c

index 5217f2a71ec0f96114ec010aa775ca4a53cb789a..d74ab7e016f78c0e4a025b085e9f015f781e37c5 100644 (file)
@@ -265,9 +265,11 @@ class Test_Csv(unittest.TestCase):
                     writer = csv.writer(sio, lineterminator=lineterminator)
                     writer.writerow(['a', 'b'])
                     writer.writerow([1, 2])
+                    writer.writerow(['\r', '\n'])
                     self.assertEqual(sio.getvalue(),
                                      f'a,b{lineterminator}'
-                                     f'1,2{lineterminator}')
+                                     f'1,2{lineterminator}'
+                                     f'"\r","\n"{lineterminator}')
 
     def test_write_iterable(self):
         self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"')
@@ -507,22 +509,44 @@ class Test_Csv(unittest.TestCase):
         self.assertEqual(r.line_num, 3)
 
     def test_roundtrip_quoteed_newlines(self):
-        with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
-            writer = csv.writer(fileobj)
-            rows = [['a\nb','b'],['c','x\r\nd']]
-            writer.writerows(rows)
-            fileobj.seek(0)
-            for i, row in enumerate(csv.reader(fileobj)):
-                self.assertEqual(row, rows[i])
+        rows = [
+            ['\na', 'b\nc', 'd\n'],
+            ['\re', 'f\rg', 'h\r'],
+            ['\r\ni', 'j\r\nk', 'l\r\n'],
+            ['\n\rm', 'n\n\ro', 'p\n\r'],
+            ['\r\rq', 'r\r\rs', 't\r\r'],
+            ['\n\nu', 'v\n\nw', 'x\n\n'],
+        ]
+        for lineterminator in '\r\n', '\n', '\r':
+            with self.subTest(lineterminator=lineterminator):
+                with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
+                    writer = csv.writer(fileobj, lineterminator=lineterminator)
+                    writer.writerows(rows)
+                    fileobj.seek(0)
+                    for i, row in enumerate(csv.reader(fileobj)):
+                        self.assertEqual(row, rows[i])
 
     def test_roundtrip_escaped_unquoted_newlines(self):
-        with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
-            writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
-            rows = [['a\nb','b'],['c','x\r\nd']]
-            writer.writerows(rows)
-            fileobj.seek(0)
-            for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
-                self.assertEqual(row,rows[i])
+        rows = [
+            ['\na', 'b\nc', 'd\n'],
+            ['\re', 'f\rg', 'h\r'],
+            ['\r\ni', 'j\r\nk', 'l\r\n'],
+            ['\n\rm', 'n\n\ro', 'p\n\r'],
+            ['\r\rq', 'r\r\rs', 't\r\r'],
+            ['\n\nu', 'v\n\nw', 'x\n\n'],
+        ]
+        for lineterminator in '\r\n', '\n', '\r':
+            with self.subTest(lineterminator=lineterminator):
+                with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
+                    writer = csv.writer(fileobj, lineterminator=lineterminator,
+                                        quoting=csv.QUOTE_NONE, escapechar="\\")
+                    writer.writerows(rows)
+                    fileobj.seek(0)
+                    for i, row in enumerate(csv.reader(fileobj,
+                                                       quoting=csv.QUOTE_NONE,
+                                                       escapechar="\\")):
+                        self.assertEqual(row, rows[i])
+
 
 class TestDialectRegistry(unittest.TestCase):
     def test_registry_badargs(self):
diff --git a/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst b/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst
new file mode 100644 (file)
index 0000000..095e69b
--- /dev/null
@@ -0,0 +1,2 @@
+:func:`csv.writer` now always quotes or escapes ``'\r'`` and ``'\n'``,
+regardless of *lineterminator* value.
index 8d0472885afd96d58c046d3090660ea78dd84f69..660c5455af764ef4cfa4e660041d5fa6c2ed8bcd 100644 (file)
@@ -1152,6 +1152,8 @@ join_append_data(WriterObj *self, int field_kind, const void *field_data,
         if (c == dialect->delimiter ||
             c == dialect->escapechar ||
             c == dialect->quotechar  ||
+            c == '\n'  ||
+            c == '\r'  ||
             PyUnicode_FindChar(
                 dialect->lineterminator, c, 0,
                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {