gh-67230: add quoting rules to csv module (GH-29469)

author Skip Montanaro <skip.montanaro@gmail.com>

Wed, 12 Apr 2023 22:32:30 +0000 (17:32 -0500)

committer GitHub <noreply@github.com>

Wed, 12 Apr 2023 22:32:30 +0000 (15:32 -0700)
author Skip Montanaro <skip.montanaro@gmail.com>
Wed, 12 Apr 2023 22:32:30 +0000 (17:32 -0500)
committer GitHub <noreply@github.com>
Wed, 12 Apr 2023 22:32:30 +0000 (15:32 -0700)
diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst

index f1776554d8b9f29630063d6df3d750a706ba4ed3..64baa69be4af3101a1f2caceaa783965fddf214f 100644 (file)
--- a/Doc/library/csv.rst
+++ b/Doc/library/csv.rst
@@ -327,7 +327,7 @@ The :mod:`csv` module defines the following constants:
  
     Instructs :class:`writer` objects to quote all non-numeric fields.
  
-   Instructs the reader to convert all non-quoted fields to type *float*.
+   Instructs :class:`reader` objects to convert all non-quoted fields to type *float*.
  
  
  .. data:: QUOTE_NONE
@@ -337,7 +337,25 @@ The :mod:`csv` module defines the following constants:
     character.  If *escapechar* is not set, the writer will raise :exc:`Error` if
     any characters that require escaping are encountered.
  
-   Instructs :class:`reader` to perform no special processing of quote characters.
+   Instructs :class:`reader` objects to perform no special processing of quote characters.
+
+.. data:: QUOTE_NOTNULL
+
+   Instructs :class:`writer` objects to quote all fields which are not
+   ``None``.  This is similar to :data:`QUOTE_ALL`, except that if a
+   field value is ``None`` an empty (unquoted) string is written.
+
+   Instructs :class:`reader` objects to interpret an empty (unquoted) field as None and
+   to otherwise behave as :data:`QUOTE_ALL`.
+
+.. data:: QUOTE_STRINGS
+
+   Instructs :class:`writer` objects to always place quotes around fields
+   which are strings.  This is similar to :data:`QUOTE_NONNUMERIC`, except that if a
+   field value is ``None`` an empty (unquoted) string is written.
+
+   Instructs :class:`reader` objects to interpret an empty (unquoted) string as ``None`` and
+   to otherwise behave as :data:`QUOTE_NONNUMERIC`.
  
  The :mod:`csv` module defines the following exception:
  
diff --git a/Lib/csv.py b/Lib/csv.py

index 4ef8be45ca9e0ac1ac18481a3acaeef72ae5a95f..77f30c8d2b1f61d8abbee7f65985a82dddb2e90d 100644 (file)
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -9,12 +9,14 @@ from _csv import Error, __version__, writer, reader, register_dialect, \
                   unregister_dialect, get_dialect, list_dialects, \
                   field_size_limit, \
                   QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
+                 QUOTE_STRINGS, QUOTE_NOTNULL, \
                   __doc__
  from _csv import Dialect as _Dialect
  
  from io import StringIO
  
  __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
+           "QUOTE_STRINGS", "QUOTE_NOTNULL",
             "Error", "Dialect", "__doc__", "excel", "excel_tab",
             "field_size_limit", "reader", "writer",
             "register_dialect", "get_dialect", "list_dialects", "Sniffer",
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py

index 8289ddb1c3a54f5c5ef47bb8b0213704144ec903..8fb97bc0c1a1a7eaddc449163c0e9483492c9399 100644 (file)
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -187,6 +187,10 @@ class Test_Csv(unittest.TestCase):
                           quoting = csv.QUOTE_ALL)
          self._write_test(['a\nb',1], '"a\nb","1"',
                           quoting = csv.QUOTE_ALL)
+        self._write_test(['a','',None,1], '"a","",,1',
+                         quoting = csv.QUOTE_STRINGS)
+        self._write_test(['a','',None,1], '"a","",,"1"',
+                         quoting = csv.QUOTE_NOTNULL)
  
      def test_write_escape(self):
          self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
diff --git a/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst b/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst

new file mode 100644 (file)

index 0000000..53c32d3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst
@@ -0,0 +1,2 @@
+Add :data:`~csv.QUOTE_STRINGS` and :data:`~csv.QUOTE_NOTNULL` to the suite
+of :mod:`csv` module quoting styles.
diff --git a/Modules/_csv.c b/Modules/_csv.c

index bd337084dbff8146070544cfaef9be7e70260a5b..2217cc2ca7a775872d66efadfc7db7709fe20937 100644 (file)
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -82,7 +82,8 @@ typedef enum {
  } ParserState;
  
  typedef enum {
-    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
+    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
+    QUOTE_STRINGS, QUOTE_NOTNULL
  } QuoteStyle;
  
  typedef struct {
@@ -95,6 +96,8 @@ static const StyleDesc quote_styles[] = {
      { QUOTE_ALL,        "QUOTE_ALL" },
      { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
      { QUOTE_NONE,       "QUOTE_NONE" },
+    { QUOTE_STRINGS,    "QUOTE_STRINGS" },
+    { QUOTE_NOTNULL,    "QUOTE_NOTNULL" },
      { 0 }
  };
  
@@ -1264,6 +1267,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
          case QUOTE_ALL:
              quoted = 1;
              break;
+        case QUOTE_STRINGS:
+            quoted = PyUnicode_Check(field);
+            break;
+        case QUOTE_NOTNULL:
+            quoted = field != Py_None;
+            break;
          default:
              quoted = 0;
              break;
@@ -1659,6 +1668,11 @@ PyDoc_STRVAR(csv_module_doc,
  "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
  "            fields which do not parse as integers or floating point\n"
  "            numbers.\n"
+"        csv.QUOTE_STRINGS means that quotes are always placed around\n"
+"            fields which are strings.  Note that the Python value None\n"
+"            is not a string.\n"
+"        csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
+"            that are not the Python value None.\n"
  "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
  "    * escapechar - specifies a one-character string used to escape\n"
  "        the delimiter when quoting is set to QUOTE_NONE.\n"
author	Skip Montanaro <skip.montanaro@gmail.com>
	Wed, 12 Apr 2023 22:32:30 +0000 (17:32 -0500)
committer	GitHub <noreply@github.com>
	Wed, 12 Apr 2023 22:32:30 +0000 (15:32 -0700)
Doc/library/csv.rst		patch \| blob \| blame \| history
Lib/csv.py		patch \| blob \| blame \| history
Lib/test/test_csv.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst	[new file with mode: 0644]	patch \| blob
Modules/_csv.c		patch \| blob \| blame \| history