]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
When using QUOTE_NONNUMERIC, we now test for "numericness" with
authorAndrew McNamara <andrewm@object-craft.com.au>
Wed, 12 Jan 2005 07:44:42 +0000 (07:44 +0000)
committerAndrew McNamara <andrewm@object-craft.com.au>
Wed, 12 Jan 2005 07:44:42 +0000 (07:44 +0000)
PyNumber_Check, rather than trying to convert to a float.  Reimplemented
writer - now raises exceptions when it sees a quotechar but neither
doublequote or escapechar are set. Doublequote results are now more
consistent (eg, single quote should generate """", rather than "",
which is ambiguous).

Lib/test/test_csv.py
Misc/NEWS
Modules/_csv.c

index be1147d08b33b9f83d66429d49cb009c7465612c..a3c084306b7ea03c30d0f141401dd7e75924e6cb 100644 (file)
@@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase):
                          (bigstring, bigstring))
 
     def test_write_quoting(self):
-        self._write_test(['a','1','p,q'], 'a,1,"p,q"')
+        self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
         self.assertRaises(csv.Error,
                           self._write_test,
-                          ['a','1','p,q'], 'a,1,"p,q"',
+                          ['a',1,'p,q'], 'a,1,p,q',
                           quoting = csv.QUOTE_NONE)
-        self._write_test(['a','1','p,q'], 'a,1,"p,q"',
+        self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
                          quoting = csv.QUOTE_MINIMAL)
-        self._write_test(['a','1','p,q'], '"a",1,"p,q"',
+        self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
                          quoting = csv.QUOTE_NONNUMERIC)
-        self._write_test(['a','1','p,q'], '"a","1","p,q"',
+        self._write_test(['a',1,'p,q'], '"a","1","p,q"',
                          quoting = csv.QUOTE_ALL)
 
     def test_write_escape(self):
-        self._write_test(['a','1','p,q'], 'a,1,"p,q"',
+        self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
                          escapechar='\\')
-# FAILED - needs to be fixed [am]:
-#        self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
-#                         escapechar='\\', doublequote = 0)
-        self._write_test(['a','1','p,q'], 'a,1,p\\,q',
+        self.assertRaises(csv.Error,
+                          self._write_test,
+                          ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
+                          escapechar=None, doublequote=False) 
+        self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
+                         escapechar='\\', doublequote = False)
+        self._write_test(['"'], '""""', 
+                         escapechar='\\', quoting = csv.QUOTE_MINIMAL)
+        self._write_test(['"'], '\\"', 
+                         escapechar='\\', quoting = csv.QUOTE_MINIMAL,
+                         doublequote = False)
+        self._write_test(['"'], '\\"', 
+                         escapechar='\\', quoting = csv.QUOTE_NONE)
+        self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
                          escapechar='\\', quoting = csv.QUOTE_NONE)
 
     def test_writerows(self):
index 4bb11e4b48c4e3f643d9d5b2b2b9e434c6c9dfc5..02f54bd741f58ae42c881e124056b6be02c5a2b9 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -45,6 +45,9 @@ Library
   + quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
     dictates.
   + the parser now removes the escapechar prefix from escaped characters.
+  + QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
+    to cast to float.
+  + writer doublequote handling improved.
   + Dialect classes passed to the module are no longer instantiated by
     the module before being parsed (the former validation scheme required
     this, but the mechanism was unreliable).
index 03b291f2256f898df9d312314531dd59a049aac9..30b7eca40901db97af2574b19b517c28169e3b29 100644 (file)
@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
 {
         DialectObj *dialect = self->dialect;
        int i, rec_len;
+       char *lineterm;
+
+#define ADDCH(c) \
+       do {\
+               if (copy_phase) \
+                       self->rec[rec_len] = c;\
+               rec_len++;\
+       } while(0)
+
+       lineterm = PyString_AsString(dialect->lineterminator);
+       if (lineterm == NULL)
+               return -1;
 
        rec_len = self->rec_len;
 
-       /* If this is not the first field we need a field separator.
-        */
-       if (self->num_fields > 0) {
-               if (copy_phase)
-                       self->rec[rec_len] = dialect->delimiter;
-               rec_len++;
-       }
-       /* Handle preceding quote.
-        */
-       switch (dialect->quoting) {
-       case QUOTE_ALL:
-               *quoted = 1;
-               if (copy_phase)
-                       self->rec[rec_len] = dialect->quotechar;
-               rec_len++;
-               break;
-       case QUOTE_MINIMAL:
-       case QUOTE_NONNUMERIC:
-               /* We only know about quoted in the copy phase.
-                */
-               if (copy_phase && *quoted) {
-                       self->rec[rec_len] = dialect->quotechar;
-                       rec_len++;
-               }
-               break;
-       case QUOTE_NONE:
-               break;
-       }
-       /* Copy/count field data.
-        */
+       /* If this is not the first field we need a field separator */
+       if (self->num_fields > 0)
+               ADDCH(dialect->delimiter);
+
+       /* Handle preceding quote */
+       if (copy_phase && *quoted)
+               ADDCH(dialect->quotechar);
+
+       /* Copy/count field data */
        for (i = 0;; i++) {
                char c = field[i];
+               int want_escape = 0;
 
                if (c == '\0')
                        break;
-               /* If in doublequote mode we escape quote chars with a
-                * quote.
-                */
-               if (dialect->quoting != QUOTE_NONE && 
-                    c == dialect->quotechar && dialect->doublequote) {
-                       if (copy_phase)
-                               self->rec[rec_len] = dialect->quotechar;
-                       *quoted = 1;
-                       rec_len++;
-               }
 
-               /* Some special characters need to be escaped.  If we have a
-                * quote character switch to quoted field instead of escaping
-                * individual characters.
-                */
-               if (!*quoted
-                   && (c == dialect->delimiter || 
-                        c == dialect->escapechar || 
-                        c == '\n' || c == '\r')) {
-                       if (dialect->quoting != QUOTE_NONE)
-                               *quoted = 1;
-                       else if (dialect->escapechar) {
-                               if (copy_phase)
-                                       self->rec[rec_len] = dialect->escapechar;
-                               rec_len++;
-                       }
+               if (c == dialect->delimiter ||
+                   c == dialect->escapechar ||
+                   c == dialect->quotechar ||
+                   strchr(lineterm, c)) {
+                       if (dialect->quoting == QUOTE_NONE)
+                               want_escape = 1;
                        else {
-                               PyErr_Format(error_obj, 
-                                             "delimiter must be quoted or escaped");
-                               return -1;
+                               if (c == dialect->quotechar) {
+                                       if (dialect->doublequote)
+                                               ADDCH(dialect->quotechar);
+                                       else
+                                               want_escape = 1;
+                               }
+                               if (!want_escape)
+                                       *quoted = 1;
+                       }
+                       if (want_escape) {
+                               if (!dialect->escapechar) {
+                                       PyErr_Format(error_obj, 
+                                                    "need to escape, but no escapechar set");
+                                       return -1;
+                               }
+                               ADDCH(dialect->escapechar);
                        }
                }
                /* Copy field character into record buffer.
                 */
-               if (copy_phase)
-                       self->rec[rec_len] = c;
-               rec_len++;
+               ADDCH(c);
        }
 
        /* If field is empty check if it needs to be quoted.
@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
                        *quoted = 1;
        }
 
-       /* Handle final quote character on field.
-        */
        if (*quoted) {
                if (copy_phase)
-                       self->rec[rec_len] = dialect->quotechar;
+                       ADDCH(dialect->quotechar);
                else
-                       /* Didn't know about leading quote until we found it
-                        * necessary in field data - compensate for it now.
-                        */
-                       rec_len++;
-               rec_len++;
+                       rec_len += 2;
        }
-
        return rec_len;
+#undef ADDCH
 }
 
 static int
@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
                if (field == NULL)
                        return NULL;
 
-               quoted = 0;
-               if (dialect->quoting == QUOTE_NONNUMERIC) {
-                       PyObject *num;
-
-                       num = PyNumber_Float(field);
-                       if (num == NULL) {
-                               quoted = 1;
-                               PyErr_Clear();
-                       }
-                       else {
-                               Py_DECREF(num);
-                       }
+               switch (dialect->quoting) {
+               case QUOTE_NONNUMERIC:
+                       quoted = !PyNumber_Check(field);
+                       break;
+               case QUOTE_ALL:
+                       quoted = 1;
+                       break;
+               default:
+                       quoted = 0;
+                       break;
                }
 
                if (PyString_Check(field)) {