Add _PyUnicodeWriter_WriteSubstring() function

author Victor Stinner <victor.stinner@gmail.com>

Tue, 2 Apr 2013 23:48:39 +0000 (01:48 +0200)

committer Victor Stinner <victor.stinner@gmail.com>

Tue, 2 Apr 2013 23:48:39 +0000 (01:48 +0200)
author Victor Stinner <victor.stinner@gmail.com>
Tue, 2 Apr 2013 23:48:39 +0000 (01:48 +0200)
committer Victor Stinner <victor.stinner@gmail.com>
Tue, 2 Apr 2013 23:48:39 +0000 (01:48 +0200)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index a70585c1e69a6442852d67125d9fe47244586fd0..09173076ae2b1db64ad2a300aa307c0cd2fcb123 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -940,6 +940,15 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
      PyObject *str               /* Unicode string */
      );
  
+/* Append a substring of a Unicode string.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
+    PyObject *str,              /* Unicode string */
+    Py_ssize_t start,
+    Py_ssize_t end
+    );
+
  /* Append a latin1-encoded byte string.
     Return 0 on success, raise an exception and return -1 on error. */
  PyAPI_FUNC(int)
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py

index bc56fbce53366cb6395f5175762c39809aeb904e..bbce970a97e8a52799c076e4cdcdd1922f62b032 100644 (file)
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -324,6 +324,9 @@ class FormatTest(unittest.TestCase):
          self.assertIs("{0:1s}".format(text), text)
          self.assertIs("{0:5s}".format(text), text)
  
+        self.assertIs(text % (), text)
+        self.assertIs(text.format(), text)
+
  
  def test_main():
      support.run_unittest(FormatTest)
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h

index e9be516318f22f71371409e7929c542a93237a09..2f58946ec35000b10114b39b306176a5a9e21224 100644 (file)
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
      SubString literal;
      SubString field_name;
      SubString format_spec;
-    Py_UCS4 conversion, maxchar;
-    Py_ssize_t sublen;
-    int err;
+    Py_UCS4 conversion;
  
      MarkupIterator_init(&iter, input->str, input->start, input->end);
      while ((result = MarkupIterator_next(&iter, &literal, &field_present,
                                           &field_name, &format_spec,
                                           &conversion,
                                           &format_spec_needs_expanding)) == 2) {
-        sublen = literal.end - literal.start;
-        if (sublen) {
-            maxchar = _PyUnicode_FindMaxChar(literal.str,
-                                             literal.start, literal.end);
-            err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
-            if (err == -1)
+        if (literal.end != literal.start) {
+            if (!field_present && iter.str.start == iter.str.end)
+                writer->overallocate = 0;
+            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+                                                literal.start, literal.end) < 0)
                  return 0;
-            _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
-                                          literal.str, literal.start, sublen);
-            writer->pos += sublen;
          }
  
          if (field_present) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 23e57f03fb0ba3687ad7c7bde76f1f5f8c13a9f7..a926e371b1a52de2fa5e1fa62a396937f557b93b 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12790,6 +12790,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
      return 0;
  }
  
+int
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
+                                Py_ssize_t start, Py_ssize_t end)
+{
+    Py_UCS4 maxchar;
+    Py_ssize_t len;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    assert(0 <= start);
+    assert(end <= PyUnicode_GET_LENGTH(str));
+    assert(start <= end);
+
+    if (end == 0)
+        return 0;
+
+    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
+        return _PyUnicodeWriter_WriteStr(writer, str);
+
+    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+        maxchar = _PyUnicode_FindMaxChar(str, start, end);
+    else
+        maxchar = writer->maxchar;
+    len = end - start;
+
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
+        return -1;
+
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, start, len);
+    writer->pos += len;
+    return 0;
+}
+
  int
  _PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
  {
@@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
  
      while (--ctx.fmtcnt >= 0) {
          if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
-            Py_ssize_t nonfmtpos, sublen;
+            Py_ssize_t nonfmtpos;
              Py_UCS4 maxchar;
  
              nonfmtpos = ctx.fmtpos++;
@@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                  ctx.fmtpos--;
                  ctx.writer.overallocate = 0;
              }
-            sublen = ctx.fmtpos - nonfmtpos;
-            maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr,
-                                             nonfmtpos, nonfmtpos + sublen);
-            if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1)
-                goto onError;
  
-            _PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos,
-                                          ctx.fmtstr, nonfmtpos, sublen);
-            ctx.writer.pos += sublen;
+            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
+                                                nonfmtpos, ctx.fmtpos) < 0)
+                goto onError;
          }
          else {
              ctx.fmtpos++;
author	Victor Stinner <victor.stinner@gmail.com>
	Tue, 2 Apr 2013 23:48:39 +0000 (01:48 +0200)
committer	Victor Stinner <victor.stinner@gmail.com>
	Tue, 2 Apr 2013 23:48:39 +0000 (01:48 +0200)
Include/unicodeobject.h		patch \| blob \| blame \| history
Lib/test/test_format.py		patch \| blob \| blame \| history
Objects/stringlib/unicode_format.h		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history