#include "pycore_fileutils.h" // _Py_error_handler
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
+
// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
#define _Py_MAX_UNICODE 0x10ffff
+extern int _PyUnicode_IsModifiable(PyObject *unicode);
+
+
static inline void
_PyUnicode_Fill(int kind, void *data, Py_UCS4 value,
Py_ssize_t start, Py_ssize_t length)
}
}
+static inline int
+_PyUnicode_EnsureUnicode(PyObject *obj)
+{
+ if (!PyUnicode_Check(obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "must be str, not %T", obj);
+ return -1;
+ }
+ return 0;
+}
+
+static inline int
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+ assert(ch <= _Py_MAX_UNICODE);
+ if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
+ return -1;
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
+ writer->pos++;
+ return 0;
+}
+
/* --- Characters Type APIs ----------------------------------------------- */
Objects/tupleobject.o \
Objects/typeobject.o \
Objects/typevarobject.o \
+ Objects/unicode_format.o \
Objects/unicode_formatter.o \
- Objects/unicodeobject.o \
Objects/unicodectype.o \
+ Objects/unicodeobject.o \
Objects/unionobject.o \
Objects/weakrefobject.o \
@PERF_TRAMPOLINE_OBJ@
Objects/bytesobject.o: $(srcdir)/Objects/bytesobject.c $(BYTESTR_DEPS)
Objects/bytearrayobject.o: $(srcdir)/Objects/bytearrayobject.c $(BYTESTR_DEPS)
+Objects/unicode_format.o: $(srcdir)/Objects/unicode_format.c $(UNICODE_DEPS)
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c $(UNICODE_DEPS)
Objects/dictobject.o: $(srcdir)/Objects/stringlib/eq.h
--- /dev/null
+/*
+
+Unicode implementation based on original code by Fredrik Lundh,
+modified by Marc-Andre Lemburg <mal@lemburg.com>.
+
+Major speed upgrades to the method implementations at the Reykjavik
+NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
+
+Copyright (c) Corporation for National Research Initiatives.
+
+--------------------------------------------------------------------
+The original string type implementation is:
+
+ Copyright (c) 1999 by Secret Labs AB
+ Copyright (c) 1999 by Fredrik Lundh
+
+By obtaining, using, and/or copying this software and/or its
+associated documentation, you agree that you have read, understood,
+and will comply with the following terms and conditions:
+
+Permission to use, copy, modify, and distribute this software and its
+associated documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appears in all
+copies, and that both that copyright notice and this permission notice
+appear in supporting documentation, and that the name of Secret Labs
+AB or the author not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+--------------------------------------------------------------------
+
+*/
+
+// PyUnicode_Format() implementation
+
+#include "Python.h"
+#include "pycore_abstract.h" // _PyIndex_Check()
+#include "pycore_format.h" // F_ALT
+#include "pycore_long.h" // _PyLong_FormatWriter()
+#include "pycore_object.h" // _PyObject_IsUniquelyReferenced()
+#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
+
+
+#define MAX_UNICODE _Py_MAX_UNICODE
+#define ensure_unicode _PyUnicode_EnsureUnicode
+
+struct unicode_formatter_t {
+ PyObject *args;
+ int args_owned;
+ Py_ssize_t arglen, argidx;
+ PyObject *dict;
+
+ int fmtkind;
+ Py_ssize_t fmtcnt, fmtpos;
+ const void *fmtdata;
+ PyObject *fmtstr;
+
+ _PyUnicodeWriter writer;
+};
+
+
+struct unicode_format_arg_t {
+ Py_UCS4 ch;
+ int flags;
+ Py_ssize_t width;
+ int prec;
+ int sign;
+};
+
+
+static PyObject *
+unicode_format_getnextarg(struct unicode_formatter_t *ctx)
+{
+ Py_ssize_t argidx = ctx->argidx;
+
+ if (argidx < ctx->arglen) {
+ ctx->argidx++;
+ if (ctx->arglen < 0)
+ return ctx->args;
+ else
+ return PyTuple_GetItem(ctx->args, argidx);
+ }
+ PyErr_SetString(PyExc_TypeError,
+ "not enough arguments for format string");
+ return NULL;
+}
+
+
+/* Returns a new reference to a PyUnicode object, or NULL on failure. */
+
+/* Format a float into the writer if the writer is not NULL, or into *p_output
+ otherwise.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+static int
+formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
+ PyObject **p_output,
+ _PyUnicodeWriter *writer)
+{
+ char *p;
+ double x;
+ Py_ssize_t len;
+ int prec;
+ int dtoa_flags = 0;
+
+ x = PyFloat_AsDouble(v);
+ if (x == -1.0 && PyErr_Occurred())
+ return -1;
+
+ prec = arg->prec;
+ if (prec < 0)
+ prec = 6;
+
+ if (arg->flags & F_ALT)
+ dtoa_flags |= Py_DTSF_ALT;
+ p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
+ if (p == NULL)
+ return -1;
+ len = strlen(p);
+ if (writer) {
+ if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
+ PyMem_Free(p);
+ return -1;
+ }
+ }
+ else
+ *p_output = _PyUnicode_FromASCII(p, len);
+ PyMem_Free(p);
+ return 0;
+}
+
+
+/* formatlong() emulates the format codes d, u, o, x and X, and
+ * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
+ * Python's regular ints.
+ * Return value: a new PyUnicodeObject*, or NULL if error.
+ * The output string is of the form
+ * "-"? ("0x" | "0X")? digit+
+ * "0x"/"0X" are present only for x and X conversions, with F_ALT
+ * set in flags. The case of hex digits will be correct,
+ * There will be at least prec digits, zero-filled on the left if
+ * necessary to get that many.
+ * val object to be converted
+ * flags bitmask of format flags; only F_ALT is looked at
+ * prec minimum number of digits; 0-fill on left if needed
+ * type a character in [duoxX]; u acts the same as d
+ *
+ * CAUTION: o, x and X conversions on regular ints can never
+ * produce a '-' sign, but can for Python's unbounded ints.
+ */
+PyObject *
+_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
+{
+ PyObject *result = NULL;
+ char *buf;
+ Py_ssize_t i;
+ int sign; /* 1 if '-', else 0 */
+ int len; /* number of characters */
+ Py_ssize_t llen;
+ int numdigits; /* len == numnondigits + numdigits */
+ int numnondigits = 0;
+
+ /* Avoid exceeding SSIZE_T_MAX */
+ if (prec > INT_MAX-3) {
+ PyErr_SetString(PyExc_OverflowError,
+ "precision too large");
+ return NULL;
+ }
+
+ assert(PyLong_Check(val));
+
+ switch (type) {
+ default:
+ Py_UNREACHABLE();
+ case 'd':
+ case 'i':
+ case 'u':
+ /* int and int subclasses should print numerically when a numeric */
+ /* format code is used (see issue18780) */
+ result = PyNumber_ToBase(val, 10);
+ break;
+ case 'o':
+ numnondigits = 2;
+ result = PyNumber_ToBase(val, 8);
+ break;
+ case 'x':
+ case 'X':
+ numnondigits = 2;
+ result = PyNumber_ToBase(val, 16);
+ break;
+ }
+ if (!result)
+ return NULL;
+
+ assert(_PyUnicode_IsModifiable(result));
+ assert(PyUnicode_IS_ASCII(result));
+
+ /* To modify the string in-place, there can only be one reference. */
+ if (!_PyObject_IsUniquelyReferenced(result)) {
+ Py_DECREF(result);
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ buf = PyUnicode_DATA(result);
+ llen = PyUnicode_GET_LENGTH(result);
+ if (llen > INT_MAX) {
+ Py_DECREF(result);
+ PyErr_SetString(PyExc_ValueError,
+ "string too large in _PyUnicode_FormatLong");
+ return NULL;
+ }
+ len = (int)llen;
+ sign = buf[0] == '-';
+ numnondigits += sign;
+ numdigits = len - numnondigits;
+ assert(numdigits > 0);
+
+ /* Get rid of base marker unless F_ALT */
+ if (((alt) == 0 &&
+ (type == 'o' || type == 'x' || type == 'X'))) {
+ assert(buf[sign] == '0');
+ assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
+ buf[sign+1] == 'o');
+ numnondigits -= 2;
+ buf += 2;
+ len -= 2;
+ if (sign)
+ buf[0] = '-';
+ assert(len == numnondigits + numdigits);
+ assert(numdigits > 0);
+ }
+
+ /* Fill with leading zeroes to meet minimum width. */
+ if (prec > numdigits) {
+ PyObject *r1 = PyBytes_FromStringAndSize(NULL,
+ numnondigits + prec);
+ char *b1;
+ if (!r1) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ b1 = PyBytes_AS_STRING(r1);
+ for (i = 0; i < numnondigits; ++i)
+ *b1++ = *buf++;
+ for (i = 0; i < prec - numdigits; i++)
+ *b1++ = '0';
+ for (i = 0; i < numdigits; i++)
+ *b1++ = *buf++;
+ *b1 = '\0';
+ Py_SETREF(result, r1);
+ buf = PyBytes_AS_STRING(result);
+ len = numnondigits + prec;
+ }
+
+ /* Fix up case for hex conversions. */
+ if (type == 'X') {
+ /* Need to convert all lower case letters to upper case.
+ and need to convert 0x to 0X (and -0x to -0X). */
+ for (i = 0; i < len; i++)
+ if (buf[i] >= 'a' && buf[i] <= 'x')
+ buf[i] -= 'a'-'A';
+ }
+ if (!PyUnicode_Check(result)
+ || buf != PyUnicode_DATA(result)) {
+ PyObject *unicode;
+ unicode = _PyUnicode_FromASCII(buf, len);
+ Py_SETREF(result, unicode);
+ }
+ else if (len != PyUnicode_GET_LENGTH(result)) {
+ if (PyUnicode_Resize(&result, len) < 0)
+ Py_CLEAR(result);
+ }
+ return result;
+}
+
+
+/* Format an integer or a float as an integer.
+ * Return 1 if the number has been formatted into the writer,
+ * 0 if the number has been formatted into *p_output
+ * -1 and raise an exception on error */
+static int
+mainformatlong(PyObject *v,
+ struct unicode_format_arg_t *arg,
+ PyObject **p_output,
+ _PyUnicodeWriter *writer)
+{
+ PyObject *iobj, *res;
+ char type = (char)arg->ch;
+
+ if (!PyNumber_Check(v))
+ goto wrongtype;
+
+ /* make sure number is a type of integer for o, x, and X */
+ if (!PyLong_Check(v)) {
+ if (type == 'o' || type == 'x' || type == 'X') {
+ iobj = _PyNumber_Index(v);
+ }
+ else {
+ iobj = PyNumber_Long(v);
+ }
+ if (iobj == NULL ) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ goto wrongtype;
+ return -1;
+ }
+ assert(PyLong_Check(iobj));
+ }
+ else {
+ iobj = Py_NewRef(v);
+ }
+
+ if (PyLong_CheckExact(v)
+ && arg->width == -1 && arg->prec == -1
+ && !(arg->flags & (F_SIGN | F_BLANK))
+ && type != 'X')
+ {
+ /* Fast path */
+ int alternate = arg->flags & F_ALT;
+ int base;
+
+ switch(type)
+ {
+ default:
+ Py_UNREACHABLE();
+ case 'd':
+ case 'i':
+ case 'u':
+ base = 10;
+ break;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ }
+
+ if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
+ Py_DECREF(iobj);
+ return -1;
+ }
+ Py_DECREF(iobj);
+ return 1;
+ }
+
+ res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
+ Py_DECREF(iobj);
+ if (res == NULL)
+ return -1;
+ *p_output = res;
+ return 0;
+
+wrongtype:
+ switch(type)
+ {
+ case 'o':
+ case 'x':
+ case 'X':
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: an integer is required, "
+ "not %.200s",
+ type, Py_TYPE(v)->tp_name);
+ break;
+ default:
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: a real number is required, "
+ "not %.200s",
+ type, Py_TYPE(v)->tp_name);
+ break;
+ }
+ return -1;
+}
+
+
+static Py_UCS4
+formatchar(PyObject *v)
+{
+ /* presume that the buffer is at least 3 characters long */
+ if (PyUnicode_Check(v)) {
+ if (PyUnicode_GET_LENGTH(v) == 1) {
+ return PyUnicode_READ_CHAR(v, 0);
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%%c requires an int or a unicode character, "
+ "not a string of length %zd",
+ PyUnicode_GET_LENGTH(v));
+ return (Py_UCS4) -1;
+ }
+ else {
+ int overflow;
+ long x = PyLong_AsLongAndOverflow(v, &overflow);
+ if (x == -1 && PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+ PyErr_Format(PyExc_TypeError,
+ "%%c requires an int or a unicode character, not %T",
+ v);
+ return (Py_UCS4) -1;
+ }
+ return (Py_UCS4) -1;
+ }
+
+ if (x < 0 || x > MAX_UNICODE) {
+ /* this includes an overflow in converting to C long */
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000)");
+ return (Py_UCS4) -1;
+ }
+
+ return (Py_UCS4) x;
+ }
+}
+
+
+/* Parse options of an argument: flags, width, precision.
+ Handle also "%(name)" syntax.
+
+ Return 0 if the argument has been formatted into arg->str.
+ Return 1 if the argument has been written into ctx->writer,
+ Raise an exception and return -1 on error. */
+static int
+unicode_format_arg_parse(struct unicode_formatter_t *ctx,
+ struct unicode_format_arg_t *arg)
+{
+#define FORMAT_READ(ctx) \
+ PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
+
+ PyObject *v;
+
+ if (arg->ch == '(') {
+ /* Get argument value from a dictionary. Example: "%(name)s". */
+ Py_ssize_t keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
+
+ if (ctx->dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ return -1;
+ }
+ ++ctx->fmtpos;
+ --ctx->fmtcnt;
+ keystart = ctx->fmtpos;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ if (arg->ch == ')')
+ --pcount;
+ else if (arg->ch == '(')
+ ++pcount;
+ ctx->fmtpos++;
+ }
+ keylen = ctx->fmtpos - keystart - 1;
+ if (ctx->fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ return -1;
+ }
+ key = PyUnicode_Substring(ctx->fmtstr,
+ keystart, keystart + keylen);
+ if (key == NULL)
+ return -1;
+ if (ctx->args_owned) {
+ ctx->args_owned = 0;
+ Py_DECREF(ctx->args);
+ }
+ ctx->args = PyObject_GetItem(ctx->dict, key);
+ Py_DECREF(key);
+ if (ctx->args == NULL)
+ return -1;
+ ctx->args_owned = 1;
+ ctx->arglen = -1;
+ ctx->argidx = -2;
+ }
+
+ /* Parse flags. Example: "%+i" => flags=F_SIGN. */
+ while (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ switch (arg->ch) {
+ case '-': arg->flags |= F_LJUST; continue;
+ case '+': arg->flags |= F_SIGN; continue;
+ case ' ': arg->flags |= F_BLANK; continue;
+ case '#': arg->flags |= F_ALT; continue;
+ case '0': arg->flags |= F_ZERO; continue;
+ }
+ break;
+ }
+
+ /* Parse width. Example: "%10s" => width=10 */
+ if (arg->ch == '*') {
+ v = unicode_format_getnextarg(ctx);
+ if (v == NULL)
+ return -1;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ return -1;
+ }
+ arg->width = PyLong_AsSsize_t(v);
+ if (arg->width == -1 && PyErr_Occurred())
+ return -1;
+ if (arg->width < 0) {
+ arg->flags |= F_LJUST;
+ arg->width = -arg->width;
+ }
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ }
+ else if (arg->ch >= '0' && arg->ch <= '9') {
+ arg->width = arg->ch - '0';
+ while (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ if (arg->ch < '0' || arg->ch > '9')
+ break;
+ /* Since arg->ch is unsigned, the RHS would end up as unsigned,
+ mixing signed and unsigned comparison. Since arg->ch is between
+ '0' and '9', casting to int is safe. */
+ if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
+ PyErr_SetString(PyExc_ValueError,
+ "width too big");
+ return -1;
+ }
+ arg->width = arg->width*10 + (arg->ch - '0');
+ }
+ }
+
+ /* Parse precision. Example: "%.3f" => prec=3 */
+ if (arg->ch == '.') {
+ arg->prec = 0;
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ if (arg->ch == '*') {
+ v = unicode_format_getnextarg(ctx);
+ if (v == NULL)
+ return -1;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ return -1;
+ }
+ arg->prec = PyLong_AsInt(v);
+ if (arg->prec == -1 && PyErr_Occurred())
+ return -1;
+ if (arg->prec < 0)
+ arg->prec = 0;
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ }
+ else if (arg->ch >= '0' && arg->ch <= '9') {
+ arg->prec = arg->ch - '0';
+ while (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ if (arg->ch < '0' || arg->ch > '9')
+ break;
+ if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
+ PyErr_SetString(PyExc_ValueError,
+ "precision too big");
+ return -1;
+ }
+ arg->prec = arg->prec*10 + (arg->ch - '0');
+ }
+ }
+ }
+
+ /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
+ if (ctx->fmtcnt >= 0) {
+ if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
+ if (--ctx->fmtcnt >= 0) {
+ arg->ch = FORMAT_READ(ctx);
+ ctx->fmtpos++;
+ }
+ }
+ }
+ if (ctx->fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ return -1;
+ }
+ return 0;
+
+#undef FORMAT_READ
+}
+
+
+/* Format one argument. Supported conversion specifiers:
+
+ - "s", "r", "a": any type
+ - "i", "d", "u": int or float
+ - "o", "x", "X": int
+ - "e", "E", "f", "F", "g", "G": float
+ - "c": int or str (1 character)
+
+ When possible, the output is written directly into the Unicode writer
+ (ctx->writer). A string is created when padding is required.
+
+ Return 0 if the argument has been formatted into *p_str,
+ 1 if the argument has been written into ctx->writer,
+ -1 on error. */
+static int
+unicode_format_arg_format(struct unicode_formatter_t *ctx,
+ struct unicode_format_arg_t *arg,
+ PyObject **p_str)
+{
+ PyObject *v;
+ _PyUnicodeWriter *writer = &ctx->writer;
+
+ if (ctx->fmtcnt == 0)
+ ctx->writer.overallocate = 0;
+
+ v = unicode_format_getnextarg(ctx);
+ if (v == NULL)
+ return -1;
+
+
+ switch (arg->ch) {
+ case 's':
+ case 'r':
+ case 'a':
+ if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
+ /* Fast path */
+ if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
+ return -1;
+ return 1;
+ }
+
+ if (PyUnicode_CheckExact(v) && arg->ch == 's') {
+ *p_str = Py_NewRef(v);
+ }
+ else {
+ if (arg->ch == 's')
+ *p_str = PyObject_Str(v);
+ else if (arg->ch == 'r')
+ *p_str = PyObject_Repr(v);
+ else
+ *p_str = PyObject_ASCII(v);
+ }
+ break;
+
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ {
+ int ret = mainformatlong(v, arg, p_str, writer);
+ if (ret != 0)
+ return ret;
+ arg->sign = 1;
+ break;
+ }
+
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ if (arg->width == -1 && arg->prec == -1
+ && !(arg->flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ if (formatfloat(v, arg, NULL, writer) == -1)
+ return -1;
+ return 1;
+ }
+
+ arg->sign = 1;
+ if (formatfloat(v, arg, p_str, NULL) == -1)
+ return -1;
+ break;
+
+ case 'c':
+ {
+ Py_UCS4 ch = formatchar(v);
+ if (ch == (Py_UCS4) -1)
+ return -1;
+ if (arg->width == -1 && arg->prec == -1) {
+ /* Fast path */
+ if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
+ return -1;
+ return 1;
+ }
+ *p_str = PyUnicode_FromOrdinal(ch);
+ break;
+ }
+
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
+ (int)arg->ch,
+ ctx->fmtpos - 1);
+ return -1;
+ }
+ if (*p_str == NULL)
+ return -1;
+ assert (PyUnicode_Check(*p_str));
+ return 0;
+}
+
+
+static int
+unicode_format_arg_output(struct unicode_formatter_t *ctx,
+ struct unicode_format_arg_t *arg,
+ PyObject *str)
+{
+ Py_ssize_t len;
+ int kind;
+ const void *pbuf;
+ Py_ssize_t pindex;
+ Py_UCS4 signchar;
+ Py_ssize_t buflen;
+ Py_UCS4 maxchar;
+ Py_ssize_t sublen;
+ _PyUnicodeWriter *writer = &ctx->writer;
+ Py_UCS4 fill;
+
+ fill = ' ';
+ if (arg->sign && arg->flags & F_ZERO)
+ fill = '0';
+
+ len = PyUnicode_GET_LENGTH(str);
+ if ((arg->width == -1 || arg->width <= len)
+ && (arg->prec == -1 || arg->prec >= len)
+ && !(arg->flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
+ return -1;
+ return 0;
+ }
+
+ /* Truncate the string for "s", "r" and "a" formats
+ if the precision is set */
+ if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
+ if (arg->prec >= 0 && len > arg->prec)
+ len = arg->prec;
+ }
+
+ /* Adjust sign and width */
+ kind = PyUnicode_KIND(str);
+ pbuf = PyUnicode_DATA(str);
+ pindex = 0;
+ signchar = '\0';
+ if (arg->sign) {
+ Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
+ if (ch == '-' || ch == '+') {
+ signchar = ch;
+ len--;
+ pindex++;
+ }
+ else if (arg->flags & F_SIGN)
+ signchar = '+';
+ else if (arg->flags & F_BLANK)
+ signchar = ' ';
+ else
+ arg->sign = 0;
+ }
+ if (arg->width < len)
+ arg->width = len;
+
+ /* Prepare the writer */
+ maxchar = writer->maxchar;
+ if (!(arg->flags & F_LJUST)) {
+ if (arg->sign) {
+ if ((arg->width-1) > len)
+ maxchar = Py_MAX(maxchar, fill);
+ }
+ else {
+ if (arg->width > len)
+ maxchar = Py_MAX(maxchar, fill);
+ }
+ }
+ if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
+ Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
+ maxchar = Py_MAX(maxchar, strmaxchar);
+ }
+
+ buflen = arg->width;
+ if (arg->sign && len == arg->width)
+ buflen++;
+ if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
+ return -1;
+
+ /* Write the sign if needed */
+ if (arg->sign) {
+ if (fill != ' ') {
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+ writer->pos += 1;
+ }
+ if (arg->width > len)
+ arg->width--;
+ }
+
+ /* Write the numeric prefix for "x", "X" and "o" formats
+ if the alternate form is used.
+ For example, write "0x" for the "%#x" format. */
+ if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+ assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+ assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
+ if (fill != ' ') {
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+ writer->pos += 2;
+ pindex += 2;
+ }
+ arg->width -= 2;
+ if (arg->width < 0)
+ arg->width = 0;
+ len -= 2;
+ }
+
+ /* Pad left with the fill character if needed */
+ if (arg->width > len && !(arg->flags & F_LJUST)) {
+ sublen = arg->width - len;
+ _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
+ writer->pos += sublen;
+ arg->width = len;
+ }
+
+ /* If padding with spaces: write sign if needed and/or numeric prefix if
+ the alternate form is used */
+ if (fill == ' ') {
+ if (arg->sign) {
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+ writer->pos += 1;
+ }
+ if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+ assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+ assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+ writer->pos += 2;
+ pindex += 2;
+ }
+ }
+
+ /* Write characters */
+ if (len) {
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ str, pindex, len);
+ writer->pos += len;
+ }
+
+ /* Pad right with the fill character if needed */
+ if (arg->width > len) {
+ sublen = arg->width - len;
+ _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
+ writer->pos += sublen;
+ }
+ return 0;
+}
+
+
+/* Helper of PyUnicode_Format(): format one arg.
+ Return 0 on success, raise an exception and return -1 on error. */
+static int
+unicode_format_arg(struct unicode_formatter_t *ctx)
+{
+ struct unicode_format_arg_t arg;
+ PyObject *str;
+ int ret;
+
+ arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
+ if (arg.ch == '%') {
+ ctx->fmtpos++;
+ ctx->fmtcnt--;
+ if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
+ return -1;
+ return 0;
+ }
+ arg.flags = 0;
+ arg.width = -1;
+ arg.prec = -1;
+ arg.sign = 0;
+ str = NULL;
+
+ ret = unicode_format_arg_parse(ctx, &arg);
+ if (ret == -1)
+ return -1;
+
+ ret = unicode_format_arg_format(ctx, &arg, &str);
+ if (ret == -1)
+ return -1;
+
+ if (ret != 1) {
+ ret = unicode_format_arg_output(ctx, &arg, str);
+ Py_DECREF(str);
+ if (ret == -1)
+ return -1;
+ }
+
+ if (ctx->dict && (ctx->argidx < ctx->arglen)) {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
+ return -1;
+ }
+ return 0;
+}
+
+
+PyObject *
+PyUnicode_Format(PyObject *format, PyObject *args)
+{
+ struct unicode_formatter_t ctx;
+
+ if (format == NULL || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ if (ensure_unicode(format) < 0)
+ return NULL;
+
+ ctx.fmtstr = format;
+ ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
+ ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
+ ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
+ ctx.fmtpos = 0;
+
+ _PyUnicodeWriter_Init(&ctx.writer);
+ ctx.writer.min_length = ctx.fmtcnt + 100;
+ ctx.writer.overallocate = 1;
+
+ if (PyTuple_Check(args)) {
+ ctx.arglen = PyTuple_Size(args);
+ ctx.argidx = 0;
+ }
+ else {
+ ctx.arglen = -1;
+ ctx.argidx = -2;
+ }
+ ctx.args_owned = 0;
+ if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
+ ctx.dict = args;
+ else
+ ctx.dict = NULL;
+ ctx.args = args;
+
+ while (--ctx.fmtcnt >= 0) {
+ if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+ Py_ssize_t nonfmtpos;
+
+ nonfmtpos = ctx.fmtpos++;
+ while (ctx.fmtcnt >= 0 &&
+ PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+ ctx.fmtpos++;
+ ctx.fmtcnt--;
+ }
+ if (ctx.fmtcnt < 0) {
+ ctx.fmtpos--;
+ ctx.writer.overallocate = 0;
+ }
+
+ if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
+ nonfmtpos, ctx.fmtpos) < 0)
+ goto onError;
+ }
+ else {
+ ctx.fmtpos++;
+ if (unicode_format_arg(&ctx) == -1)
+ goto onError;
+ }
+ }
+
+ if (ctx.argidx < ctx.arglen && !ctx.dict) {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
+ goto onError;
+ }
+
+ if (ctx.args_owned) {
+ Py_DECREF(ctx.args);
+ }
+ return _PyUnicodeWriter_Finish(&ctx.writer);
+
+ onError:
+ _PyUnicodeWriter_Dealloc(&ctx.writer);
+ if (ctx.args_owned) {
+ Py_DECREF(ctx.args);
+ }
+ return NULL;
+}
*/
#define MAX_UNICODE _Py_MAX_UNICODE
+#define ensure_unicode _PyUnicode_EnsureUnicode
#ifdef Py_DEBUG
# define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0)
/* forward */
static PyObject* get_latin1_char(unsigned char ch);
-static int unicode_modifiable(PyObject *unicode);
static PyObject *
#undef BLOOM_UPDATE
}
-static int
-ensure_unicode(PyObject *obj)
-{
- if (!PyUnicode_Check(obj)) {
- PyErr_Format(PyExc_TypeError,
- "must be str, not %.100s",
- Py_TYPE(obj)->tp_name);
- return -1;
- }
- return 0;
-}
-
/* Compilation of templated routines */
#define STRINGLIB_GET_EMPTY() unicode_get_empty()
Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
#endif
- if (!unicode_modifiable(unicode)) {
+ if (!_PyUnicode_IsModifiable(unicode)) {
PyObject *copy = resize_copy(unicode, length);
if (copy == NULL) {
return NULL;
static int
unicode_check_modifiable(PyObject *unicode)
{
- if (!unicode_modifiable(unicode)) {
+ if (!_PyUnicode_IsModifiable(unicode)) {
PyErr_SetString(PyExc_SystemError,
"Cannot modify a string currently used");
return -1;
}
#endif
-static int
-unicode_modifiable(PyObject *unicode)
+int
+_PyUnicode_IsModifiable(PyObject *unicode)
{
assert(_PyUnicode_CHECK(unicode));
if (!_PyObject_IsUniquelyReferenced(unicode))
return 0;
}
- if (!unicode_modifiable(unicode)) {
+ if (!_PyUnicode_IsModifiable(unicode)) {
PyObject *copy = resize_copy(unicode, length);
if (copy == NULL)
return -1;
{
const int kind = PyUnicode_KIND(unicode);
void *data = PyUnicode_DATA(unicode);
- assert(unicode_modifiable(unicode));
+ assert(_PyUnicode_IsModifiable(unicode));
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
assert(start >= 0);
assert(start + length <= PyUnicode_GET_LENGTH(unicode));
}
new_len = left_len + right_len;
- if (unicode_modifiable(left)
+ if (_PyUnicode_IsModifiable(left)
&& PyUnicode_CheckExact(right)
&& PyUnicode_KIND(right) <= PyUnicode_KIND(left)
/* Don't resize for ascii += latin1. Convert ascii to latin1 requires
return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
}
-static inline int
-_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
-{
- assert(ch <= MAX_UNICODE);
- if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
- return -1;
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
- writer->pos++;
- return 0;
-}
-
int
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
{
};
-/* Helpers for PyUnicode_Format() */
-
-struct unicode_formatter_t {
- PyObject *args;
- int args_owned;
- Py_ssize_t arglen, argidx;
- PyObject *dict;
-
- int fmtkind;
- Py_ssize_t fmtcnt, fmtpos;
- const void *fmtdata;
- PyObject *fmtstr;
-
- _PyUnicodeWriter writer;
-};
-
-struct unicode_format_arg_t {
- Py_UCS4 ch;
- int flags;
- Py_ssize_t width;
- int prec;
- int sign;
-};
-
-static PyObject *
-unicode_format_getnextarg(struct unicode_formatter_t *ctx)
-{
- Py_ssize_t argidx = ctx->argidx;
-
- if (argidx < ctx->arglen) {
- ctx->argidx++;
- if (ctx->arglen < 0)
- return ctx->args;
- else
- return PyTuple_GetItem(ctx->args, argidx);
- }
- PyErr_SetString(PyExc_TypeError,
- "not enough arguments for format string");
- return NULL;
-}
-
-/* Returns a new reference to a PyUnicode object, or NULL on failure. */
-
-/* Format a float into the writer if the writer is not NULL, or into *p_output
- otherwise.
-
- Return 0 on success, raise an exception and return -1 on error. */
-static int
-formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
- PyObject **p_output,
- _PyUnicodeWriter *writer)
-{
- char *p;
- double x;
- Py_ssize_t len;
- int prec;
- int dtoa_flags = 0;
-
- x = PyFloat_AsDouble(v);
- if (x == -1.0 && PyErr_Occurred())
- return -1;
-
- prec = arg->prec;
- if (prec < 0)
- prec = 6;
-
- if (arg->flags & F_ALT)
- dtoa_flags |= Py_DTSF_ALT;
- p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
- if (p == NULL)
- return -1;
- len = strlen(p);
- if (writer) {
- if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
- PyMem_Free(p);
- return -1;
- }
- }
- else
- *p_output = _PyUnicode_FromASCII(p, len);
- PyMem_Free(p);
- return 0;
-}
-
-/* formatlong() emulates the format codes d, u, o, x and X, and
- * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
- * Python's regular ints.
- * Return value: a new PyUnicodeObject*, or NULL if error.
- * The output string is of the form
- * "-"? ("0x" | "0X")? digit+
- * "0x"/"0X" are present only for x and X conversions, with F_ALT
- * set in flags. The case of hex digits will be correct,
- * There will be at least prec digits, zero-filled on the left if
- * necessary to get that many.
- * val object to be converted
- * flags bitmask of format flags; only F_ALT is looked at
- * prec minimum number of digits; 0-fill on left if needed
- * type a character in [duoxX]; u acts the same as d
- *
- * CAUTION: o, x and X conversions on regular ints can never
- * produce a '-' sign, but can for Python's unbounded ints.
- */
-PyObject *
-_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type)
-{
- PyObject *result = NULL;
- char *buf;
- Py_ssize_t i;
- int sign; /* 1 if '-', else 0 */
- int len; /* number of characters */
- Py_ssize_t llen;
- int numdigits; /* len == numnondigits + numdigits */
- int numnondigits = 0;
-
- /* Avoid exceeding SSIZE_T_MAX */
- if (prec > INT_MAX-3) {
- PyErr_SetString(PyExc_OverflowError,
- "precision too large");
- return NULL;
- }
-
- assert(PyLong_Check(val));
-
- switch (type) {
- default:
- Py_UNREACHABLE();
- case 'd':
- case 'i':
- case 'u':
- /* int and int subclasses should print numerically when a numeric */
- /* format code is used (see issue18780) */
- result = PyNumber_ToBase(val, 10);
- break;
- case 'o':
- numnondigits = 2;
- result = PyNumber_ToBase(val, 8);
- break;
- case 'x':
- case 'X':
- numnondigits = 2;
- result = PyNumber_ToBase(val, 16);
- break;
- }
- if (!result)
- return NULL;
-
- assert(unicode_modifiable(result));
- assert(PyUnicode_IS_ASCII(result));
-
- /* To modify the string in-place, there can only be one reference. */
- if (!_PyObject_IsUniquelyReferenced(result)) {
- Py_DECREF(result);
- PyErr_BadInternalCall();
- return NULL;
- }
- buf = PyUnicode_DATA(result);
- llen = PyUnicode_GET_LENGTH(result);
- if (llen > INT_MAX) {
- Py_DECREF(result);
- PyErr_SetString(PyExc_ValueError,
- "string too large in _PyUnicode_FormatLong");
- return NULL;
- }
- len = (int)llen;
- sign = buf[0] == '-';
- numnondigits += sign;
- numdigits = len - numnondigits;
- assert(numdigits > 0);
-
- /* Get rid of base marker unless F_ALT */
- if (((alt) == 0 &&
- (type == 'o' || type == 'x' || type == 'X'))) {
- assert(buf[sign] == '0');
- assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
- buf[sign+1] == 'o');
- numnondigits -= 2;
- buf += 2;
- len -= 2;
- if (sign)
- buf[0] = '-';
- assert(len == numnondigits + numdigits);
- assert(numdigits > 0);
- }
-
- /* Fill with leading zeroes to meet minimum width. */
- if (prec > numdigits) {
- PyObject *r1 = PyBytes_FromStringAndSize(NULL,
- numnondigits + prec);
- char *b1;
- if (!r1) {
- Py_DECREF(result);
- return NULL;
- }
- b1 = PyBytes_AS_STRING(r1);
- for (i = 0; i < numnondigits; ++i)
- *b1++ = *buf++;
- for (i = 0; i < prec - numdigits; i++)
- *b1++ = '0';
- for (i = 0; i < numdigits; i++)
- *b1++ = *buf++;
- *b1 = '\0';
- Py_SETREF(result, r1);
- buf = PyBytes_AS_STRING(result);
- len = numnondigits + prec;
- }
-
- /* Fix up case for hex conversions. */
- if (type == 'X') {
- /* Need to convert all lower case letters to upper case.
- and need to convert 0x to 0X (and -0x to -0X). */
- for (i = 0; i < len; i++)
- if (buf[i] >= 'a' && buf[i] <= 'x')
- buf[i] -= 'a'-'A';
- }
- if (!PyUnicode_Check(result)
- || buf != PyUnicode_DATA(result)) {
- PyObject *unicode;
- unicode = _PyUnicode_FromASCII(buf, len);
- Py_SETREF(result, unicode);
- }
- else if (len != PyUnicode_GET_LENGTH(result)) {
- if (PyUnicode_Resize(&result, len) < 0)
- Py_CLEAR(result);
- }
- return result;
-}
-
-/* Format an integer or a float as an integer.
- * Return 1 if the number has been formatted into the writer,
- * 0 if the number has been formatted into *p_output
- * -1 and raise an exception on error */
-static int
-mainformatlong(PyObject *v,
- struct unicode_format_arg_t *arg,
- PyObject **p_output,
- _PyUnicodeWriter *writer)
-{
- PyObject *iobj, *res;
- char type = (char)arg->ch;
-
- if (!PyNumber_Check(v))
- goto wrongtype;
-
- /* make sure number is a type of integer for o, x, and X */
- if (!PyLong_Check(v)) {
- if (type == 'o' || type == 'x' || type == 'X') {
- iobj = _PyNumber_Index(v);
- }
- else {
- iobj = PyNumber_Long(v);
- }
- if (iobj == NULL ) {
- if (PyErr_ExceptionMatches(PyExc_TypeError))
- goto wrongtype;
- return -1;
- }
- assert(PyLong_Check(iobj));
- }
- else {
- iobj = Py_NewRef(v);
- }
-
- if (PyLong_CheckExact(v)
- && arg->width == -1 && arg->prec == -1
- && !(arg->flags & (F_SIGN | F_BLANK))
- && type != 'X')
- {
- /* Fast path */
- int alternate = arg->flags & F_ALT;
- int base;
-
- switch(type)
- {
- default:
- Py_UNREACHABLE();
- case 'd':
- case 'i':
- case 'u':
- base = 10;
- break;
- case 'o':
- base = 8;
- break;
- case 'x':
- case 'X':
- base = 16;
- break;
- }
-
- if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
- Py_DECREF(iobj);
- return -1;
- }
- Py_DECREF(iobj);
- return 1;
- }
-
- res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type);
- Py_DECREF(iobj);
- if (res == NULL)
- return -1;
- *p_output = res;
- return 0;
-
-wrongtype:
- switch(type)
- {
- case 'o':
- case 'x':
- case 'X':
- PyErr_Format(PyExc_TypeError,
- "%%%c format: an integer is required, "
- "not %.200s",
- type, Py_TYPE(v)->tp_name);
- break;
- default:
- PyErr_Format(PyExc_TypeError,
- "%%%c format: a real number is required, "
- "not %.200s",
- type, Py_TYPE(v)->tp_name);
- break;
- }
- return -1;
-}
-
-static Py_UCS4
-formatchar(PyObject *v)
-{
- /* presume that the buffer is at least 3 characters long */
- if (PyUnicode_Check(v)) {
- if (PyUnicode_GET_LENGTH(v) == 1) {
- return PyUnicode_READ_CHAR(v, 0);
- }
- PyErr_Format(PyExc_TypeError,
- "%%c requires an int or a unicode character, "
- "not a string of length %zd",
- PyUnicode_GET_LENGTH(v));
- return (Py_UCS4) -1;
- }
- else {
- int overflow;
- long x = PyLong_AsLongAndOverflow(v, &overflow);
- if (x == -1 && PyErr_Occurred()) {
- if (PyErr_ExceptionMatches(PyExc_TypeError)) {
- PyErr_Format(PyExc_TypeError,
- "%%c requires an int or a unicode character, not %T",
- v);
- return (Py_UCS4) -1;
- }
- return (Py_UCS4) -1;
- }
-
- if (x < 0 || x > MAX_UNICODE) {
- /* this includes an overflow in converting to C long */
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000)");
- return (Py_UCS4) -1;
- }
-
- return (Py_UCS4) x;
- }
-}
-
-/* Parse options of an argument: flags, width, precision.
- Handle also "%(name)" syntax.
-
- Return 0 if the argument has been formatted into arg->str.
- Return 1 if the argument has been written into ctx->writer,
- Raise an exception and return -1 on error. */
-static int
-unicode_format_arg_parse(struct unicode_formatter_t *ctx,
- struct unicode_format_arg_t *arg)
-{
-#define FORMAT_READ(ctx) \
- PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
-
- PyObject *v;
-
- if (arg->ch == '(') {
- /* Get argument value from a dictionary. Example: "%(name)s". */
- Py_ssize_t keystart;
- Py_ssize_t keylen;
- PyObject *key;
- int pcount = 1;
-
- if (ctx->dict == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "format requires a mapping");
- return -1;
- }
- ++ctx->fmtpos;
- --ctx->fmtcnt;
- keystart = ctx->fmtpos;
- /* Skip over balanced parentheses */
- while (pcount > 0 && --ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- if (arg->ch == ')')
- --pcount;
- else if (arg->ch == '(')
- ++pcount;
- ctx->fmtpos++;
- }
- keylen = ctx->fmtpos - keystart - 1;
- if (ctx->fmtcnt < 0 || pcount > 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format key");
- return -1;
- }
- key = PyUnicode_Substring(ctx->fmtstr,
- keystart, keystart + keylen);
- if (key == NULL)
- return -1;
- if (ctx->args_owned) {
- ctx->args_owned = 0;
- Py_DECREF(ctx->args);
- }
- ctx->args = PyObject_GetItem(ctx->dict, key);
- Py_DECREF(key);
- if (ctx->args == NULL)
- return -1;
- ctx->args_owned = 1;
- ctx->arglen = -1;
- ctx->argidx = -2;
- }
-
- /* Parse flags. Example: "%+i" => flags=F_SIGN. */
- while (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- switch (arg->ch) {
- case '-': arg->flags |= F_LJUST; continue;
- case '+': arg->flags |= F_SIGN; continue;
- case ' ': arg->flags |= F_BLANK; continue;
- case '#': arg->flags |= F_ALT; continue;
- case '0': arg->flags |= F_ZERO; continue;
- }
- break;
- }
-
- /* Parse width. Example: "%10s" => width=10 */
- if (arg->ch == '*') {
- v = unicode_format_getnextarg(ctx);
- if (v == NULL)
- return -1;
- if (!PyLong_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- return -1;
- }
- arg->width = PyLong_AsSsize_t(v);
- if (arg->width == -1 && PyErr_Occurred())
- return -1;
- if (arg->width < 0) {
- arg->flags |= F_LJUST;
- arg->width = -arg->width;
- }
- if (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- }
- }
- else if (arg->ch >= '0' && arg->ch <= '9') {
- arg->width = arg->ch - '0';
- while (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- if (arg->ch < '0' || arg->ch > '9')
- break;
- /* Since arg->ch is unsigned, the RHS would end up as unsigned,
- mixing signed and unsigned comparison. Since arg->ch is between
- '0' and '9', casting to int is safe. */
- if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
- PyErr_SetString(PyExc_ValueError,
- "width too big");
- return -1;
- }
- arg->width = arg->width*10 + (arg->ch - '0');
- }
- }
-
- /* Parse precision. Example: "%.3f" => prec=3 */
- if (arg->ch == '.') {
- arg->prec = 0;
- if (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- }
- if (arg->ch == '*') {
- v = unicode_format_getnextarg(ctx);
- if (v == NULL)
- return -1;
- if (!PyLong_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- return -1;
- }
- arg->prec = PyLong_AsInt(v);
- if (arg->prec == -1 && PyErr_Occurred())
- return -1;
- if (arg->prec < 0)
- arg->prec = 0;
- if (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- }
- }
- else if (arg->ch >= '0' && arg->ch <= '9') {
- arg->prec = arg->ch - '0';
- while (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- if (arg->ch < '0' || arg->ch > '9')
- break;
- if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
- PyErr_SetString(PyExc_ValueError,
- "precision too big");
- return -1;
- }
- arg->prec = arg->prec*10 + (arg->ch - '0');
- }
- }
- }
-
- /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
- if (ctx->fmtcnt >= 0) {
- if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
- if (--ctx->fmtcnt >= 0) {
- arg->ch = FORMAT_READ(ctx);
- ctx->fmtpos++;
- }
- }
- }
- if (ctx->fmtcnt < 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format");
- return -1;
- }
- return 0;
-
-#undef FORMAT_READ
-}
-
-/* Format one argument. Supported conversion specifiers:
-
- - "s", "r", "a": any type
- - "i", "d", "u": int or float
- - "o", "x", "X": int
- - "e", "E", "f", "F", "g", "G": float
- - "c": int or str (1 character)
-
- When possible, the output is written directly into the Unicode writer
- (ctx->writer). A string is created when padding is required.
-
- Return 0 if the argument has been formatted into *p_str,
- 1 if the argument has been written into ctx->writer,
- -1 on error. */
-static int
-unicode_format_arg_format(struct unicode_formatter_t *ctx,
- struct unicode_format_arg_t *arg,
- PyObject **p_str)
-{
- PyObject *v;
- _PyUnicodeWriter *writer = &ctx->writer;
-
- if (ctx->fmtcnt == 0)
- ctx->writer.overallocate = 0;
-
- v = unicode_format_getnextarg(ctx);
- if (v == NULL)
- return -1;
-
-
- switch (arg->ch) {
- case 's':
- case 'r':
- case 'a':
- if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
- /* Fast path */
- if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
- return -1;
- return 1;
- }
-
- if (PyUnicode_CheckExact(v) && arg->ch == 's') {
- *p_str = Py_NewRef(v);
- }
- else {
- if (arg->ch == 's')
- *p_str = PyObject_Str(v);
- else if (arg->ch == 'r')
- *p_str = PyObject_Repr(v);
- else
- *p_str = PyObject_ASCII(v);
- }
- break;
-
- case 'i':
- case 'd':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- {
- int ret = mainformatlong(v, arg, p_str, writer);
- if (ret != 0)
- return ret;
- arg->sign = 1;
- break;
- }
-
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- if (arg->width == -1 && arg->prec == -1
- && !(arg->flags & (F_SIGN | F_BLANK)))
- {
- /* Fast path */
- if (formatfloat(v, arg, NULL, writer) == -1)
- return -1;
- return 1;
- }
-
- arg->sign = 1;
- if (formatfloat(v, arg, p_str, NULL) == -1)
- return -1;
- break;
-
- case 'c':
- {
- Py_UCS4 ch = formatchar(v);
- if (ch == (Py_UCS4) -1)
- return -1;
- if (arg->width == -1 && arg->prec == -1) {
- /* Fast path */
- if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
- return -1;
- return 1;
- }
- *p_str = PyUnicode_FromOrdinal(ch);
- break;
- }
-
- default:
- PyErr_Format(PyExc_ValueError,
- "unsupported format character '%c' (0x%x) "
- "at index %zd",
- (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
- (int)arg->ch,
- ctx->fmtpos - 1);
- return -1;
- }
- if (*p_str == NULL)
- return -1;
- assert (PyUnicode_Check(*p_str));
- return 0;
-}
-
-static int
-unicode_format_arg_output(struct unicode_formatter_t *ctx,
- struct unicode_format_arg_t *arg,
- PyObject *str)
-{
- Py_ssize_t len;
- int kind;
- const void *pbuf;
- Py_ssize_t pindex;
- Py_UCS4 signchar;
- Py_ssize_t buflen;
- Py_UCS4 maxchar;
- Py_ssize_t sublen;
- _PyUnicodeWriter *writer = &ctx->writer;
- Py_UCS4 fill;
-
- fill = ' ';
- if (arg->sign && arg->flags & F_ZERO)
- fill = '0';
-
- len = PyUnicode_GET_LENGTH(str);
- if ((arg->width == -1 || arg->width <= len)
- && (arg->prec == -1 || arg->prec >= len)
- && !(arg->flags & (F_SIGN | F_BLANK)))
- {
- /* Fast path */
- if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
- return -1;
- return 0;
- }
-
- /* Truncate the string for "s", "r" and "a" formats
- if the precision is set */
- if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
- if (arg->prec >= 0 && len > arg->prec)
- len = arg->prec;
- }
-
- /* Adjust sign and width */
- kind = PyUnicode_KIND(str);
- pbuf = PyUnicode_DATA(str);
- pindex = 0;
- signchar = '\0';
- if (arg->sign) {
- Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
- if (ch == '-' || ch == '+') {
- signchar = ch;
- len--;
- pindex++;
- }
- else if (arg->flags & F_SIGN)
- signchar = '+';
- else if (arg->flags & F_BLANK)
- signchar = ' ';
- else
- arg->sign = 0;
- }
- if (arg->width < len)
- arg->width = len;
-
- /* Prepare the writer */
- maxchar = writer->maxchar;
- if (!(arg->flags & F_LJUST)) {
- if (arg->sign) {
- if ((arg->width-1) > len)
- maxchar = Py_MAX(maxchar, fill);
- }
- else {
- if (arg->width > len)
- maxchar = Py_MAX(maxchar, fill);
- }
- }
- if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
- Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
- maxchar = Py_MAX(maxchar, strmaxchar);
- }
-
- buflen = arg->width;
- if (arg->sign && len == arg->width)
- buflen++;
- if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
- return -1;
-
- /* Write the sign if needed */
- if (arg->sign) {
- if (fill != ' ') {
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
- writer->pos += 1;
- }
- if (arg->width > len)
- arg->width--;
- }
-
- /* Write the numeric prefix for "x", "X" and "o" formats
- if the alternate form is used.
- For example, write "0x" for the "%#x" format. */
- if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
- assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
- assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
- if (fill != ' ') {
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
- writer->pos += 2;
- pindex += 2;
- }
- arg->width -= 2;
- if (arg->width < 0)
- arg->width = 0;
- len -= 2;
- }
-
- /* Pad left with the fill character if needed */
- if (arg->width > len && !(arg->flags & F_LJUST)) {
- sublen = arg->width - len;
- _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen);
- writer->pos += sublen;
- arg->width = len;
- }
-
- /* If padding with spaces: write sign if needed and/or numeric prefix if
- the alternate form is used */
- if (fill == ' ') {
- if (arg->sign) {
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
- writer->pos += 1;
- }
- if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
- assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
- assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
- writer->pos += 2;
- pindex += 2;
- }
- }
-
- /* Write characters */
- if (len) {
- _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
- str, pindex, len);
- writer->pos += len;
- }
-
- /* Pad right with the fill character if needed */
- if (arg->width > len) {
- sublen = arg->width - len;
- _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen);
- writer->pos += sublen;
- }
- return 0;
-}
-
-/* Helper of PyUnicode_Format(): format one arg.
- Return 0 on success, raise an exception and return -1 on error. */
-static int
-unicode_format_arg(struct unicode_formatter_t *ctx)
-{
- struct unicode_format_arg_t arg;
- PyObject *str;
- int ret;
-
- arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
- if (arg.ch == '%') {
- ctx->fmtpos++;
- ctx->fmtcnt--;
- if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0)
- return -1;
- return 0;
- }
- arg.flags = 0;
- arg.width = -1;
- arg.prec = -1;
- arg.sign = 0;
- str = NULL;
-
- ret = unicode_format_arg_parse(ctx, &arg);
- if (ret == -1)
- return -1;
-
- ret = unicode_format_arg_format(ctx, &arg, &str);
- if (ret == -1)
- return -1;
-
- if (ret != 1) {
- ret = unicode_format_arg_output(ctx, &arg, str);
- Py_DECREF(str);
- if (ret == -1)
- return -1;
- }
-
- if (ctx->dict && (ctx->argidx < ctx->arglen)) {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- return -1;
- }
- return 0;
-}
-
-PyObject *
-PyUnicode_Format(PyObject *format, PyObject *args)
-{
- struct unicode_formatter_t ctx;
-
- if (format == NULL || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
- }
-
- if (ensure_unicode(format) < 0)
- return NULL;
-
- ctx.fmtstr = format;
- ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
- ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
- ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
- ctx.fmtpos = 0;
-
- _PyUnicodeWriter_Init(&ctx.writer);
- ctx.writer.min_length = ctx.fmtcnt + 100;
- ctx.writer.overallocate = 1;
-
- if (PyTuple_Check(args)) {
- ctx.arglen = PyTuple_Size(args);
- ctx.argidx = 0;
- }
- else {
- ctx.arglen = -1;
- ctx.argidx = -2;
- }
- ctx.args_owned = 0;
- if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
- ctx.dict = args;
- else
- ctx.dict = NULL;
- ctx.args = args;
-
- while (--ctx.fmtcnt >= 0) {
- if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
- Py_ssize_t nonfmtpos;
-
- nonfmtpos = ctx.fmtpos++;
- while (ctx.fmtcnt >= 0 &&
- PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
- ctx.fmtpos++;
- ctx.fmtcnt--;
- }
- if (ctx.fmtcnt < 0) {
- ctx.fmtpos--;
- ctx.writer.overallocate = 0;
- }
-
- if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
- nonfmtpos, ctx.fmtpos) < 0)
- goto onError;
- }
- else {
- ctx.fmtpos++;
- if (unicode_format_arg(&ctx) == -1)
- goto onError;
- }
- }
-
- if (ctx.argidx < ctx.arglen && !ctx.dict) {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- goto onError;
- }
-
- if (ctx.args_owned) {
- Py_DECREF(ctx.args);
- }
- return _PyUnicodeWriter_Finish(&ctx.writer);
-
- onError:
- _PyUnicodeWriter_Dealloc(&ctx.writer);
- if (ctx.args_owned) {
- Py_DECREF(ctx.args);
- }
- return NULL;
-}
-
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *unicode);
<ClCompile Include="..\Objects\tupleobject.c" />
<ClCompile Include="..\Objects\typeobject.c" />
<ClCompile Include="..\Objects\typevarobject.c" />
+ <ClCompile Include="..\Objects\unicode_format.c" />
<ClCompile Include="..\Objects\unicodectype.c" />
<ClCompile Include="..\Objects\unicode_formatter.c" />
<ClCompile Include="..\Objects\unicodeobject.c" />
<ClCompile Include="..\Objects\typeobject.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\Objects\unicode_format.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
<ClCompile Include="..\Objects\unicodectype.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Objects\tupleobject.c" />
<ClCompile Include="..\Objects\typeobject.c" />
<ClCompile Include="..\Objects\typevarobject.c" />
+ <ClCompile Include="..\Objects\unicode_format.c" />
<ClCompile Include="..\Objects\unicodectype.c" />
<ClCompile Include="..\Objects\unicode_formatter.c" />
<ClCompile Include="..\Objects\unicodeobject.c" />
<ClCompile Include="..\Objects\typeobject.c">
<Filter>Objects</Filter>
</ClCompile>
+ <ClCompile Include="..\Objects\unicode_format.c">
+ <Filter>Objects</Filter>
+ </ClCompile>
<ClCompile Include="..\Objects\unicodectype.c">
<Filter>Objects</Filter>
</ClCompile>