--- /dev/null
+/* stringlib: repr() implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+
+static void
+STRINGLIB(repr)(PyObject *unicode, Py_UCS4 quote,
+ STRINGLIB_CHAR *odata)
+{
+ Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode);
+ const void *idata = PyUnicode_DATA(unicode);
+ int ikind = PyUnicode_KIND(unicode);
+
+ *odata++ = quote;
+ for (Py_ssize_t i = 0; i < isize; i++) {
+ Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
+
+ /* Escape quotes and backslashes */
+ if ((ch == quote) || (ch == '\\')) {
+ *odata++ = '\\';
+ *odata++ = ch;
+ continue;
+ }
+
+ /* Map special whitespace to '\t', \n', '\r' */
+ if (ch == '\t') {
+ *odata++ = '\\';
+ *odata++ = 't';
+ }
+ else if (ch == '\n') {
+ *odata++ = '\\';
+ *odata++ = 'n';
+ }
+ else if (ch == '\r') {
+ *odata++ = '\\';
+ *odata++ = 'r';
+ }
+
+ /* Map non-printable US ASCII to '\xhh' */
+ else if (ch < ' ' || ch == 0x7F) {
+ *odata++ = '\\';
+ *odata++ = 'x';
+ *odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
+ *odata++ = Py_hexdigits[ch & 0x000F];
+ }
+
+ /* Copy ASCII characters as-is */
+ else if (ch < 0x7F) {
+ *odata++ = ch;
+ }
+
+ /* Non-ASCII characters */
+ else {
+ /* Map Unicode whitespace and control characters
+ (categories Z* and C* except ASCII space)
+ */
+ if (!Py_UNICODE_ISPRINTABLE(ch)) {
+ *odata++ = '\\';
+ /* Map 8-bit characters to '\xhh' */
+ if (ch <= 0xff) {
+ *odata++ = 'x';
+ *odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
+ *odata++ = Py_hexdigits[ch & 0x000F];
+ }
+ /* Map 16-bit characters to '\uxxxx' */
+ else if (ch <= 0xffff) {
+ *odata++ = 'u';
+ *odata++ = Py_hexdigits[(ch >> 12) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 8) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 4) & 0xF];
+ *odata++ = Py_hexdigits[ch & 0xF];
+ }
+ /* Map 21-bit characters to '\U00xxxxxx' */
+ else {
+ *odata++ = 'U';
+ *odata++ = Py_hexdigits[(ch >> 28) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 24) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 20) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 16) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 12) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 8) & 0xF];
+ *odata++ = Py_hexdigits[(ch >> 4) & 0xF];
+ *odata++ = Py_hexdigits[ch & 0xF];
+ }
+ }
+ /* Copy characters as-is */
+ else {
+ *odata++ = ch;
+ }
+ }
+ }
+ *odata = quote;
+}
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
+#include "stringlib/repr.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
+#include "stringlib/repr.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
+#include "stringlib/repr.h"
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
static PyObject *
unicode_repr(PyObject *unicode)
{
- PyObject *repr;
- Py_ssize_t isize;
- Py_ssize_t osize, squote, dquote, i, o;
- Py_UCS4 max, quote;
- int ikind, okind, unchanged;
- const void *idata;
- void *odata;
-
- isize = PyUnicode_GET_LENGTH(unicode);
- idata = PyUnicode_DATA(unicode);
+ Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode);
+ const void *idata = PyUnicode_DATA(unicode);
/* Compute length of output, quote characters, and
maximum character */
- osize = 0;
- max = 127;
- squote = dquote = 0;
- ikind = PyUnicode_KIND(unicode);
- for (i = 0; i < isize; i++) {
+ Py_ssize_t osize = 0;
+ Py_UCS4 maxch = 127;
+ Py_ssize_t squote = 0;
+ Py_ssize_t dquote = 0;
+ int ikind = PyUnicode_KIND(unicode);
+ for (Py_ssize_t i = 0; i < isize; i++) {
Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
Py_ssize_t incr = 1;
switch (ch) {
else if (ch < 0x7f)
;
else if (Py_UNICODE_ISPRINTABLE(ch))
- max = ch > max ? ch : max;
+ maxch = (ch > maxch) ? ch : maxch;
else if (ch < 0x100)
incr = 4; /* \xHH */
else if (ch < 0x10000)
osize += incr;
}
- quote = '\'';
- unchanged = (osize == isize);
+ Py_UCS4 quote = '\'';
+ int changed = (osize != isize);
if (squote) {
- unchanged = 0;
+ changed = 1;
if (dquote)
/* Both squote and dquote present. Use squote,
and escape them */
}
osize += 2; /* quotes */
- repr = PyUnicode_New(osize, max);
+ PyObject *repr = PyUnicode_New(osize, maxch);
if (repr == NULL)
return NULL;
- okind = PyUnicode_KIND(repr);
- odata = PyUnicode_DATA(repr);
+ int okind = PyUnicode_KIND(repr);
+ void *odata = PyUnicode_DATA(repr);
+
+ if (!changed) {
+ PyUnicode_WRITE(okind, odata, 0, quote);
- PyUnicode_WRITE(okind, odata, 0, quote);
- PyUnicode_WRITE(okind, odata, osize-1, quote);
- if (unchanged) {
_PyUnicode_FastCopyCharacters(repr, 1,
unicode, 0,
isize);
+
+ PyUnicode_WRITE(okind, odata, osize-1, quote);
}
else {
- for (i = 0, o = 1; i < isize; i++) {
- Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
-
- /* Escape quotes and backslashes */
- if ((ch == quote) || (ch == '\\')) {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, ch);
- continue;
- }
-
- /* Map special whitespace to '\t', \n', '\r' */
- if (ch == '\t') {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 't');
- }
- else if (ch == '\n') {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 'n');
- }
- else if (ch == '\r') {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 'r');
- }
-
- /* Map non-printable US ASCII to '\xhh' */
- else if (ch < ' ' || ch == 0x7F) {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- PyUnicode_WRITE(okind, odata, o++, 'x');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
- }
-
- /* Copy ASCII characters as-is */
- else if (ch < 0x7F) {
- PyUnicode_WRITE(okind, odata, o++, ch);
- }
-
- /* Non-ASCII characters */
- else {
- /* Map Unicode whitespace and control characters
- (categories Z* and C* except ASCII space)
- */
- if (!Py_UNICODE_ISPRINTABLE(ch)) {
- PyUnicode_WRITE(okind, odata, o++, '\\');
- /* Map 8-bit characters to '\xhh' */
- if (ch <= 0xff) {
- PyUnicode_WRITE(okind, odata, o++, 'x');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
- }
- /* Map 16-bit characters to '\uxxxx' */
- else if (ch <= 0xffff) {
- PyUnicode_WRITE(okind, odata, o++, 'u');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
- }
- /* Map 21-bit characters to '\U00xxxxxx' */
- else {
- PyUnicode_WRITE(okind, odata, o++, 'U');
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
- PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
- }
- }
- /* Copy characters as-is */
- else {
- PyUnicode_WRITE(okind, odata, o++, ch);
- }
- }
+ switch (okind) {
+ case PyUnicode_1BYTE_KIND:
+ ucs1lib_repr(unicode, quote, odata);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ ucs2lib_repr(unicode, quote, odata);
+ break;
+ default:
+ assert(okind == PyUnicode_4BYTE_KIND);
+ ucs4lib_repr(unicode, quote, odata);
}
}
- /* Closing quote already added at the beginning */
+
assert(_PyUnicode_CheckConsistency(repr, 1));
return repr;
}