From: Fredrik Lundh Date: Tue, 26 Jun 2001 20:01:56 +0000 (+0000) Subject: more unicode tweaks: make unichr(0xdddddddd) behave like u"\Udddddddd" X-Git-Tag: v2.2a3~1415 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0dcf67e56d891832b53a82ee0abb60dcc2e0148e;p=thirdparty%2FPython%2Fcpython.git more unicode tweaks: make unichr(0xdddddddd) behave like u"\Udddddddd" wrt surrogates. (this extends the valid range from 65535 to 1114111) --- diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 4da984f8da48..ed5519f25748 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -308,23 +308,34 @@ static PyObject * builtin_unichr(PyObject *self, PyObject *args) { long x; - Py_UNICODE s[1]; + Py_UNICODE s[2]; if (!PyArg_ParseTuple(args, "l:unichr", &x)) return NULL; - if (x < 0 || x >= 65536) { + + if (x < 0 || x > 0x10ffff) { PyErr_SetString(PyExc_ValueError, - "unichr() arg not in range(65536)"); + "unichr() arg not in range(0x10ffff)"); return NULL; } - s[0] = (Py_UNICODE)x; - return PyUnicode_FromUnicode(s, 1); + + if (x <= 0xffff) { + /* UCS-2 character */ + s[0] = (Py_UNICODE) x; + return PyUnicode_FromUnicode(s, 1); + } else { + /* UCS-4 character. store as two surrogate characters */ + x -= 0x10000L; + s[0] = 0xD800 + (Py_UNICODE) (x >> 10); + s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF); + return PyUnicode_FromUnicode(s, 2); + } } static char unichr_doc[] = "unichr(i) -> Unicode character\n\ \n\ -Return a Unicode string of one character with ordinal i; 0 <= i < 65536."; +Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."; static PyObject *