locale encoding
On Unix, it is the encoding of the LC_CTYPE locale. It can be set with
- ``locale.setlocale(locale.LC_CTYPE, new_locale)``.
+ :func:`locale.setlocale(locale.LC_CTYPE, new_locale) <locale.setlocale>`.
- On Windows, it is the ANSI code page (ex: ``cp1252``).
+ On Windows, it is the ANSI code page (ex: ``"cp1252"``).
- ``locale.getpreferredencoding(False)`` can be used to get the locale
- encoding.
+ On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding.
- Python uses the :term:`filesystem encoding and error handler` to convert
- between Unicode filenames and bytes filenames.
+ ``locale.getencoding()`` can be used to get the locale encoding.
+
+ See also the :term:`filesystem encoding and error handler`.
list
A built-in Python :term:`sequence`. Despite its name it is more akin
is not necessary or desired, *do_setlocale* should be set to ``False``.
On Android or if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, always
- return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale*
+ return ``'utf-8'``, the :term:`locale encoding` and the *do_setlocale*
argument are ignored.
The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
locale. See also the :term:`filesystem encoding and error handler`.
.. versionchanged:: 3.7
- The function now always returns ``UTF-8`` on Android or if the
+ The function now always returns ``"utf-8"`` on Android or if the
:ref:`Python UTF-8 Mode <utf8-mode>` is enabled.
+.. function:: getencoding()
+
+ Get the current :term:`locale encoding`:
+
+ * On Android and VxWorks, return ``"utf-8"``.
+ * On Unix, return the encoding of the current :data:`LC_CTYPE` locale.
+ Return ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string:
+ for example, if the current LC_CTYPE locale is not supported.
+ * On Windows, return the ANSI code page.
+
+ The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
+ locale. See also the :term:`filesystem encoding and error handler`.
+
+ This function is similar to
+ :func:`getpreferredencoding(False) <getpreferredencoding>` except this
+ function ignores the :ref:`Python UTF-8 Mode <utf8-mode>`.
+
+ .. versionadded:: 3.11
+
+
.. function:: normalize(localename)
Returns a normalized locale code for the given locale name. The returned locale
* Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a
:class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.)
+locale
+------
+
+* Add :func:`locale.getencoding` to get the current locale encoding. It is similar to
+ ``locale.getpreferredencoding(False)`` but ignores the
+ :ref:`Python UTF-8 Mode <utf8-mode>`.
+
math
----
"setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
"str", "atof", "atoi", "format", "format_string", "currency",
"normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
- "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
+ "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
def _strcoll(a,b):
""" strcoll(string,string) -> int.
try:
- from _locale import _get_locale_encoding
+ from _locale import getencoding
except ImportError:
- def _get_locale_encoding():
+ def getencoding():
if hasattr(sys, 'getandroidapilevel'):
# On Android langinfo.h and CODESET are missing, and UTF-8 is
# always used in mbstowcs() and wcstombs().
- return 'UTF-8'
- if sys.flags.utf8_mode:
- return 'UTF-8'
+ return 'utf-8'
encoding = getdefaultlocale()[1]
if encoding is None:
- # LANG not set, default conservatively to ASCII
- encoding = 'ascii'
+ # LANG not set, default to UTF-8
+ encoding = 'utf-8'
return encoding
try:
except NameError:
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using."""
- return _get_locale_encoding()
+ if sys.flags.utf8_mode:
+ return 'utf-8'
+ return getencoding()
else:
# On Unix, if CODESET is available, use that.
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using,
according to the system configuration."""
if sys.flags.utf8_mode:
- return 'UTF-8'
+ return 'utf-8'
if not do_setlocale:
- return _get_locale_encoding()
+ return getencoding()
old_loc = setlocale(LC_CTYPE)
try:
setlocale(LC_CTYPE, "")
except Error:
pass
- return _get_locale_encoding()
+ return getencoding()
finally:
setlocale(LC_CTYPE, old_loc)
def test_locale_getpreferredencoding(self):
code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
out = self.get_output('-X', 'utf8', '-c', code)
- self.assertEqual(out, 'UTF-8 UTF-8')
+ self.assertEqual(out, 'utf-8 utf-8')
for loc in POSIX_LOCALES:
with self.subTest(LC_ALL=loc):
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
- self.assertEqual(out, 'UTF-8 UTF-8')
+ self.assertEqual(out, 'utf-8 utf-8')
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
def test_cmd_line(self):
# In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
with open(filename, encoding="utf8") as fp:
out = fp.read().rstrip()
- self.assertEqual(out, 'True UTF-8')
+ self.assertEqual(out, 'True utf-8')
if __name__ == "__main__":
--- /dev/null
+Add :func:`locale.getencoding` to get the current locale encoding.
+It is similar to ``locale.getpreferredencoding(False)`` but ignores the
+:ref:`Python UTF-8 Mode <utf8-mode>`.
}
}
if (encoding == NULL && self->encoding == NULL) {
- self->encoding = _Py_GetLocaleEncodingObject();
+ if (_PyRuntime.preconfig.utf8_mode) {
+ _Py_DECLARE_STR(utf_8, "utf-8");
+ self->encoding = Py_NewRef(&_Py_STR(utf_8));
+ }
+ else {
+ self->encoding = _Py_GetLocaleEncodingObject();
+ }
if (self->encoding == NULL) {
goto error;
}
/*[clinic input]
-_locale._get_locale_encoding
+_locale.getencoding
Get the current locale encoding.
[clinic start generated code]*/
static PyObject *
-_locale__get_locale_encoding_impl(PyObject *module)
-/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
+_locale_getencoding_impl(PyObject *module)
+/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/
{
return _Py_GetLocaleEncodingObject();
}
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif
#endif
- _LOCALE__GET_LOCALE_ENCODING_METHODDEF
+ _LOCALE_GETENCODING_METHODDEF
{NULL, NULL}
};
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
-PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
-"_get_locale_encoding($module, /)\n"
+PyDoc_STRVAR(_locale_getencoding__doc__,
+"getencoding($module, /)\n"
"--\n"
"\n"
"Get the current locale encoding.");
-#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \
- {"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
+#define _LOCALE_GETENCODING_METHODDEF \
+ {"getencoding", (PyCFunction)_locale_getencoding, METH_NOARGS, _locale_getencoding__doc__},
static PyObject *
-_locale__get_locale_encoding_impl(PyObject *module);
+_locale_getencoding_impl(PyObject *module);
static PyObject *
-_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
+_locale_getencoding(PyObject *module, PyObject *Py_UNUSED(ignored))
{
- return _locale__get_locale_encoding_impl(module);
+ return _locale_getencoding_impl(module);
}
#ifndef _LOCALE_STRCOLL_METHODDEF
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
-/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=ea71e9b94bdaa47d input=a9049054013a1b77]*/
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
#else
+ if (_PyRuntime.preconfig.utf8_mode) {
+ _Py_DECLARE_STR(utf_8, "utf-8");
+ return Py_NewRef(&_Py_STR(utf_8));
+ }
return _Py_GetLocaleEncodingObject();
#endif
}
// Get the current locale encoding name:
//
-// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
-// - Return "UTF-8" if the UTF-8 Mode is enabled
+// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
+// - Return "utf-8" if the UTF-8 Mode is enabled
// - On Windows, return the ANSI code page (ex: "cp1250")
-// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
+// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
// - Otherwise, return nl_langinfo(CODESET).
//
// Return NULL on memory allocation failure.
#ifdef _Py_FORCE_UTF8_LOCALE
// On Android langinfo.h and CODESET are missing,
// and UTF-8 is always used in mbstowcs() and wcstombs().
- return _PyMem_RawWcsdup(L"UTF-8");
+ return _PyMem_RawWcsdup(L"utf-8");
#else
- const PyPreConfig *preconfig = &_PyRuntime.preconfig;
- if (preconfig->utf8_mode) {
- return _PyMem_RawWcsdup(L"UTF-8");
- }
#ifdef MS_WINDOWS
wchar_t encoding[23];
if (!encoding || encoding[0] == '\0') {
// Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
// macOS if the LC_CTYPE locale is not supported.
- return _PyMem_RawWcsdup(L"UTF-8");
+ return _PyMem_RawWcsdup(L"utf-8");
}
wchar_t *wstr;
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
wchar_t **locale_encoding)
{
- wchar_t *encoding = _Py_GetLocaleEncoding();
+ wchar_t *encoding;
+ if (preconfig->utf8_mode) {
+ encoding = _PyMem_RawWcsdup(L"utf-8");
+ }
+ else {
+ encoding = _Py_GetLocaleEncoding();
+ }
if (encoding == NULL) {
return _PyStatus_NO_MEMORY();
}