#include "Python.h"
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_long.h" // _PyLong_GetZero()
+#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
#include "pycore_object.h"
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "structmember.h" // PyMemberDef
_Py_IDENTIFIER(decode);
_Py_IDENTIFIER(fileno);
_Py_IDENTIFIER(flush);
-_Py_IDENTIFIER(getpreferredencoding);
_Py_IDENTIFIER(isatty);
_Py_IDENTIFIER(mode);
_Py_IDENTIFIER(name);
}
}
if (encoding == NULL && self->encoding == NULL) {
- PyObject *locale_module = _PyIO_get_locale_module(state);
- if (locale_module == NULL)
- goto catch_ImportError;
- self->encoding = _PyObject_CallMethodIdOneArg(
- locale_module, &PyId_getpreferredencoding, Py_False);
- Py_DECREF(locale_module);
+ self->encoding = _Py_GetLocaleEncoding();
if (self->encoding == NULL) {
- catch_ImportError:
- /*
- Importing locale can raise an ImportError because of
- _functools, and locale.getpreferredencoding can raise an
- ImportError if _locale is not available. These will happen
- during module building.
- */
- if (PyErr_ExceptionMatches(PyExc_ImportError)) {
- PyErr_Clear();
- self->encoding = PyUnicode_FromString("ascii");
- }
- else
- goto error;
+ goto error;
}
- else if (!PyUnicode_Check(self->encoding))
- Py_CLEAR(self->encoding);
+ assert(PyUnicode_Check(self->encoding));
}
if (self->encoding != NULL) {
encoding = PyUnicode_AsUTF8(self->encoding);
#include "Python.h"
-#include "pycore_fileutils.h"
+#include "pycore_fileutils.h" // fileutils definitions
+#include "pycore_runtime.h" // _PyRuntime
#include "osdefs.h" // SEP
#include <locale.h>
}
+// Get the current locale encoding: locale.getpreferredencoding(False).
+// See also config_get_locale_encoding()
+PyObject *
+_Py_GetLocaleEncoding(void)
+{
+#ifdef _Py_FORCE_UTF8_LOCALE
+ // On Android langinfo.h and CODESET are missing,
+ // and UTF-8 is always used in mbstowcs() and wcstombs().
+ return PyUnicode_FromString("UTF-8");
+#else
+ const PyPreConfig *preconfig = &_PyRuntime.preconfig;
+ if (preconfig->utf8_mode) {
+ return PyUnicode_FromString("UTF-8");
+ }
+
+#if defined(MS_WINDOWS)
+ return PyUnicode_FromFormat("cp%u", GetACP());
+#else
+ const char *encoding = nl_langinfo(CODESET);
+ if (!encoding || encoding[0] == '\0') {
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ // nl_langinfo() can return an empty string when the LC_CTYPE locale is
+ // not supported. Default to UTF-8 in that case, because UTF-8 is the
+ // default charset on macOS.
+ encoding = "UTF-8";
+#else
+ PyErr_SetString(PyExc_ValueError,
+ "failed to get the locale encoding: "
+ "nl_langinfo(CODESET) returns an empty string");
+ return NULL;
+#endif
+ }
+ // Decode from UTF-8
+ return PyUnicode_FromString(encoding);
+#endif // !CODESET
+
+#endif
+}
+
+
#ifdef MS_WINDOWS
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
configured. */
PyStatus
PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str,
- const char *str)
+ const char *str)
{
return CONFIG_SET_BYTES_STR(config, config_str, str, "string");
}
static const wchar_t *
-config_get_stdio_errors(void)
+config_get_stdio_errors(const PyPreConfig *preconfig)
{
+ if (preconfig->utf8_mode) {
+ /* UTF-8 Mode uses UTF-8/surrogateescape */
+ return L"surrogateescape";
+ }
+
#ifndef MS_WINDOWS
const char *loc = setlocale(LC_CTYPE, NULL);
if (loc != NULL) {
}
+// See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
static PyStatus
-config_get_locale_encoding(PyConfig *config, wchar_t **locale_encoding)
+config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
+ wchar_t **locale_encoding)
{
+#ifdef _Py_FORCE_UTF8_LOCALE
+ return PyConfig_SetString(config, locale_encoding, L"utf-8");
+#else
+ if (preconfig->utf8_mode) {
+ return PyConfig_SetString(config, locale_encoding, L"utf-8");
+ }
+
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
return PyConfig_SetBytesString(config, locale_encoding, encoding);
-#elif defined(_Py_FORCE_UTF8_LOCALE)
- return PyConfig_SetString(config, locale_encoding, L"utf-8");
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ // nl_langinfo() can return an empty string when the LC_CTYPE locale is
+ // not supported. Default to UTF-8 in that case, because UTF-8 is the
+ // default charset on macOS.
+ encoding = "UTF-8";
+#else
return _PyStatus_ERR("failed to get the locale encoding: "
- "nl_langinfo(CODESET) failed");
+ "nl_langinfo(CODESET) returns an empty string");
+#endif
}
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
return CONFIG_SET_BYTES_STR(config,
locale_encoding, encoding,
"nl_langinfo(CODESET)");
-#endif
+#endif // !MS_WINDOWS
+#endif // !_Py_FORCE_UTF8_LOCALE
}
PyMem_RawFree(pythonioencoding);
}
- /* UTF-8 Mode uses UTF-8/surrogateescape */
- if (preconfig->utf8_mode) {
- if (config->stdio_encoding == NULL) {
- status = PyConfig_SetString(config, &config->stdio_encoding,
- L"utf-8");
- if (_PyStatus_EXCEPTION(status)) {
- return status;
- }
- }
- if (config->stdio_errors == NULL) {
- status = PyConfig_SetString(config, &config->stdio_errors,
- L"surrogateescape");
- if (_PyStatus_EXCEPTION(status)) {
- return status;
- }
- }
- }
-
/* Choose the default error handler based on the current locale. */
if (config->stdio_encoding == NULL) {
- status = config_get_locale_encoding(config, &config->stdio_encoding);
+ status = config_get_locale_encoding(config, preconfig,
+ &config->stdio_encoding);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
if (config->stdio_errors == NULL) {
- const wchar_t *errors = config_get_stdio_errors();
+ const wchar_t *errors = config_get_stdio_errors(preconfig);
assert(errors != NULL);
status = PyConfig_SetString(config, &config->stdio_errors, errors);
}
+// See also config_get_locale_encoding()
+static PyStatus
+config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
+ wchar_t **fs_encoding)
+{
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return PyConfig_SetString(config, fs_encoding, L"utf-8");
+#elif defined(MS_WINDOWS)
+ const wchar_t *encoding;
+ if (preconfig->legacy_windows_fs_encoding) {
+ // Legacy Windows filesystem encoding: mbcs/replace
+ encoding = L"mbcs";
+ }
+ else {
+ // Windows defaults to utf-8/surrogatepass (PEP 529)
+ encoding = L"utf-8";
+ }
+ return PyConfig_SetString(config, fs_encoding, encoding);
+#else // !MS_WINDOWS
+ if (preconfig->utf8_mode) {
+ return PyConfig_SetString(config, fs_encoding, L"utf-8");
+ }
+ else if (_Py_GetForceASCII()) {
+ return PyConfig_SetString(config, fs_encoding, L"ascii");
+ }
+ else {
+ return config_get_locale_encoding(config, preconfig, fs_encoding);
+ }
+#endif // !MS_WINDOWS
+}
+
+
static PyStatus
config_init_fs_encoding(PyConfig *config, const PyPreConfig *preconfig)
{
PyStatus status;
if (config->filesystem_encoding == NULL) {
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- status = PyConfig_SetString(config, &config->filesystem_encoding, L"utf-8");
-#else
-
-#ifdef MS_WINDOWS
- if (preconfig->legacy_windows_fs_encoding) {
- /* Legacy Windows filesystem encoding: mbcs/replace */
- status = PyConfig_SetString(config, &config->filesystem_encoding,
- L"mbcs");
- }
- else
-#endif
- if (preconfig->utf8_mode) {
- status = PyConfig_SetString(config, &config->filesystem_encoding,
- L"utf-8");
- }
-#ifndef MS_WINDOWS
- else if (_Py_GetForceASCII()) {
- status = PyConfig_SetString(config, &config->filesystem_encoding,
- L"ascii");
- }
-#endif
- else {
-#ifdef MS_WINDOWS
- /* Windows defaults to utf-8/surrogatepass (PEP 529). */
- status = PyConfig_SetString(config, &config->filesystem_encoding,
- L"utf-8");
-#else
- status = config_get_locale_encoding(config,
- &config->filesystem_encoding);
-#endif
- }
-#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
-
+ status = config_get_fs_encoding(config, preconfig,
+ &config->filesystem_encoding);
if (_PyStatus_EXCEPTION(status)) {
return status;
}