bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)

author Ma Lin <animalize@users.noreply.github.com>

Sun, 18 Oct 2020 14:48:38 +0000 (22:48 +0800)

committer GitHub <noreply@github.com>

Sun, 18 Oct 2020 14:48:38 +0000 (17:48 +0300)
author Ma Lin <animalize@users.noreply.github.com>
Sun, 18 Oct 2020 14:48:38 +0000 (22:48 +0800)
committer GitHub <noreply@github.com>
Sun, 18 Oct 2020 14:48:38 +0000 (17:48 +0300)
diff --git a/Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst b/Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst

new file mode 100644 (file)

index 0000000..c103e6c
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst
@@ -0,0 +1 @@
+Use 8-byte step to detect ASCII sequence in 64-bit Windows build.
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c

index 72daa1fdd554e0b26d4723075be4b92d013c0d65..1512086e6131fc7dfaecbebc0c1a9a36441b9865 100644 (file)
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\
  False otherwise.");
  
  // Optimization is copied from ascii_decode in unicodeobject.c
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
     non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
  #else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
  #endif
  
  PyObject*
@@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
  {
      const char *p = cptr;
      const char *end = p + len;
-    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
  
      while (p < end) {
          /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
             for an explanation. */
-        if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+        if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
              /* Help allocation */
              const char *_p = p;
              while (_p < aligned_end) {
-                unsigned long value = *(const unsigned long *) _p;
+                size_t value = *(const size_t *) _p;
                  if (value & ASCII_CHAR_MASK) {
                      Py_RETURN_FALSE;
                  }
-                _p += SIZEOF_LONG;
+                _p += SIZEOF_SIZE_T;
              }
              p = _p;
              if (_p == end)
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h

index 197605b012e5c6d9112d7b7be24f62cb386f8073..b6ca404b1a2d7a7c158ec2b8c914e4465e2bba83 100644 (file)
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -6,14 +6,14 @@
  
  #include "pycore_bitutils.h"      // _Py_bswap32()
  
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
     non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
  #else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
  #endif
  
  /* 10xxxxxx */
@@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
  {
      Py_UCS4 ch;
      const char *s = *inptr;
-    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
      STRINGLIB_CHAR *p = dest + *outpos;
  
      while (s < end) {
@@ -36,19 +36,19 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
              /* Fast path for runs of ASCII characters. Given that common UTF-8
                 input will consist of an overwhelming majority of ASCII
                 characters, we try to optimize for this case by checking
-               as many characters as a C 'long' can contain.
+               as many characters as a C 'size_t' can contain.
                 First, check if we can do an aligned read, as most CPUs have
                 a penalty for unaligned reads.
              */
-            if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
+            if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
                  /* Help register allocation */
                  const char *_s = s;
                  STRINGLIB_CHAR *_p = p;
                  while (_s < aligned_end) {
-                    /* Read a whole long at a time (either 4 or 8 bytes),
+                    /* Read a whole size_t at a time (either 4 or 8 bytes),
                         and do a fast unrolled copy if it only contains ASCII
                         characters. */
-                    unsigned long value = *(const unsigned long *) _s;
+                    size_t value = *(const size_t *) _s;
                      if (value & ASCII_CHAR_MASK)
                          break;
  #if PY_LITTLE_ENDIAN
@@ -56,14 +56,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
                      _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
                      _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
                      _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
-# if SIZEOF_LONG == 8
+# if SIZEOF_SIZE_T == 8
                      _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
                      _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
                      _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
                      _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
  # endif
  #else
-# if SIZEOF_LONG == 8
+# if SIZEOF_SIZE_T == 8
                      _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
                      _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
                      _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
@@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
                      _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
  # endif
  #endif
-                    _s += SIZEOF_LONG;
-                    _p += SIZEOF_LONG;
+                    _s += SIZEOF_SIZE_T;
+                    _p += SIZEOF_SIZE_T;
                  }
                  s = _s;
                  p = _p;
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h

index f4e0a7761d31198bf426dc8b7cbcb70b940ffc0b..3319a46461451b38b056efc663ef5283f5f1635e 100644 (file)
--- a/Objects/stringlib/find_max_char.h
+++ b/Objects/stringlib/find_max_char.h
@@ -4,14 +4,14 @@
  # error "find_max_char.h is specific to Unicode"
  #endif
  
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
     non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define UCS1_ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define UCS1_ASCII_CHAR_MASK 0x80808080U
  #else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
  #endif
  
  #if STRINGLIB_SIZEOF_CHAR == 1
@@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
  {
      const unsigned char *p = (const unsigned char *) begin;
      const unsigned char *aligned_end =
-            (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+            (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
  
      while (p < end) {
-        if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+        if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
              /* Help register allocation */
              const unsigned char *_p = p;
              while (_p < aligned_end) {
-                unsigned long value = *(const unsigned long *) _p;
+                size_t value = *(const size_t *) _p;
                  if (value & UCS1_ASCII_CHAR_MASK)
                      return 255;
-                _p += SIZEOF_LONG;
+                _p += SIZEOF_SIZE_T;
              }
              p = _p;
              if (p == end)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index c4e73ebd45d2065cdeda9e2d8b186721a43a3abf..f963deb0201a48f0de7fac0477c2bc5ebe936b3e 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s,
  #include "stringlib/codecs.h"
  #include "stringlib/undef.h"
  
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
     non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
  #else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
  #endif
  
  static Py_ssize_t
  ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
  {
      const char *p = start;
-    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
  
      /*
       * Issue #17237: m68k is a bit different from most architectures in
@@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
       * version" will even speed up m68k.
       */
  #if !defined(__m68k__)
-#if SIZEOF_LONG <= SIZEOF_VOID_P
-    assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG));
-    if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
+    assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
+    if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
          /* Fast path, see in STRINGLIB(utf8_decode) for
             an explanation. */
          /* Help allocation */
          const char *_p = p;
          Py_UCS1 * q = dest;
          while (_p < aligned_end) {
-            unsigned long value = *(const unsigned long *) _p;
+            size_t value = *(const size_t *) _p;
              if (value & ASCII_CHAR_MASK)
                  break;
-            *((unsigned long *)q) = value;
-            _p += SIZEOF_LONG;
-            q += SIZEOF_LONG;
+            *((size_t *)q) = value;
+            _p += SIZEOF_SIZE_T;
+            q += SIZEOF_SIZE_T;
          }
          p = _p;
          while (p < end) {
@@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
      while (p < end) {
          /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
             for an explanation. */
-        if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+        if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
              /* Help allocation */
              const char *_p = p;
              while (_p < aligned_end) {
-                unsigned long value = *(const unsigned long *) _p;
+                size_t value = *(const size_t *) _p;
                  if (value & ASCII_CHAR_MASK)
                      break;
-                _p += SIZEOF_LONG;
+                _p += SIZEOF_SIZE_T;
              }
              p = _p;
              if (_p == end)
author	Ma Lin <animalize@users.noreply.github.com>
	Sun, 18 Oct 2020 14:48:38 +0000 (22:48 +0800)
committer	GitHub <noreply@github.com>
	Sun, 18 Oct 2020 14:48:38 +0000 (17:48 +0300)
Misc/NEWS.d/next/Windows/2020-10-18-18-43-45.bpo-38252.7Nlepg.rst	[new file with mode: 0644]	patch \| blob
Objects/bytes_methods.c		patch \| blob \| blame \| history
Objects/stringlib/codecs.h		patch \| blob \| blame \| history
Objects/stringlib/find_max_char.h		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history