]> git.ipfire.org Git - thirdparty/xz.git/commitdiff
tuklib_integer: Autodetect when -mstrict-align is used with GCC on ARM64
authorLasse Collin <lasse.collin@tukaani.org>
Wed, 10 Dec 2025 14:49:55 +0000 (16:49 +0200)
committerLasse Collin <lasse.collin@tukaani.org>
Tue, 16 Dec 2025 09:34:10 +0000 (11:34 +0200)
On ARM64, support for fast unaligned memory access was autodetected by
checking if __ARM_FEATURE_UNALIGNED is defined. However, at least GCC
versions up to 15.2.0 define the macro even when -mstrict-align has
been specified. Thus, autodetection with GCC doesn't work correctly,
and binaries built using -mstrict-align can be much slower than they
need to be, unless the user also passes --disable-unaligned-access
to configure or -DTUKLIB_FAST_UNALIGNED_ACCESS=OFF to cmake.

See the GCC bug:

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555

Workaround the issue by using heuristics with GCC on ARM64.

With Clang, the detection using __ARM_FEATURE_UNALIGNED works.
It also works with GCC on 32-bit ARM.

Fixes: e5f13a66567b ("tuklib_integer: Autodetect support for unaligned access on ARM.")
cmake/tuklib_integer.cmake
m4/tuklib_integer.m4

index e2d6c71d8538fb8c5bedacf5c3c57b5874a2fa49..c2cd04e4d257e7afa97c556b3404aae29f9c627c 100644 (file)
@@ -159,24 +159,16 @@ function(tuklib_integer TARGET_OR_ALL)
             set(FAST_UNALIGNED_GUESS ON)
         endif()
 
-    elseif(PROCESSOR MATCHES "^arm|^aarch64|^riscv")
-        # On 32-bit and 64-bit ARM, GCC and Clang
-        # #define __ARM_FEATURE_UNALIGNED if
-        # unaligned access is supported.
-        #
-        # Exception: GCC at least up to 13.2.0
-        # defines it even when using -mstrict-align
-        # so in that case this autodetection goes wrong.
-        # Most of the time -mstrict-align isn't used so it
-        # shouldn't be a common problem in practice. See:
-        # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555
+    elseif(PROCESSOR MATCHES "^arm|^riscv" AND NOT PROCESSOR MATCHES "^arm64")
+        # On 32-bit ARM, GCC and Clang # #define __ARM_FEATURE_UNALIGNED
+        # if and only if unaligned access is supported.
         #
         # RISC-V C API Specification says that if
         # __riscv_misaligned_fast is defined then
         # unaligned access is known to be fast.
         #
         # MSVC is handled as a special case: We assume that
-        # 32/64-bit ARM supports fast unaligned access.
+        # 32-bit ARM supports fast unaligned access.
         # If MSVC gets RISC-V support then this will assume
         # fast unaligned access on RISC-V too.
         check_c_source_compiles("
@@ -192,6 +184,53 @@ function(tuklib_integer TARGET_OR_ALL)
             set(FAST_UNALIGNED_GUESS ON)
         endif()
 
+    elseif(PROCESSOR MATCHES "^aarch64|^arm64")
+        # On ARM64, Clang defines __ARM_FEATURE_UNALIGNED if and only if
+        # unaligned access is supported. However, GCC (at least up to 15.2.0)
+        # defines it even when using -mstrict-align, so autodetection with
+        # this macro doesn't work with GCC on ARM64. (It does work on
+        # 32-bit ARM.) See:
+        #
+        #     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555
+        #
+        # We need three checks:
+        #
+        # 1. If __ARM_FEATURE_UNALIGNED is defined and the
+        #    compiler isn't GCC, unaligned access is enabled.
+        #    If the compiler is MSVC, unaligned access is
+        #    enabled even without __ARM_FEATURE_UNALIGNED.
+        check_c_source_compiles("
+                #if defined(__ARM_FEATURE_UNALIGNED) \
+                        && (!defined(__GNUC__) || defined(__clang__))
+                #elif defined(_MSC_VER)
+                #else
+                compile error
+                #endif
+                int main(void) { return 0; }
+            "
+            TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR)
+        if(TUKLIB_FAST_UNALIGNED_DEFINED_BY_PREPROCESSOR)
+            set(FAST_UNALIGNED_GUESS ON)
+        else()
+            # 2. If __ARM_FEATURE_UNALIGNED is not defined,
+            #    unaligned access is disabled.
+            check_c_source_compiles("
+                    #ifdef __ARM_FEATURE_UNALIGNED
+                    compile error
+                    #endif
+                    int main(void) { return 0; }
+                "
+                TUKLIB_FAST_UNALIGNED_NOT_DEFINED_BY_PREPROCESSOR)
+            if(NOT TUKLIB_FAST_UNALIGNED_NOT_DEFINED_BY_PREPROCESSOR)
+                # 3. Use heuristics to detect if -mstrict-align is
+                #    in effect when building with GCC.
+                tuklib_integer_internal_strict_align("[ \t]ldrb[ \t]")
+                if(NOT TUKLIB_INTEGER_STRICT_ALIGN)
+                    set(FAST_UNALIGNED_GUESS ON)
+                endif()
+            endif()
+        endif()
+
     elseif(PROCESSOR MATCHES "^loongarch")
         tuklib_integer_internal_strict_align("[ \t]ld\\.bu[ \t]")
         if(NOT TUKLIB_INTEGER_STRICT_ALIGN)
index a3128a20d6ba40fda01e206f6cffc6104f4a21f3..29f2c95f8b7ef28a6547b3e2ac5d711f686af8fd 100644 (file)
@@ -130,34 +130,74 @@ if test "x$enable_unaligned_access" = xauto ; then
                i?86|x86_64|powerpc|powerpc64|powerpc64le)
                        enable_unaligned_access=yes
                        ;;
-               arm*|aarch64*|riscv*)
-                       # On 32-bit and 64-bit ARM, GCC and Clang
-                       # #define __ARM_FEATURE_UNALIGNED if
-                       # unaligned access is supported.
-                       #
-                       # Exception: GCC at least up to 13.2.0
-                       # defines it even when using -mstrict-align
-                       # so in that case this autodetection goes wrong.
-                       # Most of the time -mstrict-align isn't used so it
-                       # shouldn't be a common problem in practice. See:
-                       # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555
+               arm*|riscv*)
+                       # On 32-bit ARM, GCC and Clang
+                       # #define __ARM_FEATURE_UNALIGNED
+                       # if and only if unaligned access is supported.
                        #
                        # RISC-V C API Specification says that if
                        # __riscv_misaligned_fast is defined then
                        # unaligned access is known to be fast.
                        #
                        # MSVC is handled as a special case: We assume that
-                       # 32/64-bit ARM supports fast unaligned access.
+                       # 32-bit ARM supports fast unaligned access.
                        # If MSVC gets RISC-V support then this will assume
                        # fast unaligned access on RISC-V too.
                        AC_COMPILE_IFELSE([AC_LANG_SOURCE([
-#if !defined(__ARM_FEATURE_UNALIGNED) \
-               && !defined(__riscv_misaligned_fast) \
-               && !defined(_MSC_VER)
-compile error
-#endif
-int main(void) { return 0; }
-])], [enable_unaligned_access=yes], [enable_unaligned_access=no])
+                               #if !defined(__ARM_FEATURE_UNALIGNED) \
+                                       && !defined(__riscv_misaligned_fast) \
+                                       && !defined(_MSC_VER)
+                               compile error
+                               #endif
+                               int main(void) { return 0; }
+                       ])],
+                       [enable_unaligned_access=yes],
+                       [enable_unaligned_access=no])
+                       ;;
+               aarch64*)
+                       # On ARM64, Clang defines __ARM_FEATURE_UNALIGNED
+                       # if and only if unaligned access is supported.
+                       # However, GCC (at least up to 15.2.0) defines it
+                       # even when using -mstrict-align, so autodetection
+                       # with this macro doesn't work with GCC on ARM64.
+                       # (It does work on 32-bit ARM.) See:
+                       #
+                       # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111555
+                       #
+                       # We need three checks:
+                       #
+                       # 1. If __ARM_FEATURE_UNALIGNED is defined and the
+                       #    compiler isn't GCC, unaligned access is enabled.
+                       #    If the compiler is MSVC, unaligned access is
+                       #    enabled even without __ARM_FEATURE_UNALIGNED.
+                       AC_COMPILE_IFELSE([AC_LANG_SOURCE([
+                               #if defined(__ARM_FEATURE_UNALIGNED) \
+                                       && (!defined(__GNUC__) \
+                                               || defined(__clang__))
+                               #elif defined(_MSC_VER)
+                               #else
+                               compile error
+                               #endif
+                               int main(void) { return 0; }
+                       ])], [enable_unaligned_access=yes])
+
+                       # 2. If __ARM_FEATURE_UNALIGNED is not defined,
+                       #    unaligned access is disabled.
+                       if test "x$enable_unaligned_access" = xauto ; then
+                               AC_COMPILE_IFELSE([AC_LANG_SOURCE([
+                                       #ifdef __ARM_FEATURE_UNALIGNED
+                                       compile error
+                                       #endif
+                                       int main(void) { return 0; }
+                               ])], [enable_unaligned_access=no])
+                       fi
+
+                       # 3. Use heuristics to detect if -mstrict-align is
+                       #    in effect when building with GCC.
+                       if test "x$enable_unaligned_access" = xauto ; then
+                               [tuklib_integer_strict_align \
+                                               '[[:blank:]]ldrb[[:blank:]]']
+                       fi
                        ;;
                loongarch*)
                        # See sections 7.4, 8.1, and 8.2: