]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
xor/arm64: Use shared NEON intrinsics implementation from 32-bit ARM
authorArd Biesheuvel <ardb@kernel.org>
Wed, 22 Apr 2026 17:16:59 +0000 (19:16 +0200)
committerEric Biggers <ebiggers@kernel.org>
Thu, 28 May 2026 20:14:21 +0000 (13:14 -0700)
Tweak the arm64 code so that the pure NEON intrinsics implementation of
XOR is shared between arm64 and ARM. While at it, rename the arm64
specific piece xor-eor3.c to reflect that only the version based on the
EOR3 instruction is kept there.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://patch.msgid.link/20260422171655.3437334-13-ardb+git@google.com
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
lib/raid/xor/Makefile
lib/raid/xor/arm64/xor-eor3.c [new file with mode: 0644]
lib/raid/xor/arm64/xor-neon.c [deleted file]
lib/raid/xor/xor-neon.c

index d78400f2427ab436cf82c04d47381cd8e6f2b353..e8ecec3c09f9f4d79978f62bbe0135cb5e25df41 100644 (file)
@@ -19,7 +19,8 @@ xor-$(CONFIG_ARM)             += arm/xor.o
 ifeq ($(CONFIG_ARM),y)
 xor-$(CONFIG_KERNEL_MODE_NEON) += xor-neon.o arm/xor-neon-glue.o
 endif
-xor-$(CONFIG_ARM64)            += arm64/xor-neon.o arm64/xor-neon-glue.o
+xor-$(CONFIG_ARM64)            += xor-neon.o arm64/xor-eor3.o \
+                                  arm64/xor-neon-glue.o
 xor-$(CONFIG_CPU_HAS_LSX)      += loongarch/xor_simd.o
 xor-$(CONFIG_CPU_HAS_LSX)      += loongarch/xor_simd_glue.o
 xor-$(CONFIG_ALTIVEC)          += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o
@@ -34,8 +35,8 @@ obj-y                         += tests/
 CFLAGS_xor-neon.o              += $(CC_FLAGS_FPU) -I$(src)/$(SRCARCH)
 CFLAGS_REMOVE_xor-neon.o       += $(CC_FLAGS_NO_FPU)
 
-CFLAGS_arm64/xor-neon.o                += $(CC_FLAGS_FPU)
-CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_arm64/xor-eor3.o                += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_arm64/xor-eor3.o += $(CC_FLAGS_NO_FPU)
 
 CFLAGS_powerpc/xor_vmx.o       += -mhard-float -maltivec \
                                   $(call cc-option,-mabi=altivec) \
diff --git a/lib/raid/xor/arm64/xor-eor3.c b/lib/raid/xor/arm64/xor-eor3.c
new file mode 100644 (file)
index 0000000..e44016c
--- /dev/null
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/cache.h>
+#include <asm/neon-intrinsics.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
+#include "xor-neon.h"
+
+extern void __xor_eor3_2(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2);
+
+static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
+{
+       uint64x2_t res;
+
+       asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n"
+           "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
+           : "=w"(res) : "w"(p), "w"(q), "w"(r));
+       return res;
+}
+
+static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2,
+               const unsigned long * __restrict p3)
+{
+       uint64_t *dp1 = (uint64_t *)p1;
+       uint64_t *dp2 = (uint64_t *)p2;
+       uint64_t *dp3 = (uint64_t *)p3;
+
+       register uint64x2_t v0, v1, v2, v3;
+       long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+       do {
+               /* p1 ^= p2 ^ p3 */
+               v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
+                         vld1q_u64(dp3 + 0));
+               v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
+                         vld1q_u64(dp3 + 2));
+               v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
+                         vld1q_u64(dp3 + 4));
+               v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
+                         vld1q_u64(dp3 + 6));
+
+               /* store */
+               vst1q_u64(dp1 + 0, v0);
+               vst1q_u64(dp1 + 2, v1);
+               vst1q_u64(dp1 + 4, v2);
+               vst1q_u64(dp1 + 6, v3);
+
+               dp1 += 8;
+               dp2 += 8;
+               dp3 += 8;
+       } while (--lines > 0);
+}
+
+static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2,
+               const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4)
+{
+       uint64_t *dp1 = (uint64_t *)p1;
+       uint64_t *dp2 = (uint64_t *)p2;
+       uint64_t *dp3 = (uint64_t *)p3;
+       uint64_t *dp4 = (uint64_t *)p4;
+
+       register uint64x2_t v0, v1, v2, v3;
+       long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+       do {
+               /* p1 ^= p2 ^ p3 */
+               v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
+                         vld1q_u64(dp3 + 0));
+               v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
+                         vld1q_u64(dp3 + 2));
+               v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
+                         vld1q_u64(dp3 + 4));
+               v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
+                         vld1q_u64(dp3 + 6));
+
+               /* p1 ^= p4 */
+               v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
+               v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
+               v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
+               v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
+
+               /* store */
+               vst1q_u64(dp1 + 0, v0);
+               vst1q_u64(dp1 + 2, v1);
+               vst1q_u64(dp1 + 4, v2);
+               vst1q_u64(dp1 + 6, v3);
+
+               dp1 += 8;
+               dp2 += 8;
+               dp3 += 8;
+               dp4 += 8;
+       } while (--lines > 0);
+}
+
+static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2,
+               const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4,
+               const unsigned long * __restrict p5)
+{
+       uint64_t *dp1 = (uint64_t *)p1;
+       uint64_t *dp2 = (uint64_t *)p2;
+       uint64_t *dp3 = (uint64_t *)p3;
+       uint64_t *dp4 = (uint64_t *)p4;
+       uint64_t *dp5 = (uint64_t *)p5;
+
+       register uint64x2_t v0, v1, v2, v3;
+       long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+       do {
+               /* p1 ^= p2 ^ p3 */
+               v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
+                         vld1q_u64(dp3 + 0));
+               v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
+                         vld1q_u64(dp3 + 2));
+               v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
+                         vld1q_u64(dp3 + 4));
+               v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
+                         vld1q_u64(dp3 + 6));
+
+               /* p1 ^= p4 ^ p5 */
+               v0 = eor3(v0, vld1q_u64(dp4 + 0), vld1q_u64(dp5 + 0));
+               v1 = eor3(v1, vld1q_u64(dp4 + 2), vld1q_u64(dp5 + 2));
+               v2 = eor3(v2, vld1q_u64(dp4 + 4), vld1q_u64(dp5 + 4));
+               v3 = eor3(v3, vld1q_u64(dp4 + 6), vld1q_u64(dp5 + 6));
+
+               /* store */
+               vst1q_u64(dp1 + 0, v0);
+               vst1q_u64(dp1 + 2, v1);
+               vst1q_u64(dp1 + 4, v2);
+               vst1q_u64(dp1 + 6, v3);
+
+               dp1 += 8;
+               dp2 += 8;
+               dp3 += 8;
+               dp4 += 8;
+               dp5 += 8;
+       } while (--lines > 0);
+}
+
+__DO_XOR_BLOCKS(eor3_inner, __xor_eor3_2, __xor_eor3_3, __xor_eor3_4,
+               __xor_eor3_5);
diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c
deleted file mode 100644 (file)
index 97ef3cb..0000000
+++ /dev/null
@@ -1,312 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Authors: Jackie Liu <liuyun01@kylinos.cn>
- * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
- */
-
-#include <linux/cache.h>
-#include <asm/neon-intrinsics.h>
-#include "xor_impl.h"
-#include "xor_arch.h"
-#include "xor-neon.h"
-
-static void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 */
-               v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
-               v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
-               v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
-               v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
-
-               /* store */
-               vst1q_u64(dp1 +  0, v0);
-               vst1q_u64(dp1 +  2, v1);
-               vst1q_u64(dp1 +  4, v2);
-               vst1q_u64(dp1 +  6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-       } while (--lines > 0);
-}
-
-static void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2,
-               const unsigned long * __restrict p3)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-       uint64_t *dp3 = (uint64_t *)p3;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 */
-               v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
-               v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
-               v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
-               v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
-
-               /* p1 ^= p3 */
-               v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
-               v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
-               v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
-               v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
-
-               /* store */
-               vst1q_u64(dp1 +  0, v0);
-               vst1q_u64(dp1 +  2, v1);
-               vst1q_u64(dp1 +  4, v2);
-               vst1q_u64(dp1 +  6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-               dp3 += 8;
-       } while (--lines > 0);
-}
-
-static void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2,
-               const unsigned long * __restrict p3,
-               const unsigned long * __restrict p4)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-       uint64_t *dp3 = (uint64_t *)p3;
-       uint64_t *dp4 = (uint64_t *)p4;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 */
-               v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
-               v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
-               v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
-               v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
-
-               /* p1 ^= p3 */
-               v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
-               v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
-               v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
-               v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
-
-               /* p1 ^= p4 */
-               v0 = veorq_u64(v0, vld1q_u64(dp4 +  0));
-               v1 = veorq_u64(v1, vld1q_u64(dp4 +  2));
-               v2 = veorq_u64(v2, vld1q_u64(dp4 +  4));
-               v3 = veorq_u64(v3, vld1q_u64(dp4 +  6));
-
-               /* store */
-               vst1q_u64(dp1 +  0, v0);
-               vst1q_u64(dp1 +  2, v1);
-               vst1q_u64(dp1 +  4, v2);
-               vst1q_u64(dp1 +  6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-               dp3 += 8;
-               dp4 += 8;
-       } while (--lines > 0);
-}
-
-static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2,
-               const unsigned long * __restrict p3,
-               const unsigned long * __restrict p4,
-               const unsigned long * __restrict p5)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-       uint64_t *dp3 = (uint64_t *)p3;
-       uint64_t *dp4 = (uint64_t *)p4;
-       uint64_t *dp5 = (uint64_t *)p5;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 */
-               v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
-               v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
-               v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
-               v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
-
-               /* p1 ^= p3 */
-               v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
-               v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
-               v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
-               v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
-
-               /* p1 ^= p4 */
-               v0 = veorq_u64(v0, vld1q_u64(dp4 +  0));
-               v1 = veorq_u64(v1, vld1q_u64(dp4 +  2));
-               v2 = veorq_u64(v2, vld1q_u64(dp4 +  4));
-               v3 = veorq_u64(v3, vld1q_u64(dp4 +  6));
-
-               /* p1 ^= p5 */
-               v0 = veorq_u64(v0, vld1q_u64(dp5 +  0));
-               v1 = veorq_u64(v1, vld1q_u64(dp5 +  2));
-               v2 = veorq_u64(v2, vld1q_u64(dp5 +  4));
-               v3 = veorq_u64(v3, vld1q_u64(dp5 +  6));
-
-               /* store */
-               vst1q_u64(dp1 +  0, v0);
-               vst1q_u64(dp1 +  2, v1);
-               vst1q_u64(dp1 +  4, v2);
-               vst1q_u64(dp1 +  6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-               dp3 += 8;
-               dp4 += 8;
-               dp5 += 8;
-       } while (--lines > 0);
-}
-
-__DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4,
-               __xor_neon_5);
-
-static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
-{
-       uint64x2_t res;
-
-       asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n"
-           "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
-           : "=w"(res) : "w"(p), "w"(q), "w"(r));
-       return res;
-}
-
-static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2,
-               const unsigned long * __restrict p3)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-       uint64_t *dp3 = (uint64_t *)p3;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 ^ p3 */
-               v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
-                         vld1q_u64(dp3 + 0));
-               v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
-                         vld1q_u64(dp3 + 2));
-               v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
-                         vld1q_u64(dp3 + 4));
-               v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
-                         vld1q_u64(dp3 + 6));
-
-               /* store */
-               vst1q_u64(dp1 + 0, v0);
-               vst1q_u64(dp1 + 2, v1);
-               vst1q_u64(dp1 + 4, v2);
-               vst1q_u64(dp1 + 6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-               dp3 += 8;
-       } while (--lines > 0);
-}
-
-static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2,
-               const unsigned long * __restrict p3,
-               const unsigned long * __restrict p4)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-       uint64_t *dp3 = (uint64_t *)p3;
-       uint64_t *dp4 = (uint64_t *)p4;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 ^ p3 */
-               v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
-                         vld1q_u64(dp3 + 0));
-               v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
-                         vld1q_u64(dp3 + 2));
-               v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
-                         vld1q_u64(dp3 + 4));
-               v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
-                         vld1q_u64(dp3 + 6));
-
-               /* p1 ^= p4 */
-               v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
-               v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
-               v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
-               v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
-
-               /* store */
-               vst1q_u64(dp1 + 0, v0);
-               vst1q_u64(dp1 + 2, v1);
-               vst1q_u64(dp1 + 4, v2);
-               vst1q_u64(dp1 + 6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-               dp3 += 8;
-               dp4 += 8;
-       } while (--lines > 0);
-}
-
-static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
-               const unsigned long * __restrict p2,
-               const unsigned long * __restrict p3,
-               const unsigned long * __restrict p4,
-               const unsigned long * __restrict p5)
-{
-       uint64_t *dp1 = (uint64_t *)p1;
-       uint64_t *dp2 = (uint64_t *)p2;
-       uint64_t *dp3 = (uint64_t *)p3;
-       uint64_t *dp4 = (uint64_t *)p4;
-       uint64_t *dp5 = (uint64_t *)p5;
-
-       register uint64x2_t v0, v1, v2, v3;
-       long lines = bytes / (sizeof(uint64x2_t) * 4);
-
-       do {
-               /* p1 ^= p2 ^ p3 */
-               v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
-                         vld1q_u64(dp3 + 0));
-               v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
-                         vld1q_u64(dp3 + 2));
-               v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
-                         vld1q_u64(dp3 + 4));
-               v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
-                         vld1q_u64(dp3 + 6));
-
-               /* p1 ^= p4 ^ p5 */
-               v0 = eor3(v0, vld1q_u64(dp4 + 0), vld1q_u64(dp5 + 0));
-               v1 = eor3(v1, vld1q_u64(dp4 + 2), vld1q_u64(dp5 + 2));
-               v2 = eor3(v2, vld1q_u64(dp4 + 4), vld1q_u64(dp5 + 4));
-               v3 = eor3(v3, vld1q_u64(dp4 + 6), vld1q_u64(dp5 + 6));
-
-               /* store */
-               vst1q_u64(dp1 + 0, v0);
-               vst1q_u64(dp1 + 2, v1);
-               vst1q_u64(dp1 + 4, v2);
-               vst1q_u64(dp1 + 6, v3);
-
-               dp1 += 8;
-               dp2 += 8;
-               dp3 += 8;
-               dp4 += 8;
-               dp5 += 8;
-       } while (--lines > 0);
-}
-
-__DO_XOR_BLOCKS(eor3_inner, __xor_neon_2, __xor_eor3_3, __xor_eor3_4,
-               __xor_eor3_5);
index a3e2b4af8d362fc1e968b12b871215b419b3ef1e..c7c3cf634e23ac6304b3b4e314c9ab7fe4485e7f 100644 (file)
@@ -173,3 +173,7 @@ static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
 
 __DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4,
                __xor_neon_5);
+
+#ifdef CONFIG_ARM64
+extern typeof(__xor_neon_2) __xor_eor3_2 __alias(__xor_neon_2);
+#endif