From: James Greenhalgh Date: Tue, 26 Nov 2013 10:04:51 +0000 (+0000) Subject: [AArch64] [4/4 Fix vtbx1] Handle vtbx{1,3} emulation sequence using X-Git-Tag: releases/gcc-4.9.0~2455 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=793c28b87fbb215df23a3e72d33a4ffa07899044;p=thirdparty%2Fgcc.git [AArch64] [4/4 Fix vtbx1] Handle vtbx{1,3} emulation sequence using other intrinsics gcc/ * config/aarch64/arm_neon.h (vtbx1_8): Emulate behaviour using other intrinsics. (vtbx3_8): Likewise. From-SVN: r205386 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 52c507d7ba60..d1885d97d4d7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2013-11-26 James Greenhalgh + + * config/aarch64/arm_neon.h (vtbx1_8): Emulate behaviour + using other intrinsics. + (vtbx3_8): Likewise. + 2013-11-26 James Greenhalgh * config/aarch64/aarch64-builtins.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 6826ffb827c4..03549bd7a27c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -15134,54 +15134,6 @@ vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx) -{ - int8x8_t result; - int8x8_t tmp1; - int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("movi %0.8b, 8\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {%2.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=&w"(tmp1) - : "w"(temp), "w"(idx), "w"(r) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx) -{ - uint8x8_t result; - uint8x8_t tmp1; - uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("movi %0.8b, 8\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {%2.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=&w"(tmp1) - : "w"(temp), "w"(idx), "w"(r) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx) -{ - poly8x8_t result; - poly8x8_t tmp1; - poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("movi %0.8b, 8\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {%2.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=&w"(tmp1) - : "w"(temp), "w"(idx), "w"(r) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) { @@ -15218,63 +15170,6 @@ vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx) -{ - int8x8_t result; - int8x8_t tmp1; - int8x16x2_t temp; - temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" - "movi %0.8b, 24\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=&w"(tmp1) - : "Q"(temp), "w"(idx), "w"(r) - : "v16", "v17", "memory"); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx) -{ - uint8x8_t result; - uint8x8_t tmp1; - uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" - "movi %0.8b, 24\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=&w"(tmp1) - : "Q"(temp), "w"(idx), "w"(r) - : "v16", "v17", "memory"); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx) -{ - poly8x8_t result; - poly8x8_t tmp1; - poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" - "movi %0.8b, 24\n\t" - "cmhs %0.8b, %3.8b, %0.8b\n\t" - "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t" - "bsl %0.8b, %4.8b, %1.8b\n\t" - : "+w"(result), "=&w"(tmp1) - : "Q"(temp), "w"(idx), "w"(r) - : "v16", "v17", "memory"); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx) { @@ -24920,6 +24815,66 @@ vsubd_u64 (uint64x1_t __a, uint64x1_t __b) return __a - __b; } +/* vtbx1 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), + vmov_n_u8 (8)); + int8x8_t __tbl = vtbl1_s8 (__tab, __idx); + + return vbsl_s8 (__mask, __tbl, __r); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); + uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); + + return vbsl_u8 (__mask, __tbl, __r); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); + poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); + + return vbsl_p8 (__mask, __tbl, __r); +} + +/* vtbx3 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), + vmov_n_u8 (24)); + int8x8_t __tbl = vtbl3_s8 (__tab, __idx); + + return vbsl_s8 (__mask, __tbl, __r); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); + uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); + + return vbsl_u8 (__mask, __tbl, __r); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); + poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); + + return vbsl_p8 (__mask, __tbl, __r); +} + /* vtrn */ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))