From f7d5cf8df3193f8f6e62501def08e4b0b1baadbc Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 23 Apr 2014 15:26:28 +0000 Subject: [PATCH] [AArch64][2/3] Recognise rev16 operations on SImode and DImode data * config/aarch64/aarch64.md (rev162): New pattern. (rev162_alt): Likewise. * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. (aarch_rev16_shleft_mask_imm_p): Likewise. (aarch_rev16_p_1): Likewise. (aarch_rev16_p): Likewise. * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. (aarch_rev16_shright_mask_imm_p): Likewise. (aarch_rev16_shleft_mask_imm_p): Likewise. * gcc.target/aarch64/rev16_1.c: New test. From-SVN: r209704 --- gcc/ChangeLog | 13 ++++ gcc/config/aarch64/aarch64.c | 10 +++ gcc/config/aarch64/aarch64.md | 32 ++++++++++ gcc/config/arm/aarch-common-protos.h | 3 + gcc/config/arm/aarch-common.c | 73 ++++++++++++++++++++++ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.target/aarch64/rev16_1.c | 59 +++++++++++++++++ 7 files changed, 194 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/rev16_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6d3bab8d5d8e..1b8dd62b670c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2014-04-23 Kyrylo Tkachov + + * config/aarch64/aarch64.md (rev162): New pattern. + (rev162_alt): Likewise. + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. + * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. + (aarch_rev16_shleft_mask_imm_p): Likewise. + (aarch_rev16_p_1): Likewise. + (aarch_rev16_p): Likewise. + * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. + (aarch_rev16_shright_mask_imm_p): Likewise. + (aarch_rev16_shleft_mask_imm_p): Likewise. + 2014-04-23 Kyrylo Tkachov * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index dacd7eebcf6a..68c29aa0da7b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4695,6 +4695,16 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, return false; case IOR: + if (aarch_rev16_p (x)) + { + *cost = COSTS_N_INSNS (1); + + if (speed) + *cost += extra_cost->alu.rev; + + return true; + } + /* Fall through. */ case XOR: case AND: cost_logic: diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index ee32b6cf4adf..98c46d13bf42 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -3253,6 +3253,38 @@ [(set_attr "type" "rev")] ) +;; There are no canonicalisation rules for the position of the lshiftrt, ashift +;; operations within an IOR/AND RTX, therefore we have two patterns matching +;; each valid permutation. + +(define_insn "rev162" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n")) + (and:GPI (lshiftrt:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], mode) + && aarch_rev16_shright_mask_imm_p (operands[2], mode)" + "rev16\\t%0, %1" + [(set_attr "type" "rev")] +) + +(define_insn "rev162_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], mode) + && aarch_rev16_shright_mask_imm_p (operands[2], mode)" + "rev16\\t%0, %1" + [(set_attr "type" "rev")] +) + ;; zero_extend version of above (define_insn "*bswapsi2_uxtw" [(set (match_operand:DI 0 "register_operand" "=r") diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 5693c3127d8f..1b60d7892556 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -24,6 +24,9 @@ #define GCC_AARCH_COMMON_PROTOS_H extern int aarch_crypto_can_dual_issue (rtx, rtx); +extern bool aarch_rev16_p (rtx); +extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); +extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_early_store_addr_dep (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx); diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index af8fc9996fab..884d4b37fac9 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -191,6 +191,79 @@ arm_get_set_operands (rtx producer, rtx consumer, return 0; } +bool +aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode); +} + +bool +aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode); +} + + +static bool +aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode) +{ + if (GET_CODE (lhs) == AND + && GET_CODE (XEXP (lhs, 0)) == ASHIFT + && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) + && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 + && REG_P (XEXP (XEXP (lhs, 0), 0)) + && CONST_INT_P (XEXP (lhs, 1)) + && GET_CODE (rhs) == AND + && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT + && REG_P (XEXP (XEXP (rhs, 0), 0)) + && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) + && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 + && CONST_INT_P (XEXP (rhs, 1)) + && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) + + { + rtx lhs_mask = XEXP (lhs, 1); + rtx rhs_mask = XEXP (rhs, 1); + + return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) + && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); + } + + return false; +} + +/* Recognise a sequence of bitwise operations corresponding to a rev16 operation. + These will be of the form: + ((x >> 8) & 0x00ff00ff) + | ((x << 8) & 0xff00ff00) + for SImode and with similar but wider bitmasks for DImode. + The two sub-expressions of the IOR can appear on either side so check both + permutations with the help of aarch_rev16_p_1 above. */ + +bool +aarch_rev16_p (rtx x) +{ + rtx left_sub_rtx, right_sub_rtx; + bool is_rev = false; + + if (GET_CODE (x) != IOR) + return false; + + left_sub_rtx = XEXP (x, 0); + right_sub_rtx = XEXP (x, 1); + + /* There are no canonicalisation rules for the position of the two shifts + involved in a rev, so try both permutations. */ + is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); + + if (!is_rev) + is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); + + return is_rev; +} + /* Return nonzero if the CONSUMER instruction (a load) does need PRODUCER's value to calculate the address. */ int diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e74096c9ddc0..9f1e7ce966cd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-04-23 Kyrylo Tkachov + + * gcc.target/aarch64/rev16_1.c: New test. + 2014-04-23 Richard Biener PR tree-optimization/60903 diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_1.c b/gcc/testsuite/gcc.target/aarch64/rev16_1.c new file mode 100644 index 000000000000..126d3c017714 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/rev16_1.c @@ -0,0 +1,59 @@ +/* { dg-options "-O2" } */ +/* { dg-do run } */ + +extern void abort (void); + +typedef unsigned int __u32; + +__u32 +__rev16_32_alt (__u32 x) +{ + return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) + | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); +} + +__u32 +__rev16_32 (__u32 x) +{ + return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) + | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); +} + +typedef unsigned long long __u64; + +__u64 +__rev16_64_alt (__u64 x) +{ + return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) + | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); +} + +__u64 +__rev16_64 (__u64 x) +{ + return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) + | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); +} + +int +main (void) +{ + volatile __u32 in32 = 0x12345678; + volatile __u32 expected32 = 0x34127856; + volatile __u64 in64 = 0x1234567890abcdefUL; + volatile __u64 expected64 = 0x34127856ab90efcdUL; + + if (__rev16_32 (in32) != expected32) + abort (); + + if (__rev16_32_alt (in32) != expected32) + abort (); + + if (__rev16_64 (in64) != expected64) + abort (); + + if (__rev16_64_alt (in64) != expected64) + abort (); + + return 0; +} -- 2.47.3