From f200869abc9e4b4dc6e48425bc3cd22fa1799a67 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 11 Oct 2013 18:52:07 +0000 Subject: [PATCH] vector.md (vec_realign_load): Generate vperm directly to circumvent subtract from splat{31} workaround. 2013-10-11 Bill Schmidt * config/rs6000/vector.md (vec_realign_load): Generate vperm directly to circumvent subtract from splat{31} workaround. * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New prototype. * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New. * config/rs6000/altivec.md (define_c_enum "unspec"): Add UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X. (altivec_vperm_): Convert to define_insn_and_split to separate big and little endian logic. (*altivec_vperm__internal): New define_insn. (altivec_vperm__uns): Convert to define_insn_and_split to separate big and little endian logic. (*altivec_vperm__uns_internal): New define_insn. (vec_permv16qi): Add little endian logic. From-SVN: r203457 --- gcc/ChangeLog | 17 +++++++ gcc/config/rs6000/altivec.md | 83 +++++++++++++++++++++++++++++-- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 48 ++++++++++++++++++ gcc/config/rs6000/vector.md | 11 +++- 5 files changed, 155 insertions(+), 5 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a5bde2c900e7..5a6fff651cf8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2013-10-11 Bill Schmidt + + * config/rs6000/vector.md (vec_realign_load): Generate vperm + directly to circumvent subtract from splat{31} workaround. + * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New + prototype. + * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New. + * config/rs6000/altivec.md (define_c_enum "unspec"): Add + UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X. + (altivec_vperm_): Convert to define_insn_and_split to + separate big and little endian logic. + (*altivec_vperm__internal): New define_insn. + (altivec_vperm__uns): Convert to define_insn_and_split to + separate big and little endian logic. + (*altivec_vperm__uns_internal): New define_insn. + (vec_permv16qi): Add little endian logic. + 2013-10-11 Marc Glisse * doc/extend.texi (returns_nonnull): Remove arguments. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 47a3a6894e15..49f908161dfb 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -59,6 +59,8 @@ UNSPEC_VSUMSWS UNSPEC_VPERM UNSPEC_VPERM_UNS + UNSPEC_VPERM_X + UNSPEC_VPERM_UNS_X UNSPEC_VRFIN UNSPEC_VCFUX UNSPEC_VCFSX @@ -1279,21 +1281,91 @@ "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn "altivec_vperm_" +(define_insn_and_split "altivec_vperm_" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM_X))] + "TARGET_ALTIVEC" + "#" + "!reload_in_progress && !reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + if (BYTES_BIG_ENDIAN) + operands[4] = gen_rtx_UNSPEC (mode, + gen_rtvec (3, operands[1], + operands[2], operands[3]), + UNSPEC_VPERM); + else + { + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, splat); + rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]); + emit_move_insn (tmp, sel); + operands[4] = gen_rtx_UNSPEC (mode, + gen_rtvec (3, operands[2], + operands[1], tmp), + UNSPEC_VPERM); + } +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vperm__internal" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "+v")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vperm__uns" +(define_insn_and_split "altivec_vperm__uns" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM_UNS_X))] + "TARGET_ALTIVEC" + "#" + "!reload_in_progress && !reload_completed" + [(set (match_dup 0) (match_dup 4))] +{ + if (BYTES_BIG_ENDIAN) + operands[4] = gen_rtx_UNSPEC (mode, + gen_rtvec (3, operands[1], + operands[2], operands[3]), + UNSPEC_VPERM_UNS); + else + { + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, splat); + rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]); + emit_move_insn (tmp, sel); + operands[4] = gen_rtx_UNSPEC (mode, + gen_rtvec (3, operands[2], + operands[1], tmp), + UNSPEC_VPERM_UNS); + } +} + [(set_attr "type" "vecperm")]) + +(define_insn "*altivec_vperm__uns_internal" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "+v")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3" @@ -1306,7 +1378,12 @@ (match_operand:V16QI 3 "register_operand" "")] UNSPEC_VPERM))] "TARGET_ALTIVEC" - "") +{ + if (!BYTES_BIG_ENDIAN) { + altivec_expand_vec_perm_le (operands); + DONE; + } +}) (define_expand "vec_perm_constv16qi" [(match_operand:V16QI 0 "register_operand" "") diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9aa9429b83c5..1fc890340947 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -56,6 +56,7 @@ extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, int); extern bool altivec_expand_vec_perm_const (rtx op[4]); +extern void altivec_expand_vec_perm_le (rtx op[4]); extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 0cf6ed87f0c1..3f56136d983f 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -28608,6 +28608,54 @@ altivec_expand_vec_perm_const_le (rtx operands[4]) emit_move_insn (target, unspec); } +/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the + permute control vector. But here it's not a constant, so we must + generate a vector splat/subtract to do the adjustment. */ + +void +altivec_expand_vec_perm_le (rtx operands[4]) +{ + rtx splat, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + rtx tmp = target; + + /* Get everything in regs so the pattern matches. */ + if (!REG_P (op0)) + op0 = force_reg (V16QImode, op0); + if (!REG_P (op1)) + op1 = force_reg (V16QImode, op1); + if (!REG_P (sel)) + sel = force_reg (V16QImode, sel); + if (!REG_P (target)) + tmp = gen_reg_rtx (V16QImode); + + /* SEL = splat(31) - SEL. */ + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + emit_move_insn (tmp, splat); + sel = gen_rtx_MINUS (V16QImode, tmp, sel); + emit_move_insn (tmp, sel); + + /* Permute with operands reversed and adjusted selector. */ + unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp), + UNSPEC_VPERM); + + /* Copy into target, possibly by way of a register. */ + if (!REG_P (target)) + { + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + /* Expand an Altivec constant permutation. Return true if we match an efficient implementation; false to fall back to VPERM. */ diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index cbb1f4f8e724..e88d879591e3 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -950,8 +950,15 @@ emit_insn (gen_altivec_vperm_ (operands[0], operands[1], operands[2], operands[3])); else - emit_insn (gen_altivec_vperm_ (operands[0], operands[2], - operands[1], operands[3])); + { + /* Avoid the "subtract from splat31" workaround for vperm since + we have changed lvsr to lvsl instead. */ + rtx unspec = gen_rtx_UNSPEC (mode, + gen_rtvec (3, operands[2], + operands[1], operands[3]), + UNSPEC_VPERM); + emit_move_insn (operands[0], unspec); + } DONE; }) -- 2.47.2