From ff5a75fc05f8dc9b999dc378f3da0b6f686fde52 Mon Sep 17 00:00:00 2001 From: rearnsha Date: Wed, 20 Nov 2013 13:55:04 +0000 Subject: [PATCH] PR rtl-optimization/54300 gcc/ PR rtl-optimization/54300 * regcprop.c (copyprop_hardreg_forward_1): Ensure any unused outputs in a single-set are killed from the value chains. gcc/testsuite: PR rtl-optimization/54300 * gcc.target/arm/pr54300.C: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@205117 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 6 +++ gcc/regcprop.c | 18 ++++++++ gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/arm/pr54300.C | 61 ++++++++++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/pr54300.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 15476a4f611d..1064cfe0601b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2013-11-20 Richard Earnshaw + + PR rtl-optimization/54300 + * regcprop.c (copyprop_hardreg_forward_1): Ensure any unused + outputs in a single-set are killed from the value chains. + 2013-11-20 Ilya Enkovich * cgraph.h (varpool_node): Add need_bounds_init field. diff --git a/gcc/regcprop.c b/gcc/regcprop.c index 0fa0afbc8248..9b52a6301f7c 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -747,6 +747,7 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) int n_ops, i, alt, predicated; bool is_asm, any_replacements; rtx set; + rtx link; bool replaced[MAX_RECOG_OPERANDS]; bool changed = false; struct kill_set_value_data ksvd; @@ -815,6 +816,23 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) if (recog_op_alt[i][alt].earlyclobber) kill_value (recog_data.operand[i], vd); + /* If we have dead sets in the insn, then we need to note these as we + would clobbers. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + { + if (REG_NOTE_KIND (link) == REG_UNUSED) + { + kill_value (XEXP (link, 0), vd); + /* Furthermore, if the insn looked like a single-set, + but the dead store kills the source value of that + set, then we can no-longer use the plain move + special case below. */ + if (set + && reg_overlap_mentioned_p (XEXP (link, 0), SET_SRC (set))) + set = NULL; + } + } + /* Special-case plain move instructions, since we may well be able to do the move from a different register class. */ if (set && REG_P (SET_SRC (set))) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e937d50277ab..37e83eae5b27 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-11-20 Richard Earnshaw + + PR rtl-optimization/54300 + * gcc.target/arm/pr54300.C: New test. + 2013-11-20 Diego Novillo PR 59212 diff --git a/gcc/testsuite/gcc.target/arm/pr54300.C b/gcc/testsuite/gcc.target/arm/pr54300.C new file mode 100644 index 000000000000..eb1a74e36cff --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr54300.C @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include +#include + +struct __attribute__ ((aligned(8))) _v16u8_ { + uint8x16_t val; + _v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); } + operator int16x8_t () const { return vreinterpretq_s16_u8(val); } +}; +typedef struct _v16u8_ v16u8; + +struct __attribute__ ((aligned(4))) _v8u8_ { + uint8x8_t val; + _v8u8_( const uint8x8_t &src) { val = src; } + operator int16x4_t () const { return vreinterpret_s16_u8(val); } +}; +typedef struct _v8u8_ v8u8; + +typedef v16u8 v8i16; +typedef int32x4_t v4i32; +typedef const short cv1i16; +typedef const unsigned char cv1u8; +typedef const v8i16 cv8i16; + +static inline __attribute__((always_inline)) v8u8 zero_64(){ return vdup_n_u8( 0 ); } + +static inline __attribute__((always_inline)) v8i16 loadlo_8i16( cv8i16* p ){ + return vcombine_s16( vld1_s16( (cv1i16 *)p ), zero_64() ); +} +static inline __attribute__((always_inline)) v8i16 _loadlo_8i16( cv8i16* p, int offset ){ + return loadlo_8i16( (cv8i16*)(&((cv1u8*)p)[offset]) ); +} + +void __attribute__((noinline)) +test(unsigned short *_Inp, int32_t *_Out, + unsigned int s1v, unsigned int dv0, + unsigned int smask_v) +{ + int32x4_t c = vdupq_n_s32(0); + + for(unsigned int sv=0 ; sv!=dv0 ; sv=(sv+s1v)&smask_v ) + { + int32x4_t s; + s = vmovl_s16( vget_low_s16( _loadlo_8i16( (cv8i16*) _Inp, sv ) ) ); + c = vaddq_s32( c, s ); + } + vst1q_s32( _Out, c ); +} + +main() +{ + unsigned short a[4] = {1, 2, 3, 4}; + int32_t b[4] = {0, 0, 0, 0}; + test(a, b, 1, 1, ~0); + if (b[0] != 1 || b[1] != 2 || b[2] != 3 || b[3] != 4) + abort(); +} -- 2.47.2