From: Jonathan Wright Date: Fri, 16 Jul 2021 14:34:38 +0000 (+0100) Subject: simplify-rtx: Push sign/zero-extension inside vec_duplicate X-Git-Tag: basepoints/gcc-13~5782 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3bc9db6a989671bedf19e61bd1b21f79588e99da;p=thirdparty%2Fgcc.git simplify-rtx: Push sign/zero-extension inside vec_duplicate As a general principle, vec_duplicate should be as close to the root of an expression as possible. Where unary operations have vec_duplicate as an argument, these operations should be pushed inside the vec_duplicate. This patch modifies unary operation simplification to push sign/zero-extension of a scalar inside vec_duplicate. This patch also updates all RTL patterns in aarch64-simd.md to use the new canonical form. gcc/ChangeLog: 2021-07-19 Jonathan Wright * config/aarch64/aarch64-simd.md: Push sign/zero-extension inside vec_duplicate for all patterns. * simplify-rtx.c (simplify_context::simplify_unary_operation_1): Push sign/zero-extension inside vec_duplicate. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 13c86984df14..c5638d096fa8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2079,14 +2079,16 @@ (define_insn "aarch64_mlal_hi_n_insn" [(set (match_operand: 0 "register_operand" "=w") - (plus: - (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (match_operand: 4 "register_operand" "")))) - (match_operand: 1 "register_operand" "0")))] + (plus: + (mult: + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (match_operand: 4 "register_operand" "")))) + (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" "mlal2\t%0., %2., %4.[0]" [(set_attr "type" "neon_mla__long")] @@ -2154,14 +2156,16 @@ (define_insn "aarch64_mlsl_hi_n_insn" [(set (match_operand: 0 "register_operand" "=w") - (minus: - (match_operand: 1 "register_operand" "0") - (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (match_operand: 4 "register_operand" ""))))))] + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (match_operand: 4 "register_operand" ""))))))] "TARGET_SIMD" "mlsl2\t%0., %2., %4.[0]" [(set_attr "type" "neon_mla__long")] @@ -2197,14 +2201,14 @@ (define_insn "aarch64_mlal_n" [(set (match_operand: 0 "register_operand" "=w") - (plus: - (mult: - (ANY_EXTEND: - (match_operand:VD_HSI 2 "register_operand" "w")) - (ANY_EXTEND: - (vec_duplicate:VD_HSI - (match_operand: 3 "register_operand" "")))) - (match_operand: 1 "register_operand" "0")))] + (plus: + (mult: + (ANY_EXTEND: + (match_operand:VD_HSI 2 "register_operand" "w")) + (vec_duplicate: + (ANY_EXTEND: + (match_operand: 3 "register_operand" "")))) + (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" "mlal\t%0., %2., %3.[0]" [(set_attr "type" "neon_mla__long")] @@ -2226,14 +2230,14 @@ (define_insn "aarch64_mlsl_n" [(set (match_operand: 0 "register_operand" "=w") - (minus: - (match_operand: 1 "register_operand" "0") - (mult: - (ANY_EXTEND: - (match_operand:VD_HSI 2 "register_operand" "w")) - (ANY_EXTEND: - (vec_duplicate:VD_HSI - (match_operand: 3 "register_operand" ""))))))] + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: + (match_operand:VD_HSI 2 "register_operand" "w")) + (vec_duplicate: + (ANY_EXTEND: + (match_operand: 3 "register_operand" ""))))))] "TARGET_SIMD" "mlsl\t%0., %2., %3.[0]" [(set_attr "type" "neon_mla__long")] @@ -2311,8 +2315,8 @@ (mult: (ANY_EXTEND: (match_operand: 1 "register_operand" "w")) - (ANY_EXTEND: - (vec_duplicate: + (vec_duplicate: + (ANY_EXTEND: (vec_select: (match_operand:VDQHS 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] @@ -2327,13 +2331,15 @@ (define_insn "aarch64_mull_hi_lane_insn" [(set (match_operand: 0 "register_operand" "=w") (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (vec_select: - (match_operand: 3 "register_operand" "") - (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] "TARGET_SIMD" { operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); @@ -2359,13 +2365,15 @@ (define_insn "aarch64_mull_hi_laneq_insn" [(set (match_operand: 0 "register_operand" "=w") (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (vec_select: - (match_operand: 3 "register_operand" "") - (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] "TARGET_SIMD" { operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); @@ -2390,11 +2398,11 @@ (define_insn "aarch64_mull_n" [(set (match_operand: 0 "register_operand" "=w") - (mult: - (ANY_EXTEND: - (match_operand:VD_HSI 1 "register_operand" "w")) - (ANY_EXTEND: - (vec_duplicate: + (mult: + (ANY_EXTEND: + (match_operand:VD_HSI 1 "register_operand" "w")) + (vec_duplicate: + (ANY_EXTEND: (match_operand: 2 "register_operand" "")))))] "TARGET_SIMD" "mull\t%0., %1., %2.[0]" @@ -2404,11 +2412,12 @@ (define_insn "aarch64_mull_hi_n_insn" [(set (match_operand: 0 "register_operand" "=w") (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) (ANY_EXTEND: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: (match_operand: 2 "register_operand" "")))))] "TARGET_SIMD" "mull2\\t%0., %1., %2.[0]" @@ -2435,8 +2444,8 @@ (mult: (ANY_EXTEND: (match_operand: 2 "register_operand" "w")) - (ANY_EXTEND: - (vec_duplicate: + (vec_duplicate: + (ANY_EXTEND: (vec_select: (match_operand:VDQHS 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))) @@ -2453,13 +2462,15 @@ [(set (match_operand: 0 "register_operand" "=w") (plus: (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (vec_select: - (match_operand: 4 "register_operand" "") - (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (vec_select: + (match_operand: 4 "register_operand" "") + (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" { @@ -2488,13 +2499,15 @@ [(set (match_operand: 0 "register_operand" "=w") (plus: (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (vec_select: - (match_operand: 4 "register_operand" "") - (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (vec_select: + (match_operand: 4 "register_operand" "") + (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" { @@ -2526,8 +2539,8 @@ (mult: (ANY_EXTEND: (match_operand: 2 "register_operand" "w")) - (ANY_EXTEND: - (vec_duplicate: + (vec_duplicate: + (ANY_EXTEND: (vec_select: (match_operand:VDQHS 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))] @@ -2544,13 +2557,15 @@ (minus: (match_operand: 1 "register_operand" "0") (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (vec_select: - (match_operand: 4 "register_operand" "") - (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (vec_select: + (match_operand: 4 "register_operand" "") + (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) )))] "TARGET_SIMD" { @@ -2580,13 +2595,15 @@ (minus: (match_operand: 1 "register_operand" "0") (mult: - (ANY_EXTEND: (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND: (vec_duplicate: - (vec_select: - (match_operand: 4 "register_operand" "") - (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) + (ANY_EXTEND: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (ANY_EXTEND: + (vec_select: + (match_operand: 4 "register_operand" "") + (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) )))] "TARGET_SIMD" { @@ -5313,12 +5330,12 @@ (mult: (sign_extend: (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1)) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" @@ -5338,12 +5355,12 @@ (mult: (sign_extend: (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1))))] "TARGET_SIMD" { @@ -5363,12 +5380,12 @@ (mult: (sign_extend: (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1))))] "TARGET_SIMD" { @@ -5386,12 +5403,12 @@ (mult: (sign_extend: (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1)) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" @@ -5507,8 +5524,8 @@ (mult: (sign_extend: (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI + (vec_duplicate: + (sign_extend: (match_operand: 3 "register_operand" "")))) (const_int 1))))] "TARGET_SIMD" @@ -5523,8 +5540,8 @@ (mult: (sign_extend: (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI + (vec_duplicate: + (sign_extend: (match_operand: 3 "register_operand" "")))) (const_int 1)) (match_operand: 1 "register_operand" "0")))] @@ -5601,11 +5618,11 @@ (ss_ashift: (mult: (sign_extend: - (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5622,15 +5639,15 @@ (define_insn "aarch64_sqdmlal2_lane_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_plus: + (ss_plus: (ss_ashift: (mult: (sign_extend: - (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5648,16 +5665,16 @@ (define_insn "aarch64_sqdmlsl2_laneq_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_minus: + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: (sign_extend: - (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5674,15 +5691,15 @@ (define_insn "aarch64_sqdmlal2_laneq_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_plus: + (ss_plus: (ss_ashift: (mult: (sign_extend: - (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: (vec_select: (match_operand: 3 "register_operand" "") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5734,16 +5751,16 @@ (define_insn "aarch64_sqdmlsl2_n_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_minus: + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: (sign_extend: - (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: (match_operand: 3 "register_operand" "")))) (const_int 1))))] "TARGET_SIMD" @@ -5753,15 +5770,15 @@ (define_insn "aarch64_sqdmlal2_n_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_plus: + (ss_plus: (ss_ashift: (mult: (sign_extend: - (vec_select: - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: (match_operand: 3 "register_operand" "")))) (const_int 1)) (match_operand: 1 "register_operand" "0")))] @@ -5806,13 +5823,13 @@ (define_insn "aarch64_sqdmull_lane" [(set (match_operand: 0 "register_operand" "=w") - (ss_ashift: + (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 1 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI - (vec_select: + (vec_duplicate: + (sign_extend: + (vec_select: (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -5827,13 +5844,13 @@ (define_insn "aarch64_sqdmull_laneq" [(set (match_operand: 0 "register_operand" "=w") - (ss_ashift: + (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 1 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI - (vec_select: + (vec_duplicate: + (sign_extend: + (vec_select: (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -5890,13 +5907,13 @@ (define_insn "aarch64_sqdmull_n" [(set (match_operand: 0 "register_operand" "=w") - (ss_ashift: + (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 1 "register_operand" "w")) - (sign_extend: - (vec_duplicate:VD_HSI - (match_operand: 2 "register_operand" ""))) + (vec_duplicate: + (sign_extend: + (match_operand: 2 "register_operand" ""))) ) (const_int 1)))] "TARGET_SIMD" @@ -5906,8 +5923,6 @@ ;; vqdmull2 - - (define_insn "aarch64_sqdmull2_internal" [(set (match_operand: 0 "register_operand" "=w") (ss_ashift: @@ -5943,15 +5958,15 @@ (define_insn "aarch64_sqdmull2_lane_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_ashift: + (ss_ashift: (mult: (sign_extend: (vec_select: - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: - (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: + (vec_select: (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -5966,15 +5981,15 @@ (define_insn "aarch64_sqdmull2_laneq_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_ashift: + (ss_ashift: (mult: (sign_extend: (vec_select: - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: - (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: + (vec_select: (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -6019,15 +6034,15 @@ (define_insn "aarch64_sqdmull2_n_internal" [(set (match_operand: 0 "register_operand" "=w") - (ss_ashift: + (ss_ashift: (mult: (sign_extend: (vec_select: - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (sign_extend: - (vec_duplicate: - (match_operand: 2 "register_operand" ""))) + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate: + (sign_extend: + (match_operand: 2 "register_operand" ""))) ) (const_int 1)))] "TARGET_SIMD" diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index fd306bfbba6d..a719f57870fc 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -1717,22 +1717,35 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode, && vec_duplicate_p (op, &elt) && code != VEC_DUPLICATE) { - /* Try applying the operator to ELT and see if that simplifies. - We can duplicate the result if so. + if (code == SIGN_EXTEND || code == ZERO_EXTEND) + /* Enforce a canonical order of VEC_DUPLICATE wrt other unary + operations by promoting VEC_DUPLICATE to the root of the expression + (as far as possible). */ + temp = simplify_gen_unary (code, GET_MODE_INNER (mode), + elt, GET_MODE_INNER (GET_MODE (op))); + else + /* Try applying the operator to ELT and see if that simplifies. + We can duplicate the result if so. - The reason we don't use simplify_gen_unary is that it isn't - necessarily a win to convert things like: + The reason we traditionally haven't used simplify_gen_unary + for these codes is that it didn't necessarily seem to be a + win to convert things like: - (neg:V (vec_duplicate:V (reg:S R))) + (neg:V (vec_duplicate:V (reg:S R))) - to: + to: - (vec_duplicate:V (neg:S (reg:S R))) + (vec_duplicate:V (neg:S (reg:S R))) - The first might be done entirely in vector registers while the - second might need a move between register files. */ - temp = simplify_unary_operation (code, GET_MODE_INNER (mode), - elt, GET_MODE_INNER (GET_MODE (op))); + The first might be done entirely in vector registers while the + second might need a move between register files. + + However, there also cases where promoting the vec_duplicate is + more efficient, and there is definite value in having a canonical + form when matching instruction patterns. We should consider + extending the simplify_gen_unary code above to more cases. */ + temp = simplify_unary_operation (code, GET_MODE_INNER (mode), + elt, GET_MODE_INNER (GET_MODE (op))); if (temp) return gen_vec_duplicate (mode, temp); }