From 9a810e57c4e6af54d29c325a013f451ade2b85e8 Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Thu, 4 Jun 2020 15:41:29 +0100 Subject: [PATCH] [ARM]: Correct the grouping of operands in MVE vector scatter store intrinsics (PR94735). The operands in RTL patterns of MVE vector scatter store intrinsics are wrongly grouped, because of which few vector loads and stores instructions are wrongly getting optimized out with -O2. A new predicate "mve_scatter_memory" is defined in this patch, this predicate returns TRUE on matching: (mem(reg)) for MVE scatter store intrinsics. This patch fixes the issue by adding define_expand pattern with "mve_scatter_memory" predicate and calls the corresponding define_insn by passing register_operand as first argument. This register_operand is extracted from the operand with "mve_scatter_memory" predicate in define_expand pattern. gcc/ChangeLog: 2020-06-01 Srinath Parvathaneni PR target/94735 * config/arm/predicates.md (mve_scatter_memory): Define to match (mem (reg)) for scatter store memory. * config/arm/mve.md (mve_vstrbq_scatter_offset_): Modify define_insn to define_expand. (mve_vstrbq_scatter_offset_p_): Likewise. (mve_vstrhq_scatter_offset_): Likewise. (mve_vstrhq_scatter_shifted_offset_p_): Likewise. (mve_vstrhq_scatter_shifted_offset_): Likewise. (mve_vstrdq_scatter_offset_p_v2di): Likewise. (mve_vstrdq_scatter_offset_v2di): Likewise. (mve_vstrdq_scatter_shifted_offset_p_v2di): Likewise. (mve_vstrdq_scatter_shifted_offset_v2di): Likewise. (mve_vstrhq_scatter_offset_fv8hf): Likewise. (mve_vstrhq_scatter_offset_p_fv8hf): Likewise. (mve_vstrhq_scatter_shifted_offset_fv8hf): Likewise. (mve_vstrhq_scatter_shifted_offset_p_fv8hf): Likewise. (mve_vstrwq_scatter_offset_fv4sf): Likewise. (mve_vstrwq_scatter_offset_p_fv4sf): Likewise. (mve_vstrwq_scatter_offset_p_v4si): Likewise. (mve_vstrwq_scatter_offset_v4si): Likewise. (mve_vstrwq_scatter_shifted_offset_fv4sf): Likewise. (mve_vstrwq_scatter_shifted_offset_p_fv4sf): Likewise. (mve_vstrwq_scatter_shifted_offset_p_v4si): Likewise. (mve_vstrwq_scatter_shifted_offset_v4si): Likewise. (mve_vstrbq_scatter_offset__insn): Define insn for scatter stores. (mve_vstrbq_scatter_offset_p__insn): Likewise. (mve_vstrhq_scatter_offset__insn): Likewise. (mve_vstrhq_scatter_shifted_offset_p__insn): Likewise. (mve_vstrhq_scatter_shifted_offset__insn): Likewise. (mve_vstrdq_scatter_offset_p_v2di_insn): Likewise. (mve_vstrdq_scatter_offset_v2di_insn): Likewise. (mve_vstrdq_scatter_shifted_offset_p_v2di_insn): Likewise. (mve_vstrdq_scatter_shifted_offset_v2di_insn): Likewise. (mve_vstrhq_scatter_offset_fv8hf_insn): Likewise. (mve_vstrhq_scatter_offset_p_fv8hf_insn): Likewise. (mve_vstrhq_scatter_shifted_offset_fv8hf_insn): Likewise. (mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn): Likewise. (mve_vstrwq_scatter_offset_fv4sf_insn): Likewise. (mve_vstrwq_scatter_offset_p_fv4sf_insn): Likewise. (mve_vstrwq_scatter_offset_p_v4si_insn): Likewise. (mve_vstrwq_scatter_offset_v4si_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_fv4sf_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_p_v4si_insn): Likewise. (mve_vstrwq_scatter_shifted_offset_v4si_insn): Likewise. gcc/testsuite/ChangeLog: 2020-06-01 Srinath Parvathaneni PR target/94735 * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c: New test. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c: Likewise. * gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c: Likewise. --- gcc/config/arm/mve.md | 828 +++++++++++------- gcc/config/arm/predicates.md | 6 + .../mve/intrinsics/mve_vstore_scatter_base.c | 67 ++ .../intrinsics/mve_vstore_scatter_base_p.c | 69 ++ .../intrinsics/mve_vstore_scatter_offset.c | 215 +++++ .../intrinsics/mve_vstore_scatter_offset_p.c | 216 +++++ .../mve_vstore_scatter_shifted_offset.c | 141 +++ .../mve_vstore_scatter_shifted_offset_p.c | 142 +++ 8 files changed, 1363 insertions(+), 321 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 986fbfe2abae..3a57901bd5bc 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -8102,22 +8102,29 @@ ;; ;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u] ;; -(define_insn "mve_vstrbq_scatter_offset_" - [(set (match_operand: 0 "memory_operand" "=Us") - (unspec: - [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w")] - VSTRBSOQ)) - ] +(define_expand "mve_vstrbq_scatter_offset_" + [(match_operand: 0 "mve_scatter_memory") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRBSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn("vstrb.\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrbq_scatter_offset__insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrbq_scatter_offset__insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w")] + VSTRBSOQ))] + "TARGET_HAVE_MVE" + "vstrb.\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; @@ -8210,23 +8217,33 @@ ;; ;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u] ;; -(define_insn "mve_vstrbq_scatter_offset_p_" - [(set (match_operand: 0 "memory_operand" "=Us") - (unspec: - [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRBSOQ)) - ] +(define_expand "mve_vstrbq_scatter_offset_p_" + [(match_operand: 0 "mve_scatter_memory") + (match_operand:MVE_2 1 "s_register_operand") + (match_operand:MVE_2 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand" "Up") + (unspec:V4SI [(const_int 0)] VSTRBSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrbt.\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrbq_scatter_offset_p__insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrbq_scatter_offset_p__insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRBSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrbt.\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; @@ -9097,87 +9114,122 @@ ;; ;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u] ;; -(define_insn "mve_vstrhq_scatter_offset_p_" - [(set (match_operand: 0 "memory_operand" "=Us") - (unspec: - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHSOQ)) - ] +(define_expand "mve_vstrhq_scatter_offset_p_" + [(match_operand: 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_offset_p__insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_p__insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrht.\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrhq_scatter_offset_s vstrhq_scatter_offset_u] ;; -(define_insn "mve_vstrhq_scatter_offset_" - [(set (match_operand: 0 "memory_operand" "=Us") - (unspec: - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w")] - VSTRHSOQ)) - ] +(define_expand "mve_vstrhq_scatter_offset_" + [(match_operand: 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_offset__insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset__insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w")] + VSTRHSOQ))] + "TARGET_HAVE_MVE" + "vstrh.\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_p_" - [(set (match_operand: 0 "memory_operand" "=Ux") - (unspec: - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHSSOQ)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_p_" + [(match_operand: 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_shifted_offset_p__insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_p__insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHSSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrht.\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "8")]) ;; ;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_" - [(set (match_operand: 0 "memory_operand" "=Us") - (unspec: - [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:MVE_6 2 "s_register_operand" "w")] - VSTRHSSOQ)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_" + [(match_operand: 0 "mve_scatter_memory") + (match_operand:MVE_6 1 "s_register_operand") + (match_operand:MVE_6 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_shifted_offset__insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset__insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:MVE_6 1 "s_register_operand" "w") + (match_operand:MVE_6 2 "s_register_operand" "w")] + VSTRHSSOQ))] + "TARGET_HAVE_MVE" + "vstrh.\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "4")]) ;; @@ -9345,173 +9397,240 @@ ;; ;; [vstrdq_scatter_offset_p_s vstrdq_scatter_offset_p_u] ;; -(define_insn "mve_vstrdq_scatter_offset_p_v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRDSOQ)) - ] +(define_expand "mve_vstrdq_scatter_offset_p_v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrdt.64\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrdq_scatter_offset_p_v2di_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_offset_p_v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRDSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrdt.64\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrdq_scatter_offset_s vstrdq_scatter_offset_u] ;; -(define_insn "mve_vstrdq_scatter_offset_v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w")] - VSTRDSOQ)) - ] +(define_expand "mve_vstrdq_scatter_offset_v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrd.64\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrdq_scatter_offset_v2di_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_offset_v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w")] + VSTRDSOQ))] + "TARGET_HAVE_MVE" + "vstrd.64\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u] ;; -(define_insn "mve_vstrdq_scatter_shifted_offset_p_v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRDSSOQ)) - ] +(define_expand "mve_vstrdq_scatter_shifted_offset_p_v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrdt.64\t%q2, [%m0, %q1, UXTW #3]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrdq_scatter_shifted_offset_p_v2di_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_shifted_offset_p_v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRDSSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrdt.64\t%q2, [%0, %q1, UXTW #3]" [(set_attr "length" "8")]) ;; ;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u] ;; -(define_insn "mve_vstrdq_scatter_shifted_offset_v2di" - [(set (match_operand:V2DI 0 "memory_operand" "=Us") - (unspec:V2DI - [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:V2DI 2 "s_register_operand" "w")] - VSTRDSSOQ)) - ] +(define_expand "mve_vstrdq_scatter_shifted_offset_v2di" + [(match_operand:V2DI 0 "mve_scatter_memory") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:V2DI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRDSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrd.64\t%q2, [%m0, %q1, UXTW #3]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrdq_scatter_shifted_offset_v2di_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrdq_scatter_shifted_offset_v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:V2DI 2 "s_register_operand" "w")] + VSTRDSSOQ))] + "TARGET_HAVE_MVE" + "vstrd.64\t%q2, [%0, %q1, UXTW #3]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_offset_f] ;; -(define_insn "mve_vstrhq_scatter_offset_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")] - VSTRHQSO_F)) - ] +(define_expand "mve_vstrhq_scatter_offset_fv8hf" + [(match_operand:V8HI 0 "mve_scatter_memory") + (match_operand:V8HI 1 "s_register_operand") + (match_operand:V8HF 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.16\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_offset_fv8hf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w")] + VSTRHQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrh.16\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_offset_p_f] ;; -(define_insn "mve_vstrhq_scatter_offset_p_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHQSO_F)) - ] +(define_expand "mve_vstrhq_scatter_offset_p_fv8hf" + [(match_operand:V8HI 0 "mve_scatter_memory") + (match_operand:V8HI 1 "s_register_operand") + (match_operand:V8HF 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.16\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_offset_p_fv8hf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_offset_p_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrht.16\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrhq_scatter_shifted_offset_f] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")] - VSTRHQSSO_F)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_fv8hf" + [(match_operand:V8HI 0 "memory_operand" "=Us") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrh.16\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrhq_scatter_shifted_offset_fv8hf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w")] + VSTRHQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrh.16\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "4")]) ;; ;; [vstrhq_scatter_shifted_offset_p_f] ;; -(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf" - [(set (match_operand:V8HI 0 "memory_operand" "=Us") - (unspec:V8HI - [(match_operand:V8HI 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRHQSSO_F)) - ] +(define_expand "mve_vstrhq_scatter_shifted_offset_p_fv8hf" + [(match_operand:V8HI 0 "memory_operand" "=Us") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up") + (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrht.16\t%q2, [%m0, %q1, uxtw #1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V8HI 1 "s_register_operand" "w") + (match_operand:V8HF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRHQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]" [(set_attr "length" "8")]) ;; @@ -9562,173 +9681,240 @@ ;; ;; [vstrwq_scatter_offset_f] ;; -(define_insn "mve_vstrwq_scatter_offset_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w")] - VSTRWQSO_F)) - ] +(define_expand "mve_vstrwq_scatter_offset_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_fv4sf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w")] + VSTRWQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrw.32\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrwq_scatter_offset_p_f] ;; -(define_insn "mve_vstrwq_scatter_offset_p_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWQSO_F)) - ] +(define_expand "mve_vstrwq_scatter_offset_p_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrwt.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_p_fv4sf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWQSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrwt.32\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; -;; [vstrwq_scatter_offset_p_s vstrwq_scatter_offset_p_u] +;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u] ;; -(define_insn "mve_vstrwq_scatter_offset_p_v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWSOQ)) - ] +(define_expand "mve_vstrwq_scatter_offset_p_v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\n\tvstrwt.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_p_v4si_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_p_v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrwt.32\t%q2, [%0, %q1]" [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u] ;; -(define_insn "mve_vstrwq_scatter_offset_v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w")] - VSTRWSOQ)) - ] +(define_expand "mve_vstrwq_scatter_offset_v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_offset_v4si_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_offset_v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VSTRWSOQ))] + "TARGET_HAVE_MVE" + "vstrw.32\t%q2, [%0, %q1]" [(set_attr "length" "4")]) ;; ;; [vstrwq_scatter_shifted_offset_f] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w")] - VSTRWQSSO_F)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} - [(set_attr "length" "4")]) + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn (gen_mve_vstrwq_scatter_shifted_offset_fv4sf_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w")] + VSTRWQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vstrw.32\t%q2, [%0, %q1, uxtw #2]" + [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_shifted_offset_p_f] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWQSSO_F)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_p_fv4sf" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SF 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrwt.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWQSSO_F))] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" + "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_p_v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] - VSTRWSSOQ)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_p_v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vpst\;\tvstrwt.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrwq_scatter_shifted_offset_p_v4si_insn (ind, operands[1], + operands[2], + operands[3])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_p_v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWSSOQ))] + "TARGET_HAVE_MVE" + "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" [(set_attr "length" "8")]) ;; ;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u] ;; -(define_insn "mve_vstrwq_scatter_shifted_offset_v4si" - [(set (match_operand:V4SI 0 "memory_operand" "=Us") - (unspec:V4SI - [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w")] - VSTRWSSOQ)) - ] +(define_expand "mve_vstrwq_scatter_shifted_offset_v4si" + [(match_operand:V4SI 0 "mve_scatter_memory") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:V4SI 2 "s_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] "TARGET_HAVE_MVE" { - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[1]; - ops[2] = operands[2]; - output_asm_insn ("vstrw.32\t%q2, [%m0, %q1, uxtw #2]",ops); - return ""; -} + rtx ind = XEXP (operands[0], 0); + gcc_assert (REG_P (ind)); + emit_insn ( + gen_mve_vstrwq_scatter_shifted_offset_v4si_insn (ind, operands[1], + operands[2])); + DONE; +}) + +(define_insn "mve_vstrwq_scatter_shifted_offset_v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:SI 0 "register_operand" "r") + (match_operand:V4SI 1 "s_register_operand" "w") + (match_operand:V4SI 2 "s_register_operand" "w")] + VSTRWSSOQ))] + "TARGET_HAVE_MVE" + "vstrw.32\t%q2, [%0, %q1, uxtw #2]" [(set_attr "length" "4")]) ;; diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index c57ad73577e1..9e9bca4d87fd 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -37,6 +37,12 @@ && mve_vector_mem_operand (GET_MODE (op), XEXP (op, 0), false)"))) +(define_predicate "mve_scatter_memory" + (and (match_code "mem") + (match_test "TARGET_HAVE_MVE && REG_P (XEXP (op, 0)) + && mve_vector_mem_operand (GET_MODE (op), XEXP (op, 0), + false)"))) + ;; True for immediates in the range of 1 to 16 for MVE. (define_predicate "mve_imm_16" (match_test "satisfies_constraint_Rd (op)")) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c new file mode 100644 index 000000000000..21b9e12d57e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base.c @@ -0,0 +1,67 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int +foows32(uint32x4_t pDataDest, int32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_s32 (pDataDest, 4, value); + vstrwq_scatter_base_s32 (pDataDest, 132, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foowu32(uint32x4_t pDataDest, uint32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_u32 (pDataDest, 4, value); + vstrwq_scatter_base_u32 (pDataDest, 132, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foowf32(uint32x4_t pDataDest, float32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_f32 (pDataDest, 4, value); + vstrwq_scatter_base_f32 (pDataDest, 132, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foods64(uint64x2_t pDataDest, int64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_s64 (pDataDest, 256, value); + vstrdq_scatter_base_s64 (pDataDest, 512, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +int +foodu64(uint64x2_t pDataDest, uint64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_u64 (pDataDest, 256, value); + vstrdq_scatter_base_u64 (pDataDest, 512, value); + vstrwq_scatter_offset_s32 (ret, vecOffs1, (int32x4_t) pDataDest); + vstrwq_scatter_offset_s32 (ret, vecOffs2, (int32x4_t) pDataDest); + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 20 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c new file mode 100644 index 000000000000..15c6496732a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_base_p.c @@ -0,0 +1,69 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +mve_pred16_t __p; + +int +foows32(uint32x4_t pDataDest, int32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_p_s32 (pDataDest, 4, value, __p); + vstrwq_scatter_base_p_s32 (pDataDest, 132, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foowu32(uint32x4_t pDataDest, uint32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_p_u32 (pDataDest, 4, value, __p); + vstrwq_scatter_base_p_u32 (pDataDest, 132, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foowf32(uint32x4_t pDataDest, float32x4_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrwq_scatter_base_p_f32 (pDataDest, 4, value, __p); + vstrwq_scatter_base_p_f32 (pDataDest, 132, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foods64(uint64x2_t pDataDest, int64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_p_s64 (pDataDest, 256, value, __p); + vstrdq_scatter_base_p_s64 (pDataDest, 512, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +int +foodu64(uint64x2_t pDataDest, uint64x2_t value, int32_t * ret) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + vstrdq_scatter_base_p_u64 (pDataDest, 256, value, __p); + vstrdq_scatter_base_p_u64 (pDataDest, 512, value, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs1, (int32x4_t) pDataDest, __p); + vstrwq_scatter_offset_p_s32 (ret, vecOffs2, (int32x4_t) pDataDest, __p); + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 20 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c new file mode 100644 index 000000000000..6d123669c13f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset.c @@ -0,0 +1,215 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int +foobu8( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_u8 (pDataDest, vecOffs1, (uint8x16_t) vecIn1); + vstrbq_scatter_offset_u8 (pDataDest, vecOffs2, (uint8x16_t) vecIn2); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobu16( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1); + vstrbq_scatter_offset_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobu32( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); + vstrbq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foobs8( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1); + vstrbq_scatter_offset_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobs16( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1); + vstrbq_scatter_offset_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobs32( uint8_t * pDataSrc, int8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); + vstrbq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1); + vstrhq_scatter_offset_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); + vstrhq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1); + vstrhq_scatter_offset_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( uint16_t * pDataSrc, int16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); + vstrhq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1); + vstrhq_scatter_offset_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1); + vstrwq_scatter_offset_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1); + vstrwq_scatter_offset_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrwq_scatter_offset_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1); + vstrwq_scatter_offset_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1); + vstrdq_scatter_offset_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +int +foows64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1); + vstrdq_scatter_offset_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 32 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c new file mode 100644 index 000000000000..cd2e1ee80f9d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c @@ -0,0 +1,216 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +mve_pred16_t __p; +int +foobu8( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_p_u8(pDataDest, vecOffs1, (uint8x16_t) vecIn1, __p); + vstrbq_scatter_offset_p_u8(pDataDest, vecOffs2, (uint8x16_t) vecIn2, __p); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobu16( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, __p); + vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobu32( uint8_t * pDataSrc, uint8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p); + vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foobs8( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14}; + const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]); + vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1, __p); + vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2, __p); + pDataDest[32] = pDataSrc[32]; + return 0; +} + +int +foobs16( int8_t * pDataSrc, int8_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, __p); + vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foobs32( uint8_t * pDataSrc, int8_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p); + vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, __p); + vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p); + vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, __p); + vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( uint16_t * pDataSrc, int16_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p); + vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1, __p); + vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p); + vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p); + vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]); + vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1, __p); + vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1, __p); + vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2, __p); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +int +foows64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 3}; + const uint64x2_t vecOffs2 = { 1, 2}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1, __p); + vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2, __p); + pDataDest[4] = pDataSrc[4]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 32 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c new file mode 100644 index 000000000000..62dfb450a6d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset.c @@ -0,0 +1,141 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]); + vstrwq_scatter_shifted_offset_u32 (pDataDest, vecOffs1, vecIn1); + vstrwq_scatter_shifted_offset_u32 (pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + float32x4_t vecIn1 = vldrwq_f32 ((float32_t const *) pDataSrc); + float32x4_t vecIn2 = vldrwq_f32 ((float32_t const *) &pDataSrc[4]); + vstrwq_scatter_shifted_offset_f32 (pDataDest, vecOffs1, vecIn1); + vstrwq_scatter_shifted_offset_f32 (pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + uint16x8_t vecIn1 = vldrhq_u16 ((uint16_t const *) pDataSrc); + uint16x8_t vecIn2 = vldrhq_u16 ((uint16_t const *) &pDataSrc[8]); + vstrhq_scatter_shifted_offset_u16 (pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_u16 (pDataDest, vecOffs2, vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrhq_u32 ((uint16_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrhq_u32 ((uint16_t const *) &pDataSrc[4]); + vstrhq_scatter_shifted_offset_u32 ((uint16_t *)pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_u32 ((uint16_t *)pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + float16x8_t vecIn1 = vldrhq_f16 ((float16_t const *) pDataSrc); + float16x8_t vecIn2 = vldrhq_f16 ((float16_t const *) &pDataSrc[8]); + vstrhq_scatter_shifted_offset_f16 (pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_f16 (pDataDest, vecOffs2, vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foodu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc); + uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]); + + vstrdq_scatter_shifted_offset_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1); + vstrdq_scatter_shifted_offset_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]); + vstrwq_scatter_shifted_offset_s32 (pDataDest, vecOffs1, vecIn1); + vstrwq_scatter_shifted_offset_s32 (pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + int16x8_t vecIn1 = vldrhq_s16 ((int16_t const *) pDataSrc); + int16x8_t vecIn2 = vldrhq_s16 ((int16_t const *) &pDataSrc[8]); + vstrhq_scatter_shifted_offset_s16 (pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_s16 (pDataDest, vecOffs2, vecIn2); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrhq_s32 ((int16_t const *) pDataSrc); + int32x4_t vecIn2 = vldrhq_s32 ((int16_t const *) &pDataSrc[4]); + vstrhq_scatter_shifted_offset_s32 ((int16_t *)pDataDest, vecOffs1, vecIn1); + vstrhq_scatter_shifted_offset_s32 ((int16_t *)pDataDest, vecOffs2, vecIn2); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foods64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc); + int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[2]); + + vstrdq_scatter_shifted_offset_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1); + vstrdq_scatter_shifted_offset_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]" 20 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c new file mode 100644 index 000000000000..a51d3a211672 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_shifted_offset_p.c @@ -0,0 +1,142 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +mve_pred16_t __p; +int +foowu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrwq_z_u32 ((uint32_t const *) pDataSrc, __p); + uint32x4_t vecIn2 = vldrwq_z_u32 ((uint32_t const *) &pDataSrc[4], __p); + vstrwq_scatter_shifted_offset_p_u32 (pDataDest, vecOffs1, vecIn1, __p); + vstrwq_scatter_shifted_offset_p_u32 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foowf32( float32_t * pDataSrc, float32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + float32x4_t vecIn1 = vldrwq_z_f32 ((float32_t const *) pDataSrc, __p); + float32x4_t vecIn2 = vldrwq_z_f32 ((float32_t const *) &pDataSrc[4], __p); + vstrwq_scatter_shifted_offset_p_f32 (pDataDest, vecOffs1, vecIn1, __p); + vstrwq_scatter_shifted_offset_p_f32 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohu16( uint16_t * pDataSrc, uint16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + uint16x8_t vecIn1 = vldrhq_z_u16 ((uint16_t const *) pDataSrc, __p); + uint16x8_t vecIn2 = vldrhq_z_u16 ((uint16_t const *) &pDataSrc[8], __p); + vstrhq_scatter_shifted_offset_p_u16 (pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_u16 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohu32( uint32_t * pDataSrc, uint32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + uint32x4_t vecIn1 = vldrhq_z_u32 ((uint16_t const *) pDataSrc, __p); + uint32x4_t vecIn2 = vldrhq_z_u32 ((uint16_t const *) &pDataSrc[4], __p); + vstrhq_scatter_shifted_offset_p_u32 ((uint16_t *)pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_u32 ((uint16_t *)pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohf16( float16_t * pDataSrc, float16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + float16x8_t vecIn1 = vldrhq_z_f16 ((float16_t const *) pDataSrc, __p); + float16x8_t vecIn2 = vldrhq_z_f16 ((float16_t const *) &pDataSrc[8], __p); + vstrhq_scatter_shifted_offset_p_f16 (pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_f16 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foodu64( uint64_t * pDataSrc, uint64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + uint32x4_t vecIn1 = vldrwq_z_u32 ((uint32_t const *) pDataSrc, __p); + uint32x4_t vecIn2 = vldrwq_z_u32 ((uint32_t const *) &pDataSrc[2], __p); + + vstrdq_scatter_shifted_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1, __p); + vstrdq_scatter_shifted_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2, __p); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +int +foows32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrwq_z_s32 ((int32_t const *) pDataSrc, __p); + int32x4_t vecIn2 = vldrwq_z_s32 ((int32_t const *) &pDataSrc[4], __p); + vstrwq_scatter_shifted_offset_p_s32 (pDataDest, vecOffs1, vecIn1, __p); + vstrwq_scatter_shifted_offset_p_s32 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foohs16( int16_t * pDataSrc, int16_t * pDataDest) +{ + const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5}; + const uint16x8_t vecOffs2 = { 9, 11, 13, 10, 12, 15, 8, 14}; + int16x8_t vecIn1 = vldrhq_z_s16 ((int16_t const *) pDataSrc, __p); + int16x8_t vecIn2 = vldrhq_z_s16 ((int16_t const *) &pDataSrc[8], __p); + vstrhq_scatter_shifted_offset_p_s16 (pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_s16 (pDataDest, vecOffs2, vecIn2, __p); + pDataDest[16] = pDataSrc[16]; + return 0; +} + +int +foohs32( int32_t * pDataSrc, int32_t * pDataDest) +{ + const uint32x4_t vecOffs1 = { 0, 3, 6, 1}; + const uint32x4_t vecOffs2 = { 4, 7, 2, 5}; + int32x4_t vecIn1 = vldrhq_z_s32 ((int16_t const *) pDataSrc, __p); + int32x4_t vecIn2 = vldrhq_z_s32 ((int16_t const *) &pDataSrc[4], __p); + vstrhq_scatter_shifted_offset_p_s32 ((int16_t *)pDataDest, vecOffs1, vecIn1, __p); + vstrhq_scatter_shifted_offset_p_s32 ((int16_t *)pDataDest, vecOffs2, vecIn2, __p); + pDataDest[8] = pDataSrc[8]; + return 0; +} + +int +foods64( int64_t * pDataSrc, int64_t * pDataDest) +{ + const uint64x2_t vecOffs1 = { 0, 1}; + const uint64x2_t vecOffs2 = { 2, 3}; + int32x4_t vecIn1 = vldrwq_z_s32 ((int32_t const *) pDataSrc, __p); + int32x4_t vecIn2 = vldrwq_z_s32 ((int32_t const *) &pDataSrc[2], __p); + + vstrdq_scatter_shifted_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1, __p); + vstrdq_scatter_shifted_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2, __p); + + pDataDest[2] = pDataSrc[2]; + return 0; +} + +/* { dg-final { scan-assembler-times "vstr\[a-z\]t" 20 } } */ -- 2.39.5