LoongArch: Add builtin interfaces for 128 and 256 vector conversions.

author chenxiaolong <chenxiaolong@loongson.cn>

Wed, 29 Oct 2025 10:49:34 +0000 (18:49 +0800)

committer Lulu Cheng <chenglulu@loongson.cn>

Mon, 3 Nov 2025 02:13:33 +0000 (10:13 +0800)
author chenxiaolong <chenxiaolong@loongson.cn>
Wed, 29 Oct 2025 10:49:34 +0000 (18:49 +0800)
committer Lulu Cheng <chenglulu@loongson.cn>
Mon, 3 Nov 2025 02:13:33 +0000 (10:13 +0800)
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md

index 7a91473e402083f08f2c9ccd25208d6b3bfb11f5..7704f8c798e5674332576c6b60200c448b3ba7d6 100644 (file)
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -130,6 +130,7 @@
  
  ;; Only used for splitting insert_d and copy_{u,s}.d.
  (define_mode_iterator LASX_WD [V4DI V4DF V8SI V8SF])
+(define_mode_iterator LASX_PART [V4DI V4DF V8SF])
  
  ;; Only used for copy256_{u,s}.w.
  (define_mode_iterator LASX_W    [V8SI V8SF])
@@ -672,6 +673,41 @@
    [(set_attr "move_type" "fmove")
     (set_attr "mode" "<MODE>")])
  
+;; vr0 -> low xr0
+;;
+(define_insn "vec_cast<mode>"
+  [(set (match_operand:LASX_PART 0 "register_operand" "=f")
+       (subreg:LASX_PART
+         (match_operand:<VHMODE256_ALL> 1 "register_operand" "0") 0))]
+  "ISA_HAS_LASX"
+  ""
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "vec_insert_lo_<mode>"
+  [(set (match_operand:LASX_PART 0 "register_operand" "=f")
+    (vec_concat:LASX_PART
+      (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")
+      (vec_select:<VHMODE256_ALL>
+       (match_operand:LASX_PART 1 "register_operand" "0")
+       (match_operand:LASX_PART 3 "vect_par_cnst_high_half"))))]
+  "ISA_HAS_LASX"
+  "xvpermi.q\t%u0,%u2,0x30"
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "vec_insert_hi_<mode>"
+  [(set (match_operand:LASX_PART 0 "register_operand" "=f")
+    (vec_concat:LASX_PART
+      (vec_select:<VHMODE256_ALL>
+       (match_operand:LASX_PART 1 "register_operand" "0")
+       (match_operand:LASX_PART 3 "vect_par_cnst_low_half"))
+      (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")))]
+  "ISA_HAS_LASX"
+  "xvpermi.q\t%u0,%u2,0x02"
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+
  (define_expand "vec_perm<mode>"
   [(match_operand:LASX 0 "register_operand")
    (match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h

index 6bcffc26d4a5e511c4ba908a1a814bf67ce81623..6c34edeec25a81e74f4d230432a5f1b9ce93f0c3 100644 (file)
--- a/gcc/config/loongarch/lasxintrin.h
+++ b/gcc/config/loongarch/lasxintrin.h
@@ -23,6 +23,8 @@
     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     <http://www.gnu.org/licenses/>.  */
  
+#include <lsxintrin.h>
+
  #ifndef _GCC_LOONGSON_ASXINTRIN_H
  #define _GCC_LOONGSON_ASXINTRIN_H 1
  
@@ -5368,5 +5370,159 @@ __m256i __lasx_xvfcmp_sun_s (__m256 _1, __m256 _2)
  #define __lasx_xvrepli_w(/*si10*/ _1) \
    ((__m256i)__builtin_lasx_xvrepli_w ((_1)))
  
+#if defined (__loongarch_asx_sx_conv)
+/* Add builtin interfaces for 128 and 256 vector conversions.
+   For the assembly instruction format of some functions of the following vector
+   conversion, it is not described exactly in accordance with the format of the
+   generated assembly instruction.
+   In the front end of the Rust language, different built-in functions are called
+   by analyzing the format of assembly instructions. The data types of instructions
+   are all defined based on the interfaces of the defined functions, in the
+   following order: output, input... .  */
+/* Assembly instruction format:        xd, vj.  */
+/* Data types in instruction templates:  V8SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_cast_128_s (__m128 _1)
+{
+  return  (__m256)__builtin_lasx_cast_128_s ((v4f32)_1);
+}
+
+/* Assembly instruction format:        xd, vj.  */
+/* Data types in instruction templates:  V4DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_cast_128_d (__m128d _1)
+{
+  return  (__m256d)__builtin_lasx_cast_128_d ((v2f64)_1);
+}
+
+/* Assembly instruction format:        xd, vj.  */
+/* Data types in instruction templates:  V4DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_cast_128 (__m128i _1)
+{
+  return  (__m256i)__builtin_lasx_cast_128 ((v2i64)_1);
+}
+
+/* Assembly instruction format:        xd, vj, vk.  */
+/* Data types in instruction templates:  V8SF, V4SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_concat_128_s (__m128 _1, __m128 _2)
+{
+  return  (__m256)__builtin_lasx_concat_128_s ((v4f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format:        xd, vj, vk.  */
+/* Data types in instruction templates:  V4DF, V2DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_concat_128_d (__m128d _1, __m128d _2)
+{
+  return  (__m256d)__builtin_lasx_concat_128_d ((v2f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format:        xd, vj, vk.  */
+/* Data types in instruction templates:  V4DI, V2DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_concat_128 (__m128i _1, __m128i _2)
+{
+  return  (__m256i)__builtin_lasx_concat_128 ((v2i64)_1, (v2i64)_2);
+}
+
+/* Assembly instruction format:        vd, xj.  */
+/* Data types in instruction templates:  V4SF, V8SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lasx_extract_128_lo_s (__m256 _1)
+{
+  return  (__m128)__builtin_lasx_extract_128_lo_s ((v8f32)_1);
+}
+
+/* Assembly instruction format:        vd, xj.  */
+/* Data types in instruction templates:  V4SF, V8SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lasx_extract_128_hi_s (__m256 _1)
+{
+  return  (__m128)__builtin_lasx_extract_128_hi_s ((v8f32)_1);
+}
+
+/* Assembly instruction format:        vd, xj.  */
+/* Data types in instruction templates:  V2DF, V4DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lasx_extract_128_lo_d (__m256d _1)
+{
+  return  (__m128d)__builtin_lasx_extract_128_lo_d ((v4f64)_1);
+}
+
+/* Assembly instruction format:        vd, xj.  */
+/* Data types in instruction templates:  V2DF, V4DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lasx_extract_128_hi_d (__m256d _1)
+{
+  return  (__m128d)__builtin_lasx_extract_128_hi_d ((v4f64)_1);
+}
+
+/* Assembly instruction format:        vd, xj.  */
+/* Data types in instruction templates:  V2DI, V4DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128i __lasx_extract_128_lo (__m256i _1)
+{
+  return  (__m128i)__builtin_lasx_extract_128_lo ((v4i64)_1);
+}
+
+/* Assembly instruction format:        vd, xj.  */
+/* Data types in instruction templates:  V2DI, V4DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128i __lasx_extract_128_hi (__m256i _1)
+{
+  return  (__m128i)__builtin_lasx_extract_128_hi ((v4i64)_1);
+}
+
+/* Assembly instruction format:        xd, xj, vk.  */
+/* Data types in instruction templates:  V8SF, V8SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_insert_128_lo_s (__m256 _1, __m128 _2)
+{
+  return  (__m256)__builtin_lasx_insert_128_lo_s ((v8f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format:        xd, xj, vk.  */
+/* Data types in instruction templates:  V8SF, V8SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_insert_128_hi_s (__m256 _1, __m128 _2)
+{
+  return  (__m256)__builtin_lasx_insert_128_hi_s ((v8f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format:        xd, xj, vk.  */
+/* Data types in instruction templates:  V4DF, V4DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_insert_128_lo_d (__m256d _1, __m128d _2)
+{
+  return  (__m256d)__builtin_lasx_insert_128_lo_d ((v4f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format:        xd, xj, vk.  */
+/* Data types in instruction templates:  V4DF, V4DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_insert_128_hi_d (__m256d _1, __m128d _2)
+{
+  return  (__m256d)__builtin_lasx_insert_128_hi_d ((v4f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format:        xd, xj, vk.  */
+/* Data types in instruction templates:  V4DI, V4DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_insert_128_lo (__m256i _1, __m128i _2)
+{
+  return  (__m256i)__builtin_lasx_insert_128_lo ((v4i64)_1, (v2i64)_2);
+}
+
+/* Assembly instruction format:        xd, xj, vk.  */
+/* Data types in instruction templates:  V4DI, V4DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_insert_128_hi (__m256i _1, __m128i _2)
+{
+  return  (__m256i)__builtin_lasx_insert_128_hi ((v4i64)_1, (v2i64)_2);
+}
+
+#endif /* defined(__loongarch_asx_sx_conv).  */
  #endif /* defined(__loongarch_asx).  */
  #endif /* _GCC_LOONGSON_ASXINTRIN_H.  */
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc

index 9493dedcab7878c4a23e726361f1adb30beddbd6..312d87626a4b5649ee5afcae054d240e01d32423 100644 (file)
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -865,6 +865,27 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
  #define CODE_FOR_lasx_xvmaddwod_q_du   CODE_FOR_lasx_maddwod_q_du_punned
  #define CODE_FOR_lasx_xvmaddwod_q_du_d CODE_FOR_lasx_maddwod_q_du_d_punned
  
+
+/* Add mutual conversion between 128 and 256 vectors.  */
+#define CODE_FOR_lasx_extract_128_lo_s CODE_FOR_vec_extract_lo_v8sf
+#define CODE_FOR_lasx_extract_128_hi_s CODE_FOR_vec_extract_hi_v8sf
+#define CODE_FOR_lasx_extract_128_lo_d CODE_FOR_vec_extract_lo_v4df
+#define CODE_FOR_lasx_extract_128_hi_d CODE_FOR_vec_extract_hi_v4df
+#define CODE_FOR_lasx_extract_128_lo   CODE_FOR_vec_extract_lo_v4di
+#define CODE_FOR_lasx_extract_128_hi   CODE_FOR_vec_extract_hi_v4di
+#define CODE_FOR_lasx_insert_128_lo_s  CODE_FOR_vec_insert_lo_v8sf
+#define CODE_FOR_lasx_insert_128_hi_s  CODE_FOR_vec_insert_hi_v8sf
+#define CODE_FOR_lasx_insert_128_lo_d  CODE_FOR_vec_insert_lo_v4df
+#define CODE_FOR_lasx_insert_128_hi_d  CODE_FOR_vec_insert_hi_v4df
+#define CODE_FOR_lasx_insert_128_lo    CODE_FOR_vec_insert_lo_v4di
+#define CODE_FOR_lasx_insert_128_hi    CODE_FOR_vec_insert_hi_v4di
+#define CODE_FOR_lasx_concat_128_s     CODE_FOR_vec_concatv8sf
+#define CODE_FOR_lasx_concat_128_d     CODE_FOR_vec_concatv4df
+#define CODE_FOR_lasx_concat_128       CODE_FOR_vec_concatv4di
+#define CODE_FOR_lasx_cast_128_s       CODE_FOR_vec_castv8sf
+#define CODE_FOR_lasx_cast_128_d       CODE_FOR_vec_castv4df
+#define CODE_FOR_lasx_cast_128         CODE_FOR_vec_castv4di
+
  static const struct loongarch_builtin_description loongarch_builtins[] = {
  #define LARCH_MOVFCSR2GR 0
    DIRECT_BUILTIN (movfcsr2gr, LARCH_USI_FTYPE_UQI, hard_float),
@@ -2407,7 +2428,25 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
    LASX_BUILTIN (xvssrarni_bu_h, LARCH_UV32QI_FTYPE_UV32QI_V32QI_USI),
    LASX_BUILTIN (xvssrarni_hu_w, LARCH_UV16HI_FTYPE_UV16HI_V16HI_USI),
    LASX_BUILTIN (xvssrarni_wu_d, LARCH_UV8SI_FTYPE_UV8SI_V8SI_USI),
-  LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI)
+  LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI),
+  LASX_BUILTIN (extract_128_lo_s, LARCH_V4SF_FTYPE_V8SF),
+  LASX_BUILTIN (extract_128_hi_s, LARCH_V4SF_FTYPE_V8SF),
+  LASX_BUILTIN (extract_128_lo_d, LARCH_V2DF_FTYPE_V4DF),
+  LASX_BUILTIN (extract_128_hi_d, LARCH_V2DF_FTYPE_V4DF),
+  LASX_BUILTIN (extract_128_lo, LARCH_V2DI_FTYPE_V4DI),
+  LASX_BUILTIN (extract_128_hi, LARCH_V2DI_FTYPE_V4DI),
+  LASX_BUILTIN (insert_128_lo_s, LARCH_V8SF_FTYPE_V8SF_V4SF),
+  LASX_BUILTIN (insert_128_hi_s, LARCH_V8SF_FTYPE_V8SF_V4SF),
+  LASX_BUILTIN (insert_128_lo_d, LARCH_V4DF_FTYPE_V4DF_V2DF),
+  LASX_BUILTIN (insert_128_hi_d, LARCH_V4DF_FTYPE_V4DF_V2DF),
+  LASX_BUILTIN (insert_128_lo, LARCH_V4DI_FTYPE_V4DI_V2DI),
+  LASX_BUILTIN (insert_128_hi, LARCH_V4DI_FTYPE_V4DI_V2DI),
+  LASX_BUILTIN (concat_128_s, LARCH_V8SF_FTYPE_V4SF_V4SF),
+  LASX_BUILTIN (concat_128_d, LARCH_V4DF_FTYPE_V2DF_V2DF),
+  LASX_BUILTIN (concat_128, LARCH_V4DI_FTYPE_V2DI_V2DI),
+  LASX_BUILTIN (cast_128_s, LARCH_V8SF_FTYPE_V4SF),
+  LASX_BUILTIN (cast_128_d, LARCH_V4DF_FTYPE_V2DF),
+  LASX_BUILTIN (cast_128, LARCH_V4DI_FTYPE_V2DI)
  };
  
  /* Index I is the function declaration for loongarch_builtins[I], or null if
@@ -3001,6 +3040,10 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
  {
    struct expand_operand ops[MAX_RECOG_OPERANDS];
    int opno, argno;
+  /* For vector extraction/insertion operations, sel_high_p being true
+     indicates that the high of the data is selected/retained from the
+     vector register.  */
+  bool sel_high_p = true;
  
    /* Map any target to operand 0.  */
    opno = 0;
@@ -3019,6 +3062,51 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
        create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode);
        return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
  
+    case CODE_FOR_vec_extract_lo_v8sf:
+    case CODE_FOR_vec_extract_lo_v4df:
+    case CODE_FOR_vec_extract_lo_v4di:
+      sel_high_p = false;
+    /* Fall through.  */
+    case CODE_FOR_vec_extract_hi_v8sf:
+    case CODE_FOR_vec_extract_hi_v4df:
+    case CODE_FOR_vec_extract_hi_v4di:
+      {
+       /* The selection method for constructing the high/low half.  */
+       loongarch_prepare_builtin_arg (&ops[1], exp, 0);
+       int nelts = GET_MODE_NUNITS (GET_MODE (ops[1].value));
+       int half_nelts = nelts / 2;
+       int base = sel_high_p ? half_nelts : 0;
+
+       rtx pat_rtx
+           = loongarch_gen_stepped_int_parallel (half_nelts, base, 1);
+       create_input_operand (&ops[2], pat_rtx, ops[1].mode);
+
+       return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
+      }
+
+    case CODE_FOR_vec_insert_hi_v8sf:
+    case CODE_FOR_vec_insert_hi_v4df:
+    case CODE_FOR_vec_insert_hi_v4di:
+      sel_high_p = false;
+    /* Fall through.  */
+    case CODE_FOR_vec_insert_lo_v8sf:
+    case CODE_FOR_vec_insert_lo_v4df:
+    case CODE_FOR_vec_insert_lo_v4di:
+      {
+       /* The selection method for constructing the high/low half.  */
+       loongarch_prepare_builtin_arg (&ops[1], exp, 0);
+       loongarch_prepare_builtin_arg (&ops[2], exp, 1);
+       int nelts = GET_MODE_NUNITS (GET_MODE (ops[1].value));
+       int half_nelts = nelts / 2;
+       int base = sel_high_p ? half_nelts : 0;
+
+       rtx pat_rtx
+           = loongarch_gen_stepped_int_parallel (half_nelts, base, 1);
+       create_input_operand (&ops[3], pat_rtx, ops[1].mode);
+
+       return loongarch_expand_builtin_insn (icode, 4, ops, has_target_p);
+      }
+
      default:
        break;
      }
diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc

index effdcf0e25544d9f5e6511f8fabcc50f366a9dcb..fc031a6fe90eb2df5cbcee269b1be8167f17e15c 100644 (file)
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -132,6 +132,7 @@ loongarch_update_cpp_builtins (cpp_reader *pfile)
    loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_simd", pfile);
    loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_sx", pfile);
    loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx", pfile);
+  loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx_sx_conv", pfile);
  
    builtin_undef ("__loongarch_simd_width");
    if (ISA_HAS_LSX)
diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def

index 337f2c2c229352184a8890b31e49bea7d3094329..68b1b446182cf6ac8552dca6b7a9d0b0346da8d7 100644 (file)
--- a/gcc/config/loongarch/loongarch-ftypes.def
+++ b/gcc/config/loongarch/loongarch-ftypes.def
@@ -42,6 +42,12 @@ DEF_LARCH_FTYPE (1, (USI, USI))
  DEF_LARCH_FTYPE (1, (UDI, USI))
  DEF_LARCH_FTYPE (1, (USI, UQI))
  DEF_LARCH_FTYPE (1, (VOID, USI))
+DEF_LARCH_FTYPE (1, (V4SF, V8SF))
+DEF_LARCH_FTYPE (1, (V2DF, V4DF))
+DEF_LARCH_FTYPE (1, (V2DI, V4DI))
+DEF_LARCH_FTYPE (1, (V8SF, V4SF))
+DEF_LARCH_FTYPE (1, (V4DF, V2DF))
+DEF_LARCH_FTYPE (1, (V4DI, V2DI))
  
  DEF_LARCH_FTYPE (2, (VOID, UQI, USI))
  DEF_LARCH_FTYPE (2, (VOID, UHI, USI))
@@ -58,6 +64,12 @@ DEF_LARCH_FTYPE (2, (SI, SI, SI))
  DEF_LARCH_FTYPE (2, (SI, DI, SI))
  DEF_LARCH_FTYPE (2, (USI, USI, USI))
  DEF_LARCH_FTYPE (2, (UDI, UDI, USI))
+DEF_LARCH_FTYPE (2, (V8SF, V4SF, V4SF))
+DEF_LARCH_FTYPE (2, (V4DF, V2DF, V2DF))
+DEF_LARCH_FTYPE (2, (V4DI, V2DI, V2DI))
+DEF_LARCH_FTYPE (2, (V8SF, V8SF, V4SF))
+DEF_LARCH_FTYPE (2, (V4DF, V4DF, V2DF))
+DEF_LARCH_FTYPE (2, (V4DI, V4DI, V2DI))
  
  DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI))
  DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI))
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi

index 30eae4bdacce4c08f561cb224d613b67a72d6dee..00273c0b673fe8bf1e4176d22e249e05c7f64b01 100644 (file)
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19701,7 +19701,16 @@ into the data cache.  The instruction is issued in slot I1@.
  
  These built-in functions are available for LoongArch.
  
-Data Type Description:
+@menu
+* Data Types::
+* Directly-mapped Builtin Functions::
+* Directly-mapped Division Builtin Functions::
+* Other Builtin Functions::
+@end menu
+
+@node Data Types
+@subsubsection Data Types
+
  @itemize
  @item @code{imm0_31}, a compile-time constant in range 0 to 31;
  @item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
@@ -19709,6 +19718,9 @@ Data Type Description:
  @item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
  @end itemize
  
+@node Directly-mapped Builtin Functions
+@subsubsection Directly-mapped Builtin Functions
+
  The intrinsics provided are listed below:
  @smallexample
      unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
@@ -19832,6 +19844,9 @@ function you need to include @code{larchintrin.h}.
      void __break (imm0_32767)
  @end smallexample
  
+@node Directly-mapped Division Builtin Functions
+@subsubsection Directly-mapped Division Builtin Functions
+
  These intrinsic functions are available by including @code{larchintrin.h} and
  using @option{-mfrecipe}.
  @smallexample
@@ -19841,6 +19856,9 @@ using @option{-mfrecipe}.
      double __frsqrte_d (double);
  @end smallexample
  
+@node Other Builtin Functions
+@subsubsection Other Builtin Functions
+
  Additional built-in functions are available for LoongArch family
  processors to efficiently use 128-bit floating-point (__float128)
  values.
@@ -19867,6 +19885,15 @@ GCC provides intrinsics to access the LSX (Loongson SIMD Extension) instructions
  The interface is made available by including @code{<lsxintrin.h>} and using
  @option{-mlsx}.
  
+@menu
+* SX Data Types::
+* Directly-mapped SX Builtin Functions::
+* Directly-mapped SX Division Builtin Functions::
+@end menu
+
+@node SX Data Types
+@subsubsection SX Data Types
+
  The following vectors typedefs are included in @code{lsxintrin.h}:
  
  @itemize
@@ -19894,6 +19921,9 @@ input/output values manipulated:
  @item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047.
  @end itemize
  
+@node Directly-mapped SX Builtin Functions
+@subsubsection Directly-mapped SX Builtin Functions
+
  For convenience, GCC defines functions @code{__lsx_vrepli_@{b/h/w/d@}} and
  @code{__lsx_b[n]z_@{v/b/h/w/d@}}, which are implemented as follows:
  
@@ -20677,6 +20707,9 @@ __m128i __lsx_vxori_b (__m128i, imm0_255);
  __m128i __lsx_vxor_v (__m128i, __m128i);
  @end smallexample
  
+@node Directly-mapped SX Division Builtin Functions
+@subsubsection Directly-mapped SX Division Builtin Functions
+
  These intrinsic functions are available by including @code{lsxintrin.h} and
  using @option{-mfrecipe} and @option{-mlsx}.
  @smallexample
@@ -20693,6 +20726,16 @@ GCC provides intrinsics to access the LASX (Loongson Advanced SIMD Extension)
  instructions. The interface is made available by including @code{<lasxintrin.h>}
  and using @option{-mlasx}.
  
+@menu
+* ASX Data Types::
+* Directly-mapped ASX Builtin Functions::
+* Directly-mapped ASX Division Builtin Functions::
+* Directly-mapped SX and ASX Conversion Builtin Functions::
+@end menu
+
+@node ASX Data Types
+@subsubsection ASX Data Types
+
  The following vectors typedefs are included in @code{lasxintrin.h}:
  
  @itemize
@@ -20721,6 +20764,9 @@ input/output values manipulated:
  @item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047.
  @end itemize
  
+@node Directly-mapped ASX Builtin Functions
+@subsubsection Directly-mapped ASX Builtin Functions
+
  For convenience, GCC defines functions @code{__lasx_xvrepli_@{b/h/w/d@}} and
  @code{__lasx_b[n]z_@{v/b/h/w/d@}}, which are implemented as follows:
  
@@ -21525,6 +21571,9 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255);
  __m256i __lasx_xvxor_v (__m256i, __m256i);
  @end smallexample
  
+@node Directly-mapped ASX Division Builtin Functions
+@subsubsection Directly-mapped ASX Division Builtin Functions
+
  These intrinsic functions are available by including @code{lasxintrin.h} and
  using @option{-mfrecipe} and @option{-mlasx}.
  @smallexample
@@ -21534,6 +21583,213 @@ __m256d __lasx_xvfrsqrte_d (__m256d);
  __m256 __lasx_xvfrsqrte_s (__m256);
  @end smallexample
  
+@node Directly-mapped SX and ASX Conversion Builtin Functions
+@subsubsection Directly-mapped SX and ASX Conversion Builtin Functions
+
+For convenience, the @code{lsxintrin.h} file was imported into @code{
+lasxintrin.h} and 18 new interface functions for 128 and 256 vector
+conversions were added, using the @option{-mlasx} option.
+@smallexample
+__m256 __lasx_cast_128_s (__m128);
+__m256d __lasx_cast_128_d (__m128d);
+__m256i __lasx_cast_128 (__m128i);
+__m256 __lasx_concat_128_s (__m128, __m128);
+__m256d __lasx_concat_128_d (__m128d, __m128d);
+__m256i __lasx_concat_128 (__m128i, __m128i);
+__m128 __lasx_extract_128_lo_s (__m256);
+__m128 __lasx_extract_128_hi_s (__m256);
+__m128d __lasx_extract_128_lo_d (__m256d);
+__m128d __lasx_extract_128_hi_d (__m256d);
+__m128i __lasx_extract_128_lo (__m256i);
+__m128i __lasx_extract_128_hi (__m256i);
+__m256 __lasx_insert_128_lo_s (__m256, __m128);
+__m256 __lasx_insert_128_hi_s (__m256, __m128);
+__m256d __lasx_insert_128_lo_d (__m256d, __m128d);
+__m256d __lasx_insert_128_hi_d (__m256d, __m128d);
+__m256i __lasx_insert_128_lo (__m256i, __m128i);
+__m256i __lasx_insert_128_hi (__m256i, __m128i);
+@end smallexample
+
+When gcc does not support interfaces for 128 and 256 conversions,
+use the following code for equivalent substitution.
+
+@smallexample
+
+  #ifndef __loongarch_asx_sx_conv
+
+  #include <lasxintrin.h>
+  #include <lsxintrin.h>
+  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_cast_128_s (__m128 src)
+  @{
+    __m256 dest;
+    asm ("" : "=f"(dest) : "0"(src));
+    return dest;
+  @}
+
+  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_cast_128_d (__m128d src)
+  @{
+    __m256d dest;
+    asm        ("" : "=f"(dest) : "0"(src));
+    return dest;
+  @}
+
+  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_cast_128 (__m128i src)
+  @{
+    __m256i dest;
+    asm        ("" : "=f"(dest) : "0"(src));
+    return dest;
+  @}
+
+  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_concat_128_s (__m128 src1, __m128 src2)
+  @{
+    __m256 dest;
+    asm        ("xvpermi.q %u0,%u2,0x02\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_concat_128_d (__m128d src1, __m128d src2)
+  @{
+    __m256d dest;
+    asm        ("xvpermi.q %u0,%u2,0x02\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_concat_128 (__m128i src1, __m128i src2)
+  @{
+    __m256i dest;
+    asm        ("xvpermi.q %u0,%u2,0x02\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m128 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_extract_128_lo_s (__m256 src)
+  @{
+    __m128 dest;
+    asm        ("" : "=f"(dest) : "0"(src));
+    return dest;
+  @}
+
+  __m128d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_extract_128_lo_d (__m256d src)
+  @{
+    __m128d dest;
+    asm        ("" : "=f"(dest) : "0"(src));
+    return dest;
+  @}
+
+  __m128i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_extract_128_lo (__m256i src)
+  @{
+    __m128i dest;
+    asm        ("" : "=f"(dest) : "0"(src));
+    return dest;
+  @}
+
+  __m128 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_extract_128_hi_s (__m256 src)
+  @{
+    __m128 dest;
+    asm        ("xvpermi.d %u0,%u1,0xe\n"
+          : "=f"(dest)
+          : "f"(src));
+    return dest;
+  @}
+
+  __m128d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_extract_128_hi_d (__m256d src)
+  @{
+    __m128d dest;
+    asm        ("xvpermi.d %u0,%u1,0xe\n"
+          : "=f"(dest)
+          : "f"(src));
+    return dest;
+  @}
+
+  __m128i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_extract_128_hi (__m256i src)
+  @{
+    __m128i dest;
+    asm        ("xvpermi.d %u0,%u1,0xe\n"
+          : "=f"(dest)
+          : "f"(src));
+    return dest;
+  @}
+
+  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_insert_128_lo_s (__m256 src1, __m128 src2)
+  @{
+    __m256 dest;
+    asm        ("xvpermi.q %u0,%u2,0x30\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_insert_128_lo_d (__m256d a, __m128d b)
+  @{
+    __m256d dest;
+    asm        ("xvpermi.q %u0,%u2,0x30\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_insert_128_lo (__m256i src1, __m128i src2)
+  @{
+    __m256i dest;
+    asm        ("xvpermi.q %u0,%u2,0x30\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256 inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_insert_128_hi_s (__m256 src1, __m128 src2)
+  @{
+    __m256 dest;
+    asm        ("xvpermi.q %u0,%u2,0x02\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256d inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_insert_128_hi_d (__m256d src1, __m128d src2)
+  @{
+    __m256d dest;
+    asm        ("xvpermi.q %u0,%u2,0x02\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+
+  __m256i inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+  __lasx_insert_128_hi (__m256i src1, __m128i src2)
+  @{
+    __m256i dest;
+    asm        ("xvpermi.q %u0,%u2,0x02\n"
+          : "=f"(dest)
+          : "0"(src1), "f"(src2));
+    return dest;
+  @}
+  #endif
+
+@end smallexample
+
  @node MIPS DSP Built-in Functions
  @subsection MIPS DSP Built-in Functions
  
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c

new file mode 100644 (file)

index 0000000..e876c4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c
@@ -0,0 +1,68 @@
+/* { dg-options "-mabi=lp64d -O2 -mlasx -w -fno-strict-aliasing" } */
+
+#include "../simd_correctness_check.h"
+#include <lasxintrin.h>
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
+  __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
+  __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
+
+  //__m128_op0={1,2,3,4},__m128_op1={5,6,7,8};
+  *((int *)&__m128_op0[3]) = 0x40800000;
+  *((int *)&__m128_op0[2]) = 0x40400000;
+  *((int *)&__m128_op0[1]) = 0x40000000;
+  *((int *)&__m128_op0[0]) = 0x3f800000;
+  *((int *)&__m128_op1[3]) = 0x41000000;
+  *((int *)&__m128_op1[2]) = 0x40e00000;
+  *((int *)&__m128_op1[1]) = 0x40c00000;
+  *((int *)&__m128_op1[0]) = 0x40a00000;
+  *((int *)&__m256_result[7]) = 0x41000000;
+  *((int *)&__m256_result[6]) = 0x40e00000;
+  *((int *)&__m256_result[5]) = 0x40c00000;
+  *((int *)&__m256_result[4]) = 0x40a00000;
+  *((int *)&__m256_result[3]) = 0x40800000;
+  *((int *)&__m256_result[2]) = 0x40400000;
+  *((int *)&__m256_result[1]) = 0x40000000;
+  *((int *)&__m256_result[0]) = 0x3f800000;
+  __m256_out = __lasx_concat_128_s (__m128_op0, __m128_op1);
+  ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+  __m256_out = __lasx_cast_128_s (__m128_op0);
+  ASSERTEQ_32 (__LINE__, __m256_out, __m128_op0);
+
+  //__m128i_op0={1,2},__m128i_op1={3,4};
+  *((unsigned long *)&__m128i_op0[1]) = 0x2;
+  *((unsigned long *)&__m128i_op0[0]) = 0x1;
+  *((unsigned long *)&__m128i_op1[1]) = 0x4;
+  *((unsigned long *)&__m128i_op1[0]) = 0x3;
+  *((unsigned long *)&__m256i_result[3]) = 0x4;
+  *((unsigned long *)&__m256i_result[2]) = 0x3;
+  *((unsigned long *)&__m256i_result[1]) = 0x2;
+  *((unsigned long *)&__m256i_result[0]) = 0x1;
+  __m256i_out = __lasx_concat_128 (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+  __m256i_out = __lasx_cast_128 (__m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m256i_out, __m128i_op0);
+
+  //__m128d_op0={1,2},__m128i_op1={3,4};
+  *((unsigned long *)&__m128d_op0[1]) = 0x4000000000000000;
+  *((unsigned long *)&__m128d_op0[0]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128d_op1[1]) = 0x4010000000000000;
+  *((unsigned long *)&__m128d_op1[0]) = 0x4008000000000000;
+  *((unsigned long *)&__m256d_result[3]) = 0x4010000000000000;
+  *((unsigned long *)&__m256d_result[2]) = 0x4008000000000000;
+  *((unsigned long *)&__m256d_result[1]) = 0x4000000000000000;
+  *((unsigned long *)&__m256d_result[0]) = 0x3ff0000000000000;
+  __m256d_out = __lasx_concat_128_d (__m128d_op0, __m128d_op1);
+  ASSERTEQ_64 (__LINE__, __m256d_result, __m256d_out);
+  __m256d_out = __lasx_cast_128_d (__m128d_op0);
+  ASSERTEQ_64 (__LINE__, __m256d_out, __m128d_op0);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c

new file mode 100644 (file)

index 0000000..5d8cbb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c
@@ -0,0 +1,92 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r8,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x02
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256
+foo1 (__m128 x, __m128 y)
+{
+  return __builtin_lasx_concat_128_s (x, y);
+}
+
+/*
+**foo2:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r8,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x02
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256d
+foo2 (__m128d x, __m128d y)
+{
+  return __builtin_lasx_concat_128_d (x, y);
+}
+
+/*
+**foo3:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r8,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x02
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256i
+foo3 (__m128i x, __m128i y)
+{
+  return __builtin_lasx_concat_128 (x, y);
+}
+
+/*
+**foo4:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256
+foo4 (__m128 x)
+{
+  return __builtin_lasx_cast_128_s (x);
+}
+
+/*
+**foo5:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256d
+foo5 (__m128d x)
+{
+  return __builtin_lasx_cast_128_d (x);
+}
+
+/*
+**foo6:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256i
+foo6 (__m128i x)
+{
+  return __builtin_lasx_cast_128 (x);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c

new file mode 100644 (file)

index 0000000..61064d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c
@@ -0,0 +1,69 @@
+/* { dg-options "-mabi=lp64d -O2 -mlasx -w -fno-strict-aliasing" } */
+
+#include "../simd_correctness_check.h"
+#include <lasxintrin.h>
+
+extern void abort (void);
+int
+main ()
+{
+  __m128i __m128i_result0, __m128i_result1, __m128i_out, __m128i_result;
+  __m128 __m128_result0, __m128_result1, __m128_out, __m128_result;
+  __m128d __m128d_result0, __m128d_result1, __m128d_out, __m128d_result;
+
+  __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
+  __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
+  __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
+
+  //__m256_op0 = {1,2,3,4,5,6,7,8};
+  *((int *)&__m256_op0[7]) = 0x41000000;
+  *((int *)&__m256_op0[6]) = 0x40e00000;
+  *((int *)&__m256_op0[5]) = 0x40c00000;
+  *((int *)&__m256_op0[4]) = 0x40a00000;
+  *((int *)&__m256_op0[3]) = 0x40800000;
+  *((int *)&__m256_op0[2]) = 0x40400000;
+  *((int *)&__m256_op0[1]) = 0x40000000;
+  *((int *)&__m256_op0[0]) = 0x3f800000;
+  *((int *)&__m128_result1[3]) = 0x41000000;
+  *((int *)&__m128_result1[2]) = 0x40e00000;
+  *((int *)&__m128_result1[1]) = 0x40c00000;
+  *((int *)&__m128_result1[0]) = 0x40a00000;
+  *((int *)&__m128_result0[3]) = 0x40800000;
+  *((int *)&__m128_result0[2]) = 0x40400000;
+  *((int *)&__m128_result0[1]) = 0x40000000;
+  *((int *)&__m128_result0[0]) = 0x3f800000;
+  __m128_out = __lasx_extract_128_lo_s (__m256_op0);
+  ASSERTEQ_32 (__LINE__, __m128_result0, __m128_out);
+  __m128_out = __lasx_extract_128_hi_s (__m256_op0);
+  ASSERTEQ_32 (__LINE__, __m128_result1, __m128_out);
+
+  //__m256i_op0 = {1,2,3,4};
+  *((unsigned long *)&__m256i_op0[3]) = 0x4;
+  *((unsigned long *)&__m256i_op0[2]) = 0x3;
+  *((unsigned long *)&__m256i_op0[1]) = 0x2;
+  *((unsigned long *)&__m256i_op0[0]) = 0x1;
+  *((unsigned long *)&__m128i_result0[1]) = 0x2;
+  *((unsigned long *)&__m128i_result0[0]) = 0x1;
+  *((unsigned long *)&__m128i_result1[1]) = 0x4;
+  *((unsigned long *)&__m128i_result1[0]) = 0x3;
+  __m128i_out = __lasx_extract_128_lo (__m256i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result0, __m128i_out);
+  __m128i_out = __lasx_extract_128_hi (__m256i_op0);
+  ASSERTEQ_64 (__LINE__, __m128i_result1, __m128i_out);
+
+  //__m256d_op0 = {1,2,3,4};
+  *((unsigned long *)&__m256d_op0[3]) = 0x4010000000000000;
+  *((unsigned long *)&__m256d_op0[2]) = 0x4008000000000000;
+  *((unsigned long *)&__m256d_op0[1]) = 0x4000000000000000;
+  *((unsigned long *)&__m256d_op0[0]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128d_result0[1]) = 0x4000000000000000;
+  *((unsigned long *)&__m128d_result0[0]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128d_result1[1]) = 0x4010000000000000;
+  *((unsigned long *)&__m128d_result1[0]) = 0x4008000000000000;
+  __m128d_out = __lasx_extract_128_lo_d (__m256d_op0);
+  ASSERTEQ_64 (__LINE__, __m128d_result0, __m128d_out);
+  __m128d_out = __lasx_extract_128_hi_d (__m256d_op0);
+  ASSERTEQ_64 (__LINE__, __m128d_result1, __m128d_out);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c

new file mode 100644 (file)

index 0000000..d2219ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
@@ -0,0 +1,86 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1_lo:
+**     vld     (\$vr[0-9]+),\$r4,0
+**     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
+**     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
+**     jr      \$r1
+*/
+__m128
+foo1_lo (__m256 x)
+{
+  return __lasx_extract_128_lo_s (x);
+}
+
+/*
+**foo1_hi:
+**     xvld    (\$xr[0-9]+),\$r4,0
+**     xvpermi.d       (\$xr[0-9]+),(\$xr[0-9]+),0xe
+**     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
+**     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
+**     jr      \$r1
+*/
+__m128
+foo1_hi (__m256 x)
+{
+  return __lasx_extract_128_hi_s (x);
+}
+
+/*
+**foo2_lo:
+**     vld     (\$vr[0-9]+),\$r4,0
+**     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
+**     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
+**     jr      \$r1
+*/
+__m128d
+foo2_lo (__m256d x)
+{
+  return __lasx_extract_128_lo_d (x);
+}
+
+/*
+**foo2_hi:
+**     xvld    (\$xr[0-9]+),\$r4,0
+**     xvpermi.d       (\$xr[0-9]+),(\$xr[0-9]+),0xe
+**     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
+**     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
+**     jr      \$r1
+*/    
+__m128d
+foo2_hi (__m256d x)
+{
+  return __lasx_extract_128_hi_d (x);
+}
+
+/*
+**foo3_lo:
+**     vld     (\$vr[0-9]+),\$r4,0
+**     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
+**     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
+**     jr      \$r1
+*/
+__m128i
+foo3_lo (__m256i x)
+{
+  return __lasx_extract_128_lo (x);
+}
+
+/*
+**foo3_hi:
+**     xvld    (\$xr[0-9]+),\$r4,0
+**     xvpermi.d       (\$xr[0-9]+),(\$xr[0-9]+),0xe
+**     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
+**     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
+**     jr      \$r1
+*/
+__m128i
+foo3_hi (__m256i x)
+{
+  return __lasx_extract_128_hi (x);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c

new file mode 100644 (file)

index 0000000..ce5abf9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c
@@ -0,0 +1,97 @@
+/* { dg-options "-mabi=lp64d -O2 -mlasx -w -fno-strict-aliasing" } */
+
+#include "../simd_correctness_check.h"
+#include <lasxintrin.h>
+
+extern void abort (void);
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_out;
+  __m128 __m128_op0, __m128_op1, __m128_out;
+  __m128d __m128d_op0, __m128d_op1, __m128d_out;
+
+  __m256i __m256i_op0, __m256i_result0, __m256i_result1, __m256i_out;
+  __m256 __m256_op0, __m256_result0, __m256_result1, __m256_out;
+  __m256d __m256d_op0, __m256d_result0, __m256d_result1, __m256d_out;
+
+  //__m256_op0 = {1,2,3,4,5,6,7,8}, __m128_op0 ={9,9,9,9};
+  *((int *)&__m256_op0[7]) = 0x41000000;
+  *((int *)&__m256_op0[6]) = 0x40e00000;
+  *((int *)&__m256_op0[5]) = 0x40c00000;
+  *((int *)&__m256_op0[4]) = 0x40a00000;
+  *((int *)&__m256_op0[3]) = 0x40800000;
+  *((int *)&__m256_op0[2]) = 0x40400000;
+  *((int *)&__m256_op0[1]) = 0x40000000;
+  *((int *)&__m256_op0[0]) = 0x3f800000;
+  *((int *)&__m128_op0[3]) = 0x41100000;
+  *((int *)&__m128_op0[2]) = 0x41100000;
+  *((int *)&__m128_op0[1]) = 0x41100000;
+  *((int *)&__m128_op0[0]) = 0x41100000;
+  *((int *)&__m256_result0[7]) = 0x41000000;
+  *((int *)&__m256_result0[6]) = 0x40e00000;
+  *((int *)&__m256_result0[5]) = 0x40c00000;
+  *((int *)&__m256_result0[4]) = 0x40a00000;
+  *((int *)&__m256_result0[3]) = 0x41100000;
+  *((int *)&__m256_result0[2]) = 0x41100000;
+  *((int *)&__m256_result0[1]) = 0x41100000;
+  *((int *)&__m256_result0[0]) = 0x41100000;
+  *((int *)&__m256_result1[7]) = 0x41100000;
+  *((int *)&__m256_result1[6]) = 0x41100000;
+  *((int *)&__m256_result1[5]) = 0x41100000;
+  *((int *)&__m256_result1[4]) = 0x41100000;
+  *((int *)&__m256_result1[3]) = 0x40800000;
+  *((int *)&__m256_result1[2]) = 0x40400000;
+  *((int *)&__m256_result1[1]) = 0x40000000;
+  *((int *)&__m256_result1[0]) = 0x3f800000;
+  __m256_out = __lasx_insert_128_lo_s (__m256_op0, __m128_op0);
+  ASSERTEQ_32 (__LINE__, __m256_result0, __m256_out);
+  __m256_out = __lasx_insert_128_hi_s (__m256_op0, __m128_op0);
+  ASSERTEQ_32 (__LINE__, __m256_result1, __m256_out);
+
+  //__m256i_op0 ={1,2,3,4},__m128i_op0={5,6},__m128i_op1={7,8};
+  *((unsigned long *)&__m256i_op0[3]) = 0x4;
+  *((unsigned long *)&__m256i_op0[2]) = 0x3;
+  *((unsigned long *)&__m256i_op0[1]) = 0x2;
+  *((unsigned long *)&__m256i_op0[0]) = 0x1;
+  *((unsigned long *)&__m128i_op0[1]) = 0x6;
+  *((unsigned long *)&__m128i_op0[0]) = 0x5;
+  *((unsigned long *)&__m128i_op1[1]) = 0x8;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7;
+  *((unsigned long *)&__m256i_result0[3]) = 0x4;
+  *((unsigned long *)&__m256i_result0[2]) = 0x3;
+  *((unsigned long *)&__m256i_result0[1]) = 0x6;
+  *((unsigned long *)&__m256i_result0[0]) = 0x5;
+  *((unsigned long *)&__m256i_result1[3]) = 0x8;
+  *((unsigned long *)&__m256i_result1[2]) = 0x7;
+  *((unsigned long *)&__m256i_result1[1]) = 0x2;
+  *((unsigned long *)&__m256i_result1[0]) = 0x1;
+  __m256i_out = __lasx_insert_128_lo (__m256i_op0, __m128i_op0);
+  ASSERTEQ_64 (__LINE__, __m256i_result0, __m256i_out);
+  __m256i_out = __lasx_insert_128_hi (__m256i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m256i_result1, __m256i_out);
+
+  //__m256d_op0 ={1,2,3,4},__m128d_op0={5,6},__m128d_op1={7,8};
+  *((unsigned long *)&__m256d_op0[3]) = 0x4010000000000000;
+  *((unsigned long *)&__m256d_op0[2]) = 0x4008000000000000;
+  *((unsigned long *)&__m256d_op0[1]) = 0x4000000000000000;
+  *((unsigned long *)&__m256d_op0[0]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128d_op0[1]) = 0x4018000000000000;
+  *((unsigned long *)&__m128d_op0[0]) = 0x4014000000000000;
+  *((unsigned long *)&__m128d_op1[1]) = 0x4020000000000000;
+  *((unsigned long *)&__m128d_op1[0]) = 0x401c000000000000;
+  *((unsigned long *)&__m256d_result0[3]) = 0x4010000000000000;
+  *((unsigned long *)&__m256d_result0[2]) = 0x4008000000000000;
+  *((unsigned long *)&__m256d_result0[1]) = 0x4018000000000000;
+  *((unsigned long *)&__m256d_result0[0]) = 0x4014000000000000;
+  *((unsigned long *)&__m256d_result1[3]) = 0x4020000000000000;
+  *((unsigned long *)&__m256d_result1[2]) = 0x401c000000000000;
+  *((unsigned long *)&__m256d_result1[1]) = 0x4000000000000000;
+  *((unsigned long *)&__m256d_result1[0]) = 0x3ff0000000000000;
+  __m256d_out = __lasx_insert_128_lo_d (__m256d_op0, __m128d_op0);
+  ASSERTEQ_64 (__LINE__, __m256d_result0, __m256d_out);
+  __m256d_out = __lasx_insert_128_hi_d (__m256d_op0, __m128d_op1);
+  ASSERTEQ_64 (__LINE__, __m256d_result1, __m256d_out);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c

new file mode 100644 (file)

index 0000000..326c855
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c
@@ -0,0 +1,95 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,0
+**     xvld    (\$xr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x30
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256
+foo1 (__m256 x, __m128 y)
+{
+  return __builtin_lasx_insert_128_lo_s (x, y);
+}
+
+/*
+**foo2:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,0
+**     xvld    (\$xr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x02
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256
+foo2 (__m256 x, __m128 y)
+{
+  return __builtin_lasx_insert_128_hi_s (x, y);
+}
+
+/*
+**foo3:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,0
+**     xvld    (\$xr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x30
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256d
+foo3 (__m256d x, __m128d y)
+{
+  return __builtin_lasx_insert_128_lo_d (x, y);
+}
+
+/*
+**foo4:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,0
+**     xvld    (\$xr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x02
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256d
+foo4 (__m256d x, __m128d y)
+{
+  return __builtin_lasx_insert_128_hi_d (x, y);
+}
+
+/*
+**foo5:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,0
+**     xvld    (\$xr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x30
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256i
+foo5 (__m256i x, __m128i y)
+{
+  return __builtin_lasx_insert_128_lo (x, y);
+}
+
+/*
+**foo6:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,0
+**     xvld    (\$xr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r7,1
+**     xvpermi.q       (\$xr[0-9]+),(\$xr[0-9]+),0x02
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256i
+foo6 (__m256i x, __m128i y)
+{
+  return __builtin_lasx_insert_128_hi (x, y);
+}
author	chenxiaolong <chenxiaolong@loongson.cn>
	Wed, 29 Oct 2025 10:49:34 +0000 (18:49 +0800)
committer	Lulu Cheng <chenglulu@loongson.cn>
	Mon, 3 Nov 2025 02:13:33 +0000 (10:13 +0800)
gcc/config/loongarch/lasx.md		patch \| blob \| blame \| history
gcc/config/loongarch/lasxintrin.h		patch \| blob \| blame \| history
gcc/config/loongarch/loongarch-builtins.cc		patch \| blob \| blame \| history
gcc/config/loongarch/loongarch-c.cc		patch \| blob \| blame \| history
gcc/config/loongarch/loongarch-ftypes.def		patch \| blob \| blame \| history
gcc/doc/extend.texi		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c	[new file with mode: 0644]	patch \| blob