aarch64: Fix LD1Q and ST1Q failures for big-endian

author Richard Sandiford <richard.sandiford@arm.com>

Thu, 10 Jul 2025 15:54:45 +0000 (16:54 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Thu, 10 Jul 2025 15:54:45 +0000 (16:54 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Thu, 10 Jul 2025 15:54:45 +0000 (16:54 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Thu, 10 Jul 2025 15:54:45 +0000 (16:54 +0100)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc

index d9922de7ca5a488af9c2ce9be1fec41edbc06d01..abe21a8b61c6b75090cdff872ed8a74fca52add8 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -316,7 +316,8 @@ public:
    expand (function_expander &e) const override
    {
      e.prepare_gather_address_operands (1, false);
-    return e.use_exact_insn (CODE_FOR_aarch64_gather_ld1q);
+    auto icode = code_for_aarch64_gather_ld1q (e.tuple_mode (0));
+    return e.use_exact_insn (icode);
    }
  };
  
@@ -722,7 +723,7 @@ public:
    expand (function_expander &e) const override
    {
      rtx data = e.args.last ();
-    e.args.last () = force_lowpart_subreg (VNx2DImode, data, GET_MODE (data));
+    e.args.last () = aarch64_sve_reinterpret (VNx2DImode, data);
      e.prepare_gather_address_operands (1, false);
      return e.use_exact_insn (CODE_FOR_aarch64_scatter_st1q);
    }
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md

index 789ec0dd1a3c914deb17e8469680facb11fe7230..660901d4b3f1b14ee10ee504c3c91e993bd892b8 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -334,12 +334,21 @@
  ;; - LD1Q (SVE2p1)
  ;; -------------------------------------------------------------------------
  
-;; Model this as operating on the largest valid element size, which is DI.
-;; This avoids having to define move patterns & more for VNx1TI, which would
-;; be difficult without a non-gather form of LD1Q.
-(define_insn "aarch64_gather_ld1q"
-  [(set (match_operand:VNx2DI 0 "register_operand")
-       (unspec:VNx2DI
+;; For little-endian targets, it would be enough to use a single pattern,
+;; with a subreg to bitcast the result to whatever mode is needed.
+;; However, on big-endian targets, the bitcast would need to be an
+;; aarch64_sve_reinterpret instruction.  That would interact badly
+;; with the "&" and "?" constraints in this pattern: if the result
+;; of the reinterpret needs to be in the same register as the index,
+;; the RA would tend to prefer to allocate a separate register for the
+;; intermediate (uncast) result, even if the reinterpret prefers tying.
+;;
+;; The index is logically VNx1DI rather than VNx2DI, but introducing
+;; and using VNx1DI would just create more bitcasting.  The ACLE intrinsic
+;; uses svuint64_t, which corresponds to VNx2DI.
+(define_insn "@aarch64_gather_ld1q<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand")
+       (unspec:SVE_FULL
           [(match_operand:VNx2BI 1 "register_operand")
            (match_operand:DI 2 "aarch64_reg_or_zero")
            (match_operand:VNx2DI 3 "register_operand")
author	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 10 Jul 2025 15:54:45 +0000 (16:54 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Thu, 10 Jul 2025 15:54:45 +0000 (16:54 +0100)
gcc/config/aarch64/aarch64-sve-builtins-sve2.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-sve2.md		patch \| blob \| blame \| history