From: Peter Maydell <peter.maydell@linaro.org>
Date: Wed, 23 Jul 2025 16:54:56 +0000 (+0100)
Subject: target/arm: LD1Q, ST1Q are vector + scalar, not scalar + vector
X-Git-Tag: v10.1.0-rc1~9^2~4
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1c6aae5efbd28ac35003dea341364cd63a4515a1;p=thirdparty%2Fqemu.git

target/arm: LD1Q, ST1Q are vector + scalar, not scalar + vector

Unlike the "LD1D (scalar + vector)" etc instructions, LD1Q is
vector + scalar. This means that:
 * the vector and the scalar register are in opposite fields
   in the encoding
 * 31 in the scalar register field is XZR, not XSP

The same applies for ST1Q.

This means we can't reuse the trans_LD1_zprz() and trans_ST1_zprz()
functions for LD1Q and ST1Q. Split them out to use their own
trans functions.

Note that the change made here to sve.decode requires the decodetree
bugfix "decodetree: Infer argument set before inferring format" to
avoid a spurious compile-time error about "dtype".

Fixes: d2aa9a804ee678f ("target/arm: Implement LD1Q, ST1Q for SVE2p1")
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250723165458.3509150-5-peter.maydell@linaro.org
---

diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index aea7f519730..ab63cfaa0f0 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -1343,9 +1343,9 @@ LD1_zprz        1100010 10 1. ..... 1.. ... ..... ..... \
 LD1_zprz        1100010 11 1. ..... 11. ... ..... ..... \
                 @rprr_g_load_sc esz=3 msz=3 u=1
 
-# LD1Q
-LD1_zprz        1100 0100 000 rm:5 101 pg:3 rn:5 rd:5 \
-                &rprr_gather_load u=1 ff=0 xs=2 esz=4 msz=4 scale=0
+# LD1Q. Note that this is subtly different from LD1_zprz because
+# it is vector + scalar, not scalar + vector.
+LD1Q            1100 0100 000 rm:5 101 pg:3 rn:5 rd:5
 
 # SVE 64-bit gather load (vector plus immediate)
 LD1_zpiz        1100010 .. 01 ..... 1.. ... ..... ..... \
@@ -1450,9 +1450,9 @@ ST1_zprz        1110010 .. 01 ..... 101 ... ..... ..... \
 ST1_zprz        1110010 .. 00 ..... 101 ... ..... ..... \
                 @rprr_scatter_store xs=2 esz=3 scale=0
 
-# ST1Q
-ST1_zprz        1110 0100 001 rm:5 001 pg:3 rn:5 rd:5 \
-                &rprr_scatter_store xs=2 msz=4 esz=4 scale=0
+# ST1Q. Note that this is subtly different from ST1_zprz because
+# it is vector + scalar, not scalar + vector.
+ST1Q            1110 0100 001 rm:5 001 pg:3 rn:5 rd:5
 
 # SVE 64-bit scatter store (vector plus immediate)
 ST1_zpiz        1110010 .. 10 ..... 101 ... ..... ..... \
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 5cba7b87bdc..07b827fa8e8 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -6179,9 +6179,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
     bool be = s->be_data == MO_BE;
     bool mte = s->mte_active[0];
 
-    if (a->esz < MO_128
-        ? !dc_isar_feature(aa64_sve, s)
-        : !dc_isar_feature(aa64_sve2p1, s)) {
+    if (!dc_isar_feature(aa64_sve, s)) {
         return false;
     }
     s->is_nonstreaming = true;
@@ -6196,10 +6194,6 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
     case MO_64:
         fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
         break;
-    case MO_128:
-        assert(!a->ff && a->u && a->xs == 2 && a->msz == MO_128);
-        fn = gather_load_fn128[mte][be];
-        break;
     default:
         g_assert_not_reached();
     }
@@ -6210,6 +6204,32 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
     return true;
 }
 
+static bool trans_LD1Q(DisasContext *s, arg_LD1Q *a)
+{
+    gen_helper_gvec_mem_scatter *fn = NULL;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
+
+    if (!dc_isar_feature(aa64_sve2p1, s)) {
+        return false;
+    }
+    s->is_nonstreaming = true;
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    fn = gather_load_fn128[mte][be];
+    assert(fn != NULL);
+
+    /*
+     * Unlike LD1_zprz, a->rm is the scalar register and it can be XZR, not XSP.
+     * a->rn is the vector register.
+     */
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+               cpu_reg(s, a->rm), MO_128, false, fn);
+    return true;
+}
+
 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;
@@ -6386,9 +6406,7 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
         return false;
     }
-    if (a->esz < MO_128
-        ? !dc_isar_feature(aa64_sve, s)
-        : !dc_isar_feature(aa64_sve2p1, s)) {
+    if (!dc_isar_feature(aa64_sve, s)) {
         return false;
     }
     s->is_nonstreaming = true;
@@ -6402,10 +6420,6 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
     case MO_64:
         fn = scatter_store_fn64[mte][be][a->xs][a->msz];
         break;
-    case MO_128:
-        assert(a->xs == 2 && a->msz == MO_128);
-        fn = scatter_store_fn128[mte][be];
-        break;
     default:
         g_assert_not_reached();
     }
@@ -6414,6 +6428,29 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
     return true;
 }
 
+static bool trans_ST1Q(DisasContext *s, arg_ST1Q *a)
+{
+    gen_helper_gvec_mem_scatter *fn;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
+
+    if (!dc_isar_feature(aa64_sve2p1, s)) {
+        return false;
+    }
+    s->is_nonstreaming = true;
+    if (!sve_access_check(s)) {
+        return true;
+    }
+    fn = scatter_store_fn128[mte][be];
+    /*
+     * Unlike ST1_zprz, a->rm is the scalar register, and it
+     * can be XZR, not XSP. a->rn is the vector register.
+     */
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+               cpu_reg(s, a->rm), MO_128, true, fn);
+    return true;
+}
+
 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
 {
     gen_helper_gvec_mem_scatter *fn = NULL;