From: Paul Floyd <pjfloyd@wanadoo.fr>
Date: Sun, 14 Apr 2024 15:59:30 +0000 (+0200)
Subject: Bug 485148 - vfmadd213ss instruction is instrumented incorrectly (the remaining part... 
X-Git-Tag: VALGRIND_3_23_0~44
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=219998aeb64e28391861006e9eb1ea4b96d69083;p=thirdparty%2Fvalgrind.git

Bug 485148 - vfmadd213ss instruction is instrumented incorrectly (the remaining part of the register is cleared instead of kept unmodified)

Initial version contributed by Bruno Lathuilière <bruno.lathuiliere@edf.fr>
Initial test contributed by Petr <kobalicek.petr@gmail.com>
---

diff --git a/.gitignore b/.gitignore
index 1491b8943..b242eb6e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1659,6 +1659,7 @@
 /none/tests/amd64/bug132918
 /none/tests/amd64/bug137714-amd64
 /none/tests/amd64/bug156404-amd64
+/none/tests/amd64/bug485148
 /none/tests/amd64/cet_nops
 /none/tests/amd64/clc
 /none/tests/amd64/cmpxchg
diff --git a/NEWS b/NEWS
index 705e17be2..8f5ac0c9c 100644
--- a/NEWS
+++ b/NEWS
@@ -89,6 +89,8 @@ are not entered into bugzilla tend to get forgotten about or ignored.
 484480  False positives when using sem_trywait
 484935  [patch] Valgrind reports false "Conditional jump or move depends on
         uninitialised value" errors for aarch64 signal handlers
+485148  vfmadd213ss instruction is instrumented incorrectly (the remaining
+        part of the register is cleared instead of kept unmodified)
 n-i-bz  Add redirect for memccpy
 
 To see details of a given bug, visit
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
index d7c25042d..f0b1c5516 100644
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -27989,8 +27989,8 @@ static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
    }
 
    switch (vty) {
-      case Ity_F32:  putYMMRegLane32(rG, 1, mkU32(0)); /*fallthru*/
-      case Ity_F64:  putYMMRegLane64(rG, 1, mkU64(0)); /*fallthru*/
+      case Ity_F32:
+      case Ity_F64:
       case Ity_V128: putYMMRegLane128(rG, 1, mkV128(0)); /*fallthru*/
       case Ity_V256: break;
       default: vassert(0);
diff --git a/none/tests/amd64/Makefile.am b/none/tests/amd64/Makefile.am
index 2e688e3ca..dc0498018 100644
--- a/none/tests/amd64/Makefile.am
+++ b/none/tests/amd64/Makefile.am
@@ -43,6 +43,7 @@ EXTRA_DIST = \
 	bug132918.stdout.exp-older-glibc \
 	bug156404-amd64.vgtest bug156404-amd64.stdout.exp \
 	bug156404-amd64.stderr.exp \
+	bug485148.vgtest bug485148.stdout.exp bug485148.stderr.exp \
 	cet_nops.vgtest cet_nops.stdout.exp cet_nops.stderr.exp \
 	clc.vgtest clc.stdout.exp clc.stderr.exp \
 	crc32.vgtest crc32.stdout.exp crc32.stderr.exp \
@@ -160,7 +161,7 @@ if BUILD_BMI_TESTS
  check_PROGRAMS += bmi
 endif
 if BUILD_FMA_TESTS
- check_PROGRAMS += fma
+ check_PROGRAMS += fma bug485148
 endif
 if BUILD_MPX_TESTS
  check_PROGRAMS += mpx
@@ -201,6 +202,8 @@ allexec_CFLAGS		= $(AM_CFLAGS) @FLAG_W_NO_NONNULL@
 # generic C ones
 amd64locked_CFLAGS	= $(AM_CFLAGS) -O
 bug132918_LDADD		= -lm
+bug485148_CXXFLAGS      = ${AM_CXXFLAGS} -mfma
+bug485148_SOURCES       = bug485148.cpp
 cmpxchg_CFLAGS		= $(AM_CFLAGS) @FLAG_NO_PIE@
 fb_test_amd64_CFLAGS	= $(AM_CFLAGS) -O -fno-strict-aliasing
 fb_test_amd64_LDADD	= -lm
diff --git a/none/tests/amd64/bug485148.cpp b/none/tests/amd64/bug485148.cpp
new file mode 100644
index 000000000..bed8de854
--- /dev/null
+++ b/none/tests/amd64/bug485148.cpp
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <x86intrin.h>
+
+static __attribute__((noinline)) void
+test_fma_ss(float dst[4], const float a[4], const float b[4], const float c[4])
+{
+   __m128 av = _mm_loadu_ps(a);
+   __m128 bv = _mm_loadu_ps(b);
+   __m128 cv = _mm_loadu_ps(c);
+
+   __m128 dv = _mm_fmadd_ss(av, bv, cv);
+   _mm_storeu_ps(dst, dv);
+}
+
+static __attribute__((noinline)) void
+test_fma_sd(double dst[2], const double a[2], const double b[2], const double c[2])
+{
+   __m128d av = _mm_loadu_pd(a);
+   __m128d bv = _mm_loadu_pd(b);
+   __m128d cv = _mm_loadu_pd(c);
+
+   __m128d dv = _mm_fmadd_sd(av, bv, cv);
+   _mm_storeu_pd(dst, dv);
+}
+
+int main()
+{
+   float a[4] = {1, 2, 3, 4};
+   float b[4] = {3, 11, 35, 1};
+   float c[4] = {-1, -2, -19, 0};
+
+   float dst_f[4];
+   test_fma_ss(dst_f, a, b, c);
+
+   printf("[%f %f %f %f]\n", dst_f[0], dst_f[1], dst_f[2], dst_f[3]);
+
+   double d[2] = {5, 6};
+   double e[2] = {2, 18};
+   double f[2] = {3, 15};
+
+   double dst_d[2];
+   test_fma_sd(dst_d, d, e, f);
+
+   printf("[%f %f]\n", dst_d[0], dst_d[1]);
+}
diff --git a/none/tests/amd64/bug485148.stderr.exp b/none/tests/amd64/bug485148.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/none/tests/amd64/bug485148.stdout.exp b/none/tests/amd64/bug485148.stdout.exp
new file mode 100644
index 000000000..23d83107e
--- /dev/null
+++ b/none/tests/amd64/bug485148.stdout.exp
@@ -0,0 +1,2 @@
+[2.000000 2.000000 3.000000 4.000000]
+[13.000000 6.000000]
diff --git a/none/tests/amd64/bug485148.vgtest b/none/tests/amd64/bug485148.vgtest
new file mode 100644
index 000000000..dbbedc62b
--- /dev/null
+++ b/none/tests/amd64/bug485148.vgtest
@@ -0,0 +1,3 @@
+prog: bug485148
+prereq: test -x bug485148 && ../../../tests/x86_amd64_features amd64-avx
+vgopts: -q