From: Guinevere Larsen <guinevere@redhat.com>
Date: Wed, 26 Feb 2025 19:15:33 +0000 (-0300)
Subject: gdb/record: support more AVX arithmetic instructions
X-Git-Tag: binutils-2_45~85
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=77a7df4756cd4b07bc1c3efbae774498fcebc77e;p=thirdparty%2Fbinutils-gdb.git

gdb/record: support more AVX arithmetic instructions

This commit adds support to the following AVX/AVX2 instructions:
* VPADD[B|W|D|Q]
* VPMUL[LW|LD|HW|HUW|UDQ]
* VXORP[S|D]
* VPAND[|N]

This required some reworking on the loop that processes instruction
prefixes, because the opcode for VPMULLD overlapped with a valid
instruction prefix. To fix that, rather than using "goto out_prefixes",
this commit changes the infinite loop to only run while we don't find
another VEX prefix. That should be OK, as the intel manual (page 526 on
the March 2024 edition) says that the VEX prefix is always the last one.
---

diff --git a/gdb/i386-tdep.c b/gdb/i386-tdep.c
index dcb7e7b6766..67feb81b092 100644
--- a/gdb/i386-tdep.c
+++ b/gdb/i386-tdep.c
@@ -4800,7 +4800,7 @@ static int i386_record_floats (struct gdbarch *gdbarch,
 
 static int
 i386_record_vex (struct i386_record_s *ir, uint8_t vex_w, uint8_t vex_r,
-		 int opcode, struct gdbarch *gdbarch)
+		 struct gdbarch *gdbarch)
 {
   /* We need this to find YMM (and once AVX-512 is supported, ZMM) registers.
      We should always save the largest available register, since an
@@ -4814,6 +4814,11 @@ i386_record_vex (struct i386_record_s *ir, uint8_t vex_w, uint8_t vex_r,
   SCOPE_EXIT { inferior_thread ()->set_executing (true); };
   inferior_thread () -> set_executing (false);
 
+  uint8_t opcode;
+  if (record_read_memory (gdbarch, ir->addr, &opcode, 1))
+    return -1;
+  ir->addr++;
+
   switch (opcode)
     {
     case 0x10:	/* VMOVS[S|D] XMM, mem.  */
@@ -5016,14 +5021,26 @@ i386_record_vex (struct i386_record_s *ir, uint8_t vex_w, uint8_t vex_r,
       }
       break;
 
-    case 0x78:	/* VPBROADCASTB  */
-    case 0x79:	/* VPBROADCASTW  */
+    case 0x40:	/* VPMULLD  */
+    case 0x57:	/* VXORP[S|D]  */
     case 0x58:	/* VPBROADCASTD and VADD[P|S][S|D]  */
     case 0x59:	/* VPBROADCASTQ and VMUL[P|S][S|D]  */
     case 0x5c:	/* VSUB[P|S][S|D]  */
     case 0x5d:	/* VMIN[P|S][S|D]  */
     case 0x5e:	/* VDIV[P|S][S|D]  */
     case 0x5f:	/* VMAX[P|S][S|D]  */
+    case 0x78:	/* VPBROADCASTB  */
+    case 0x79:	/* VPBROADCASTW  */
+    case 0xd4:	/* VPADDQ  */
+    case 0xd5:	/* VPMULLW  */
+    case 0xdb:	/* VPAND  */
+    case 0xdf:	/* VPANDN  */
+    case 0xe5:	/* VPMULHW  */
+    case 0xe4:	/* VPMULHUW  */
+    case 0xf4:	/* VPMULUDQ  */
+    case 0xfc:	/* VPADDB  */
+    case 0xfd:	/* VPADDW  */
+    case 0xfe:	/* VPADDD  */
       {
 	/* vpbroadcast and arithmetic operations are differentiated
 	   by map_select, but it doesn't change the recording mechanics.  */
@@ -5127,8 +5144,11 @@ i386_process_record (struct gdbarch *gdbarch, struct regcache *regcache,
 		"addr = %s\n",
 		paddress (gdbarch, ir.addr));
 
-  /* prefixes */
-  while (1)
+  /* Process the prefixes.  This used to be an infinite loop, but since
+     a VEX prefix is always the last one before the opcode, according to
+     Intel's manual anyway, and some AVX opcodes may conflict with
+     prefixes, it's safe to leave the loop as soon as we see VEX.  */
+  while (!vex_prefix)
     {
       if (record_read_memory (gdbarch, ir.addr, &opcode8, 1))
 	return -1;
@@ -5268,7 +5288,7 @@ i386_process_record (struct gdbarch *gdbarch, struct regcache *regcache,
     {
       /* If we found the VEX prefix, i386 will either record or warn that
 	 the instruction isn't supported, so we can return the VEX result.  */
-      return i386_record_vex (&ir, rex_w, rex_r, opcode, gdbarch);
+      return i386_record_vex (&ir, rex_w, rex_r, gdbarch);
     }
  reswitch:
   switch (opcode)
diff --git a/gdb/testsuite/gdb.reverse/i386-avx-reverse.c b/gdb/testsuite/gdb.reverse/i386-avx-reverse.c
index f559d69d8e7..3ebb4ddc48d 100644
--- a/gdb/testsuite/gdb.reverse/i386-avx-reverse.c
+++ b/gdb/testsuite/gdb.reverse/i386-avx-reverse.c
@@ -372,6 +372,7 @@ arith_test ()
   /* Using GDB, load these values onto registers for testing.
      ymm0.v8_float = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5}
      ymm1.v8_float = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5}
+     ymm2.v2_int128 = {0x0, 0x0}
      ymm15.v2_int128 = {0x0, 0x0}
      this way it's easy to confirm we're undoing things correctly.  */
   asm volatile ("vaddps %xmm0, %xmm1, %xmm15");
@@ -416,6 +417,23 @@ arith_test ()
   asm volatile ("vmaxss %xmm0, %xmm1, %xmm15");
   asm volatile ("vmaxsd %xmm0, %xmm1, %xmm15");
 
+  /* Some sanity checks for other arithmetic instructions.  */
+  asm volatile ("vpaddb %xmm0, %xmm1, %xmm2");
+  asm volatile ("vpaddw %xmm0, %xmm1, %xmm15");
+  asm volatile ("vpaddd %ymm0, %ymm1, %ymm2");
+  asm volatile ("vpaddq %ymm0, %ymm1, %ymm15");
+
+  asm volatile ("vpmullw %xmm0, %xmm1, %xmm2");
+  asm volatile ("vpmulld %xmm0, %xmm1, %xmm15");
+  asm volatile ("vpmulhw %ymm0, %ymm1, %ymm2");
+  asm volatile ("vpmulhuw %ymm0, %ymm1, %ymm15");
+  asm volatile ("vpmuludq %ymm0, %ymm1, %ymm15");
+
+  asm volatile ("vxorps %xmm0, %xmm1, %xmm2");
+  asm volatile ("vxorpd %ymm0, %ymm1, %ymm2");
+  asm volatile ("vpand %xmm0, %xmm1, %xmm15");
+  asm volatile ("vpandn %ymm0, %ymm1, %ymm15");
+
   return 0; /* end arith_test  */
 }
 
diff --git a/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp b/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp
index fbcff4975f8..c37337574da 100644
--- a/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp
+++ b/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp
@@ -527,9 +527,39 @@ gdb_test_no_output \
     "set \$ymm0.v8_float = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5}"
 gdb_test_no_output \
     "set \$ymm1.v8_float = {0, 1, 2, 3, 4, 5, 6, 7}"
+gdb_test_no_output "set \$ymm2.v2_int128 = {0,0}"
 gdb_test_no_output "set \$ymm15.v2_int128 = {0,0}"
 
 if {[record_full_function "arith"] == true} {
+    test_one_register "vpandn" "ymm15" \
+	"0x40400000400000003f80000000000000, 0x0"
+    test_one_register "vpand" "ymm15" \
+	"0x10080000000000000000000000000000, 0x10649c00000000001044480000000000"
+    test_one_register "vxorpd" "ymm2" \
+	"0x20000000200000004000003f000000, 0x0"
+    test_one_register "vxorps" "ymm2" \
+	"0x10280000100800000fd0000000000000, 0x10740000106400001054000010440000"
+
+    test_one_register "vpmuludq" "ymm15" \
+	"0x10280000100800000fd0000000000000, 0x10740000106400001054000010440000"
+    test_one_register "vpmulhuw" "ymm15" \
+	"0x0, 0x0"
+    test_one_register "vpmulhw" "ymm2" \
+	"0x18000000000000002000000000000000, 0x0"
+    test_one_register "vpmulld" "ymm15" \
+	"0x80a00000802000007f4000003f000000, 0x81d00000819000008150000081100000"
+    test_one_register "vpmullw" "ymm2" \
+	"0x80a00000802000007f4000003f000000, 0x81d00000819000008150000081100000"
+
+    test_one_register "vpaddq" "ymm15" \
+	"0x80a00000802000007f4000003f000000, 0x0"
+    test_one_register "vpaddd" "ymm2" \
+	"0x80a00000802000007e4000003f000000, 0x0"
+    test_one_register "vpaddw" "ymm15" \
+	"0x40400000400000003fc000003f000000, 0x0"
+    test_one_register "vpaddb" "ymm2" \
+	"0x0, 0x0"
+
     test_one_register "vmaxsd" "ymm15" \
 	"0x40400000400000003f8000003f000000, 0x0" "ymm operation: "
     test_one_register "vmaxss" "ymm15" \