Prepare struct operand for hosting AVX registers. Remove the
existing, incomplete code that placed the Avx flag in the operand
alignment field, and repurpose the name for a separate bit that
indicates:
- after decode, whether an instruction supports the VEX prefix;
- before writeback, that the instruction did have the VEX prefix and
therefore 1) it can have op_bytes == 32; 2) t should clear high
bytes of XMM registers.
Right now the bit will never be set and the patch has no intended
functional change. However, this is actually more vexing than the
decoder changes itself, and therefore worth separating.
Co-developed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://patch.msgid.link/20251114003633.60689-8-pbonzini@redhat.com
[sean: guard ymm[8-15] accesses with #ifdef CONFIG_X86_64]
Signed-off-by: Sean Christopherson <seanjc@google.com>
#define No64 (1<<28) /* Instruction generates #UD in 64-bit mode */
#define PageTable (1 << 29) /* instruction used to write page table */
#define NotImpl (1 << 30) /* instruction is not implemented */
+#define Avx ((u64)1 << 31) /* Instruction uses VEX prefix */
#define Src2Shift (32) /* Source 2 operand type at bits 32-36 */
#define Src2None (OpNone << Src2Shift)
#define Src2Mem (OpMem << Src2Shift)
#define Src2Mask (OpMask << Src2Shift)
/* free: 37-39 */
#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
-#define AlignMask ((u64)7 << 41) /* Memory alignment requirement at bits 41-43 */
+#define AlignMask ((u64)3 << 41) /* Memory alignment requirement at bits 41-42 */
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
-#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
-#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
-/* free: 44 */
+#define Aligned16 ((u64)3 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
+/* free: 43-44 */
#define NoWrite ((u64)1 << 45) /* No writeback */
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
#define NoMod ((u64)1 << 47) /* Mod field is ignored */
switch (alignment) {
case Unaligned:
- case Avx:
return 1;
case Aligned16:
return 16;
static void __decode_register_operand(struct x86_emulate_ctxt *ctxt,
struct operand *op, int reg)
{
- if (ctxt->d & Sse) {
+ if ((ctxt->d & Avx) && ctxt->op_bytes == 32) {
+ op->type = OP_YMM;
+ op->bytes = 32;
+ op->addr.xmm = reg;
+ kvm_read_avx_reg(reg, &op->vec_val2);
+ return;
+ }
+ if (ctxt->d & (Avx|Sse)) {
op->type = OP_XMM;
op->bytes = 16;
op->addr.xmm = reg;
op->data,
op->bytes * op->count);
case OP_XMM:
- kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
+ if (!(ctxt->d & Avx)) {
+ kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
+ break;
+ }
+ /* full YMM write but with high bytes cleared */
+ memset(op->valptr + 16, 0, 16);
+ fallthrough;
+ case OP_YMM:
+ kvm_write_avx_reg(op->addr.xmm, &op->vec_val2);
break;
case OP_MM:
kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
ctxt->op_bytes = 8; /* REX.W */
/* Opcode byte(s). */
- opcode = opcode_table[ctxt->b];
- /* Two-byte opcode? */
if (ctxt->b == 0x0f) {
+ /* Two- or three-byte opcode */
ctxt->opcode_len = 2;
ctxt->b = insn_fetch(u8, ctxt);
opcode = twobyte_table[ctxt->b];
ctxt->b = insn_fetch(u8, ctxt);
opcode = opcode_map_0f_38[ctxt->b];
}
+ } else {
+ /* Opcode byte(s). */
+ opcode = opcode_table[ctxt->b];
}
ctxt->d = opcode.flags;
ctxt->op_bytes = 4;
if (ctxt->d & Sse)
- ctxt->op_bytes = 16;
+ ctxt->op_bytes = 16, ctxt->d &= ~Avx;
else if (ctxt->d & Mmx)
ctxt->op_bytes = 8;
}
}
if (unlikely(ctxt->d &
- (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
+ (No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
(ctxt->d & Undefined)) {
rc = emulate_ud(ctxt);
goto done;
}
- if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
- || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+ if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) {
rc = emulate_ud(ctxt);
goto done;
}
- if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+ if (ctxt->d & Avx) {
+ u64 xcr = 0;
+ if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE)
+ || ops->get_xcr(ctxt, 0, &xcr)
+ || !(xcr & XFEATURE_MASK_YMM)) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
+ } else if (ctxt->d & Sse) {
+ if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
+ }
+
+ if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
rc = emulate_nm(ctxt);
goto done;
}
#define sse128_l3(x) ({ __sse128_u t; t.vec = x; t.as_u32[3]; })
#define sse128(lo, hi) ({ __sse128_u t; t.as_u64[0] = lo; t.as_u64[1] = hi; t.vec; })
+typedef u32 __attribute__((vector_size(32))) avx256_t;
+
+static inline void _kvm_read_avx_reg(int reg, avx256_t *data)
+{
+ switch (reg) {
+ case 0: asm("vmovdqa %%ymm0, %0" : "=m"(*data)); break;
+ case 1: asm("vmovdqa %%ymm1, %0" : "=m"(*data)); break;
+ case 2: asm("vmovdqa %%ymm2, %0" : "=m"(*data)); break;
+ case 3: asm("vmovdqa %%ymm3, %0" : "=m"(*data)); break;
+ case 4: asm("vmovdqa %%ymm4, %0" : "=m"(*data)); break;
+ case 5: asm("vmovdqa %%ymm5, %0" : "=m"(*data)); break;
+ case 6: asm("vmovdqa %%ymm6, %0" : "=m"(*data)); break;
+ case 7: asm("vmovdqa %%ymm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("vmovdqa %%ymm8, %0" : "=m"(*data)); break;
+ case 9: asm("vmovdqa %%ymm9, %0" : "=m"(*data)); break;
+ case 10: asm("vmovdqa %%ymm10, %0" : "=m"(*data)); break;
+ case 11: asm("vmovdqa %%ymm11, %0" : "=m"(*data)); break;
+ case 12: asm("vmovdqa %%ymm12, %0" : "=m"(*data)); break;
+ case 13: asm("vmovdqa %%ymm13, %0" : "=m"(*data)); break;
+ case 14: asm("vmovdqa %%ymm14, %0" : "=m"(*data)); break;
+ case 15: asm("vmovdqa %%ymm15, %0" : "=m"(*data)); break;
+#endif
+ default: BUG();
+ }
+}
+
+static inline void _kvm_write_avx_reg(int reg, const avx256_t *data)
+{
+ switch (reg) {
+ case 0: asm("vmovdqa %0, %%ymm0" : : "m"(*data)); break;
+ case 1: asm("vmovdqa %0, %%ymm1" : : "m"(*data)); break;
+ case 2: asm("vmovdqa %0, %%ymm2" : : "m"(*data)); break;
+ case 3: asm("vmovdqa %0, %%ymm3" : : "m"(*data)); break;
+ case 4: asm("vmovdqa %0, %%ymm4" : : "m"(*data)); break;
+ case 5: asm("vmovdqa %0, %%ymm5" : : "m"(*data)); break;
+ case 6: asm("vmovdqa %0, %%ymm6" : : "m"(*data)); break;
+ case 7: asm("vmovdqa %0, %%ymm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("vmovdqa %0, %%ymm8" : : "m"(*data)); break;
+ case 9: asm("vmovdqa %0, %%ymm9" : : "m"(*data)); break;
+ case 10: asm("vmovdqa %0, %%ymm10" : : "m"(*data)); break;
+ case 11: asm("vmovdqa %0, %%ymm11" : : "m"(*data)); break;
+ case 12: asm("vmovdqa %0, %%ymm12" : : "m"(*data)); break;
+ case 13: asm("vmovdqa %0, %%ymm13" : : "m"(*data)); break;
+ case 14: asm("vmovdqa %0, %%ymm14" : : "m"(*data)); break;
+ case 15: asm("vmovdqa %0, %%ymm15" : : "m"(*data)); break;
+#endif
+ default: BUG();
+ }
+}
+
static inline void _kvm_read_sse_reg(int reg, sse128_t *data)
{
switch (reg) {
fpregs_unlock();
}
+static inline void kvm_read_avx_reg(int reg, avx256_t *data)
+{
+ kvm_fpu_get();
+ _kvm_read_avx_reg(reg, data);
+ kvm_fpu_put();
+}
+
+static inline void kvm_write_avx_reg(int reg, const avx256_t *data)
+{
+ kvm_fpu_get();
+ _kvm_write_avx_reg(reg, data);
+ kvm_fpu_put();
+}
+
static inline void kvm_read_sse_reg(int reg, sse128_t *data)
{
kvm_fpu_get();
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_YMM, OP_MM, OP_NONE } type;
unsigned int bytes;
unsigned int count;
union {
union {
unsigned long val;
u64 val64;
- char valptr[sizeof(sse128_t)];
+ char valptr[sizeof(avx256_t)];
sse128_t vec_val;
+ avx256_t vec_val2;
u64 mm_val;
void *data;
- };
+ } __aligned(32);
};
#define X86_MAX_INSTRUCTION_LENGTH 15