SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
SUBARCH (fred, FRED, ANY_FRED, false),
SUBARCH (lkgs, LKGS, ANY_LKGS, false),
+ SUBARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, false),
+ SUBARCH (sha512, SHA512, ANY_SHA512, false),
+ SUBARCH (sm3, SM3, ANY_SM3, false),
+ SUBARCH (sm4, SM4, ANY_SM4, false),
+ SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
};
#undef SUBARCH
i.types[j].bitfield.disp8
= fits_in_disp8 (i.op[j].disps->X_add_number);
}
+ else if (optimize_for_space
+ && i.tm.base_opcode == 0x29
+ && i.tm.opcode_space == SPACE_0F38
+ && i.operands == i.reg_operands
+ && i.op[0].regs == i.op[1].regs
+ && (!i.tm.opcode_modifier.vex
+ || !(i.op[0].regs->reg_flags & RegRex))
+ && !is_evex_encoding (&i.tm))
+ {
+ /* Optimize: -Os:
+ pcmpeqq %xmmN, %xmmN -> pcmpeqd %xmmN, %xmmN
+ vpcmpeqq %xmmN, %xmmN, %xmmM -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
+ vpcmpeqq %ymmN, %ymmN, %ymmM -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0x76;
+ }
+ else if (((i.tm.base_opcode >= 0x64
+ && i.tm.base_opcode <= 0x66
+ && i.tm.opcode_space == SPACE_0F)
+ || (i.tm.base_opcode == 0x37
+ && i.tm.opcode_space == SPACE_0F38))
+ && i.operands == i.reg_operands
+ && i.op[0].regs == i.op[1].regs
+ && !is_evex_encoding (&i.tm))
+ {
+ /* Optimize: -O:
+ pcmpgt[bwd] %mmN, %mmN -> pxor %mmN, %mmN
+ pcmpgt[bwdq] %xmmN, %xmmN -> pxor %xmmN, %xmmN
+ vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
+ vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
+ vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
+ vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0xef;
+ if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
+ {
+ if (i.operands == 2)
+ {
+ gas_assert (i.tm.opcode_modifier.sse2avx);
+
+ i.operands = 3;
+ i.reg_operands = 3;
+ i.tm.operands = 3;
+
+ i.op[2].regs = i.op[0].regs;
+ i.types[2] = i.types[0];
+ i.flags[2] = i.flags[0];
+ i.tm.operand_types[2] = i.tm.operand_types[0];
+
+ i.tm.opcode_modifier.sse2avx = 0;
+ }
+ i.op[0].regs -= i.op[0].regs->reg_num + 8;
+ i.op[1].regs = i.op[0].regs;
+ }
+ }
+ else if (optimize_for_space
+ && i.tm.base_opcode == 0x59
+ && i.tm.opcode_space == SPACE_0F38
+ && i.operands == i.reg_operands
+ && i.tm.opcode_modifier.vex
+ && !(i.op[0].regs->reg_flags & RegRex)
+ && i.op[0].regs->reg_type.bitfield.xmmword
+ && i.vec_encoding != vex_encoding_vex3)
+ {
+ /* Optimize: -Os:
+ vpbroadcastq %xmmN, %xmmM -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0x6c;
+ i.tm.opcode_modifier.vexvvvv = 1;
+
+ ++i.operands;
+ ++i.reg_operands;
+ ++i.tm.operands;
+
+ i.op[2].regs = i.op[0].regs;
+ i.types[2] = i.types[0];
+ i.flags[2] = i.flags[0];
+ i.tm.operand_types[2] = i.tm.operand_types[0];
+
+ swap_2_operands (1, 2);
+ }
}
/* Return non-zero for load instruction. */