/* tc-i386.c -- Assemble code for the Intel 80386
- Copyright (C) 1989-2019 Free Software Foundation, Inc.
+ Copyright (C) 1989-2020 Free Software Foundation, Inc.
This file is part of GAS, the GNU Assembler.
#endif
#endif
-#ifndef REGISTER_WARNINGS
-#define REGISTER_WARNINGS 1
-#endif
-
#ifndef INFER_ADDR_PREFIX
#define INFER_ADDR_PREFIX 1
#endif
static char *parse_operands (char *, const char *);
static void swap_operands (void);
static void swap_2_operands (int, int);
+static enum flag_code i386_addressing_mode (void);
static void optimize_imm (void);
static void optimize_disp (void);
static const insn_template *match_template (char);
static const char *default_arch = DEFAULT_ARCH;
+/* parse_register() returns this when a register alias cannot be used. */
+static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
+ { Dw2Inval, Dw2Inval } };
+
/* This struct describes rounding control and SAE in the instruction. */
struct RC_Operation
{
unsupported_with_intel_mnemonic,
unsupported_syntax,
unsupported,
+ invalid_sib_address,
invalid_vsib_address,
invalid_vector_register_set,
+ invalid_tmm_register_set,
unsupported_vector_index_register,
unsupported_broadcast,
broadcast_needed,
unsigned int prefixes;
unsigned char prefix[MAX_PREFIXES];
+ /* Register is in low 3 bits of opcode. */
+ bfd_boolean short_form;
+
/* The operand to a branch insn indicates an absolute branch. */
bfd_boolean jumpabsolute;
- /* Has MMX register operands. */
- bfd_boolean has_regmmx;
-
- /* Has XMM register operands. */
- bfd_boolean has_regxmm;
-
- /* Has YMM register operands. */
- bfd_boolean has_regymm;
-
- /* Has ZMM register operands. */
- bfd_boolean has_regzmm;
+ /* Extended states. */
+ enum
+ {
+ /* Use MMX state. */
+ xstate_mmx = 1 << 0,
+ /* Use XMM state. */
+ xstate_xmm = 1 << 1,
+ /* Use YMM state. */
+ xstate_ymm = 1 << 2 | xstate_xmm,
+ /* Use ZMM state. */
+ xstate_zmm = 1 << 3 | xstate_ymm,
+ /* Use TMM state. */
+ xstate_tmm = 1 << 4
+ } xstate;
+
+ /* Has GOTPC or TLS relocation. */
+ bfd_boolean has_gotpc_tls_reloc;
/* RM and SIB are the modrm byte and the sib byte where the
addressing modes of this insn are encoded. */
dir_encoding_swap
} dir_encoding;
- /* Prefer 8bit or 32bit displacement in encoding. */
+ /* Prefer 8bit, 16bit, 32bit displacement in encoding. */
enum
{
disp_encoding_default = 0,
disp_encoding_8bit,
+ disp_encoding_16bit,
disp_encoding_32bit
} disp_encoding;
enum
{
vex_encoding_default = 0,
- vex_encoding_vex2,
+ vex_encoding_vex,
vex_encoding_vex3,
- vex_encoding_evex
+ vex_encoding_evex,
+ vex_encoding_error
} vec_encoding;
/* REP prefix. */
#endif
;
-#if (defined (TE_I386AIX) \
- || ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
- && !defined (TE_GNU) \
- && !defined (TE_LINUX) \
- && !defined (TE_NACL) \
- && !defined (TE_FreeBSD) \
- && !defined (TE_DragonFly) \
- && !defined (TE_NetBSD)))
+#if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
+ && !defined (TE_GNU) \
+ && !defined (TE_LINUX) \
+ && !defined (TE_FreeBSD) \
+ && !defined (TE_DragonFly) \
+ && !defined (TE_NetBSD))
/* This array holds the chars that always start a comment. If the
pre-processor is disabled, these aren't very useful. The option
--divide will remove '/' from this list. */
static unsigned int object_64bit;
static unsigned int disallow_64bit_reloc;
static int use_rela_relocations = 0;
+/* __tls_get_addr/___tls_get_addr symbol for TLS. */
+static const char *tls_get_addr;
#if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
|| defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
0 if att syntax. */
static int intel_syntax = 0;
-/* 1 for Intel64 ISA,
- 0 if AMD64 ISA. */
-static int intel64;
+static enum x86_64_isa
+{
+ amd64 = 1, /* AMD64 ISA. */
+ intel64 /* Intel64 ISA. */
+} isa64;
/* 1 for intel mnemonic,
0 if att mnemonic. */
"lock addl $0, (%{re}sp)". */
static int avoid_fence = 0;
+/* 1 if lfence should be inserted after every load. */
+static int lfence_after_load = 0;
+
+/* Non-zero if lfence should be inserted before indirect branch. */
+static enum lfence_before_indirect_branch_kind
+ {
+ lfence_branch_none = 0,
+ lfence_branch_register,
+ lfence_branch_memory,
+ lfence_branch_all
+ }
+lfence_before_indirect_branch;
+
+/* Non-zero if lfence should be inserted before ret. */
+static enum lfence_before_ret_kind
+ {
+ lfence_before_ret_none = 0,
+ lfence_before_ret_not,
+ lfence_before_ret_or,
+ lfence_before_ret_shl
+ }
+lfence_before_ret;
+
+/* Types of previous instruction is .byte or prefix. */
+static struct
+ {
+ segT seg;
+ const char *file;
+ const char *name;
+ unsigned int line;
+ enum last_insn_kind
+ {
+ last_insn_other = 0,
+ last_insn_directive,
+ last_insn_prefix
+ } kind;
+ } last_insn;
+
/* 1 if the assembler should generate relax relocations. */
static int generate_relax_relocations
}
sse_check, operand_check = check_warning;
+/* Non-zero if branches should be aligned within power of 2 boundary. */
+static int align_branch_power = 0;
+
+/* Types of branches to align. */
+enum align_branch_kind
+ {
+ align_branch_none = 0,
+ align_branch_jcc = 1,
+ align_branch_fused = 2,
+ align_branch_jmp = 3,
+ align_branch_call = 4,
+ align_branch_indirect = 5,
+ align_branch_ret = 6
+ };
+
+/* Type bits of branches to align. */
+enum align_branch_bit
+ {
+ align_branch_jcc_bit = 1 << align_branch_jcc,
+ align_branch_fused_bit = 1 << align_branch_fused,
+ align_branch_jmp_bit = 1 << align_branch_jmp,
+ align_branch_call_bit = 1 << align_branch_call,
+ align_branch_indirect_bit = 1 << align_branch_indirect,
+ align_branch_ret_bit = 1 << align_branch_ret
+ };
+
+static unsigned int align_branch = (align_branch_jcc_bit
+ | align_branch_fused_bit
+ | align_branch_jmp_bit);
+
+/* Types of condition jump used by macro-fusion. */
+enum mf_jcc_kind
+ {
+ mf_jcc_jo = 0, /* base opcode 0x70 */
+ mf_jcc_jc, /* base opcode 0x72 */
+ mf_jcc_je, /* base opcode 0x74 */
+ mf_jcc_jna, /* base opcode 0x76 */
+ mf_jcc_js, /* base opcode 0x78 */
+ mf_jcc_jp, /* base opcode 0x7a */
+ mf_jcc_jl, /* base opcode 0x7c */
+ mf_jcc_jle, /* base opcode 0x7e */
+ };
+
+/* Types of compare flag-modifying insntructions used by macro-fusion. */
+enum mf_cmp_kind
+ {
+ mf_cmp_test_and, /* test/cmp */
+ mf_cmp_alu_cmp, /* add/sub/cmp */
+ mf_cmp_incdec /* inc/dec */
+ };
+
+/* The maximum padding size for fused jcc. CMP like instruction can
+ be 9 bytes and jcc can be 6 bytes. Leave room just in case for
+ prefixes. */
+#define MAX_FUSED_JCC_PADDING_SIZE 20
+
+/* The maximum number of prefixes added for an instruction. */
+static unsigned int align_branch_prefix_size = 5;
+
/* Optimization:
1. Clear the REX_W bit with register operand if possible.
2. Above plus use 128bit vector instruction to clear the full vector
/* Interface to relax_segment.
There are 3 major relax states for 386 jump insns because the
different types of jumps add different sizes to frags when we're
- figuring out what sort of jump to choose to reach a given label. */
+ figuring out what sort of jump to choose to reach a given label.
+
+ BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
+ branches which are handled by md_estimate_size_before_relax() and
+ i386_generic_table_relax_frag(). */
/* Types. */
#define UNCOND_JUMP 0
#define COND_JUMP 1
#define COND_JUMP86 2
+#define BRANCH_PADDING 3
+#define BRANCH_PREFIX 4
+#define FUSED_JCC_PADDING 5
/* Sizes. */
#define CODE16 1
CPU_SSE2_FLAGS, 0 },
{ STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
CPU_SSE3_FLAGS, 0 },
+ { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
+ CPU_SSE4A_FLAGS, 0 },
{ STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
CPU_SSSE3_FLAGS, 0 },
{ STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
CPU_EPT_FLAGS, 0 },
{ STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
CPU_LZCNT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
+ CPU_POPCNT_FLAGS, 0 },
{ STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
CPU_HLE_FLAGS, 0 },
{ STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
CPU_WAITPKG_FLAGS, 0 },
{ STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
CPU_CLDEMOTE_FLAGS, 0 },
+ { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
+ CPU_AMX_INT8_FLAGS, 0 },
+ { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
+ CPU_AMX_BF16_FLAGS, 0 },
+ { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
+ CPU_AMX_TILE_FLAGS, 0 },
{ STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
CPU_MOVDIRI_FLAGS, 0 },
{ STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
{ STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
CPU_ENQCMD_FLAGS, 0 },
+ { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
+ CPU_SERIALIZE_FLAGS, 0 },
{ STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
CPU_RDPRU_FLAGS, 0 },
{ STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
CPU_MCOMMIT_FLAGS, 0 },
+ { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
+ CPU_SEV_ES_FLAGS, 0 },
+ { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
+ CPU_TSXLDTRK_FLAGS, 0 },
+ { STRING_COMMA_LEN (".kl"), PROCESSOR_UNKNOWN,
+ CPU_KL_FLAGS, 0 },
+ { STRING_COMMA_LEN (".widekl"), PROCESSOR_UNKNOWN,
+ CPU_WIDEKL_FLAGS, 0 },
};
static const noarch_entry cpu_noarch[] =
{ STRING_COMMA_LEN ("nosse"), CPU_ANY_SSE_FLAGS },
{ STRING_COMMA_LEN ("nosse2"), CPU_ANY_SSE2_FLAGS },
{ STRING_COMMA_LEN ("nosse3"), CPU_ANY_SSE3_FLAGS },
+ { STRING_COMMA_LEN ("nosse4a"), CPU_ANY_SSE4A_FLAGS },
{ STRING_COMMA_LEN ("nossse3"), CPU_ANY_SSSE3_FLAGS },
{ STRING_COMMA_LEN ("nosse4.1"), CPU_ANY_SSE4_1_FLAGS },
{ STRING_COMMA_LEN ("nosse4.2"), CPU_ANY_SSE4_2_FLAGS },
{ STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
{ STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
{ STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
+ { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
+ { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
+ { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
{ STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
{ STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
{ STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
- { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
+ { STRING_COMMA_LEN ("noavx512_vp2intersect"),
+ CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
{ STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
+ { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
+ { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
+ { STRING_COMMA_LEN ("nokl"), CPU_ANY_KL_FLAGS },
+ { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
};
#ifdef I386COFF
extern char *input_line_pointer;
/* Hash table for instruction mnemonic lookup. */
-static struct hash_control *op_hash;
+static htab_t op_hash;
/* Hash table for register lookup. */
-static struct hash_control *reg_hash;
+static htab_t reg_hash;
\f
/* Various efficient no-op patterns for aligning code labels.
Note: Don't try to assemble the instructions in the comments.
case rs_fill_nop:
case rs_align_code:
break;
+ case rs_machine_dependent:
+ /* Allow NOP padding for jumps and calls. */
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
+ || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
+ break;
+ /* Fall through. */
default:
return;
}
return;
}
}
- else
+ else if (fragP->fr_type != rs_machine_dependent)
fragP->fr_var = count;
if ((count / max_single_nop_size) > max_number_of_nops)
return x;
}
+static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
+
#define CPU_FLAGS_ARCH_MATCH 0x1
#define CPU_FLAGS_64BIT_MATCH 0x2
{
/* We need to check a few extra flags with AVX. */
if (cpu.bitfield.cpuavx
- && (!t->opcode_modifier.sse2avx || sse2avx)
+ && (!t->opcode_modifier.sse2avx
+ || (sse2avx && !i.prefix[DATA_PREFIX]))
&& (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
&& (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
&& (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
|| (i.types[given].bitfield.ymmword
&& !t->operand_types[wanted].bitfield.ymmword)
|| (i.types[given].bitfield.zmmword
- && !t->operand_types[wanted].bitfield.zmmword));
+ && !t->operand_types[wanted].bitfield.zmmword)
+ || (i.types[given].bitfield.tmmword
+ && !t->operand_types[wanted].bitfield.tmmword));
}
/* Return 1 if there is no conflict in any size between operand GIVEN
here. Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
down-conversion vpmov*. */
|| ((t->operand_types[wanted].bitfield.class == RegSIMD
- && !t->opcode_modifier.broadcast
- && (t->operand_types[wanted].bitfield.byte
- || t->operand_types[wanted].bitfield.word
- || t->operand_types[wanted].bitfield.dword
- || t->operand_types[wanted].bitfield.qword))
+ && t->operand_types[wanted].bitfield.byte
+ + t->operand_types[wanted].bitfield.word
+ + t->operand_types[wanted].bitfield.dword
+ + t->operand_types[wanted].bitfield.qword
+ > !!t->opcode_modifier.broadcast)
? (i.types[given].bitfield.xmmword
|| i.types[given].bitfield.ymmword
|| i.types[given].bitfield.zmmword)
if (!t->opcode_modifier.d)
{
-mismatch:
+ mismatch:
if (!match)
i.error = operand_size_mismatch;
return match;
temp.bitfield.xmmword = 0;
temp.bitfield.ymmword = 0;
temp.bitfield.zmmword = 0;
+ temp.bitfield.tmmword = 0;
if (operand_type_all_zero (&temp))
goto mismatch;
if (given.bitfield.baseindex == overlap.bitfield.baseindex)
return 1;
-mismatch:
+ mismatch:
i.error = operand_type_mismatch;
return 0;
}
/* If given types g0 and g1 are registers they must be of the same type
unless the expected operand type register overlap is null.
- Memory operand size of certain SIMD instructions is also being checked
- here. */
+ Some Intel syntax memory operand size checking also happens here. */
static INLINE int
operand_type_register_match (i386_operand_type g0,
&& g0.bitfield.class != RegSIMD
&& (!operand_type_check (g0, anymem)
|| g0.bitfield.unspecified
- || t0.bitfield.class != RegSIMD))
+ || (t0.bitfield.class != Reg
+ && t0.bitfield.class != RegSIMD)))
return 1;
if (g1.bitfield.class != Reg
&& g1.bitfield.class != RegSIMD
&& (!operand_type_check (g1, anymem)
|| g1.bitfield.unspecified
- || t1.bitfield.class != RegSIMD))
+ || (t1.bitfield.class != Reg
+ && t1.bitfield.class != RegSIMD)))
return 1;
if (g0.bitfield.byte == g1.bitfield.byte
default: abort ();
}
-#ifdef BFD64
- /* If BFD64, sign extend val for 32bit address mode. */
- if (flag_code != CODE_64BIT
- || i.prefix[ADDR_PREFIX])
- if ((val & ~(((addressT) 2 << 31) - 1)) == 0)
- val = (val ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
-#endif
-
if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
{
char buf1[40], buf2[40];
void
md_begin (void)
{
- const char *hash_err;
-
/* Support pseudo prefixes like {disp32}. */
lex_type ['{'] = LEX_BEGIN_NAME;
/* Initialize op_hash hash table. */
- op_hash = hash_new ();
+ op_hash = str_htab_create ();
{
const insn_template *optab;
/* different name --> ship out current template list;
add to hash table; & begin anew. */
core_optab->end = optab;
- hash_err = hash_insert (op_hash,
- (optab - 1)->name,
- (void *) core_optab);
- if (hash_err)
- {
- as_fatal (_("can't hash %s: %s"),
- (optab - 1)->name,
- hash_err);
- }
+ if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
+ as_fatal (_("duplicate %s"), (optab - 1)->name);
+
if (optab->name == NULL)
break;
core_optab = XNEW (templates);
}
/* Initialize reg_hash hash table. */
- reg_hash = hash_new ();
+ reg_hash = str_htab_create ();
{
const reg_entry *regtab;
unsigned int regtab_size = i386_regtab_size;
for (regtab = i386_regtab; regtab_size--; regtab++)
- {
- hash_err = hash_insert (reg_hash, regtab->reg_name, (void *) regtab);
- if (hash_err)
- as_fatal (_("can't hash %s: %s"),
- regtab->reg_name,
- hash_err);
- }
+ if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
+ as_fatal (_("duplicate %s"), regtab->reg_name);
}
/* Fill in lexical tables: mnemonic_chars, operand_chars. */
mnemonic_chars[c] = c;
operand_chars[c] = c;
}
+#ifdef SVR4_COMMENT_CHARS
+ else if (c == '\\' && strchr (i386_comment_chars, '/'))
+ operand_chars[c] = c;
+#endif
if (ISALPHA (c) || ISDIGIT (c))
identifier_chars[c] = c;
x86_dwarf2_return_column = 8;
x86_cie_data_alignment = -4;
}
+
+ /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
+ can be turned into BRANCH_PREFIX frag. */
+ if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
+ abort ();
}
void
i386_print_statistics (FILE *file)
{
- hash_print_statistics (file, "i386 opcode", op_hash);
- hash_print_statistics (file, "i386 register", reg_hash);
+ htab_print_statistics (file, "i386 opcode", op_hash);
+ htab_print_statistics (file, "i386 register", reg_hash);
}
\f
#ifdef DEBUG386
if (x->types[j].bitfield.class == Reg
|| x->types[j].bitfield.class == RegMMX
|| x->types[j].bitfield.class == RegSIMD
+ || x->types[j].bitfield.class == RegMask
|| x->types[j].bitfield.class == SReg
|| x->types[j].bitfield.class == RegCR
|| x->types[j].bitfield.class == RegDR
- || x->types[j].bitfield.class == RegTR)
+ || x->types[j].bitfield.class == RegTR
+ || x->types[j].bitfield.class == RegBND)
fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
if (operand_type_check (x->types[j], imm))
pe (x->op[j].imms);
{ OPERAND_TYPE_REGXMM, "rXMM" },
{ OPERAND_TYPE_REGYMM, "rYMM" },
{ OPERAND_TYPE_REGZMM, "rZMM" },
+ { OPERAND_TYPE_REGTMM, "rTMM" },
{ OPERAND_TYPE_REGMASK, "Mask reg" },
};
if (fixP->fx_r_type == BFD_RELOC_SIZE32
|| fixP->fx_r_type == BFD_RELOC_SIZE64
|| fixP->fx_r_type == BFD_RELOC_386_GOTOFF
- || fixP->fx_r_type == BFD_RELOC_386_PLT32
|| fixP->fx_r_type == BFD_RELOC_386_GOT32
|| fixP->fx_r_type == BFD_RELOC_386_GOT32X
|| fixP->fx_r_type == BFD_RELOC_386_TLS_GD
|| fixP->fx_r_type == BFD_RELOC_386_TLS_LE
|| fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
|| fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
- || fixP->fx_r_type == BFD_RELOC_X86_64_PLT32
|| fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
|| fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
|| fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
}
}
- switch ((i.tm.base_opcode >> 8) & 0xff)
+ switch ((i.tm.base_opcode >> (i.tm.opcode_length << 3)) & 0xff)
{
case 0:
implied_prefix = 0;
/* Determine vector length from the last multi-length vector
operand. */
- vec_length = 0;
for (op = i.operands; op--;)
if (i.tm.operand_types[op].bitfield.xmmword
+ i.tm.operand_types[op].bitfield.ymmword
unsigned int j;
if (optimize_for_space
+ && !is_any_vex_encoding (&i.tm)
&& i.reg_operands == 1
&& i.imm_operands == 1
&& !i.types[1].bitfield.byte
&& i.op[0].imms->X_op == O_constant
&& fits_in_imm7 (i.op[0].imms->X_add_number)
- && ((i.tm.base_opcode == 0xa8
- && i.tm.extension_opcode == None)
+ && (i.tm.base_opcode == 0xa8
|| (i.tm.base_opcode == 0xf6
&& i.tm.extension_opcode == 0x0)))
{
i.types[1].bitfield.byte = 1;
/* Ignore the suffix. */
i.suffix = 0;
- if (base_regnum >= 4
- && !(i.op[1].regs->reg_flags & RegRex))
- {
- /* Handle SP, BP, SI and DI registers. */
- if (i.types[1].bitfield.word)
- j = 16;
- else if (i.types[1].bitfield.dword)
- j = 32;
- else
- j = 48;
- i.op[1].regs -= j;
- }
+ /* Convert to byte registers. */
+ if (i.types[1].bitfield.word)
+ j = 16;
+ else if (i.types[1].bitfield.dword)
+ j = 32;
+ else
+ j = 48;
+ if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
+ j += 8;
+ i.op[1].regs -= j;
}
}
else if (flag_code == CODE_64BIT
+ && !is_any_vex_encoding (&i.tm)
&& ((i.types[1].bitfield.qword
&& i.reg_operands == 1
&& i.imm_operands == 1
&& i.tm.extension_opcode == None
&& fits_in_unsigned_long (i.op[0].imms->X_add_number))
|| (fits_in_imm31 (i.op[0].imms->X_add_number)
- && (((i.tm.base_opcode == 0x24
- || i.tm.base_opcode == 0xa8)
- && i.tm.extension_opcode == None)
+ && ((i.tm.base_opcode == 0x24
+ || i.tm.base_opcode == 0xa8)
|| (i.tm.base_opcode == 0x80
&& i.tm.extension_opcode == 0x4)
|| ((i.tm.base_opcode == 0xf6
|| (i.types[0].bitfield.qword
&& ((i.reg_operands == 2
&& i.op[0].regs == i.op[1].regs
- && ((i.tm.base_opcode == 0x30
- || i.tm.base_opcode == 0x28)
- && i.tm.extension_opcode == None))
+ && (i.tm.base_opcode == 0x30
+ || i.tm.base_opcode == 0x28))
|| (i.reg_operands == 1
&& i.operands == 1
- && i.tm.base_opcode == 0x30
- && i.tm.extension_opcode == None)))))
+ && i.tm.base_opcode == 0x30)))))
{
/* Optimize: -O:
andq $imm31, %r64 -> andl $imm31, %r32
i.tm.base_opcode = 0xb8;
i.tm.extension_opcode = None;
i.tm.opcode_modifier.w = 0;
- i.tm.opcode_modifier.shortform = 1;
i.tm.opcode_modifier.modrm = 0;
}
}
}
else if (optimize > 1
&& !optimize_for_space
+ && !is_any_vex_encoding (&i.tm)
&& i.reg_operands == 2
&& i.op[0].regs == i.op[1].regs
&& ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
}
}
+/* Return non-zero for load instruction. */
+
+static int
+load_insn_p (void)
+{
+ unsigned int dest;
+ int any_vex_p = is_any_vex_encoding (&i.tm);
+ unsigned int base_opcode = i.tm.base_opcode | 1;
+
+ if (!any_vex_p)
+ {
+ /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
+ prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
+ bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote. */
+ if (i.tm.opcode_modifier.anysize)
+ return 0;
+
+ /* pop, popf, popa. */
+ if (strcmp (i.tm.name, "pop") == 0
+ || i.tm.base_opcode == 0x9d
+ || i.tm.base_opcode == 0x61)
+ return 1;
+
+ /* movs, cmps, lods, scas. */
+ if ((i.tm.base_opcode | 0xb) == 0xaf)
+ return 1;
+
+ /* outs, xlatb. */
+ if (base_opcode == 0x6f
+ || i.tm.base_opcode == 0xd7)
+ return 1;
+ /* NB: For AMD-specific insns with implicit memory operands,
+ they're intentionally not covered. */
+ }
+
+ /* No memory operand. */
+ if (!i.mem_operands)
+ return 0;
+
+ if (any_vex_p)
+ {
+ /* vldmxcsr. */
+ if (i.tm.base_opcode == 0xae
+ && i.tm.opcode_modifier.vex
+ && i.tm.opcode_modifier.vexopcode == VEX0F
+ && i.tm.extension_opcode == 2)
+ return 1;
+ }
+ else
+ {
+ /* test, not, neg, mul, imul, div, idiv. */
+ if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
+ && i.tm.extension_opcode != 1)
+ return 1;
+
+ /* inc, dec. */
+ if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
+ return 1;
+
+ /* add, or, adc, sbb, and, sub, xor, cmp. */
+ if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
+ return 1;
+
+ /* bt, bts, btr, btc. */
+ if (i.tm.base_opcode == 0xfba
+ && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
+ return 1;
+
+ /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
+ if ((base_opcode == 0xc1
+ || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
+ && i.tm.extension_opcode != 6)
+ return 1;
+
+ /* cmpxchg8b, cmpxchg16b, xrstors. */
+ if (i.tm.base_opcode == 0xfc7
+ && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
+ return 1;
+
+ /* fxrstor, ldmxcsr, xrstor. */
+ if (i.tm.base_opcode == 0xfae
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 5))
+ return 1;
+
+ /* lgdt, lidt, lmsw. */
+ if (i.tm.base_opcode == 0xf01
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6))
+ return 1;
+
+ /* vmptrld */
+ if (i.tm.base_opcode == 0xfc7
+ && i.tm.extension_opcode == 6)
+ return 1;
+
+ /* Check for x87 instructions. */
+ if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
+ {
+ /* Skip fst, fstp, fstenv, fstcw. */
+ if (i.tm.base_opcode == 0xd9
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fist, fistp, fstp. */
+ if (i.tm.base_opcode == 0xdb
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fst, fstp, fsave, fstsw. */
+ if (i.tm.base_opcode == 0xdd
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ /* Skip fisttp, fist, fistp, fbstp, fistp. */
+ if (i.tm.base_opcode == 0xdf
+ && (i.tm.extension_opcode == 1
+ || i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 3
+ || i.tm.extension_opcode == 6
+ || i.tm.extension_opcode == 7))
+ return 0;
+
+ return 1;
+ }
+ }
+
+ dest = i.operands - 1;
+
+ /* Check fake imm8 operand and 3 source operands. */
+ if ((i.tm.opcode_modifier.immext
+ || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
+ && i.types[dest].bitfield.imm8)
+ dest--;
+
+ /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd */
+ if (!any_vex_p
+ && (base_opcode == 0x1
+ || base_opcode == 0x9
+ || base_opcode == 0x11
+ || base_opcode == 0x19
+ || base_opcode == 0x21
+ || base_opcode == 0x29
+ || base_opcode == 0x31
+ || base_opcode == 0x39
+ || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
+ || base_opcode == 0xfc1))
+ return 1;
+
+ /* Check for load instruction. */
+ return (i.types[dest].bitfield.class != ClassNone
+ || i.types[dest].bitfield.instance == Accum);
+}
+
+/* Output lfence, 0xfaee8, after instruction. */
+
+static void
+insert_lfence_after (void)
+{
+ if (lfence_after_load && load_insn_p ())
+ {
+ /* There are also two REP string instructions that require
+ special treatment. Specifically, the compare string (CMPS)
+ and scan string (SCAS) instructions set EFLAGS in a manner
+ that depends on the data being compared/scanned. When used
+ with a REP prefix, the number of iterations may therefore
+ vary depending on this data. If the data is a program secret
+ chosen by the adversary using an LVI method,
+ then this data-dependent behavior may leak some aspect
+ of the secret. */
+ if (((i.tm.base_opcode | 0x1) == 0xa7
+ || (i.tm.base_opcode | 0x1) == 0xaf)
+ && i.prefix[REP_PREFIX])
+ {
+ as_warn (_("`%s` changes flags which would affect control flow behavior"),
+ i.tm.name);
+ }
+ char *p = frag_more (3);
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ }
+}
+
+/* Output lfence, 0xfaee8, before instruction. */
+
+static void
+insert_lfence_before (void)
+{
+ char *p;
+
+ if (is_any_vex_encoding (&i.tm))
+ return;
+
+ if (i.tm.base_opcode == 0xff
+ && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
+ {
+ /* Insert lfence before indirect branch if needed. */
+
+ if (lfence_before_indirect_branch == lfence_branch_none)
+ return;
+
+ if (i.operands != 1)
+ abort ();
+
+ if (i.reg_operands == 1)
+ {
+ /* Indirect branch via register. Don't insert lfence with
+ -mlfence-after-load=yes. */
+ if (lfence_after_load
+ || lfence_before_indirect_branch == lfence_branch_memory)
+ return;
+ }
+ else if (i.mem_operands == 1
+ && lfence_before_indirect_branch != lfence_branch_register)
+ {
+ as_warn (_("indirect `%s` with memory operand should be avoided"),
+ i.tm.name);
+ return;
+ }
+ else
+ return;
+
+ if (last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
+ last_insn.name, i.tm.name);
+ return;
+ }
+
+ p = frag_more (3);
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ return;
+ }
+
+ /* Output or/not/shl and lfence before near ret. */
+ if (lfence_before_ret != lfence_before_ret_none
+ && (i.tm.base_opcode == 0xc2
+ || i.tm.base_opcode == 0xc3))
+ {
+ if (last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -mlfence-before-ret on `%s`"),
+ last_insn.name, i.tm.name);
+ return;
+ }
+
+ /* Near ret ingore operand size override under CPU64. */
+ char prefix = flag_code == CODE_64BIT
+ ? 0x48
+ : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
+
+ if (lfence_before_ret == lfence_before_ret_not)
+ {
+ /* not: 0xf71424, may add prefix
+ for operand size override or 64-bit code. */
+ p = frag_more ((prefix ? 2 : 0) + 6 + 3);
+ if (prefix)
+ *p++ = prefix;
+ *p++ = 0xf7;
+ *p++ = 0x14;
+ *p++ = 0x24;
+ if (prefix)
+ *p++ = prefix;
+ *p++ = 0xf7;
+ *p++ = 0x14;
+ *p++ = 0x24;
+ }
+ else
+ {
+ p = frag_more ((prefix ? 1 : 0) + 4 + 3);
+ if (prefix)
+ *p++ = prefix;
+ if (lfence_before_ret == lfence_before_ret_or)
+ {
+ /* or: 0x830c2400, may add prefix
+ for operand size override or 64-bit code. */
+ *p++ = 0x83;
+ *p++ = 0x0c;
+ }
+ else
+ {
+ /* shl: 0xc1242400, may add prefix
+ for operand size override or 64-bit code. */
+ *p++ = 0xc1;
+ *p++ = 0x24;
+ }
+
+ *p++ = 0x24;
+ *p++ = 0x0;
+ }
+
+ *p++ = 0xf;
+ *p++ = 0xae;
+ *p = 0xe8;
+ }
+}
+
/* This is the guts of the machine-dependent assembler. LINE points to a
machine dependent instruction. This function is supposed to emit
the frags/bytes it assembles to. */
/* Now we've parsed the mnemonic into a set of templates, and have the
operands at hand. */
- /* All intel opcodes have reversed operands except for "bound" and
- "enter". We also don't reverse intersegment "jmp" and "call"
- instructions with 2 immediate operands so that the immediate segment
- precedes the offset, as it does when in AT&T mode. */
+ /* All Intel opcodes have reversed operands except for "bound", "enter",
+ "monitor*", "mwait*", "tpause", and "umwait". We also don't reverse
+ intersegment "jmp" and "call" instructions with 2 immediate operands so
+ that the immediate segment precedes the offset, as it does when in AT&T
+ mode. */
if (intel_syntax
&& i.operands > 1
&& (strcmp (mnemonic, "bound") != 0)
&& (strcmp (mnemonic, "invlpga") != 0)
+ && (strncmp (mnemonic, "monitor", 7) != 0)
+ && (strncmp (mnemonic, "mwait", 5) != 0)
+ && (strcmp (mnemonic, "tpause") != 0)
+ && (strcmp (mnemonic, "umwait") != 0)
&& !(operand_type_check (i.types[0], imm)
&& operand_type_check (i.types[1], imm)))
swap_operands ();
if (sse_check != check_none
&& !i.tm.opcode_modifier.noavx
&& !i.tm.cpu_flags.bitfield.cpuavx
+ && !i.tm.cpu_flags.bitfield.cpuavx512f
&& (i.tm.cpu_flags.bitfield.cpusse
|| i.tm.cpu_flags.bitfield.cpusse2
|| i.tm.cpu_flags.bitfield.cpusse3
|| i.tm.cpu_flags.bitfield.cpusse4_2
|| i.tm.cpu_flags.bitfield.cpupclmul
|| i.tm.cpu_flags.bitfield.cpuaes
+ || i.tm.cpu_flags.bitfield.cpusha
|| i.tm.cpu_flags.bitfield.cpugfni))
{
(sse_check == check_warning
: as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
}
- /* Zap movzx and movsx suffix. The suffix has been set from
- "word ptr" or "byte ptr" on the source operand in Intel syntax
- or extracted from mnemonic in AT&T syntax. But we'll use
- the destination register to choose the suffix for encoding. */
- if ((i.tm.base_opcode & ~9) == 0x0fb6)
- {
- /* In Intel syntax, there must be a suffix. In AT&T syntax, if
- there is no suffix, the default will be byte extension. */
- if (i.reg_operands != 2
- && !i.suffix
- && intel_syntax)
- as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
-
- i.suffix = 0;
- }
-
if (i.tm.opcode_modifier.fwait)
if (!add_prefix (FWAIT_OPCODE))
return;
return;
}
- /* Check for data size prefix on VEX/XOP/EVEX encoded insns. */
- if (i.prefix[DATA_PREFIX] && is_any_vex_encoding (&i.tm))
+ /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns. */
+ if (i.prefix[DATA_PREFIX]
+ && (is_any_vex_encoding (&i.tm)
+ || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
+ || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
{
as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
return;
if (!process_suffix ())
return;
- /* Update operand types. */
+ /* Update operand types and check extended states. */
for (j = 0; j < i.operands; j++)
- i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
+ {
+ i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
+ switch (i.tm.operand_types[j].bitfield.class)
+ {
+ default:
+ break;
+ case RegMMX:
+ i.xstate |= xstate_mmx;
+ break;
+ case RegMask:
+ i.xstate |= xstate_zmm;
+ break;
+ case RegSIMD:
+ if (i.tm.operand_types[j].bitfield.tmmword)
+ i.xstate |= xstate_tmm;
+ else if (i.tm.operand_types[j].bitfield.zmmword)
+ i.xstate |= xstate_zmm;
+ else if (i.tm.operand_types[j].bitfield.ymmword)
+ i.xstate |= xstate_ymm;
+ else if (i.tm.operand_types[j].bitfield.xmmword)
+ i.xstate |= xstate_xmm;
+ break;
+ }
+ }
/* Make still unresolved immediate matches conform to size of immediate
given in i.suffix. */
&& !i.types[j].bitfield.xmmword)
i.reg_operands--;
- /* ImmExt should be processed after SSE2AVX. */
- if (!i.tm.opcode_modifier.sse2avx
- && i.tm.opcode_modifier.immext)
- process_immext ();
-
/* For insns with operands there are more diddles to do to the opcode. */
if (i.operands)
{
return;
}
+ /* Check for explicit REX prefix. */
+ if (i.prefix[REX_PREFIX] || i.rex_encoding)
+ {
+ as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
+ return;
+ }
+
if (i.tm.opcode_modifier.vex)
build_vex_prefix (t);
else
build_evex_prefix ();
+
+ /* The individual REX.RXBW bits got consumed. */
+ i.rex &= REX_OPCODE;
}
/* Handle conversion of 'int $3' --> special int3 insn. XOP or FMA4
i.op[0].disps->X_op = O_symbol;
}
- if (i.tm.opcode_modifier.rex64)
- i.rex |= REX_W;
-
/* For 8 bit registers we need an empty rex prefix. Also if the
instruction already has a prefix, we need to convert old
registers to new ones. */
if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
&& (i.op[x].regs->reg_flags & RegRex64) == 0)
{
+ gas_assert (!(i.op[x].regs->reg_flags & RegRex));
/* In case it is "hi" register, give up. */
if (i.op[x].regs->reg_num > 3)
as_bad (_("can't encode register '%s%s' in an "
if (i.rex == 0 && i.rex_encoding)
{
/* Check if we can add a REX_OPCODE byte. Look for 8 bit operand
- that uses legacy register. If it is "hi" register, don't add
+ that uses legacy register. If it is "hi" register, don't add
the REX_OPCODE byte. */
int x;
for (x = 0; x < 2; x++)
&& (i.op[x].regs->reg_flags & RegRex64) == 0
&& i.op[x].regs->reg_num > 3)
{
+ gas_assert (!(i.op[x].regs->reg_flags & RegRex));
i.rex_encoding = FALSE;
break;
}
if (i.rex != 0)
add_prefix (REX_OPCODE | i.rex);
+ insert_lfence_before ();
+
/* We are ready to output the insn. */
output_insn ();
+
+ insert_lfence_after ();
+
+ last_insn.seg = now_seg;
+
+ if (i.tm.opcode_modifier.isprefix)
+ {
+ last_insn.kind = last_insn_prefix;
+ last_insn.name = i.tm.name;
+ last_insn.file = as_where (&last_insn.line);
+ }
+ else
+ last_insn.kind = last_insn_other;
}
static char *
}
/* Look up instruction (or prefix) via hash table. */
- current_templates = (const templates *) hash_find (op_hash, mnemonic);
+ current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
if (*l != END_OF_INSN
&& (!is_space_char (*l) || l[1] != END_OF_INSN)
/* Handle pseudo prefixes. */
switch (current_templates->start->base_opcode)
{
- case 0x0:
+ case Prefix_Disp8:
/* {disp8} */
i.disp_encoding = disp_encoding_8bit;
break;
- case 0x1:
+ case Prefix_Disp16:
+ /* {disp16} */
+ i.disp_encoding = disp_encoding_16bit;
+ break;
+ case Prefix_Disp32:
/* {disp32} */
i.disp_encoding = disp_encoding_32bit;
break;
- case 0x2:
+ case Prefix_Load:
/* {load} */
i.dir_encoding = dir_encoding_load;
break;
- case 0x3:
+ case Prefix_Store:
/* {store} */
i.dir_encoding = dir_encoding_store;
break;
- case 0x4:
- /* {vex2} */
- i.vec_encoding = vex_encoding_vex2;
+ case Prefix_VEX:
+ /* {vex} */
+ i.vec_encoding = vex_encoding_vex;
break;
- case 0x5:
+ case Prefix_VEX3:
/* {vex3} */
i.vec_encoding = vex_encoding_vex3;
break;
- case 0x6:
+ case Prefix_EVEX:
/* {evex} */
i.vec_encoding = vex_encoding_evex;
break;
- case 0x7:
+ case Prefix_REX:
/* {rex} */
i.rex_encoding = TRUE;
break;
- case 0x8:
+ case Prefix_NoOptimize:
/* {nooptimize} */
i.no_optimize = TRUE;
break;
goto check_suffix;
mnem_p = dot_p;
*dot_p = '\0';
- current_templates = (const templates *) hash_find (op_hash, mnemonic);
+ current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
}
if (!current_templates)
{
-check_suffix:
+ check_suffix:
if (mnem_p > mnemonic)
{
/* See if we can get a match by trimming off a suffix. */
case QWORD_MNEM_SUFFIX:
i.suffix = mnem_p[-1];
mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
+ current_templates
+ = (const templates *) str_hash_find (op_hash, mnemonic);
break;
case SHORT_MNEM_SUFFIX:
case LONG_MNEM_SUFFIX:
{
i.suffix = mnem_p[-1];
mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
+ current_templates
+ = (const templates *) str_hash_find (op_hash, mnemonic);
}
break;
else
i.suffix = LONG_MNEM_SUFFIX;
mnem_p[-1] = '\0';
- current_templates = (const templates *) hash_find (op_hash,
- mnemonic);
+ current_templates
+ = (const templates *) str_hash_find (op_hash, mnemonic);
}
break;
}
{
unsigned int op;
i386_cpu_flags cpu;
- static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
/* Templates allowing for ZMMword as well as YMMword and/or XMMword for
any one operand are implicity requiring AVX512VL support if the actual
}
/* Without VSIB byte, we can't have a vector register for index. */
- if (!t->opcode_modifier.vecsib
+ if (!t->opcode_modifier.sib
&& i.index_reg
&& (i.index_reg->reg_type.bitfield.xmmword
|| i.index_reg->reg_type.bitfield.ymmword
/* For VSIB byte, we need a vector register for index, and all vector
registers must be distinct. */
- if (t->opcode_modifier.vecsib)
+ if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
{
if (!i.index_reg
- || !((t->opcode_modifier.vecsib == VecSIB128
+ || !((t->opcode_modifier.sib == VECSIB128
&& i.index_reg->reg_type.bitfield.xmmword)
- || (t->opcode_modifier.vecsib == VecSIB256
+ || (t->opcode_modifier.sib == VECSIB256
&& i.index_reg->reg_type.bitfield.ymmword)
- || (t->opcode_modifier.vecsib == VecSIB512
+ || (t->opcode_modifier.sib == VECSIB512
&& i.index_reg->reg_type.bitfield.zmmword)))
{
i.error = invalid_vsib_address;
}
}
+ /* For AMX instructions with three tmmword operands, all tmmword operand must be
+ distinct */
+ if (t->operand_types[0].bitfield.tmmword
+ && i.reg_operands == 3)
+ {
+ if (register_number (i.op[0].regs)
+ == register_number (i.op[1].regs)
+ || register_number (i.op[0].regs)
+ == register_number (i.op[2].regs)
+ || register_number (i.op[1].regs)
+ == register_number (i.op[2].regs))
+ {
+ i.error = invalid_tmm_register_set;
+ return 1;
+ }
+ }
+
/* Check if broadcast is supported by the instruction and is applied
to the memory operand. */
if (i.broadcast)
}
overlap = operand_type_and (type, t->operand_types[op]);
+ if (t->operand_types[op].bitfield.class == RegSIMD
+ && t->operand_types[op].bitfield.byte
+ + t->operand_types[op].bitfield.word
+ + t->operand_types[op].bitfield.dword
+ + t->operand_types[op].bitfield.qword > 1)
+ {
+ overlap.bitfield.xmmword = 0;
+ overlap.bitfield.ymmword = 0;
+ overlap.bitfield.zmmword = 0;
+ }
if (operand_type_all_zero (&overlap))
goto bad_broadcast;
}
}
+ /* Check the special Imm4 cases; must be the first operand. */
+ if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
+ {
+ if (i.op[0].imms->X_op != O_constant
+ || !fits_in_imm4 (i.op[0].imms->X_add_number))
+ {
+ i.error = bad_imm4;
+ return 1;
+ }
+
+ /* Turn off Imm<N> so that update_imm won't complain. */
+ operand_type_set (&i.types[0], 0);
+ }
+
/* Check vector Disp8 operand. */
if (t->opcode_modifier.disp8memshift
&& i.disp_encoding != disp_encoding_32bit)
return 0;
}
-/* Check if operands are valid for the instruction. Update VEX
- operand types. */
+/* Check if encoding requirements are met by the instruction. */
static int
-VEX_check_operands (const insn_template *t)
+VEX_check_encoding (const insn_template *t)
{
+ if (i.vec_encoding == vex_encoding_error)
+ {
+ i.error = unsupported;
+ return 1;
+ }
+
if (i.vec_encoding == vex_encoding_evex)
{
/* This instruction must be encoded with EVEX prefix. */
return 0;
}
- /* Check the special Imm4 cases; must be the first operand. */
- if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
- {
- if (i.op[0].imms->X_op != O_constant
- || !fits_in_imm4 (i.op[0].imms->X_add_number))
- {
- i.error = bad_imm4;
- return 1;
- }
-
- /* Turn off Imm<N> so that update_imm won't complain. */
- operand_type_set (&i.types[0], 0);
- }
-
return 0;
}
i386_opcode_modifier suffix_check;
i386_operand_type operand_types [MAX_OPERANDS];
int addr_prefix_disp;
- unsigned int j;
- unsigned int found_cpu_match, size_match;
- unsigned int check_register;
+ unsigned int j, size_match, check_register;
enum i386_error specific_error = 0;
#if MAX_OPERANDS != 5
/* Check processor support. */
i.error = unsupported;
- found_cpu_match = (cpu_flags_match (t)
- == CPU_FLAGS_PERFECT_MATCH);
- if (!found_cpu_match)
+ if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
continue;
/* Check AT&T mnemonic. */
if (intel_mnemonic && t->opcode_modifier.attmnemonic)
continue;
- /* Check AT&T/Intel syntax and Intel64/AMD64 ISA. */
+ /* Check AT&T/Intel syntax. */
i.error = unsupported_syntax;
if ((intel_syntax && t->opcode_modifier.attsyntax)
- || (!intel_syntax && t->opcode_modifier.intelsyntax)
- || (intel64 && t->opcode_modifier.amd64)
- || (!intel64 && t->opcode_modifier.intel64))
+ || (!intel_syntax && t->opcode_modifier.intelsyntax))
continue;
- /* Check the suffix. */
- i.error = invalid_instruction_suffix;
+ /* Check Intel64/AMD64 ISA. */
+ switch (isa64)
+ {
+ default:
+ /* Default: Don't accept Intel64. */
+ if (t->opcode_modifier.isa64 == INTEL64)
+ continue;
+ break;
+ case amd64:
+ /* -mamd64: Don't accept Intel64 and Intel64 only. */
+ if (t->opcode_modifier.isa64 >= INTEL64)
+ continue;
+ break;
+ case intel64:
+ /* -mintel64: Don't accept AMD64. */
+ if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
+ continue;
+ break;
+ }
+
+ /* Check the suffix. */
+ i.error = invalid_instruction_suffix;
if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
|| (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
|| (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
for (j = 0; j < MAX_OPERANDS; j++)
operand_types[j] = t->operand_types[j];
- /* In general, don't allow 64-bit operands in 32-bit mode. */
- if (i.suffix == QWORD_MNEM_SUFFIX
- && flag_code != CODE_64BIT
+ /* In general, don't allow
+ - 64-bit operands outside of 64-bit mode,
+ - 32-bit operands on pre-386. */
+ j = i.imm_operands + (t->operands > i.imm_operands + 1);
+ if (((i.suffix == QWORD_MNEM_SUFFIX
+ && flag_code != CODE_64BIT
+ && (t->base_opcode != 0x0fc7
+ || t->extension_opcode != 1 /* cmpxchg8b */))
+ || (i.suffix == LONG_MNEM_SUFFIX
+ && !cpu_arch_flags.bitfield.cpui386))
&& (intel_syntax
- ? (!t->opcode_modifier.ignoresize
- && !t->opcode_modifier.broadcast
+ ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
&& !intel_float_operand (t->name))
: intel_float_operand (t->name) != 2)
- && ((operand_types[0].bitfield.class != RegMMX
- && operand_types[0].bitfield.class != RegSIMD)
- || (operand_types[t->operands > 1].bitfield.class != RegMMX
- && operand_types[t->operands > 1].bitfield.class != RegSIMD))
- && (t->base_opcode != 0x0fc7
- || t->extension_opcode != 1 /* cmpxchg8b */))
- continue;
-
- /* In general, don't allow 32-bit operands on pre-386. */
- else if (i.suffix == LONG_MNEM_SUFFIX
- && !cpu_arch_flags.bitfield.cpui386
- && (intel_syntax
- ? (!t->opcode_modifier.ignoresize
- && !intel_float_operand (t->name))
- : intel_float_operand (t->name) != 2)
- && ((operand_types[0].bitfield.class != RegMMX
- && operand_types[0].bitfield.class != RegSIMD)
- || (operand_types[t->operands > 1].bitfield.class != RegMMX
- && operand_types[t->operands > 1].bitfield.class
- != RegSIMD)))
+ && (t->operands == i.imm_operands
+ || (operand_types[i.imm_operands].bitfield.class != RegMMX
+ && operand_types[i.imm_operands].bitfield.class != RegSIMD
+ && operand_types[i.imm_operands].bitfield.class != RegMask)
+ || (operand_types[j].bitfield.class != RegMMX
+ && operand_types[j].bitfield.class != RegSIMD
+ && operand_types[j].bitfield.class != RegMask))
+ && !t->opcode_modifier.sib)
continue;
/* Do not verify operands when there are none. */
- else
+ if (!t->operands)
{
- if (!t->operands)
- /* We've found a match; break out of loop. */
- break;
+ if (VEX_check_encoding (t))
+ {
+ specific_error = i.error;
+ continue;
+ }
+
+ /* We've found a match; break out of loop. */
+ break;
}
- /* Address size prefix will turn Disp64/Disp32/Disp16 operand
- into Disp32/Disp16/Disp32 operand. */
- if (i.prefix[ADDR_PREFIX] != 0)
- {
- /* There should be only one Disp operand. */
- switch (flag_code)
- {
- case CODE_16BIT:
- for (j = 0; j < MAX_OPERANDS; j++)
- {
- if (operand_types[j].bitfield.disp16)
- {
- addr_prefix_disp = j;
- operand_types[j].bitfield.disp32 = 1;
- operand_types[j].bitfield.disp16 = 0;
- break;
- }
- }
+ if (!t->opcode_modifier.jump
+ || t->opcode_modifier.jump == JUMP_ABSOLUTE)
+ {
+ /* There should be only one Disp operand. */
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (operand_type_check (operand_types[j], disp))
break;
- case CODE_32BIT:
- for (j = 0; j < MAX_OPERANDS; j++)
+ if (j < MAX_OPERANDS)
+ {
+ bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
+
+ addr_prefix_disp = j;
+
+ /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
+ operand into Disp32/Disp32/Disp16/Disp32 operand. */
+ switch (flag_code)
{
- if (operand_types[j].bitfield.disp32)
+ case CODE_16BIT:
+ override = !override;
+ /* Fall through. */
+ case CODE_32BIT:
+ if (operand_types[j].bitfield.disp32
+ && operand_types[j].bitfield.disp16)
{
- addr_prefix_disp = j;
- operand_types[j].bitfield.disp32 = 0;
- operand_types[j].bitfield.disp16 = 1;
- break;
+ operand_types[j].bitfield.disp16 = override;
+ operand_types[j].bitfield.disp32 = !override;
}
- }
- break;
- case CODE_64BIT:
- for (j = 0; j < MAX_OPERANDS; j++)
- {
- if (operand_types[j].bitfield.disp64)
+ operand_types[j].bitfield.disp32s = 0;
+ operand_types[j].bitfield.disp64 = 0;
+ break;
+
+ case CODE_64BIT:
+ if (operand_types[j].bitfield.disp32s
+ || operand_types[j].bitfield.disp64)
{
- addr_prefix_disp = j;
- operand_types[j].bitfield.disp64 = 0;
- operand_types[j].bitfield.disp32 = 1;
- break;
+ operand_types[j].bitfield.disp64 &= !override;
+ operand_types[j].bitfield.disp32s &= !override;
+ operand_types[j].bitfield.disp32 = override;
}
+ operand_types[j].bitfield.disp16 = 0;
+ break;
}
- break;
}
- }
+ }
/* Force 0x8b encoding for "mov foo@GOT, %eax". */
if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
if (!t->opcode_modifier.d)
continue;
-check_reverse:
+ check_reverse:
if (!(size_match & MATCH_REVERSE))
continue;
/* Try reversing direction of operands. */
/* Found either forward/reverse 2, 3 or 4 operand match here:
slip through to break. */
}
- if (!found_cpu_match)
- continue;
- /* Check if vector and VEX operands are valid. */
- if (check_VecOperands (t) || VEX_check_operands (t))
+ /* Check if vector operands are valid. */
+ if (check_VecOperands (t))
+ {
+ specific_error = i.error;
+ continue;
+ }
+
+ /* Check if VEX/EVEX encoding requirements can be satisfied. */
+ if (VEX_check_encoding (t))
{
specific_error = i.error;
continue;
as_bad (_("unsupported instruction `%s'"),
current_templates->start->name);
return NULL;
+ case invalid_sib_address:
+ err_msg = _("invalid SIB address");
+ break;
case invalid_vsib_address:
err_msg = _("invalid VSIB address");
break;
case invalid_vector_register_set:
err_msg = _("mask, index, and destination registers must be distinct");
break;
+ case invalid_tmm_register_set:
+ err_msg = _("all tmm registers must be distinct");
+ break;
case unsupported_vector_index_register:
err_msg = _("unsupported vector index register");
break;
as_warn (_("indirect %s without `*'"), t->name);
if (t->opcode_modifier.isprefix
- && t->opcode_modifier.ignoresize)
+ && t->opcode_modifier.mnemonicsize == IGNORESIZE)
{
/* Warn them that a data or address size prefix doesn't
affect assembly of the next line of code. */
i.suffix = LONG_MNEM_SUFFIX;
else if (i.tm.opcode_modifier.size == SIZE64)
i.suffix = QWORD_MNEM_SUFFIX;
- else if (i.reg_operands)
+ else if (i.reg_operands
+ && (i.operands > 1 || i.types[0].bitfield.class == Reg)
+ && !i.tm.opcode_modifier.addrprefixopreg)
{
+ unsigned int numop = i.operands;
+
+ /* movsx/movzx want only their source operand considered here, for the
+ ambiguity checking below. The suffix will be replaced afterwards
+ to represent the destination (register). */
+ if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
+ || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
+ --i.operands;
+
+ /* crc32 needs REX.W set regardless of suffix / source operand size. */
+ if (i.tm.base_opcode == 0xf20f38f0
+ && i.tm.operand_types[1].bitfield.qword)
+ i.rex |= REX_W;
+
/* If there's no instruction mnemonic suffix we try to invent one
- based on register operands. */
+ based on GPR operands. */
if (!i.suffix)
{
/* We take i.suffix from the last register operand specified,
Destination register type is more significant than source
register type. crc32 in SSE4.2 prefers source register
type. */
- if (i.tm.base_opcode == 0xf20f38f0
- && i.types[0].bitfield.class == Reg)
- {
- if (i.types[0].bitfield.byte)
- i.suffix = BYTE_MNEM_SUFFIX;
- else if (i.types[0].bitfield.word)
- i.suffix = WORD_MNEM_SUFFIX;
- else if (i.types[0].bitfield.dword)
- i.suffix = LONG_MNEM_SUFFIX;
- else if (i.types[0].bitfield.qword)
- i.suffix = QWORD_MNEM_SUFFIX;
- }
+ unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1;
- if (!i.suffix)
- {
- int op;
-
- if (i.tm.base_opcode == 0xf20f38f0)
- {
- /* We have to know the operand size for crc32. */
- as_bad (_("ambiguous memory operand size for `%s`"),
- i.tm.name);
- return 0;
- }
+ while (op--)
+ if (i.tm.operand_types[op].bitfield.instance == InstanceNone
+ || i.tm.operand_types[op].bitfield.instance == Accum)
+ {
+ if (i.types[op].bitfield.class != Reg)
+ continue;
+ if (i.types[op].bitfield.byte)
+ i.suffix = BYTE_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.word)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.dword)
+ i.suffix = LONG_MNEM_SUFFIX;
+ else if (i.types[op].bitfield.qword)
+ i.suffix = QWORD_MNEM_SUFFIX;
+ else
+ continue;
+ break;
+ }
- for (op = i.operands; --op >= 0;)
- if (i.tm.operand_types[op].bitfield.instance == InstanceNone
- || i.tm.operand_types[op].bitfield.instance == Accum)
- {
- if (i.types[op].bitfield.class != Reg)
- continue;
- if (i.types[op].bitfield.byte)
- i.suffix = BYTE_MNEM_SUFFIX;
- else if (i.types[op].bitfield.word)
- i.suffix = WORD_MNEM_SUFFIX;
- else if (i.types[op].bitfield.dword)
- i.suffix = LONG_MNEM_SUFFIX;
- else if (i.types[op].bitfield.qword)
- i.suffix = QWORD_MNEM_SUFFIX;
- else
- continue;
- break;
- }
- }
+ /* As an exception, movsx/movzx silently default to a byte source
+ in AT&T mode. */
+ if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
+ && !i.suffix && !intel_syntax)
+ i.suffix = BYTE_MNEM_SUFFIX;
}
else if (i.suffix == BYTE_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_bsuf)
i.suffix = 0;
else if (!check_byte_reg ())
else if (i.suffix == LONG_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_lsuf
&& !i.tm.opcode_modifier.todword
&& !i.tm.opcode_modifier.toqword)
else if (i.suffix == QWORD_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_qsuf
&& !i.tm.opcode_modifier.todword
&& !i.tm.opcode_modifier.toqword)
else if (i.suffix == WORD_MNEM_SUFFIX)
{
if (intel_syntax
- && i.tm.opcode_modifier.ignoresize
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
&& i.tm.opcode_modifier.no_wsuf)
i.suffix = 0;
else if (!check_word_reg ())
return 0;
}
- else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
+ else if (intel_syntax
+ && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
/* Do nothing if the instruction is going to ignore the prefix. */
;
else
abort ();
+
+ /* Undo the movsx/movzx change done above. */
+ i.operands = numop;
}
- else if (i.tm.opcode_modifier.defaultsize
- && !i.suffix
- /* exclude fldenv/frstor/fsave/fstenv */
- && i.tm.opcode_modifier.no_ssuf)
+ else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
+ && !i.suffix)
{
- if (stackop_size == LONG_MNEM_SUFFIX
- && i.tm.base_opcode == 0xcf)
+ i.suffix = stackop_size;
+ if (stackop_size == LONG_MNEM_SUFFIX)
{
/* stackop_size is set to LONG_MNEM_SUFFIX for the
.code16gcc directive to support 16-bit mode with
32-bit address. For IRET without a suffix, generate
16-bit IRET (opcode 0xcf) to return from an interrupt
handler. */
- i.suffix = WORD_MNEM_SUFFIX;
- as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+ if (i.tm.base_opcode == 0xcf)
+ {
+ i.suffix = WORD_MNEM_SUFFIX;
+ as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
+ }
+ /* Warn about changed behavior for segment register push/pop. */
+ else if ((i.tm.base_opcode | 1) == 0x07)
+ as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
+ i.tm.name);
}
- else
- i.suffix = stackop_size;
}
- else if (intel_syntax
- && !i.suffix
+ else if (!i.suffix
&& (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
|| i.tm.opcode_modifier.jump == JUMP_BYTE
|| i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
case CODE_64BIT:
if (!i.tm.opcode_modifier.no_qsuf)
{
- i.suffix = QWORD_MNEM_SUFFIX;
+ if (i.tm.opcode_modifier.jump == JUMP_BYTE
+ || i.tm.opcode_modifier.no_lsuf)
+ i.suffix = QWORD_MNEM_SUFFIX;
break;
}
/* Fall through. */
}
}
- if (!i.suffix)
- {
- if (!intel_syntax)
+ if (!i.suffix
+ && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
+ /* Also cover lret/retf/iret in 64-bit mode. */
+ || (flag_code == CODE_64BIT
+ && !i.tm.opcode_modifier.no_lsuf
+ && !i.tm.opcode_modifier.no_qsuf))
+ && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
+ /* Explicit sizing prefixes are assumed to disambiguate insns. */
+ && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
+ /* Accept FLDENV et al without suffix. */
+ && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
+ {
+ unsigned int suffixes, evex = 0;
+
+ suffixes = !i.tm.opcode_modifier.no_bsuf;
+ if (!i.tm.opcode_modifier.no_wsuf)
+ suffixes |= 1 << 1;
+ if (!i.tm.opcode_modifier.no_lsuf)
+ suffixes |= 1 << 2;
+ if (!i.tm.opcode_modifier.no_ldsuf)
+ suffixes |= 1 << 3;
+ if (!i.tm.opcode_modifier.no_ssuf)
+ suffixes |= 1 << 4;
+ if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
+ suffixes |= 1 << 5;
+
+ /* For [XYZ]MMWORD operands inspect operand sizes. While generally
+ also suitable for AT&T syntax mode, it was requested that this be
+ restricted to just Intel syntax. */
+ if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
{
- if (i.tm.opcode_modifier.w)
+ unsigned int op;
+
+ for (op = 0; op < i.tm.operands; ++op)
{
- as_bad (_("no instruction mnemonic suffix given and "
- "no register operands; can't size instruction"));
- return 0;
+ if (is_evex_encoding (&i.tm)
+ && !cpu_arch_flags.bitfield.cpuavx512vl)
+ {
+ if (i.tm.operand_types[op].bitfield.ymmword)
+ i.tm.operand_types[op].bitfield.xmmword = 0;
+ if (i.tm.operand_types[op].bitfield.zmmword)
+ i.tm.operand_types[op].bitfield.ymmword = 0;
+ if (!i.tm.opcode_modifier.evex
+ || i.tm.opcode_modifier.evex == EVEXDYN)
+ i.tm.opcode_modifier.evex = EVEX512;
+ }
+
+ if (i.tm.operand_types[op].bitfield.xmmword
+ + i.tm.operand_types[op].bitfield.ymmword
+ + i.tm.operand_types[op].bitfield.zmmword < 2)
+ continue;
+
+ /* Any properly sized operand disambiguates the insn. */
+ if (i.types[op].bitfield.xmmword
+ || i.types[op].bitfield.ymmword
+ || i.types[op].bitfield.zmmword)
+ {
+ suffixes &= ~(7 << 6);
+ evex = 0;
+ break;
+ }
+
+ if ((i.flags[op] & Operand_Mem)
+ && i.tm.operand_types[op].bitfield.unspecified)
+ {
+ if (i.tm.operand_types[op].bitfield.xmmword)
+ suffixes |= 1 << 6;
+ if (i.tm.operand_types[op].bitfield.ymmword)
+ suffixes |= 1 << 7;
+ if (i.tm.operand_types[op].bitfield.zmmword)
+ suffixes |= 1 << 8;
+ if (is_evex_encoding (&i.tm))
+ evex = EVEX512;
+ }
}
}
- else
- {
- unsigned int suffixes;
- suffixes = !i.tm.opcode_modifier.no_bsuf;
- if (!i.tm.opcode_modifier.no_wsuf)
- suffixes |= 1 << 1;
- if (!i.tm.opcode_modifier.no_lsuf)
- suffixes |= 1 << 2;
- if (!i.tm.opcode_modifier.no_ldsuf)
- suffixes |= 1 << 3;
- if (!i.tm.opcode_modifier.no_ssuf)
- suffixes |= 1 << 4;
- if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
- suffixes |= 1 << 5;
-
- /* There are more than suffix matches. */
- if (i.tm.opcode_modifier.w
- || ((suffixes & (suffixes - 1))
- && !i.tm.opcode_modifier.defaultsize
- && !i.tm.opcode_modifier.ignoresize))
+ /* Are multiple suffixes / operand sizes allowed? */
+ if (suffixes & (suffixes - 1))
+ {
+ if (intel_syntax
+ && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
+ || operand_check == check_error))
{
as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
return 0;
}
+ if (operand_check == check_error)
+ {
+ as_bad (_("no instruction mnemonic suffix given and "
+ "no register operands; can't size `%s'"), i.tm.name);
+ return 0;
+ }
+ if (operand_check == check_warning)
+ as_warn (_("%s; using default for `%s'"),
+ intel_syntax
+ ? _("ambiguous operand size")
+ : _("no instruction mnemonic suffix given and "
+ "no register operands"),
+ i.tm.name);
+
+ if (i.tm.opcode_modifier.floatmf)
+ i.suffix = SHORT_MNEM_SUFFIX;
+ else if ((i.tm.base_opcode | 8) == 0xfbe
+ || (i.tm.base_opcode == 0x63
+ && i.tm.cpu_flags.bitfield.cpu64))
+ /* handled below */;
+ else if (evex)
+ i.tm.opcode_modifier.evex = evex;
+ else if (flag_code == CODE_16BIT)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (!i.tm.opcode_modifier.no_lsuf)
+ i.suffix = LONG_MNEM_SUFFIX;
+ else
+ i.suffix = QWORD_MNEM_SUFFIX;
}
}
+ if ((i.tm.base_opcode | 8) == 0xfbe
+ || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
+ {
+ /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
+ In AT&T syntax, if there is no suffix (warned about above), the default
+ will be byte extension. */
+ if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
+ i.tm.base_opcode |= 1;
+
+ /* For further processing, the suffix should represent the destination
+ (register). This is already the case when one was used with
+ mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
+ no suffix to begin with. */
+ if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
+ {
+ if (i.types[1].bitfield.word)
+ i.suffix = WORD_MNEM_SUFFIX;
+ else if (i.types[1].bitfield.qword)
+ i.suffix = QWORD_MNEM_SUFFIX;
+ else
+ i.suffix = LONG_MNEM_SUFFIX;
+
+ i.tm.opcode_modifier.w = 0;
+ }
+ }
+
+ if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
+ i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
+ != (i.tm.operand_types[1].bitfield.class == Reg);
+
/* Change the opcode based on the operand size given by i.suffix. */
switch (i.suffix)
{
/* It's not a byte, select word/dword operation. */
if (i.tm.opcode_modifier.w)
{
- if (i.tm.opcode_modifier.shortform)
+ if (i.short_form)
i.tm.base_opcode |= 8;
else
i.tm.base_opcode |= 1;
/* Now select between word & dword operations via the operand
size prefix, except for instructions that will ignore this
prefix anyway. */
- if (i.reg_operands > 0
- && i.types[0].bitfield.class == Reg
- && i.tm.opcode_modifier.addrprefixopreg
- && (i.tm.operand_types[0].bitfield.instance == Accum
- || i.operands == 1))
- {
- /* The address size override prefix changes the size of the
- first operand. */
- if ((flag_code == CODE_32BIT
- && i.op[0].regs->reg_type.bitfield.word)
- || (flag_code != CODE_32BIT
- && i.op[0].regs->reg_type.bitfield.dword))
- if (!add_prefix (ADDR_PREFIX_OPCODE))
- return 0;
- }
- else if (i.suffix != QWORD_MNEM_SUFFIX
- && !i.tm.opcode_modifier.ignoresize
- && !i.tm.opcode_modifier.floatmf
- && !is_any_vex_encoding (&i.tm)
- && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
- || (flag_code == CODE_64BIT
- && i.tm.opcode_modifier.jump == JUMP_BYTE)))
+ if (i.suffix != QWORD_MNEM_SUFFIX
+ && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
+ && !i.tm.opcode_modifier.floatmf
+ && !is_any_vex_encoding (&i.tm)
+ && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
+ || (flag_code == CODE_64BIT
+ && i.tm.opcode_modifier.jump == JUMP_BYTE)))
{
unsigned int prefix = DATA_PREFIX_OPCODE;
if (i.suffix == QWORD_MNEM_SUFFIX
&& flag_code == CODE_64BIT
&& !i.tm.opcode_modifier.norex64
+ && !i.tm.opcode_modifier.vexw
/* Special case for xchg %rax,%rax. It is NOP and doesn't
need rex64. */
&& ! (i.operands == 2
i.rex |= REX_W;
break;
+
+ case 0:
+ /* Select word/dword/qword operation with explict data sizing prefix
+ when there are no suitable register operands. */
+ if (i.tm.opcode_modifier.w
+ && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
+ && (!i.reg_operands
+ || (i.reg_operands == 1
+ /* ShiftCount */
+ && (i.tm.operand_types[0].bitfield.instance == RegC
+ /* InOutPortReg */
+ || i.tm.operand_types[0].bitfield.instance == RegD
+ || i.tm.operand_types[1].bitfield.instance == RegD
+ /* CRC32 */
+ || i.tm.base_opcode == 0xf20f38f0))))
+ i.tm.base_opcode |= 1;
+ break;
}
- if (i.reg_operands != 0
- && i.operands > 1
- && i.tm.opcode_modifier.addrprefixopreg
- && i.tm.operand_types[0].bitfield.instance != Accum)
+ if (i.tm.opcode_modifier.addrprefixopreg)
{
- /* Check invalid register operand when the address size override
- prefix changes the size of register operands. */
- unsigned int op;
- enum { need_word, need_dword, need_qword } need;
+ gas_assert (!i.suffix);
+ gas_assert (i.reg_operands);
+
+ if (i.tm.operand_types[0].bitfield.instance == Accum
+ || i.operands == 1)
+ {
+ /* The address size override prefix changes the size of the
+ first operand. */
+ if (flag_code == CODE_64BIT
+ && i.op[0].regs->reg_type.bitfield.word)
+ {
+ as_bad (_("16-bit addressing unavailable for `%s'"),
+ i.tm.name);
+ return 0;
+ }
- if (flag_code == CODE_32BIT)
- need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
+ if ((flag_code == CODE_32BIT
+ ? i.op[0].regs->reg_type.bitfield.word
+ : i.op[0].regs->reg_type.bitfield.dword)
+ && !add_prefix (ADDR_PREFIX_OPCODE))
+ return 0;
+ }
else
{
- if (i.prefix[ADDR_PREFIX])
+ /* Check invalid register operand when the address size override
+ prefix changes the size of register operands. */
+ unsigned int op;
+ enum { need_word, need_dword, need_qword } need;
+
+ if (flag_code == CODE_32BIT)
+ need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
+ else if (i.prefix[ADDR_PREFIX])
need = need_dword;
else
need = flag_code == CODE_64BIT ? need_qword : need_word;
- }
- for (op = 0; op < i.operands; op++)
- if (i.types[op].bitfield.class == Reg
- && ((need == need_word
- && !i.op[op].regs->reg_type.bitfield.word)
- || (need == need_dword
- && !i.op[op].regs->reg_type.bitfield.dword)
- || (need == need_qword
- && !i.op[op].regs->reg_type.bitfield.qword)))
- {
- as_bad (_("invalid register operand size for `%s'"),
- i.tm.name);
- return 0;
- }
+ for (op = 0; op < i.operands; op++)
+ {
+ if (i.types[op].bitfield.class != Reg)
+ continue;
+
+ switch (need)
+ {
+ case need_word:
+ if (i.op[op].regs->reg_type.bitfield.word)
+ continue;
+ break;
+ case need_dword:
+ if (i.op[op].regs->reg_type.bitfield.dword)
+ continue;
+ break;
+ case need_qword:
+ if (i.op[op].regs->reg_type.bitfield.qword)
+ continue;
+ break;
+ }
+
+ as_bad (_("invalid register operand size for `%s'"),
+ i.tm.name);
+ return 0;
+ }
+ }
}
return 1;
&& i.tm.operand_types[op].bitfield.word)
continue;
- /* crc32 doesn't generate this warning. */
- if (i.tm.base_opcode == 0xf20f38f0)
+ /* crc32 only wants its source operand checked here. */
+ if (i.tm.base_opcode == 0xf20f38f0 && op)
continue;
- if ((i.types[op].bitfield.word
- || i.types[op].bitfield.dword
- || i.types[op].bitfield.qword)
- && i.op[op].regs->reg_num < 4
- /* Prohibit these changes in 64bit mode, since the lowering
- would be more complicated. */
- && flag_code != CODE_64BIT)
- {
-#if REGISTER_WARNINGS
- if (!quiet_warnings)
- as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
- register_prefix,
- (i.op[op].regs + (i.types[op].bitfield.word
- ? REGNAM_AL - REGNAM_AX
- : REGNAM_AL - REGNAM_EAX))->reg_name,
- register_prefix,
- i.op[op].regs->reg_name,
- i.suffix);
-#endif
- continue;
- }
/* Any other register is bad. */
- if (i.types[op].bitfield.class == Reg
- || i.types[op].bitfield.class == RegMMX
- || i.types[op].bitfield.class == RegSIMD
- || i.types[op].bitfield.class == SReg
- || i.types[op].bitfield.class == RegCR
- || i.types[op].bitfield.class == RegDR
- || i.types[op].bitfield.class == RegTR)
- {
- as_bad (_("`%s%s' not allowed with `%s%c'"),
- register_prefix,
- i.op[op].regs->reg_name,
- i.tm.name,
- i.suffix);
- return 0;
- }
+ as_bad (_("`%s%s' not allowed with `%s%c'"),
+ register_prefix, i.op[op].regs->reg_name,
+ i.tm.name, i.suffix);
+ return 0;
}
return 1;
}
i.suffix);
return 0;
}
- /* Warn if the e prefix on a general reg is missing. */
- else if ((!quiet_warnings || flag_code == CODE_64BIT)
- && i.types[op].bitfield.word
+ /* Error if the e prefix on a general reg is missing. */
+ else if (i.types[op].bitfield.word
&& (i.tm.operand_types[op].bitfield.class == Reg
|| i.tm.operand_types[op].bitfield.instance == Accum)
&& i.tm.operand_types[op].bitfield.dword)
{
- /* Prohibit these changes in the 64bit mode, since the
- lowering is more complicated. */
- if (flag_code == CODE_64BIT)
- {
- as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
- register_prefix, i.op[op].regs->reg_name,
- i.suffix);
- return 0;
- }
-#if REGISTER_WARNINGS
- as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
- register_prefix,
- (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name,
- register_prefix, i.op[op].regs->reg_name, i.suffix);
-#endif
+ as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
+ register_prefix, i.op[op].regs->reg_name,
+ i.suffix);
+ return 0;
}
/* Warn if the r prefix on a general reg is present. */
else if (i.types[op].bitfield.qword
i.suffix);
return 0;
}
- /* Warn if the e or r prefix on a general reg is present. */
- else if ((!quiet_warnings || flag_code == CODE_64BIT)
- && (i.types[op].bitfield.dword
+ /* Error if the e or r prefix on a general reg is present. */
+ else if ((i.types[op].bitfield.dword
|| i.types[op].bitfield.qword)
&& (i.tm.operand_types[op].bitfield.class == Reg
|| i.tm.operand_types[op].bitfield.instance == Accum)
&& i.tm.operand_types[op].bitfield.word)
{
- /* Prohibit these changes in the 64bit mode, since the
- lowering is more complicated. */
- if (flag_code == CODE_64BIT)
- {
- as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
- register_prefix, i.op[op].regs->reg_name,
- i.suffix);
- return 0;
- }
-#if REGISTER_WARNINGS
- as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
- register_prefix,
- (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name,
- register_prefix, i.op[op].regs->reg_name, i.suffix);
-#endif
+ as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
+ register_prefix, i.op[op].regs->reg_name,
+ i.suffix);
+ return 0;
}
return 1;
}
else
overlap = imm32s;
}
+ else if (i.prefix[REX_PREFIX] & REX_W)
+ overlap = operand_type_and (overlap, imm32s);
+ else if (i.prefix[DATA_PREFIX])
+ overlap = operand_type_and (overlap,
+ flag_code != CODE_16BIT ? imm16 : imm32);
if (!operand_type_equal (&overlap, &imm8)
&& !operand_type_equal (&overlap, &imm8s)
&& !operand_type_equal (&overlap, &imm16)
unnecessary segment overrides. */
const seg_entry *default_seg = 0;
+ if (i.tm.opcode_modifier.sse2avx)
+ {
+ /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
+ need converting. */
+ i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
+ i.prefix[REX_PREFIX] = 0;
+ i.rex_encoding = 0;
+ }
+ /* ImmExt should be processed after SSE2AVX. */
+ else if (i.tm.opcode_modifier.immext)
+ process_immext ();
+
if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
{
unsigned int dupl = i.operands;
i.flags[j] = i.flags[j - 1];
}
i.op[0].regs
- = (const reg_entry *) hash_find (reg_hash, "xmm0");
+ = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
i.types[0] = regxmm;
i.tm.operand_types[0] = regxmm;
}
else
{
-duplicate:
+ duplicate:
i.operands++;
i.reg_operands++;
i.tm.operands++;
on one of their operands, the default segment is ds. */
default_seg = &ds;
}
- else if (i.tm.opcode_modifier.shortform)
+ else if (i.short_form)
{
/* The register or float register operand is in operand
0 or 1. */
}
}
- if (i.tm.base_opcode == 0x8d /* lea */
- && i.seg[0]
- && !quiet_warnings)
- as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
+ if ((i.seg[0] || i.prefix[SEG_PREFIX])
+ && i.tm.base_opcode == 0x8d /* lea */
+ && !is_any_vex_encoding(&i.tm))
+ {
+ if (!quiet_warnings)
+ as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
+ if (optimize)
+ {
+ i.seg[0] = NULL;
+ i.prefix[SEG_PREFIX] = 0;
+ }
+ }
/* If a segment was explicitly specified, and the specified segment
- is not the default, use an opcode prefix to select it. If we
- never figured out what the default segment is, then default_seg
- will be zero at this point, and the specified segment prefix will
- always be used. */
- if ((i.seg[0]) && (i.seg[0] != default_seg))
+ is neither the default nor the one already recorded from a prefix,
+ use an opcode prefix to select it. If we never figured out what
+ the default segment is, then default_seg will be zero at this
+ point, and the specified segment prefix will always be used. */
+ if (i.seg[0]
+ && i.seg[0] != default_seg
+ && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
{
if (!add_prefix (i.seg[0]->seg_prefix))
return 0;
return 1;
}
+static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
+ bfd_boolean do_sse2avx)
+{
+ if (r->reg_flags & RegRex)
+ {
+ if (i.rex & rex_bit)
+ as_bad (_("same type of prefix used twice"));
+ i.rex |= rex_bit;
+ }
+ else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
+ {
+ gas_assert (i.vex.register_specifier == r);
+ i.vex.register_specifier += 8;
+ }
+
+ if (r->reg_flags & RegVRex)
+ i.vrex |= rex_bit;
+}
+
static const seg_entry *
build_modrm_byte (void)
{
i386_operand_type op;
unsigned int vvvv;
- /* Check register-only source operand when two source
- operands are swapped. */
- if (!i.tm.operand_types[source].bitfield.baseindex
- && i.tm.operand_types[dest].bitfield.baseindex)
+ /* Swap two source operands if needed. */
+ if (i.tm.opcode_modifier.swapsources)
{
vvvv = source;
source = dest;
{
i.rm.reg = i.op[dest].regs->reg_num;
i.rm.regmem = i.op[source].regs->reg_num;
- if (i.op[dest].regs->reg_type.bitfield.class == RegMMX
- || i.op[source].regs->reg_type.bitfield.class == RegMMX)
- i.has_regmmx = TRUE;
- else if (i.op[dest].regs->reg_type.bitfield.class == RegSIMD
- || i.op[source].regs->reg_type.bitfield.class == RegSIMD)
- {
- if (i.types[dest].bitfield.zmmword
- || i.types[source].bitfield.zmmword)
- i.has_regzmm = TRUE;
- else if (i.types[dest].bitfield.ymmword
- || i.types[source].bitfield.ymmword)
- i.has_regymm = TRUE;
- else
- i.has_regxmm = TRUE;
- }
- if ((i.op[dest].regs->reg_flags & RegRex) != 0)
- i.rex |= REX_R;
- if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
- i.vrex |= REX_R;
- if ((i.op[source].regs->reg_flags & RegRex) != 0)
- i.rex |= REX_B;
- if ((i.op[source].regs->reg_flags & RegVRex) != 0)
- i.vrex |= REX_B;
+ set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
+ set_rex_vrex (i.op[source].regs, REX_B, FALSE);
}
else
{
i.rm.reg = i.op[source].regs->reg_num;
i.rm.regmem = i.op[dest].regs->reg_num;
- if ((i.op[dest].regs->reg_flags & RegRex) != 0)
- i.rex |= REX_B;
- if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
- i.vrex |= REX_B;
- if ((i.op[source].regs->reg_flags & RegRex) != 0)
- i.rex |= REX_R;
- if ((i.op[source].regs->reg_flags & RegVRex) != 0)
- i.vrex |= REX_R;
+ set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
+ set_rex_vrex (i.op[source].regs, REX_R, FALSE);
}
if (flag_code != CODE_64BIT && (i.rex & REX_R))
{
break;
gas_assert (op < i.operands);
- if (i.tm.opcode_modifier.vecsib)
+ if (i.tm.opcode_modifier.sib)
{
- if (i.index_reg->reg_num == RegIZ)
+ /* The index register of VSIB shouldn't be RegIZ. */
+ if (i.tm.opcode_modifier.sib != SIBMEM
+ && i.index_reg->reg_num == RegIZ)
abort ();
i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
i.types[op].bitfield.disp32s = 1;
}
}
- i.sib.index = i.index_reg->reg_num;
- if ((i.index_reg->reg_flags & RegRex) != 0)
- i.rex |= REX_X;
- if ((i.index_reg->reg_flags & RegVRex) != 0)
- i.vrex |= REX_X;
+
+ /* Since the mandatory SIB always has index register, so
+ the code logic remains unchanged. The non-mandatory SIB
+ without index register is allowed and will be handled
+ later. */
+ if (i.index_reg)
+ {
+ if (i.index_reg->reg_num == RegIZ)
+ i.sib.index = NO_INDEX_REGISTER;
+ else
+ i.sib.index = i.index_reg->reg_num;
+ set_rex_vrex (i.index_reg, REX_X, FALSE);
+ }
}
default_seg = &ds;
{
i386_operand_type newdisp;
- gas_assert (!i.tm.opcode_modifier.vecsib);
+ /* Both check for VSIB and mandatory non-vector SIB. */
+ gas_assert (!i.tm.opcode_modifier.sib
+ || i.tm.opcode_modifier.sib == SIBMEM);
/* Operand is just <disp> */
if (flag_code == CODE_64BIT)
{
i.types[op] = operand_type_and_not (i.types[op], anydisp);
i.types[op] = operand_type_or (i.types[op], newdisp);
}
- else if (!i.tm.opcode_modifier.vecsib)
+ else if (!i.tm.opcode_modifier.sib)
{
/* !i.base_reg && i.index_reg */
if (i.index_reg->reg_num == RegIZ)
/* RIP addressing for 64bit mode. */
else if (i.base_reg->reg_num == RegIP)
{
- gas_assert (!i.tm.opcode_modifier.vecsib);
+ gas_assert (!i.tm.opcode_modifier.sib);
i.rm.regmem = NO_BASE_REGISTER;
i.types[op].bitfield.disp8 = 0;
i.types[op].bitfield.disp16 = 0;
}
else if (i.base_reg->reg_type.bitfield.word)
{
- gas_assert (!i.tm.opcode_modifier.vecsib);
+ gas_assert (!i.tm.opcode_modifier.sib);
switch (i.base_reg->reg_num)
{
case 3: /* (%bx) */
if (operand_type_check (i.types[op], disp) == 0)
{
/* fake (%bp) into 0(%bp) */
- i.types[op].bitfield.disp8 = 1;
+ if (i.disp_encoding == disp_encoding_16bit)
+ i.types[op].bitfield.disp16 = 1;
+ else
+ i.types[op].bitfield.disp8 = 1;
fake_zero_displacement = 1;
}
}
default: /* (%si) -> 4 or (%di) -> 5 */
i.rm.regmem = i.base_reg->reg_num - 6 + 4;
}
+ if (!fake_zero_displacement
+ && !i.disp_operands
+ && i.disp_encoding)
+ {
+ fake_zero_displacement = 1;
+ if (i.disp_encoding == disp_encoding_8bit)
+ i.types[op].bitfield.disp8 = 1;
+ else
+ i.types[op].bitfield.disp16 = 1;
+ }
i.rm.mode = mode_from_disp_size (i.types[op]);
}
else /* i.base_reg and 32/64 bit mode */
}
}
- if (!i.tm.opcode_modifier.vecsib)
+ if (!i.tm.opcode_modifier.sib)
i.rm.regmem = i.base_reg->reg_num;
if ((i.base_reg->reg_flags & RegRex) != 0)
i.rex |= REX_B;
if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
{
fake_zero_displacement = 1;
- i.types[op].bitfield.disp8 = 1;
+ if (i.disp_encoding == disp_encoding_32bit)
+ i.types[op].bitfield.disp32 = 1;
+ else
+ i.types[op].bitfield.disp8 = 1;
}
i.sib.scale = i.log2_scale_factor;
if (i.index_reg == 0)
{
- gas_assert (!i.tm.opcode_modifier.vecsib);
+ /* Only check for VSIB. */
+ gas_assert (i.tm.opcode_modifier.sib != VECSIB128
+ && i.tm.opcode_modifier.sib != VECSIB256
+ && i.tm.opcode_modifier.sib != VECSIB512);
+
/* <disp>(%esp) becomes two byte modrm with no index
register. We've already stored the code for esp
in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
extra modrm byte. */
i.sib.index = NO_INDEX_REGISTER;
}
- else if (!i.tm.opcode_modifier.vecsib)
+ else if (!i.tm.opcode_modifier.sib)
{
if (i.index_reg->reg_num == RegIZ)
i.sib.index = NO_INDEX_REGISTER;
unsigned int vex_reg = ~0;
for (op = 0; op < i.operands; op++)
- {
- if (i.types[op].bitfield.class == Reg
- || i.types[op].bitfield.class == RegBND
- || i.types[op].bitfield.class == RegMask
- || i.types[op].bitfield.class == SReg
- || i.types[op].bitfield.class == RegCR
- || i.types[op].bitfield.class == RegDR
- || i.types[op].bitfield.class == RegTR)
- break;
- if (i.types[op].bitfield.class == RegSIMD)
- {
- if (i.types[op].bitfield.zmmword)
- i.has_regzmm = TRUE;
- else if (i.types[op].bitfield.ymmword)
- i.has_regymm = TRUE;
- else
- i.has_regxmm = TRUE;
- break;
- }
- if (i.types[op].bitfield.class == RegMMX)
- {
- i.has_regmmx = TRUE;
- break;
- }
- }
+ if (i.types[op].bitfield.class == Reg
+ || i.types[op].bitfield.class == RegBND
+ || i.types[op].bitfield.class == RegMask
+ || i.types[op].bitfield.class == SReg
+ || i.types[op].bitfield.class == RegCR
+ || i.types[op].bitfield.class == RegDR
+ || i.types[op].bitfield.class == RegTR
+ || i.types[op].bitfield.class == RegSIMD
+ || i.types[op].bitfield.class == RegMMX)
+ break;
if (vex_3_sources)
op = dest;
if (i.tm.extension_opcode != None)
{
i.rm.regmem = i.op[op].regs->reg_num;
- if ((i.op[op].regs->reg_flags & RegRex) != 0)
- i.rex |= REX_B;
- if ((i.op[op].regs->reg_flags & RegVRex) != 0)
- i.vrex |= REX_B;
+ set_rex_vrex (i.op[op].regs, REX_B,
+ i.tm.opcode_modifier.sse2avx);
}
else
{
i.rm.reg = i.op[op].regs->reg_num;
- if ((i.op[op].regs->reg_flags & RegRex) != 0)
- i.rex |= REX_R;
- if ((i.op[op].regs->reg_flags & RegVRex) != 0)
- i.vrex |= REX_R;
+ set_rex_vrex (i.op[op].regs, REX_R,
+ i.tm.opcode_modifier.sse2avx);
}
}
return default_seg;
}
+static INLINE void
+frag_opcode_byte (unsigned char byte)
+{
+ if (now_seg != absolute_section)
+ FRAG_APPEND_1_CHAR (byte);
+ else
+ ++abs_section_offset;
+}
+
+static unsigned int
+flip_code16 (unsigned int code16)
+{
+ gas_assert (i.tm.operands == 1);
+
+ return !(i.prefix[REX_PREFIX] & REX_W)
+ && (code16 ? i.tm.operand_types[0].bitfield.disp32
+ || i.tm.operand_types[0].bitfield.disp32s
+ : i.tm.operand_types[0].bitfield.disp16)
+ ? CODE16 : 0;
+}
+
static void
output_branch (void)
{
symbolS *sym;
offsetT off;
+ if (now_seg == absolute_section)
+ {
+ as_bad (_("relaxable branches not supported in absolute section"));
+ return;
+ }
+
code16 = flag_code == CODE_16BIT ? CODE16 : 0;
size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
{
prefix = 1;
i.prefixes -= 1;
- code16 ^= CODE16;
+ code16 ^= flip_code16(code16);
}
/* Pentium4 branch hints. */
if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
/* BND prefixed jump. */
if (i.prefix[BND_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
- i.prefixes -= 1;
+ prefix++;
+ i.prefixes--;
}
- if (i.prefixes != 0 && !intel_syntax)
- as_warn (_("skipping prefixes on this instruction"));
+ if (i.prefixes != 0)
+ as_warn (_("skipping prefixes on `%s'"), i.tm.name);
/* It's always a symbol; End frag & setup for relax.
Make sure there is enough room in this frag for the largest
if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
|| i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
*p++ = i.prefix[SEG_PREFIX];
+ if (i.prefix[BND_PREFIX] != 0)
+ *p++ = BND_PREFIX_OPCODE;
if (i.prefix[REX_PREFIX] != 0)
*p++ = i.prefix[REX_PREFIX];
*p = i.tm.base_opcode;
size = 1;
if (i.prefix[ADDR_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (ADDR_PREFIX_OPCODE);
+ frag_opcode_byte (ADDR_PREFIX_OPCODE);
i.prefixes -= 1;
}
/* Pentium4 branch hints. */
if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
|| i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
{
- FRAG_APPEND_1_CHAR (i.prefix[SEG_PREFIX]);
+ frag_opcode_byte (i.prefix[SEG_PREFIX]);
i.prefixes--;
}
}
if (i.prefix[DATA_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE);
+ frag_opcode_byte (DATA_PREFIX_OPCODE);
i.prefixes -= 1;
- code16 ^= CODE16;
+ code16 ^= flip_code16(code16);
}
size = 4;
size = 2;
}
- if (i.prefix[REX_PREFIX] != 0)
+ /* BND prefixed jump. */
+ if (i.prefix[BND_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
+ frag_opcode_byte (i.prefix[BND_PREFIX]);
i.prefixes -= 1;
}
- /* BND prefixed jump. */
- if (i.prefix[BND_PREFIX] != 0)
+ if (i.prefix[REX_PREFIX] != 0)
{
- FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
+ frag_opcode_byte (i.prefix[REX_PREFIX]);
i.prefixes -= 1;
}
- if (i.prefixes != 0 && !intel_syntax)
- as_warn (_("skipping prefixes on this instruction"));
+ if (i.prefixes != 0)
+ as_warn (_("skipping prefixes on `%s'"), i.tm.name);
+
+ if (now_seg == absolute_section)
+ {
+ abs_section_offset += i.tm.opcode_length + size;
+ return;
+ }
p = frag_more (i.tm.opcode_length + size);
switch (i.tm.opcode_length)
i.prefixes -= 1;
code16 ^= CODE16;
}
- if (i.prefix[REX_PREFIX] != 0)
- {
- prefix++;
- i.prefixes -= 1;
- }
+
+ gas_assert (!i.prefix[REX_PREFIX]);
size = 4;
if (code16)
size = 2;
- if (i.prefixes != 0 && !intel_syntax)
- as_warn (_("skipping prefixes on this instruction"));
+ if (i.prefixes != 0)
+ as_warn (_("skipping prefixes on `%s'"), i.tm.name);
+
+ if (now_seg == absolute_section)
+ {
+ abs_section_offset += prefix + 1 + 2 + size;
+ return;
+ }
/* 1 opcode; 2 segment; offset */
p = frag_more (prefix + 1 + 2 + size);
return len - start_off + (frag_now_ptr - frag_now->fr_literal);
}
+/* Return 1 for test, and, cmp, add, sub, inc and dec which may
+ be macro-fused with conditional jumps.
+ NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
+ or is one of the following format:
+
+ cmp m, imm
+ add m, imm
+ sub m, imm
+ test m, imm
+ and m, imm
+ inc m
+ dec m
+
+ it is unfusible. */
+
+static int
+maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
+{
+ /* No RIP address. */
+ if (i.base_reg && i.base_reg->reg_num == RegIP)
+ return 0;
+
+ /* No VEX/EVEX encoding. */
+ if (is_any_vex_encoding (&i.tm))
+ return 0;
+
+ /* add, sub without add/sub m, imm. */
+ if (i.tm.base_opcode <= 5
+ || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
+ || ((i.tm.base_opcode | 3) == 0x83
+ && (i.tm.extension_opcode == 0x5
+ || i.tm.extension_opcode == 0x0)))
+ {
+ *mf_cmp_p = mf_cmp_alu_cmp;
+ return !(i.mem_operands && i.imm_operands);
+ }
+
+ /* and without and m, imm. */
+ if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
+ || ((i.tm.base_opcode | 3) == 0x83
+ && i.tm.extension_opcode == 0x4))
+ {
+ *mf_cmp_p = mf_cmp_test_and;
+ return !(i.mem_operands && i.imm_operands);
+ }
+
+ /* test without test m imm. */
+ if ((i.tm.base_opcode | 1) == 0x85
+ || (i.tm.base_opcode | 1) == 0xa9
+ || ((i.tm.base_opcode | 1) == 0xf7
+ && i.tm.extension_opcode == 0))
+ {
+ *mf_cmp_p = mf_cmp_test_and;
+ return !(i.mem_operands && i.imm_operands);
+ }
+
+ /* cmp without cmp m, imm. */
+ if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
+ || ((i.tm.base_opcode | 3) == 0x83
+ && (i.tm.extension_opcode == 0x7)))
+ {
+ *mf_cmp_p = mf_cmp_alu_cmp;
+ return !(i.mem_operands && i.imm_operands);
+ }
+
+ /* inc, dec without inc/dec m. */
+ if ((i.tm.cpu_flags.bitfield.cpuno64
+ && (i.tm.base_opcode | 0xf) == 0x4f)
+ || ((i.tm.base_opcode | 1) == 0xff
+ && i.tm.extension_opcode <= 0x1))
+ {
+ *mf_cmp_p = mf_cmp_incdec;
+ return !i.mem_operands;
+ }
+
+ return 0;
+}
+
+/* Return 1 if a FUSED_JCC_PADDING frag should be generated. */
+
+static int
+add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
+{
+ /* NB: Don't work with COND_JUMP86 without i386. */
+ if (!align_branch_power
+ || now_seg == absolute_section
+ || !cpu_arch_flags.bitfield.cpui386
+ || !(align_branch & align_branch_fused_bit))
+ return 0;
+
+ if (maybe_fused_with_jcc_p (mf_cmp_p))
+ {
+ if (last_insn.kind == last_insn_other
+ || last_insn.seg != now_seg)
+ return 1;
+ if (flag_debug)
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -malign-branch-boundary on `%s`"),
+ last_insn.name, i.tm.name);
+ }
+
+ return 0;
+}
+
+/* Return 1 if a BRANCH_PREFIX frag should be generated. */
+
+static int
+add_branch_prefix_frag_p (void)
+{
+ /* NB: Don't work with COND_JUMP86 without i386. Don't add prefix
+ to PadLock instructions since they include prefixes in opcode. */
+ if (!align_branch_power
+ || !align_branch_prefix_size
+ || now_seg == absolute_section
+ || i.tm.cpu_flags.bitfield.cpupadlock
+ || !cpu_arch_flags.bitfield.cpui386)
+ return 0;
+
+ /* Don't add prefix if it is a prefix or there is no operand in case
+ that segment prefix is special. */
+ if (!i.operands || i.tm.opcode_modifier.isprefix)
+ return 0;
+
+ if (last_insn.kind == last_insn_other
+ || last_insn.seg != now_seg)
+ return 1;
+
+ if (flag_debug)
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -malign-branch-boundary on `%s`"),
+ last_insn.name, i.tm.name);
+
+ return 0;
+}
+
+/* Return 1 if a BRANCH_PADDING frag should be generated. */
+
+static int
+add_branch_padding_frag_p (enum align_branch_kind *branch_p,
+ enum mf_jcc_kind *mf_jcc_p)
+{
+ int add_padding;
+
+ /* NB: Don't work with COND_JUMP86 without i386. */
+ if (!align_branch_power
+ || now_seg == absolute_section
+ || !cpu_arch_flags.bitfield.cpui386)
+ return 0;
+
+ add_padding = 0;
+
+ /* Check for jcc and direct jmp. */
+ if (i.tm.opcode_modifier.jump == JUMP)
+ {
+ if (i.tm.base_opcode == JUMP_PC_RELATIVE)
+ {
+ *branch_p = align_branch_jmp;
+ add_padding = align_branch & align_branch_jmp_bit;
+ }
+ else
+ {
+ /* Because J<cc> and JN<cc> share same group in macro-fusible table,
+ igore the lowest bit. */
+ *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
+ *branch_p = align_branch_jcc;
+ if ((align_branch & align_branch_jcc_bit))
+ add_padding = 1;
+ }
+ }
+ else if (is_any_vex_encoding (&i.tm))
+ return 0;
+ else if ((i.tm.base_opcode | 1) == 0xc3)
+ {
+ /* Near ret. */
+ *branch_p = align_branch_ret;
+ if ((align_branch & align_branch_ret_bit))
+ add_padding = 1;
+ }
+ else
+ {
+ /* Check for indirect jmp, direct and indirect calls. */
+ if (i.tm.base_opcode == 0xe8)
+ {
+ /* Direct call. */
+ *branch_p = align_branch_call;
+ if ((align_branch & align_branch_call_bit))
+ add_padding = 1;
+ }
+ else if (i.tm.base_opcode == 0xff
+ && (i.tm.extension_opcode == 2
+ || i.tm.extension_opcode == 4))
+ {
+ /* Indirect call and jmp. */
+ *branch_p = align_branch_indirect;
+ if ((align_branch & align_branch_indirect_bit))
+ add_padding = 1;
+ }
+
+ if (add_padding
+ && i.disp_operands
+ && tls_get_addr
+ && (i.op[0].disps->X_op == O_symbol
+ || (i.op[0].disps->X_op == O_subtract
+ && i.op[0].disps->X_op_symbol == GOT_symbol)))
+ {
+ symbolS *s = i.op[0].disps->X_add_symbol;
+ /* No padding to call to global or undefined tls_get_addr. */
+ if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
+ && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
+ return 0;
+ }
+ }
+
+ if (add_padding
+ && last_insn.kind != last_insn_other
+ && last_insn.seg == now_seg)
+ {
+ if (flag_debug)
+ as_warn_where (last_insn.file, last_insn.line,
+ _("`%s` skips -malign-branch-boundary on `%s`"),
+ last_insn.name, i.tm.name);
+ return 0;
+ }
+
+ return add_padding;
+}
+
static void
output_insn (void)
{
fragS *insn_start_frag;
offsetT insn_start_off;
+ fragS *fragP = NULL;
+ enum align_branch_kind branch = align_branch_none;
+ /* The initializer is arbitrary just to avoid uninitialized error.
+ it's actually either assigned in add_branch_padding_frag_p
+ or never be used. */
+ enum mf_jcc_kind mf_jcc = mf_jcc_jo;
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
- if (IS_ELF && x86_used_note)
+ if (IS_ELF && x86_used_note && now_seg != absolute_section)
{
if (i.tm.cpu_flags.bitfield.cpucmov)
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV;
|| i.tm.cpu_flags.bitfield.cpu687
|| i.tm.cpu_flags.bitfield.cpufisttp)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
- /* Don't set GNU_PROPERTY_X86_FEATURE_2_MMX for prefetchtXXX nor
- Xfence instructions. */
- if (i.tm.base_opcode != 0xf18
- && i.tm.base_opcode != 0xf0d
- && i.tm.base_opcode != 0xfaef8
- && (i.has_regmmx
- || i.tm.cpu_flags.bitfield.cpummx
- || i.tm.cpu_flags.bitfield.cpua3dnow
- || i.tm.cpu_flags.bitfield.cpua3dnowa))
+ if ((i.xstate & xstate_mmx)
+ || i.tm.base_opcode == 0xf77 /* emms */
+ || i.tm.base_opcode == 0xf0e /* femms */)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
- if (i.has_regxmm)
+ if ((i.xstate & xstate_xmm)
+ || i.tm.cpu_flags.bitfield.cpuwidekl
+ || i.tm.cpu_flags.bitfield.cpukl)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
- if (i.has_regymm)
+ if ((i.xstate & xstate_ymm) == xstate_ymm)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
- if (i.has_regzmm)
+ if ((i.xstate & xstate_zmm) == xstate_zmm)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
if (i.tm.cpu_flags.bitfield.cpufxsr)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
if (i.tm.cpu_flags.bitfield.cpuxsavec)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
+
+ if ((i.xstate & xstate_tmm) == xstate_tmm
+ || i.tm.cpu_flags.bitfield.cpuamx_tile)
+ x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
}
#endif
insn_start_frag = frag_now;
insn_start_off = frag_now_fix ();
+ if (add_branch_padding_frag_p (&branch, &mf_jcc))
+ {
+ char *p;
+ /* Branch can be 8 bytes. Leave some room for prefixes. */
+ unsigned int max_branch_padding_size = 14;
+
+ /* Align section to boundary. */
+ record_alignment (now_seg, align_branch_power);
+
+ /* Make room for padding. */
+ frag_grow (max_branch_padding_size);
+
+ /* Start of the padding. */
+ p = frag_more (0);
+
+ fragP = frag_now;
+
+ frag_var (rs_machine_dependent, max_branch_padding_size, 0,
+ ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
+ NULL, 0, p);
+
+ fragP->tc_frag_data.mf_type = mf_jcc;
+ fragP->tc_frag_data.branch_type = branch;
+ fragP->tc_frag_data.max_bytes = max_branch_padding_size;
+ }
+
/* Output jumps. */
if (i.tm.opcode_modifier.jump == JUMP)
output_branch ();
unsigned char *q;
unsigned int j;
unsigned int prefix;
+ enum mf_cmp_kind mf_cmp;
if (avoid_fence
&& (i.tm.base_opcode == 0xfaee8
|| i.tm.base_opcode == 0xfaef0
|| i.tm.base_opcode == 0xfaef8))
- {
- /* Encode lfence, mfence, and sfence as
- f0 83 04 24 00 lock addl $0x0, (%{re}sp). */
- offsetT val = 0x240483f0ULL;
- p = frag_more (5);
- md_number_to_chars (p, val, 5);
- return;
- }
+ {
+ /* Encode lfence, mfence, and sfence as
+ f0 83 04 24 00 lock addl $0x0, (%{re}sp). */
+ if (now_seg != absolute_section)
+ {
+ offsetT val = 0x240483f0ULL;
+
+ p = frag_more (5);
+ md_number_to_chars (p, val, 5);
+ }
+ else
+ abs_section_offset += 5;
+ return;
+ }
/* Some processors fail on LOCK prefix. This options makes
assembler ignore LOCK prefix and serves as a workaround. */
i.prefix[LOCK_PREFIX] = 0;
}
+ if (branch)
+ /* Skip if this is a branch. */
+ ;
+ else if (add_fused_jcc_padding_frag_p (&mf_cmp))
+ {
+ /* Make room for padding. */
+ frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
+ p = frag_more (0);
+
+ fragP = frag_now;
+
+ frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
+ ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
+ NULL, 0, p);
+
+ fragP->tc_frag_data.mf_type = mf_cmp;
+ fragP->tc_frag_data.branch_type = align_branch_fused;
+ fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
+ }
+ else if (add_branch_prefix_frag_p ())
+ {
+ unsigned int max_prefix_size = align_branch_prefix_size;
+
+ /* Make room for padding. */
+ frag_grow (max_prefix_size);
+ p = frag_more (0);
+
+ fragP = frag_now;
+
+ frag_var (rs_machine_dependent, max_prefix_size, 0,
+ ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
+ NULL, 0, p);
+
+ fragP->tc_frag_data.max_bytes = max_prefix_size;
+ }
+
/* Since the VEX/EVEX prefix contains the implicit prefix, we
don't need the explicit prefix. */
if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
#if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
/* For x32, add a dummy REX_OPCODE prefix for mov/add with
R_X86_64_GOTTPOFF relocation so that linker can safely
- perform IE->LE optimization. */
+ perform IE->LE optimization. A dummy REX_OPCODE prefix
+ is also needed for lea with R_X86_64_GOTPC32_TLSDESC
+ relocation for GDesc -> IE/LE optimization. */
if (x86_elf_abi == X86_64_X32_ABI
&& i.operands == 2
- && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+ && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
+ || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
&& i.prefix[REX_PREFIX] == 0)
add_prefix (REX_OPCODE);
#endif
/* The prefix bytes. */
for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
if (*q)
- FRAG_APPEND_1_CHAR (*q);
+ frag_opcode_byte (*q);
}
else
{
if (*q)
switch (j)
{
- case REX_PREFIX:
- /* REX byte is encoded in VEX prefix. */
- break;
case SEG_PREFIX:
case ADDR_PREFIX:
- FRAG_APPEND_1_CHAR (*q);
+ frag_opcode_byte (*q);
break;
default:
/* There should be no other prefixes for instructions
if (i.vrex)
abort ();
/* Now the VEX prefix. */
- p = frag_more (i.vex.length);
- for (j = 0; j < i.vex.length; j++)
- p[j] = i.vex.bytes[j];
+ if (now_seg != absolute_section)
+ {
+ p = frag_more (i.vex.length);
+ for (j = 0; j < i.vex.length; j++)
+ p[j] = i.vex.bytes[j];
+ }
+ else
+ abs_section_offset += i.vex.length;
}
/* Now the opcode; be careful about word order here! */
- if (i.tm.opcode_length == 1)
+ if (now_seg == absolute_section)
+ abs_section_offset += i.tm.opcode_length;
+ else if (i.tm.opcode_length == 1)
{
FRAG_APPEND_1_CHAR (i.tm.base_opcode);
}
/* Now the modrm byte and sib byte (if present). */
if (i.tm.opcode_modifier.modrm)
{
- FRAG_APPEND_1_CHAR ((i.rm.regmem << 0
- | i.rm.reg << 3
- | i.rm.mode << 6));
+ frag_opcode_byte ((i.rm.regmem << 0)
+ | (i.rm.reg << 3)
+ | (i.rm.mode << 6));
/* If i.rm.regmem == ESP (4)
&& i.rm.mode != (Register mode)
&& not 16 bit
if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
&& i.rm.mode != 3
&& !(i.base_reg && i.base_reg->reg_type.bitfield.word))
- FRAG_APPEND_1_CHAR ((i.sib.base << 0
- | i.sib.index << 3
- | i.sib.scale << 6));
+ frag_opcode_byte ((i.sib.base << 0)
+ | (i.sib.index << 3)
+ | (i.sib.scale << 6));
}
if (i.disp_operands)
if (j > 15)
as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
j);
+ else if (fragP)
+ {
+ /* NB: Don't add prefix with GOTPC relocation since
+ output_disp() above depends on the fixed encoding
+ length. Can't add prefix with TLS relocation since
+ it breaks TLS linker optimization. */
+ unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
+ /* Prefix count on the current instruction. */
+ unsigned int count = i.vex.length;
+ unsigned int k;
+ for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
+ /* REX byte is encoded in VEX/EVEX prefix. */
+ if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
+ count++;
+
+ /* Count prefixes for extended opcode maps. */
+ if (!i.vex.length)
+ switch (i.tm.opcode_length)
+ {
+ case 3:
+ if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
+ {
+ count++;
+ switch ((i.tm.base_opcode >> 8) & 0xff)
+ {
+ case 0x38:
+ case 0x3a:
+ count++;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case 2:
+ if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
+ count++;
+ break;
+ case 1:
+ break;
+ default:
+ abort ();
+ }
+
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
+ == BRANCH_PREFIX)
+ {
+ /* Set the maximum prefix size in BRANCH_PREFIX
+ frag. */
+ if (fragP->tc_frag_data.max_bytes > max)
+ fragP->tc_frag_data.max_bytes = max;
+ if (fragP->tc_frag_data.max_bytes > count)
+ fragP->tc_frag_data.max_bytes -= count;
+ else
+ fragP->tc_frag_data.max_bytes = 0;
+ }
+ else
+ {
+ /* Remember the maximum prefix size in FUSED_JCC_PADDING
+ frag. */
+ unsigned int max_prefix_size;
+ if (align_branch_prefix_size > max)
+ max_prefix_size = max;
+ else
+ max_prefix_size = align_branch_prefix_size;
+ if (max_prefix_size > count)
+ fragP->tc_frag_data.max_prefix_length
+ = max_prefix_size - count;
+ }
+
+ /* Use existing segment prefix if possible. Use CS
+ segment prefix in 64-bit mode. In 32-bit mode, use SS
+ segment prefix with ESP/EBP base register and use DS
+ segment prefix without ESP/EBP base register. */
+ if (i.prefix[SEG_PREFIX])
+ fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
+ else if (flag_code == CODE_64BIT)
+ fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
+ else if (i.base_reg
+ && (i.base_reg->reg_num == 4
+ || i.base_reg->reg_num == 5))
+ fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
+ else
+ fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
+ }
}
}
+ /* NB: Don't work with COND_JUMP86 without i386. */
+ if (align_branch_power
+ && now_seg != absolute_section
+ && cpu_arch_flags.bitfield.cpui386)
+ {
+ /* Terminate each frag so that we can add prefix and check for
+ fused jcc. */
+ frag_wane (frag_now);
+ frag_new (0);
+ }
+
#ifdef DEBUG386
if (flag_debug)
{
{
if (operand_type_check (i.types[n], disp))
{
- if (i.op[n].disps->X_op == O_constant)
+ int size = disp_size (n);
+
+ if (now_seg == absolute_section)
+ abs_section_offset += size;
+ else if (i.op[n].disps->X_op == O_constant)
{
- int size = disp_size (n);
offsetT val = i.op[n].disps->X_add_number;
val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
else
{
enum bfd_reloc_code_real reloc_type;
- int size = disp_size (n);
int sign = i.types[n].bitfield.disp32s;
int pcrel = (i.flags[n] & Operand_PCrel) != 0;
fixS *fixP;
if (!object_64bit)
{
reloc_type = BFD_RELOC_386_GOTPC;
+ i.has_gotpc_tls_reloc = TRUE;
i.op[n].imms->X_add_number +=
encoding_length (insn_start_frag, insn_start_off, p);
}
insn, and that is taken care of in other code. */
reloc_type = BFD_RELOC_X86_64_GOTPC32;
}
+ else if (align_branch_power)
+ {
+ switch (reloc_type)
+ {
+ case BFD_RELOC_386_TLS_GD:
+ case BFD_RELOC_386_TLS_LDM:
+ case BFD_RELOC_386_TLS_IE:
+ case BFD_RELOC_386_TLS_IE_32:
+ case BFD_RELOC_386_TLS_GOTIE:
+ case BFD_RELOC_386_TLS_GOTDESC:
+ case BFD_RELOC_386_TLS_DESC_CALL:
+ case BFD_RELOC_X86_64_TLSGD:
+ case BFD_RELOC_X86_64_TLSLD:
+ case BFD_RELOC_X86_64_GOTTPOFF:
+ case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
+ case BFD_RELOC_X86_64_TLSDESC_CALL:
+ i.has_gotpc_tls_reloc = TRUE;
+ default:
+ break;
+ }
+ }
fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
size, i.op[n].disps, pcrel,
reloc_type);
&& i.rm.regmem == 5))
&& (i.rm.mode == 2
|| (i.rm.mode == 0 && i.rm.regmem == 5))
+ && !is_any_vex_encoding(&i.tm)
&& ((i.operands == 1
&& i.tm.base_opcode == 0xff
&& (i.rm.reg == 2 || i.rm.reg == 4))
|| (i.operands == 2
&& (i.tm.base_opcode == 0x8b
|| i.tm.base_opcode == 0x85
- || (i.tm.base_opcode & 0xc7) == 0x03))))
+ || (i.tm.base_opcode & ~0x38) == 0x03))))
{
if (object_64bit)
{
if (operand_type_check (i.types[n], imm))
{
- if (i.op[n].imms->X_op == O_constant)
+ int size = imm_size (n);
+
+ if (now_seg == absolute_section)
+ abs_section_offset += size;
+ else if (i.op[n].imms->X_op == O_constant)
{
- int size = imm_size (n);
offsetT val;
val = offset_in_range (i.op[n].imms->X_add_number,
non-absolute imms). Try to support other
sizes ... */
enum bfd_reloc_code_real reloc_type;
- int size = imm_size (n);
int sign;
if (i.types[n].bitfield.imm32s
reloc_type = BFD_RELOC_X86_64_GOTPC32;
else if (size == 8)
reloc_type = BFD_RELOC_X86_64_GOTPC64;
+ i.has_gotpc_tls_reloc = TRUE;
i.op[n].imms->X_add_number +=
encoding_length (insn_start_frag, insn_start_off, p);
}
/* Check masking operation. */
else if ((mask = parse_register (op_string, &end_op)) != NULL)
{
+ if (mask == &bad_reg)
+ return NULL;
+
/* k0 can't be used for write mask. */
if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
{
operand_type_set (&bigdisp, 0);
if (i.jumpabsolute
+ || i.types[this_operand].bitfield.baseindex
|| (current_templates->start->opcode_modifier.jump != JUMP
&& current_templates->start->opcode_modifier.jump != JUMP_DWORD))
{
- bigdisp.bitfield.disp32 = 1;
+ i386_addressing_mode ();
override = (i.prefix[ADDR_PREFIX] != 0);
if (flag_code == CODE_64BIT)
{
bigdisp.bitfield.disp32s = 1;
bigdisp.bitfield.disp64 = 1;
}
+ else
+ bigdisp.bitfield.disp32 = 1;
}
else if ((flag_code == CODE_16BIT) ^ override)
- {
- bigdisp.bitfield.disp32 = 0;
bigdisp.bitfield.disp16 = 1;
- }
+ else
+ bigdisp.bitfield.disp32 = 1;
}
else
{
- /* For PC-relative branches, the width of the displacement
- is dependent upon data size, not address size. */
+ /* For PC-relative branches, the width of the displacement may be
+ dependent upon data size, but is never dependent upon address size.
+ Also make sure to not unintentionally match against a non-PC-relative
+ branch template. */
+ static templates aux_templates;
+ const insn_template *t = current_templates->start;
+ bfd_boolean has_intel64 = FALSE;
+
+ aux_templates.start = t;
+ while (++t < current_templates->end)
+ {
+ if (t->opcode_modifier.jump
+ != current_templates->start->opcode_modifier.jump)
+ break;
+ if ((t->opcode_modifier.isa64 >= INTEL64))
+ has_intel64 = TRUE;
+ }
+ if (t < current_templates->end)
+ {
+ aux_templates.end = t;
+ current_templates = &aux_templates;
+ }
+
override = (i.prefix[DATA_PREFIX] != 0);
if (flag_code == CODE_64BIT)
{
- if (override || i.suffix == WORD_MNEM_SUFFIX)
+ if ((override || i.suffix == WORD_MNEM_SUFFIX)
+ && (!intel64 || !has_intel64))
bigdisp.bitfield.disp16 = 1;
else
- {
- bigdisp.bitfield.disp32 = 1;
- bigdisp.bitfield.disp32s = 1;
- }
+ bigdisp.bitfield.disp32s = 1;
}
else
{
}
#endif
+ if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
+ /* Constants get taken care of by optimize_disp(). */
+ && exp->X_op != O_constant)
+ i.types[this_operand].bitfield.disp8 = 1;
+
/* Check if this is a displacement only operand. */
bigdisp = i.types[this_operand];
bigdisp.bitfield.disp8 = 0;
if (i.prefix[ADDR_PREFIX])
addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
+ else if (flag_code == CODE_16BIT
+ && current_templates->start->cpu_flags.bitfield.cpumpx
+ /* Avoid replacing the "16-bit addressing not allowed" diagnostic
+ from md_assemble() by "is not a valid base/index expression"
+ when there is a base and/or index. */
+ && !i.types[this_operand].bitfield.baseindex)
+ {
+ /* MPX insn memory operands with neither base nor index must be forced
+ to use 32-bit addressing in 16-bit mode. */
+ addr_mode = CODE_32BIT;
+ i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
+ ++i.prefixes;
+ gas_assert (!i.types[this_operand].bitfield.disp16);
+ gas_assert (!i.types[this_operand].bitfield.disp32);
+ }
else
{
addr_mode = flag_code;
&& current_templates->end[-1].operand_types[1]
.bitfield.baseindex))
op = 1;
- expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]);
+ expected_reg
+ = (const reg_entry *) str_hash_find (reg_hash,
+ di_si[addr_mode][op == es_op]);
}
else
- expected_reg = hash_find (reg_hash, bx[addr_mode]);
+ expected_reg
+ = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
if (i.base_reg != expected_reg
|| i.index_reg
else
return 1;
-bad_address:
+ bad_address:
as_bad (_("`%s' is not a valid %s expression"),
operand_string, kind);
return 0;
if (addr_mode != CODE_16BIT)
{
/* 32-bit/64-bit checks. */
+ if (i.disp_encoding == disp_encoding_16bit)
+ {
+ bad_disp:
+ as_bad (_("invalid `%s' prefix"),
+ addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
+ return 0;
+ }
+
if ((i.base_reg
&& ((addr_mode == CODE_64BIT
? !i.base_reg->reg_type.bitfield.qword
|| !i.index_reg->reg_type.bitfield.baseindex)))
goto bad_address;
- /* bndmk, bndldx, and bndstx have special restrictions. */
+ /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
if (current_templates->start->base_opcode == 0xf30f1b
- || (current_templates->start->base_opcode & ~1) == 0x0f1a)
+ || (current_templates->start->base_opcode & ~1) == 0x0f1a
+ || current_templates->start->opcode_modifier.sib == SIBMEM)
{
/* They cannot use RIP-relative addressing. */
if (i.base_reg && i.base_reg->reg_num == RegIP)
}
/* bndldx and bndstx ignore their scale factor. */
- if (current_templates->start->base_opcode != 0xf30f1b
+ if ((current_templates->start->base_opcode & ~1) == 0x0f1a
&& i.log2_scale_factor)
as_warn (_("register scaling is being ignored here"));
}
else
{
/* 16-bit checks. */
+ if (i.disp_encoding == disp_encoding_32bit)
+ goto bad_disp;
+
if ((i.base_reg
&& (!i.base_reg->reg_type.bitfield.word
|| !i.base_reg->reg_type.bitfield.baseindex))
{
i386_operand_type temp;
+ if (r == &bad_reg)
+ return 0;
+
/* Check for a segment override by searching for ':' after a
segment register. */
op_string = end_op;
if (i.base_reg)
{
+ if (i.base_reg == &bad_reg)
+ return 0;
base_string = end_op;
if (is_space_char (*base_string))
++base_string;
if ((i.index_reg = parse_register (base_string, &end_op))
!= NULL)
{
+ if (i.index_reg == &bad_reg)
+ return 0;
base_string = end_op;
if (is_space_char (*base_string))
++base_string;
}
#endif
-/* md_estimate_size_before_relax()
+/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
+ Note also work for Skylake and Cascadelake.
+---------------------------------------------------------------------
+| JCC | ADD/SUB/CMP | INC/DEC | TEST/AND |
+| ------ | ----------- | ------- | -------- |
+| Jo | N | N | Y |
+| Jno | N | N | Y |
+| Jc/Jb | Y | N | Y |
+| Jae/Jnb | Y | N | Y |
+| Je/Jz | Y | Y | Y |
+| Jne/Jnz | Y | Y | Y |
+| Jna/Jbe | Y | N | Y |
+| Ja/Jnbe | Y | N | Y |
+| Js | N | N | Y |
+| Jns | N | N | Y |
+| Jp/Jpe | N | N | Y |
+| Jnp/Jpo | N | N | Y |
+| Jl/Jnge | Y | Y | Y |
+| Jge/Jnl | Y | Y | Y |
+| Jle/Jng | Y | Y | Y |
+| Jg/Jnle | Y | Y | Y |
+--------------------------------------------------------------------- */
+static int
+i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
+{
+ if (mf_cmp == mf_cmp_alu_cmp)
+ return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
+ || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
+ if (mf_cmp == mf_cmp_incdec)
+ return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
+ || mf_jcc == mf_jcc_jle);
+ if (mf_cmp == mf_cmp_test_and)
+ return 1;
+ return 0;
+}
- Called just before relax() for rs_machine_dependent frags. The x86
- assembler uses these frags to handle variable size jump
- instructions.
+/* Return the next non-empty frag. */
- Any symbol that is now undefined will not become defined.
- Return the correct fr_subtype in the frag.
- Return the initial "guess for variable size of frag" to caller.
- The guess is actually the growth beyond the fixed part. Whatever
- we do to grow the fixed or variable part contributes to our
- returned value. */
+static fragS *
+i386_next_non_empty_frag (fragS *fragP)
+{
+ /* There may be a frag with a ".fill 0" when there is no room in
+ the current frag for frag_grow in output_insn. */
+ for (fragP = fragP->fr_next;
+ (fragP != NULL
+ && fragP->fr_type == rs_fill
+ && fragP->fr_fix == 0);
+ fragP = fragP->fr_next)
+ ;
+ return fragP;
+}
-int
-md_estimate_size_before_relax (fragS *fragP, segT segment)
+/* Return the next jcc frag after BRANCH_PADDING. */
+
+static fragS *
+i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
{
- /* We've already got fragP->fr_subtype right; all we have to do is
- check for un-relaxable symbols. On an ELF system, we can't relax
- an externally visible symbol, because it may be overridden by a
- shared library. */
- if (S_GET_SEGMENT (fragP->fr_symbol) != segment
-#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
- || (IS_ELF
- && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
- fragP->fr_var))
-#endif
-#if defined (OBJ_COFF) && defined (TE_PE)
- || (OUTPUT_FLAVOR == bfd_target_coff_flavour
- && S_IS_WEAK (fragP->fr_symbol))
-#endif
- )
- {
- /* Symbol is undefined in this segment, or we need to keep a
- reloc so that weak symbols can be overridden. */
- int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
- enum bfd_reloc_code_real reloc_type;
- unsigned char *opcode;
- int old_fr_fix;
+ fragS *branch_fragP;
+ if (!pad_fragP)
+ return NULL;
- if (fragP->fr_var != NO_RELOC)
+ if (pad_fragP->fr_type == rs_machine_dependent
+ && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
+ == BRANCH_PADDING))
+ {
+ branch_fragP = i386_next_non_empty_frag (pad_fragP);
+ if (branch_fragP->fr_type != rs_machine_dependent)
+ return NULL;
+ if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
+ && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
+ pad_fragP->tc_frag_data.mf_type))
+ return branch_fragP;
+ }
+
+ return NULL;
+}
+
+/* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags. */
+
+static void
+i386_classify_machine_dependent_frag (fragS *fragP)
+{
+ fragS *cmp_fragP;
+ fragS *pad_fragP;
+ fragS *branch_fragP;
+ fragS *next_fragP;
+ unsigned int max_prefix_length;
+
+ if (fragP->tc_frag_data.classified)
+ return;
+
+ /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING. Convert
+ FUSED_JCC_PADDING and merge BRANCH_PADDING. */
+ for (next_fragP = fragP;
+ next_fragP != NULL;
+ next_fragP = next_fragP->fr_next)
+ {
+ next_fragP->tc_frag_data.classified = 1;
+ if (next_fragP->fr_type == rs_machine_dependent)
+ switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
+ {
+ case BRANCH_PADDING:
+ /* The BRANCH_PADDING frag must be followed by a branch
+ frag. */
+ branch_fragP = i386_next_non_empty_frag (next_fragP);
+ next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
+ break;
+ case FUSED_JCC_PADDING:
+ /* Check if this is a fused jcc:
+ FUSED_JCC_PADDING
+ CMP like instruction
+ BRANCH_PADDING
+ COND_JUMP
+ */
+ cmp_fragP = i386_next_non_empty_frag (next_fragP);
+ pad_fragP = i386_next_non_empty_frag (cmp_fragP);
+ branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
+ if (branch_fragP)
+ {
+ /* The BRANCH_PADDING frag is merged with the
+ FUSED_JCC_PADDING frag. */
+ next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
+ /* CMP like instruction size. */
+ next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
+ frag_wane (pad_fragP);
+ /* Skip to branch_fragP. */
+ next_fragP = branch_fragP;
+ }
+ else if (next_fragP->tc_frag_data.max_prefix_length)
+ {
+ /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
+ a fused jcc. */
+ next_fragP->fr_subtype
+ = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
+ next_fragP->tc_frag_data.max_bytes
+ = next_fragP->tc_frag_data.max_prefix_length;
+ /* This will be updated in the BRANCH_PREFIX scan. */
+ next_fragP->tc_frag_data.max_prefix_length = 0;
+ }
+ else
+ frag_wane (next_fragP);
+ break;
+ }
+ }
+
+ /* Stop if there is no BRANCH_PREFIX. */
+ if (!align_branch_prefix_size)
+ return;
+
+ /* Scan for BRANCH_PREFIX. */
+ for (; fragP != NULL; fragP = fragP->fr_next)
+ {
+ if (fragP->fr_type != rs_machine_dependent
+ || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
+ != BRANCH_PREFIX))
+ continue;
+
+ /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
+ COND_JUMP_PREFIX. */
+ max_prefix_length = 0;
+ for (next_fragP = fragP;
+ next_fragP != NULL;
+ next_fragP = next_fragP->fr_next)
+ {
+ if (next_fragP->fr_type == rs_fill)
+ /* Skip rs_fill frags. */
+ continue;
+ else if (next_fragP->fr_type != rs_machine_dependent)
+ /* Stop for all other frags. */
+ break;
+
+ /* rs_machine_dependent frags. */
+ if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
+ == BRANCH_PREFIX)
+ {
+ /* Count BRANCH_PREFIX frags. */
+ if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
+ {
+ max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
+ frag_wane (next_fragP);
+ }
+ else
+ max_prefix_length
+ += next_fragP->tc_frag_data.max_bytes;
+ }
+ else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
+ == BRANCH_PADDING)
+ || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
+ == FUSED_JCC_PADDING))
+ {
+ /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING. */
+ fragP->tc_frag_data.u.padding_fragP = next_fragP;
+ break;
+ }
+ else
+ /* Stop for other rs_machine_dependent frags. */
+ break;
+ }
+
+ fragP->tc_frag_data.max_prefix_length = max_prefix_length;
+
+ /* Skip to the next frag. */
+ fragP = next_fragP;
+ }
+}
+
+/* Compute padding size for
+
+ FUSED_JCC_PADDING
+ CMP like instruction
+ BRANCH_PADDING
+ COND_JUMP/UNCOND_JUMP
+
+ or
+
+ BRANCH_PADDING
+ COND_JUMP/UNCOND_JUMP
+ */
+
+static int
+i386_branch_padding_size (fragS *fragP, offsetT address)
+{
+ unsigned int offset, size, padding_size;
+ fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
+
+ /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag. */
+ if (!address)
+ address = fragP->fr_address;
+ address += fragP->fr_fix;
+
+ /* CMP like instrunction size. */
+ size = fragP->tc_frag_data.cmp_size;
+
+ /* The base size of the branch frag. */
+ size += branch_fragP->fr_fix;
+
+ /* Add opcode and displacement bytes for the rs_machine_dependent
+ branch frag. */
+ if (branch_fragP->fr_type == rs_machine_dependent)
+ size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
+
+ /* Check if branch is within boundary and doesn't end at the last
+ byte. */
+ offset = address & ((1U << align_branch_power) - 1);
+ if ((offset + size) >= (1U << align_branch_power))
+ /* Padding needed to avoid crossing boundary. */
+ padding_size = (1U << align_branch_power) - offset;
+ else
+ /* No padding needed. */
+ padding_size = 0;
+
+ /* The return value may be saved in tc_frag_data.length which is
+ unsigned byte. */
+ if (!fits_in_unsigned_byte (padding_size))
+ abort ();
+
+ return padding_size;
+}
+
+/* i386_generic_table_relax_frag()
+
+ Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
+ grow/shrink padding to align branch frags. Hand others to
+ relax_frag(). */
+
+long
+i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
+{
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
+ || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
+ {
+ long padding_size = i386_branch_padding_size (fragP, 0);
+ long grow = padding_size - fragP->tc_frag_data.length;
+
+ /* When the BRANCH_PREFIX frag is used, the computed address
+ must match the actual address and there should be no padding. */
+ if (fragP->tc_frag_data.padding_address
+ && (fragP->tc_frag_data.padding_address != fragP->fr_address
+ || padding_size))
+ abort ();
+
+ /* Update the padding size. */
+ if (grow)
+ fragP->tc_frag_data.length = padding_size;
+
+ return grow;
+ }
+ else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
+ {
+ fragS *padding_fragP, *next_fragP;
+ long padding_size, left_size, last_size;
+
+ padding_fragP = fragP->tc_frag_data.u.padding_fragP;
+ if (!padding_fragP)
+ /* Use the padding set by the leading BRANCH_PREFIX frag. */
+ return (fragP->tc_frag_data.length
+ - fragP->tc_frag_data.last_length);
+
+ /* Compute the relative address of the padding frag in the very
+ first time where the BRANCH_PREFIX frag sizes are zero. */
+ if (!fragP->tc_frag_data.padding_address)
+ fragP->tc_frag_data.padding_address
+ = padding_fragP->fr_address - (fragP->fr_address - stretch);
+
+ /* First update the last length from the previous interation. */
+ left_size = fragP->tc_frag_data.prefix_length;
+ for (next_fragP = fragP;
+ next_fragP != padding_fragP;
+ next_fragP = next_fragP->fr_next)
+ if (next_fragP->fr_type == rs_machine_dependent
+ && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
+ == BRANCH_PREFIX))
+ {
+ if (left_size)
+ {
+ int max = next_fragP->tc_frag_data.max_bytes;
+ if (max)
+ {
+ int size;
+ if (max > left_size)
+ size = left_size;
+ else
+ size = max;
+ left_size -= size;
+ next_fragP->tc_frag_data.last_length = size;
+ }
+ }
+ else
+ next_fragP->tc_frag_data.last_length = 0;
+ }
+
+ /* Check the padding size for the padding frag. */
+ padding_size = i386_branch_padding_size
+ (padding_fragP, (fragP->fr_address
+ + fragP->tc_frag_data.padding_address));
+
+ last_size = fragP->tc_frag_data.prefix_length;
+ /* Check if there is change from the last interation. */
+ if (padding_size == last_size)
+ {
+ /* Update the expected address of the padding frag. */
+ padding_fragP->tc_frag_data.padding_address
+ = (fragP->fr_address + padding_size
+ + fragP->tc_frag_data.padding_address);
+ return 0;
+ }
+
+ if (padding_size > fragP->tc_frag_data.max_prefix_length)
+ {
+ /* No padding if there is no sufficient room. Clear the
+ expected address of the padding frag. */
+ padding_fragP->tc_frag_data.padding_address = 0;
+ padding_size = 0;
+ }
+ else
+ /* Store the expected address of the padding frag. */
+ padding_fragP->tc_frag_data.padding_address
+ = (fragP->fr_address + padding_size
+ + fragP->tc_frag_data.padding_address);
+
+ fragP->tc_frag_data.prefix_length = padding_size;
+
+ /* Update the length for the current interation. */
+ left_size = padding_size;
+ for (next_fragP = fragP;
+ next_fragP != padding_fragP;
+ next_fragP = next_fragP->fr_next)
+ if (next_fragP->fr_type == rs_machine_dependent
+ && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
+ == BRANCH_PREFIX))
+ {
+ if (left_size)
+ {
+ int max = next_fragP->tc_frag_data.max_bytes;
+ if (max)
+ {
+ int size;
+ if (max > left_size)
+ size = left_size;
+ else
+ size = max;
+ left_size -= size;
+ next_fragP->tc_frag_data.length = size;
+ }
+ }
+ else
+ next_fragP->tc_frag_data.length = 0;
+ }
+
+ return (fragP->tc_frag_data.length
+ - fragP->tc_frag_data.last_length);
+ }
+ return relax_frag (segment, fragP, stretch);
+}
+
+/* md_estimate_size_before_relax()
+
+ Called just before relax() for rs_machine_dependent frags. The x86
+ assembler uses these frags to handle variable size jump
+ instructions.
+
+ Any symbol that is now undefined will not become defined.
+ Return the correct fr_subtype in the frag.
+ Return the initial "guess for variable size of frag" to caller.
+ The guess is actually the growth beyond the fixed part. Whatever
+ we do to grow the fixed or variable part contributes to our
+ returned value. */
+
+int
+md_estimate_size_before_relax (fragS *fragP, segT segment)
+{
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
+ || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
+ || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
+ {
+ i386_classify_machine_dependent_frag (fragP);
+ return fragP->tc_frag_data.length;
+ }
+
+ /* We've already got fragP->fr_subtype right; all we have to do is
+ check for un-relaxable symbols. On an ELF system, we can't relax
+ an externally visible symbol, because it may be overridden by a
+ shared library. */
+ if (S_GET_SEGMENT (fragP->fr_symbol) != segment
+#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
+ || (IS_ELF
+ && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
+ fragP->fr_var))
+#endif
+#if defined (OBJ_COFF) && defined (TE_PE)
+ || (OUTPUT_FLAVOR == bfd_target_coff_flavour
+ && S_IS_WEAK (fragP->fr_symbol))
+#endif
+ )
+ {
+ /* Symbol is undefined in this segment, or we need to keep a
+ reloc so that weak symbols can be overridden. */
+ int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
+ enum bfd_reloc_code_real reloc_type;
+ unsigned char *opcode;
+ int old_fr_fix;
+
+ if (fragP->fr_var != NO_RELOC)
reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
else if (size == 2)
reloc_type = BFD_RELOC_16_PCREL;
unsigned int extension = 0;
offsetT displacement_from_opcode_start;
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
+ || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
+ || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
+ {
+ /* Generate nop padding. */
+ unsigned int size = fragP->tc_frag_data.length;
+ if (size)
+ {
+ if (size > fragP->tc_frag_data.max_bytes)
+ abort ();
+
+ if (flag_debug)
+ {
+ const char *msg;
+ const char *branch = "branch";
+ const char *prefix = "";
+ fragS *padding_fragP;
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
+ == BRANCH_PREFIX)
+ {
+ padding_fragP = fragP->tc_frag_data.u.padding_fragP;
+ switch (fragP->tc_frag_data.default_prefix)
+ {
+ default:
+ abort ();
+ break;
+ case CS_PREFIX_OPCODE:
+ prefix = " cs";
+ break;
+ case DS_PREFIX_OPCODE:
+ prefix = " ds";
+ break;
+ case ES_PREFIX_OPCODE:
+ prefix = " es";
+ break;
+ case FS_PREFIX_OPCODE:
+ prefix = " fs";
+ break;
+ case GS_PREFIX_OPCODE:
+ prefix = " gs";
+ break;
+ case SS_PREFIX_OPCODE:
+ prefix = " ss";
+ break;
+ }
+ if (padding_fragP)
+ msg = _("%s:%u: add %d%s at 0x%llx to align "
+ "%s within %d-byte boundary\n");
+ else
+ msg = _("%s:%u: add additional %d%s at 0x%llx to "
+ "align %s within %d-byte boundary\n");
+ }
+ else
+ {
+ padding_fragP = fragP;
+ msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
+ "%s within %d-byte boundary\n");
+ }
+
+ if (padding_fragP)
+ switch (padding_fragP->tc_frag_data.branch_type)
+ {
+ case align_branch_jcc:
+ branch = "jcc";
+ break;
+ case align_branch_fused:
+ branch = "fused jcc";
+ break;
+ case align_branch_jmp:
+ branch = "jmp";
+ break;
+ case align_branch_call:
+ branch = "call";
+ break;
+ case align_branch_indirect:
+ branch = "indiret branch";
+ break;
+ case align_branch_ret:
+ branch = "ret";
+ break;
+ default:
+ break;
+ }
+
+ fprintf (stdout, msg,
+ fragP->fr_file, fragP->fr_line, size, prefix,
+ (long long) fragP->fr_address, branch,
+ 1 << align_branch_power);
+ }
+ if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
+ memset (fragP->fr_opcode,
+ fragP->tc_frag_data.default_prefix, size);
+ else
+ i386_generate_nops (fragP, (char *) fragP->fr_opcode,
+ size, 0);
+ fragP->fr_fix += size;
+ }
+ return;
+ }
+
opcode = (unsigned char *) fragP->fr_opcode;
/* Address we want to reach in file space. */
return output_invalid_buf;
}
+/* Verify that @r can be used in the current context. */
+
+static bfd_boolean check_register (const reg_entry *r)
+{
+ if (allow_pseudo_reg)
+ return TRUE;
+
+ if (operand_type_all_zero (&r->reg_type))
+ return FALSE;
+
+ if ((r->reg_type.bitfield.dword
+ || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
+ || r->reg_type.bitfield.class == RegCR
+ || r->reg_type.bitfield.class == RegDR)
+ && !cpu_arch_flags.bitfield.cpui386)
+ return FALSE;
+
+ if (r->reg_type.bitfield.class == RegTR
+ && (flag_code == CODE_64BIT
+ || !cpu_arch_flags.bitfield.cpui386
+ || cpu_arch_isa_flags.bitfield.cpui586
+ || cpu_arch_isa_flags.bitfield.cpui686))
+ return FALSE;
+
+ if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
+ return FALSE;
+
+ if (!cpu_arch_flags.bitfield.cpuavx512f)
+ {
+ if (r->reg_type.bitfield.zmmword
+ || r->reg_type.bitfield.class == RegMask)
+ return FALSE;
+
+ if (!cpu_arch_flags.bitfield.cpuavx)
+ {
+ if (r->reg_type.bitfield.ymmword)
+ return FALSE;
+
+ if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
+ return FALSE;
+ }
+ }
+
+ if (r->reg_type.bitfield.tmmword
+ && (!cpu_arch_flags.bitfield.cpuamx_tile
+ || flag_code != CODE_64BIT))
+ return FALSE;
+
+ if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
+ return FALSE;
+
+ /* Don't allow fake index register unless allow_index_reg isn't 0. */
+ if (!allow_index_reg && r->reg_num == RegIZ)
+ return FALSE;
+
+ /* Upper 16 vector registers are only available with VREX in 64bit
+ mode, and require EVEX encoding. */
+ if (r->reg_flags & RegVRex)
+ {
+ if (!cpu_arch_flags.bitfield.cpuavx512f
+ || flag_code != CODE_64BIT)
+ return FALSE;
+
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex;
+ else if (i.vec_encoding != vex_encoding_evex)
+ i.vec_encoding = vex_encoding_error;
+ }
+
+ if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
+ && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
+ && flag_code != CODE_64BIT)
+ return FALSE;
+
+ if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
+ && !intel_syntax)
+ return FALSE;
+
+ return TRUE;
+}
+
/* REG_STRING starts *before* REGISTER_PREFIX. */
static const reg_entry *
*end_op = s;
- r = (const reg_entry *) hash_find (reg_hash, reg_name_given);
+ r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
/* Handle floating point regs, allowing spaces in the (i) part. */
if (r == i386_regtab /* %st is first entry of table */)
{
if (!cpu_arch_flags.bitfield.cpu8087
&& !cpu_arch_flags.bitfield.cpu287
- && !cpu_arch_flags.bitfield.cpu387)
+ && !cpu_arch_flags.bitfield.cpu387
+ && !allow_pseudo_reg)
return (const reg_entry *) NULL;
if (is_space_char (*s))
if (*s == ')')
{
*end_op = s + 1;
- r = (const reg_entry *) hash_find (reg_hash, "st(0)");
+ r = (const reg_entry *) str_hash_find (reg_hash, "st(0)");
know (r);
return r + fpr;
}
}
}
- if (r == NULL || allow_pseudo_reg)
- return r;
-
- if (operand_type_all_zero (&r->reg_type))
- return (const reg_entry *) NULL;
-
- if ((r->reg_type.bitfield.dword
- || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
- || r->reg_type.bitfield.class == RegCR
- || r->reg_type.bitfield.class == RegDR
- || r->reg_type.bitfield.class == RegTR)
- && !cpu_arch_flags.bitfield.cpui386)
- return (const reg_entry *) NULL;
-
- if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
- return (const reg_entry *) NULL;
-
- if (!cpu_arch_flags.bitfield.cpuavx512f)
- {
- if (r->reg_type.bitfield.zmmword
- || r->reg_type.bitfield.class == RegMask)
- return (const reg_entry *) NULL;
-
- if (!cpu_arch_flags.bitfield.cpuavx)
- {
- if (r->reg_type.bitfield.ymmword)
- return (const reg_entry *) NULL;
-
- if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
- return (const reg_entry *) NULL;
- }
- }
-
- if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
- return (const reg_entry *) NULL;
-
- /* Don't allow fake index register unless allow_index_reg isn't 0. */
- if (!allow_index_reg && r->reg_num == RegIZ)
- return (const reg_entry *) NULL;
-
- /* Upper 16 vector registers are only available with VREX in 64bit
- mode, and require EVEX encoding. */
- if (r->reg_flags & RegVRex)
- {
- if (!cpu_arch_flags.bitfield.cpuavx512f
- || flag_code != CODE_64BIT)
- return (const reg_entry *) NULL;
-
- i.vec_encoding = vex_encoding_evex;
- }
-
- if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
- && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
- && flag_code != CODE_64BIT)
- return (const reg_entry *) NULL;
-
- if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
- && !intel_syntax)
- return (const reg_entry *) NULL;
-
- return r;
+ return r && check_register (r) ? r : NULL;
}
/* REG_STRING starts *before* REGISTER_PREFIX. */
know (e->X_add_number >= 0
&& (valueT) e->X_add_number < i386_regtab_size);
r = i386_regtab + e->X_add_number;
- if ((r->reg_flags & RegVRex))
- i.vec_encoding = vex_encoding_evex;
+ if (!check_register (r))
+ {
+ as_bad (_("register '%s%s' cannot be used here"),
+ register_prefix, r->reg_name);
+ r = &bad_reg;
+ }
*end_op = input_line_pointer;
}
*input_line_pointer = c;
{
*nextcharP = *input_line_pointer;
*input_line_pointer = 0;
- e->X_op = O_register;
- e->X_add_number = r - i386_regtab;
+ if (r != &bad_reg)
+ {
+ e->X_op = O_register;
+ e->X_add_number = r - i386_regtab;
+ }
+ else
+ e->X_op = O_illegal;
return 1;
}
input_line_pointer = end;
#define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
#define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
#define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
+#define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
+#define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
+#define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
+#define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
+#define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
+#define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
+#define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
struct option md_longopts[] =
{
{"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
{"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
{"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
+ {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
+ {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
+ {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
+ {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
+ {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
+ {"mlfence-before-indirect-branch", required_argument, NULL,
+ OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
+ {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
{"mamd64", no_argument, NULL, OPTION_MAMD64},
{"mintel64", no_argument, NULL, OPTION_MINTEL64},
{NULL, no_argument, NULL, 0}
md_parse_option (int c, const char *arg)
{
unsigned int j;
- char *arch, *next, *saved;
+ char *arch, *next, *saved, *type;
switch (c)
{
as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
break;
+ case OPTION_MLFENCE_AFTER_LOAD:
+ if (strcasecmp (arg, "yes") == 0)
+ lfence_after_load = 1;
+ else if (strcasecmp (arg, "no") == 0)
+ lfence_after_load = 0;
+ else
+ as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
+ break;
+
+ case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
+ if (strcasecmp (arg, "all") == 0)
+ {
+ lfence_before_indirect_branch = lfence_branch_all;
+ if (lfence_before_ret == lfence_before_ret_none)
+ lfence_before_ret = lfence_before_ret_shl;
+ }
+ else if (strcasecmp (arg, "memory") == 0)
+ lfence_before_indirect_branch = lfence_branch_memory;
+ else if (strcasecmp (arg, "register") == 0)
+ lfence_before_indirect_branch = lfence_branch_register;
+ else if (strcasecmp (arg, "none") == 0)
+ lfence_before_indirect_branch = lfence_branch_none;
+ else
+ as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
+ arg);
+ break;
+
+ case OPTION_MLFENCE_BEFORE_RET:
+ if (strcasecmp (arg, "or") == 0)
+ lfence_before_ret = lfence_before_ret_or;
+ else if (strcasecmp (arg, "not") == 0)
+ lfence_before_ret = lfence_before_ret_not;
+ else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
+ lfence_before_ret = lfence_before_ret_shl;
+ else if (strcasecmp (arg, "none") == 0)
+ lfence_before_ret = lfence_before_ret_none;
+ else
+ as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
+ arg);
+ break;
+
case OPTION_MRELAX_RELOCATIONS:
if (strcasecmp (arg, "yes") == 0)
generate_relax_relocations = 1;
as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
break;
+ case OPTION_MALIGN_BRANCH_BOUNDARY:
+ {
+ char *end;
+ long int align = strtoul (arg, &end, 0);
+ if (*end == '\0')
+ {
+ if (align == 0)
+ {
+ align_branch_power = 0;
+ break;
+ }
+ else if (align >= 16)
+ {
+ int align_power;
+ for (align_power = 0;
+ (align & 1) == 0;
+ align >>= 1, align_power++)
+ continue;
+ /* Limit alignment power to 31. */
+ if (align == 1 && align_power < 32)
+ {
+ align_branch_power = align_power;
+ break;
+ }
+ }
+ }
+ as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
+ }
+ break;
+
+ case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
+ {
+ char *end;
+ int align = strtoul (arg, &end, 0);
+ /* Some processors only support 5 prefixes. */
+ if (*end == '\0' && align >= 0 && align < 6)
+ {
+ align_branch_prefix_size = align;
+ break;
+ }
+ as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
+ arg);
+ }
+ break;
+
+ case OPTION_MALIGN_BRANCH:
+ align_branch = 0;
+ saved = xstrdup (arg);
+ type = saved;
+ do
+ {
+ next = strchr (type, '+');
+ if (next)
+ *next++ = '\0';
+ if (strcasecmp (type, "jcc") == 0)
+ align_branch |= align_branch_jcc_bit;
+ else if (strcasecmp (type, "fused") == 0)
+ align_branch |= align_branch_fused_bit;
+ else if (strcasecmp (type, "jmp") == 0)
+ align_branch |= align_branch_jmp_bit;
+ else if (strcasecmp (type, "call") == 0)
+ align_branch |= align_branch_call_bit;
+ else if (strcasecmp (type, "ret") == 0)
+ align_branch |= align_branch_ret_bit;
+ else if (strcasecmp (type, "indirect") == 0)
+ align_branch |= align_branch_indirect_bit;
+ else
+ as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
+ type = next;
+ }
+ while (next != NULL);
+ free (saved);
+ break;
+
+ case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
+ align_branch_power = 5;
+ align_branch_prefix_size = 5;
+ align_branch = (align_branch_jcc_bit
+ | align_branch_fused_bit
+ | align_branch_jmp_bit);
+ break;
+
case OPTION_MAMD64:
- intel64 = 0;
+ isa64 = amd64;
break;
case OPTION_MINTEL64:
- intel64 = 1;
+ isa64 = intel64;
break;
case 'O':
fprintf (stream, _("\
generate relax relocations\n"));
fprintf (stream, _("\
+ -malign-branch-boundary=NUM (default: 0)\n\
+ align branches within NUM byte boundary\n"));
+ fprintf (stream, _("\
+ -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
+ TYPE is combination of jcc, fused, jmp, call, ret,\n\
+ indirect\n\
+ specify types of branches to align\n"));
+ fprintf (stream, _("\
+ -malign-branch-prefix-size=NUM (default: 5)\n\
+ align branches with NUM prefixes per instruction\n"));
+ fprintf (stream, _("\
+ -mbranches-within-32B-boundaries\n\
+ align branches within 32 byte boundary\n"));
+ fprintf (stream, _("\
+ -mlfence-after-load=[no|yes] (default: no)\n\
+ generate lfence after load\n"));
+ fprintf (stream, _("\
+ -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
+ generate lfence before indirect near branch\n"));
+ fprintf (stream, _("\
+ -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
+ generate lfence before ret\n"));
+ fprintf (stream, _("\
-mamd64 accept only AMD64 ISA [default]\n"));
fprintf (stream, _("\
-mintel64 accept only Intel64 ISA\n"));
if (flag_code == CODE_64BIT)
return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
else
- return "pe-i386";
+ return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
# elif defined (TE_GO32)
case bfd_target_coff_flavour:
return "coff-go32";
{
default:
format = ELF_TARGET_FORMAT;
+#ifndef TE_SOLARIS
+ tls_get_addr = "___tls_get_addr";
+#endif
break;
case X86_64_ABI:
use_rela_relocations = 1;
object_64bit = 1;
+#ifndef TE_SOLARIS
+ tls_get_addr = "__tls_get_addr";
+#endif
format = ELF_TARGET_FORMAT64;
break;
case X86_64_X32_ABI:
use_rela_relocations = 1;
object_64bit = 1;
+#ifndef TE_SOLARIS
+ tls_get_addr = "__tls_get_addr";
+#endif
disallow_64bit_reloc = 1;
format = ELF_TARGET_FORMAT32;
break;
if (symbol_find (name))
as_bad (_("GOT already in symbol table"));
GOT_symbol = symbol_new (name, undefined_section,
- (valueT) 0, &zero_address_frag);
+ &zero_address_frag, 0);
};
return GOT_symbol;
}
#endif
+/* Remember constant directive. */
+
+void
+i386_cons_align (int ignore ATTRIBUTE_UNUSED)
+{
+ if (last_insn.kind != last_insn_directive
+ && (bfd_section_flags (now_seg) & SEC_CODE))
+ {
+ last_insn.seg = now_seg;
+ last_insn.kind = last_insn_directive;
+ last_insn.name = "constant directive";
+ last_insn.file = as_where (&last_insn.line);
+ if (lfence_before_ret != lfence_before_ret_none)
+ {
+ if (lfence_before_indirect_branch != lfence_branch_none)
+ as_warn (_("constant directive skips -mlfence-before-ret "
+ "and -mlfence-before-indirect-branch"));
+ else
+ as_warn (_("constant directive skips -mlfence-before-ret"));
+ }
+ else if (lfence_before_indirect_branch != lfence_branch_none)
+ as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
+ }
+}
+
void
i386_validate_fix (fixS *fixp)
{
}
}
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
- else if (!object_64bit)
+ else
{
- if (fixp->fx_r_type == BFD_RELOC_386_GOT32
- && fixp->fx_tcbit2)
- fixp->fx_r_type = BFD_RELOC_386_GOT32X;
+ /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
+ to section. Since PLT32 relocation must be against symbols,
+ turn such PLT32 relocation into PC32 relocation. */
+ if (fixp->fx_addsy
+ && (fixp->fx_r_type == BFD_RELOC_386_PLT32
+ || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
+ && symbol_section_p (fixp->fx_addsy))
+ fixp->fx_r_type = BFD_RELOC_32_PCREL;
+ if (!object_64bit)
+ {
+ if (fixp->fx_r_type == BFD_RELOC_386_GOT32
+ && fixp->fx_tcbit2)
+ fixp->fx_r_type = BFD_RELOC_386_GOT32X;
+ }
}
#endif
}