gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include "opcodes/i386-mnem.h"
  38 #include <limits.h>
  39
  40 #ifndef INFER_ADDR_PREFIX
  41 #define INFER_ADDR_PREFIX 1
  42 #endif
  43
  44 #ifndef DEFAULT_ARCH
  45 #define DEFAULT_ARCH "i386"
  46 #endif
  47
  48 #ifndef INLINE
  49 #if __GNUC__ >= 2
  50 #define INLINE __inline__
  51 #else
  52 #define INLINE
  53 #endif
  54 #endif
  55
  56 /* Prefixes will be emitted in the order defined below.
  57    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  58    instruction, and so must come before any prefixes.
  59    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  60    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  61 #define WAIT_PREFIX     0
  62 #define SEG_PREFIX      1
  63 #define ADDR_PREFIX     2
  64 #define DATA_PREFIX     3
  65 #define REP_PREFIX      4
  66 #define HLE_PREFIX      REP_PREFIX
  67 #define BND_PREFIX      REP_PREFIX
  68 #define LOCK_PREFIX     5
  69 #define REX_PREFIX      6       /* must come last.  */
  70 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  71
  72 /* we define the syntax here (modulo base,index,scale syntax) */
  73 #define REGISTER_PREFIX '%'
  74 #define IMMEDIATE_PREFIX '$'
  75 #define ABSOLUTE_PREFIX '*'
  76
  77 /* these are the instruction mnemonic suffixes in AT&T syntax or
  78    memory operand size in Intel syntax.  */
  79 #define WORD_MNEM_SUFFIX  'w'
  80 #define BYTE_MNEM_SUFFIX  'b'
  81 #define SHORT_MNEM_SUFFIX 's'
  82 #define LONG_MNEM_SUFFIX  'l'
  83 #define QWORD_MNEM_SUFFIX  'q'
  84
  85 #define END_OF_INSN '\0'
  86
  87 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   i386_cpu_flags enable;                /* cpu feature enable flags */
 135   i386_cpu_flags disable;       /* cpu feature disable flags */
 136 }
 137 arch_entry;
 138
 139 static void update_code_flag (int, int);
 140 static void s_insn (int);
 141 static void set_code_flag (int);
 142 static void set_16bit_gcc_code_flag (int);
 143 static void set_intel_syntax (int);
 144 static void set_intel_mnemonic (int);
 145 static void set_allow_index_reg (int);
 146 static void set_check (int);
 147 static void set_cpu_arch (int);
 148 #ifdef TE_PE
 149 static void pe_directive_secrel (int);
 150 static void pe_directive_secidx (int);
 151 #endif
 152 static void signed_cons (int);
 153 static char *output_invalid (int c);
 154 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 155                                     const char *);
 156 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 157                                        const char *);
 158 static int i386_att_operand (char *);
 159 static int i386_intel_operand (char *, int);
 160 static int i386_intel_simplify (expressionS *);
 161 static int i386_intel_parse_name (const char *, expressionS *);
 162 static const reg_entry *parse_register (const char *, char **);
 163 static const char *parse_insn (const char *, char *, bool);
 164 static char *parse_operands (char *, const char *);
 165 static void swap_operands (void);
 166 static void swap_2_operands (unsigned int, unsigned int);
 167 static enum flag_code i386_addressing_mode (void);
 168 static void optimize_imm (void);
 169 static bool optimize_disp (const insn_template *t);
 170 static const insn_template *match_template (char);
 171 static int check_string (void);
 172 static int process_suffix (void);
 173 static int check_byte_reg (void);
 174 static int check_long_reg (void);
 175 static int check_qword_reg (void);
 176 static int check_word_reg (void);
 177 static int finalize_imm (void);
 178 static int process_operands (void);
 179 static const reg_entry *build_modrm_byte (void);
 180 static void output_insn (void);
 181 static void output_imm (fragS *, offsetT);
 182 static void output_disp (fragS *, offsetT);
 183 #ifndef I386COFF
 184 static void s_bss (int);
 185 #endif
 186 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 187 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 188
 189 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 190 static unsigned int x86_isa_1_used;
 191 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 192 static unsigned int x86_feature_2_used;
 193 /* Generate x86 used ISA and feature properties.  */
 194 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 195 #endif
 196
 197 static const char *default_arch = DEFAULT_ARCH;
 198
 199 /* parse_register() returns this when a register alias cannot be used.  */
 200 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 201                                    { Dw2Inval, Dw2Inval } };
 202
 203 static const reg_entry *reg_eax;
 204 static const reg_entry *reg_ds;
 205 static const reg_entry *reg_es;
 206 static const reg_entry *reg_ss;
 207 static const reg_entry *reg_st0;
 208 static const reg_entry *reg_k0;
 209
 210 /* VEX prefix.  */
 211 typedef struct
 212 {
 213   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 214   unsigned char bytes[4];
 215   unsigned int length;
 216   /* Destination or source register specifier.  */
 217   const reg_entry *register_specifier;
 218 } vex_prefix;
 219
 220 /* 'md_assemble ()' gathers together information and puts it into a
 221    i386_insn.  */
 222
 223 union i386_op
 224   {
 225     expressionS *disps;
 226     expressionS *imms;
 227     const reg_entry *regs;
 228   };
 229
 230 enum i386_error
 231   {
 232     no_error, /* Must be first.  */
 233     operand_size_mismatch,
 234     operand_type_mismatch,
 235     register_type_mismatch,
 236     number_of_operands_mismatch,
 237     invalid_instruction_suffix,
 238     bad_imm4,
 239     unsupported_with_intel_mnemonic,
 240     unsupported_syntax,
 241     unsupported,
 242     unsupported_on_arch,
 243     unsupported_64bit,
 244     invalid_sib_address,
 245     invalid_vsib_address,
 246     invalid_vector_register_set,
 247     invalid_tmm_register_set,
 248     invalid_dest_and_src_register_set,
 249     unsupported_vector_index_register,
 250     unsupported_broadcast,
 251     broadcast_needed,
 252     unsupported_masking,
 253     mask_not_on_destination,
 254     no_default_mask,
 255     unsupported_rc_sae,
 256     invalid_register_operand,
 257   };
 258
 259 struct _i386_insn
 260   {
 261     /* TM holds the template for the insn were currently assembling.  */
 262     insn_template tm;
 263
 264     /* SUFFIX holds the instruction size suffix for byte, word, dword
 265        or qword, if given.  */
 266     char suffix;
 267
 268     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 269     unsigned char opcode_length;
 270
 271     /* OPERANDS gives the number of given operands.  */
 272     unsigned int operands;
 273
 274     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 275        of given register, displacement, memory operands and immediate
 276        operands.  */
 277     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 278
 279     /* TYPES [i] is the type (see above #defines) which tells us how to
 280        use OP[i] for the corresponding operand.  */
 281     i386_operand_type types[MAX_OPERANDS];
 282
 283     /* Displacement expression, immediate expression, or register for each
 284        operand.  */
 285     union i386_op op[MAX_OPERANDS];
 286
 287     /* Flags for operands.  */
 288     unsigned int flags[MAX_OPERANDS];
 289 #define Operand_PCrel 1
 290 #define Operand_Mem   2
 291 #define Operand_Signed 4 /* .insn only */
 292
 293     /* Relocation type for operand */
 294     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 295
 296     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 297        the base index byte below.  */
 298     const reg_entry *base_reg;
 299     const reg_entry *index_reg;
 300     unsigned int log2_scale_factor;
 301
 302     /* SEG gives the seg_entries of this insn.  They are zero unless
 303        explicit segment overrides are given.  */
 304     const reg_entry *seg[2];
 305
 306     /* PREFIX holds all the given prefix opcodes (usually null).
 307        PREFIXES is the number of prefix opcodes.  */
 308     unsigned int prefixes;
 309     unsigned char prefix[MAX_PREFIXES];
 310
 311     /* .insn allows for reserved opcode spaces.  */
 312     unsigned char insn_opcode_space;
 313
 314     /* .insn also allows (requires) specifying immediate size.  */
 315     unsigned char imm_bits[MAX_OPERANDS];
 316
 317     /* Register is in low 3 bits of opcode.  */
 318     bool short_form;
 319
 320     /* The operand to a branch insn indicates an absolute branch.  */
 321     bool jumpabsolute;
 322
 323     /* The operand to a branch insn indicates a far branch.  */
 324     bool far_branch;
 325
 326     /* There is a memory operand of (%dx) which should be only used
 327        with input/output instructions.  */
 328     bool input_output_operand;
 329
 330     /* Extended states.  */
 331     enum
 332       {
 333         /* Use MMX state.  */
 334         xstate_mmx = 1 << 0,
 335         /* Use XMM state.  */
 336         xstate_xmm = 1 << 1,
 337         /* Use YMM state.  */
 338         xstate_ymm = 1 << 2 | xstate_xmm,
 339         /* Use ZMM state.  */
 340         xstate_zmm = 1 << 3 | xstate_ymm,
 341         /* Use TMM state.  */
 342         xstate_tmm = 1 << 4,
 343         /* Use MASK state.  */
 344         xstate_mask = 1 << 5
 345       } xstate;
 346
 347     /* Has GOTPC or TLS relocation.  */
 348     bool has_gotpc_tls_reloc;
 349
 350     /* RM and SIB are the modrm byte and the sib byte where the
 351        addressing modes of this insn are encoded.  */
 352     modrm_byte rm;
 353     rex_byte rex;
 354     rex_byte vrex;
 355     sib_byte sib;
 356     vex_prefix vex;
 357
 358     /* Masking attributes.
 359
 360        The struct describes masking, applied to OPERAND in the instruction.
 361        REG is a pointer to the corresponding mask register.  ZEROING tells
 362        whether merging or zeroing mask is used.  */
 363     struct Mask_Operation
 364     {
 365       const reg_entry *reg;
 366       unsigned int zeroing;
 367       /* The operand where this operation is associated.  */
 368       unsigned int operand;
 369     } mask;
 370
 371     /* Rounding control and SAE attributes.  */
 372     struct RC_Operation
 373     {
 374       enum rc_type
 375         {
 376           rc_none = -1,
 377           rne,
 378           rd,
 379           ru,
 380           rz,
 381           saeonly
 382         } type;
 383       /* In Intel syntax the operand modifier form is supposed to be used, but
 384          we continue to accept the immediate forms as well.  */
 385       bool modifier;
 386     } rounding;
 387
 388     /* Broadcasting attributes.
 389
 390        The struct describes broadcasting, applied to OPERAND.  TYPE is
 391        expresses the broadcast factor.  */
 392     struct Broadcast_Operation
 393     {
 394       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 395       unsigned int type;
 396
 397       /* Index of broadcasted operand.  */
 398       unsigned int operand;
 399
 400       /* Number of bytes to broadcast.  */
 401       unsigned int bytes;
 402     } broadcast;
 403
 404     /* Compressed disp8*N attribute.  */
 405     unsigned int memshift;
 406
 407     /* Prefer load or store in encoding.  */
 408     enum
 409       {
 410         dir_encoding_default = 0,
 411         dir_encoding_load,
 412         dir_encoding_store,
 413         dir_encoding_swap
 414       } dir_encoding;
 415
 416     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 417     enum
 418       {
 419         disp_encoding_default = 0,
 420         disp_encoding_8bit,
 421         disp_encoding_16bit,
 422         disp_encoding_32bit
 423       } disp_encoding;
 424
 425     /* Prefer the REX byte in encoding.  */
 426     bool rex_encoding;
 427
 428     /* Disable instruction size optimization.  */
 429     bool no_optimize;
 430
 431     /* How to encode vector instructions.  */
 432     enum
 433       {
 434         vex_encoding_default = 0,
 435         vex_encoding_vex,
 436         vex_encoding_vex3,
 437         vex_encoding_evex,
 438         vex_encoding_error
 439       } vec_encoding;
 440
 441     /* REP prefix.  */
 442     const char *rep_prefix;
 443
 444     /* HLE prefix.  */
 445     const char *hle_prefix;
 446
 447     /* Have BND prefix.  */
 448     const char *bnd_prefix;
 449
 450     /* Have NOTRACK prefix.  */
 451     const char *notrack_prefix;
 452
 453     /* Error message.  */
 454     enum i386_error error;
 455   };
 456
 457 typedef struct _i386_insn i386_insn;
 458
 459 /* Link RC type with corresponding string, that'll be looked for in
 460    asm.  */
 461 struct RC_name
 462 {
 463   enum rc_type type;
 464   const char *name;
 465   unsigned int len;
 466 };
 467
 468 static const struct RC_name RC_NamesTable[] =
 469 {
 470   {  rne, STRING_COMMA_LEN ("rn-sae") },
 471   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 472   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 473   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 474   {  saeonly,  STRING_COMMA_LEN ("sae") },
 475 };
 476
 477 /* To be indexed by segment register number.  */
 478 static const unsigned char i386_seg_prefixes[] = {
 479   ES_PREFIX_OPCODE,
 480   CS_PREFIX_OPCODE,
 481   SS_PREFIX_OPCODE,
 482   DS_PREFIX_OPCODE,
 483   FS_PREFIX_OPCODE,
 484   GS_PREFIX_OPCODE
 485 };
 486
 487 /* List of chars besides those in app.c:symbol_chars that can start an
 488    operand.  Used to prevent the scrubber eating vital white-space.  */
 489 const char extra_symbol_chars[] = "*%-([{}"
 490 #ifdef LEX_AT
 491         "@"
 492 #endif
 493 #ifdef LEX_QM
 494         "?"
 495 #endif
 496         ;
 497
 498 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 499      && !defined (TE_GNU)                               \
 500      && !defined (TE_LINUX)                             \
 501      && !defined (TE_Haiku)                             \
 502      && !defined (TE_FreeBSD)                           \
 503      && !defined (TE_DragonFly)                         \
 504      && !defined (TE_NetBSD))
 505 /* This array holds the chars that always start a comment.  If the
 506    pre-processor is disabled, these aren't very useful.  The option
 507    --divide will remove '/' from this list.  */
 508 const char *i386_comment_chars = "#/";
 509 #define SVR4_COMMENT_CHARS 1
 510 #define PREFIX_SEPARATOR '\\'
 511
 512 #else
 513 const char *i386_comment_chars = "#";
 514 #define PREFIX_SEPARATOR '/'
 515 #endif
 516
 517 /* This array holds the chars that only start a comment at the beginning of
 518    a line.  If the line seems to have the form '# 123 filename'
 519    .line and .file directives will appear in the pre-processed output.
 520    Note that input_file.c hand checks for '#' at the beginning of the
 521    first line of the input file.  This is because the compiler outputs
 522    #NO_APP at the beginning of its output.
 523    Also note that comments started like this one will always work if
 524    '/' isn't otherwise defined.  */
 525 const char line_comment_chars[] = "#/";
 526
 527 const char line_separator_chars[] = ";";
 528
 529 /* Chars that can be used to separate mant from exp in floating point
 530    nums.  */
 531 const char EXP_CHARS[] = "eE";
 532
 533 /* Chars that mean this number is a floating point constant
 534    As in 0f12.456
 535    or    0d1.2345e12.  */
 536 const char FLT_CHARS[] = "fFdDxXhHbB";
 537
 538 /* Tables for lexical analysis.  */
 539 static char mnemonic_chars[256];
 540 static char register_chars[256];
 541 static char operand_chars[256];
 542
 543 /* Lexical macros.  */
 544 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 545 #define is_register_char(x) (register_chars[(unsigned char) x])
 546 #define is_space_char(x) ((x) == ' ')
 547
 548 /* All non-digit non-letter characters that may occur in an operand and
 549    which aren't already in extra_symbol_chars[].  */
 550 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
 551
 552 /* md_assemble() always leaves the strings it's passed unaltered.  To
 553    effect this we maintain a stack of saved characters that we've smashed
 554    with '\0's (indicating end of strings for various sub-fields of the
 555    assembler instruction).  */
 556 static char save_stack[32];
 557 static char *save_stack_p;
 558 #define END_STRING_AND_SAVE(s) \
 559         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 560 #define RESTORE_END_STRING(s) \
 561         do { *(s) = *--save_stack_p; } while (0)
 562
 563 /* The instruction we're assembling.  */
 564 static i386_insn i;
 565
 566 /* Possible templates for current insn.  */
 567 static const templates *current_templates;
 568
 569 /* Per instruction expressionS buffers: max displacements & immediates.  */
 570 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 571 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 572
 573 /* Current operand we are working on.  */
 574 static int this_operand = -1;
 575
 576 /* Are we processing a .insn directive?  */
 577 #define dot_insn() (i.tm.mnem_off == MN__insn)
 578
 579 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 580    these.  */
 581
 582 enum flag_code {
 583         CODE_32BIT,
 584         CODE_16BIT,
 585         CODE_64BIT };
 586
 587 static enum flag_code flag_code;
 588 static unsigned int object_64bit;
 589 static unsigned int disallow_64bit_reloc;
 590 static int use_rela_relocations = 0;
 591 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 592 static const char *tls_get_addr;
 593
 594 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 595      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 596      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 597
 598 /* The ELF ABI to use.  */
 599 enum x86_elf_abi
 600 {
 601   I386_ABI,
 602   X86_64_ABI,
 603   X86_64_X32_ABI
 604 };
 605
 606 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 607 #endif
 608
 609 #if defined (TE_PE) || defined (TE_PEP)
 610 /* Use big object file format.  */
 611 static int use_big_obj = 0;
 612 #endif
 613
 614 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 615 /* 1 if generating code for a shared library.  */
 616 static int shared = 0;
 617
 618 unsigned int x86_sframe_cfa_sp_reg;
 619 /* The other CFA base register for SFrame stack trace info.  */
 620 unsigned int x86_sframe_cfa_fp_reg;
 621 unsigned int x86_sframe_cfa_ra_reg;
 622
 623 #endif
 624
 625 /* 1 for intel syntax,
 626    0 if att syntax.  */
 627 static int intel_syntax = 0;
 628
 629 static enum x86_64_isa
 630 {
 631   amd64 = 1,    /* AMD64 ISA.  */
 632   intel64       /* Intel64 ISA.  */
 633 } isa64;
 634
 635 /* 1 for intel mnemonic,
 636    0 if att mnemonic.  */
 637 static int intel_mnemonic = !SYSV386_COMPAT;
 638
 639 /* 1 if pseudo registers are permitted.  */
 640 static int allow_pseudo_reg = 0;
 641
 642 /* 1 if register prefix % not required.  */
 643 static int allow_naked_reg = 0;
 644
 645 /* 1 if the assembler should add BND prefix for all control-transferring
 646    instructions supporting it, even if this prefix wasn't specified
 647    explicitly.  */
 648 static int add_bnd_prefix = 0;
 649
 650 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 651 static int allow_index_reg = 0;
 652
 653 /* 1 if the assembler should ignore LOCK prefix, even if it was
 654    specified explicitly.  */
 655 static int omit_lock_prefix = 0;
 656
 657 /* 1 if the assembler should encode lfence, mfence, and sfence as
 658    "lock addl $0, (%{re}sp)".  */
 659 static int avoid_fence = 0;
 660
 661 /* 1 if lfence should be inserted after every load.  */
 662 static int lfence_after_load = 0;
 663
 664 /* Non-zero if lfence should be inserted before indirect branch.  */
 665 static enum lfence_before_indirect_branch_kind
 666   {
 667     lfence_branch_none = 0,
 668     lfence_branch_register,
 669     lfence_branch_memory,
 670     lfence_branch_all
 671   }
 672 lfence_before_indirect_branch;
 673
 674 /* Non-zero if lfence should be inserted before ret.  */
 675 static enum lfence_before_ret_kind
 676   {
 677     lfence_before_ret_none = 0,
 678     lfence_before_ret_not,
 679     lfence_before_ret_or,
 680     lfence_before_ret_shl
 681   }
 682 lfence_before_ret;
 683
 684 /* Types of previous instruction is .byte or prefix.  */
 685 static struct
 686   {
 687     segT seg;
 688     const char *file;
 689     const char *name;
 690     unsigned int line;
 691     enum last_insn_kind
 692       {
 693         last_insn_other = 0,
 694         last_insn_directive,
 695         last_insn_prefix
 696       } kind;
 697   } last_insn;
 698
 699 /* 1 if the assembler should generate relax relocations.  */
 700
 701 static int generate_relax_relocations
 702   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 703
 704 static enum check_kind
 705   {
 706     check_none = 0,
 707     check_warning,
 708     check_error
 709   }
 710 sse_check, operand_check = check_warning;
 711
 712 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 713 static int align_branch_power = 0;
 714
 715 /* Types of branches to align.  */
 716 enum align_branch_kind
 717   {
 718     align_branch_none = 0,
 719     align_branch_jcc = 1,
 720     align_branch_fused = 2,
 721     align_branch_jmp = 3,
 722     align_branch_call = 4,
 723     align_branch_indirect = 5,
 724     align_branch_ret = 6
 725   };
 726
 727 /* Type bits of branches to align.  */
 728 enum align_branch_bit
 729   {
 730     align_branch_jcc_bit = 1 << align_branch_jcc,
 731     align_branch_fused_bit = 1 << align_branch_fused,
 732     align_branch_jmp_bit = 1 << align_branch_jmp,
 733     align_branch_call_bit = 1 << align_branch_call,
 734     align_branch_indirect_bit = 1 << align_branch_indirect,
 735     align_branch_ret_bit = 1 << align_branch_ret
 736   };
 737
 738 static unsigned int align_branch = (align_branch_jcc_bit
 739                                     | align_branch_fused_bit
 740                                     | align_branch_jmp_bit);
 741
 742 /* Types of condition jump used by macro-fusion.  */
 743 enum mf_jcc_kind
 744   {
 745     mf_jcc_jo = 0,  /* base opcode 0x70  */
 746     mf_jcc_jc,      /* base opcode 0x72  */
 747     mf_jcc_je,      /* base opcode 0x74  */
 748     mf_jcc_jna,     /* base opcode 0x76  */
 749     mf_jcc_js,      /* base opcode 0x78  */
 750     mf_jcc_jp,      /* base opcode 0x7a  */
 751     mf_jcc_jl,      /* base opcode 0x7c  */
 752     mf_jcc_jle,     /* base opcode 0x7e  */
 753   };
 754
 755 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 756 enum mf_cmp_kind
 757   {
 758     mf_cmp_test_and,  /* test/cmp */
 759     mf_cmp_alu_cmp,  /* add/sub/cmp */
 760     mf_cmp_incdec  /* inc/dec */
 761   };
 762
 763 /* The maximum padding size for fused jcc.  CMP like instruction can
 764    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 765    prefixes.   */
 766 #define MAX_FUSED_JCC_PADDING_SIZE 20
 767
 768 /* The maximum number of prefixes added for an instruction.  */
 769 static unsigned int align_branch_prefix_size = 5;
 770
 771 /* Optimization:
 772    1. Clear the REX_W bit with register operand if possible.
 773    2. Above plus use 128bit vector instruction to clear the full vector
 774       register.
 775  */
 776 static int optimize = 0;
 777
 778 /* Optimization:
 779    1. Clear the REX_W bit with register operand if possible.
 780    2. Above plus use 128bit vector instruction to clear the full vector
 781       register.
 782    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 783       "testb $imm7,%r8".
 784  */
 785 static int optimize_for_space = 0;
 786
 787 /* Register prefix used for error message.  */
 788 static const char *register_prefix = "%";
 789
 790 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 791    leave, push, and pop instructions so that gcc has the same stack
 792    frame as in 32 bit mode.  */
 793 static char stackop_size = '\0';
 794
 795 /* Non-zero to optimize code alignment.  */
 796 int optimize_align_code = 1;
 797
 798 /* Non-zero to quieten some warnings.  */
 799 static int quiet_warnings = 0;
 800
 801 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 802 static bool pre_386_16bit_warned;
 803
 804 /* CPU name.  */
 805 static const char *cpu_arch_name = NULL;
 806 static char *cpu_sub_arch_name = NULL;
 807
 808 /* CPU feature flags.  */
 809 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 810
 811 /* If we have selected a cpu we are generating instructions for.  */
 812 static int cpu_arch_tune_set = 0;
 813
 814 /* Cpu we are generating instructions for.  */
 815 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 816
 817 /* CPU feature flags of cpu we are generating instructions for.  */
 818 static i386_cpu_flags cpu_arch_tune_flags;
 819
 820 /* CPU instruction set architecture used.  */
 821 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 822
 823 /* CPU feature flags of instruction set architecture used.  */
 824 i386_cpu_flags cpu_arch_isa_flags;
 825
 826 /* If set, conditional jumps are not automatically promoted to handle
 827    larger than a byte offset.  */
 828 static bool no_cond_jump_promotion = false;
 829
 830 /* This will be set from an expression parser hook if there's any
 831    applicable operator involved in an expression.  */
 832 static enum {
 833   expr_operator_none,
 834   expr_operator_present,
 835   expr_large_value,
 836 } expr_mode;
 837
 838 /* Encode SSE instructions with VEX prefix.  */
 839 static unsigned int sse2avx;
 840
 841 /* Encode aligned vector move as unaligned vector move.  */
 842 static unsigned int use_unaligned_vector_move;
 843
 844 /* Encode scalar AVX instructions with specific vector length.  */
 845 static enum
 846   {
 847     vex128 = 0,
 848     vex256
 849   } avxscalar;
 850
 851 /* Encode VEX WIG instructions with specific vex.w.  */
 852 static enum
 853   {
 854     vexw0 = 0,
 855     vexw1
 856   } vexwig;
 857
 858 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 859 static enum
 860   {
 861     evexl128 = 0,
 862     evexl256,
 863     evexl512
 864   } evexlig;
 865
 866 /* Encode EVEX WIG instructions with specific evex.w.  */
 867 static enum
 868   {
 869     evexw0 = 0,
 870     evexw1
 871   } evexwig;
 872
 873 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 874 static enum rc_type evexrcig = rne;
 875
 876 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 877 static symbolS *GOT_symbol;
 878
 879 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 880 unsigned int x86_dwarf2_return_column;
 881
 882 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 883 int x86_cie_data_alignment;
 884
 885 /* Interface to relax_segment.
 886    There are 3 major relax states for 386 jump insns because the
 887    different types of jumps add different sizes to frags when we're
 888    figuring out what sort of jump to choose to reach a given label.
 889
 890    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 891    branches which are handled by md_estimate_size_before_relax() and
 892    i386_generic_table_relax_frag().  */
 893
 894 /* Types.  */
 895 #define UNCOND_JUMP 0
 896 #define COND_JUMP 1
 897 #define COND_JUMP86 2
 898 #define BRANCH_PADDING 3
 899 #define BRANCH_PREFIX 4
 900 #define FUSED_JCC_PADDING 5
 901
 902 /* Sizes.  */
 903 #define CODE16  1
 904 #define SMALL   0
 905 #define SMALL16 (SMALL | CODE16)
 906 #define BIG     2
 907 #define BIG16   (BIG | CODE16)
 908
 909 #ifndef INLINE
 910 #ifdef __GNUC__
 911 #define INLINE __inline__
 912 #else
 913 #define INLINE
 914 #endif
 915 #endif
 916
 917 #define ENCODE_RELAX_STATE(type, size) \
 918   ((relax_substateT) (((type) << 2) | (size)))
 919 #define TYPE_FROM_RELAX_STATE(s) \
 920   ((s) >> 2)
 921 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 922     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 923
 924 /* This table is used by relax_frag to promote short jumps to long
 925    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 926    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 927    don't allow a short jump in a 32 bit code segment to be promoted to
 928    a 16 bit offset jump because it's slower (requires data size
 929    prefix), and doesn't work, unless the destination is in the bottom
 930    64k of the code segment (The top 16 bits of eip are zeroed).  */
 931
 932 const relax_typeS md_relax_table[] =
 933 {
 934   /* The fields are:
 935      1) most positive reach of this state,
 936      2) most negative reach of this state,
 937      3) how many bytes this mode will have in the variable part of the frag
 938      4) which index into the table to try if we can't fit into this one.  */
 939
 940   /* UNCOND_JUMP states.  */
 941   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 942   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 943   /* dword jmp adds 4 bytes to frag:
 944      0 extra opcode bytes, 4 displacement bytes.  */
 945   {0, 0, 4, 0},
 946   /* word jmp adds 2 byte2 to frag:
 947      0 extra opcode bytes, 2 displacement bytes.  */
 948   {0, 0, 2, 0},
 949
 950   /* COND_JUMP states.  */
 951   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 952   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 953   /* dword conditionals adds 5 bytes to frag:
 954      1 extra opcode byte, 4 displacement bytes.  */
 955   {0, 0, 5, 0},
 956   /* word conditionals add 3 bytes to frag:
 957      1 extra opcode byte, 2 displacement bytes.  */
 958   {0, 0, 3, 0},
 959
 960   /* COND_JUMP86 states.  */
 961   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 962   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 963   /* dword conditionals adds 5 bytes to frag:
 964      1 extra opcode byte, 4 displacement bytes.  */
 965   {0, 0, 5, 0},
 966   /* word conditionals add 4 bytes to frag:
 967      1 displacement byte and a 3 byte long branch insn.  */
 968   {0, 0, 4, 0}
 969 };
 970
 971 #define ARCH(n, t, f, s) \
 972   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 973     CPU_NONE_FLAGS }
 974 #define SUBARCH(n, e, d, s) \
 975   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 976     CPU_ ## d ## _FLAGS }
 977
 978 static const arch_entry cpu_arch[] =
 979 {
 980   /* Do not replace the first two entries - i386_target_format() and
 981      set_cpu_arch() rely on them being there in this order.  */
 982   ARCH (generic32, GENERIC32, GENERIC32, false),
 983   ARCH (generic64, GENERIC64, GENERIC64, false),
 984   ARCH (i8086, UNKNOWN, NONE, false),
 985   ARCH (i186, UNKNOWN, 186, false),
 986   ARCH (i286, UNKNOWN, 286, false),
 987   ARCH (i386, I386, 386, false),
 988   ARCH (i486, I486, 486, false),
 989   ARCH (i586, PENTIUM, 586, false),
 990   ARCH (i686, PENTIUMPRO, 686, false),
 991   ARCH (pentium, PENTIUM, 586, false),
 992   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 993   ARCH (pentiumii, PENTIUMPRO, P2, false),
 994   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 995   ARCH (pentium4, PENTIUM4, P4, false),
 996   ARCH (prescott, NOCONA, CORE, false),
 997   ARCH (nocona, NOCONA, NOCONA, false),
 998   ARCH (yonah, CORE, CORE, true),
 999   ARCH (core, CORE, CORE, false),
1000   ARCH (merom, CORE2, CORE2, true),
1001   ARCH (core2, CORE2, CORE2, false),
1002   ARCH (corei7, COREI7, COREI7, false),
1003   ARCH (iamcu, IAMCU, IAMCU, false),
1004   ARCH (k6, K6, K6, false),
1005   ARCH (k6_2, K6, K6_2, false),
1006   ARCH (athlon, ATHLON, ATHLON, false),
1007   ARCH (sledgehammer, K8, K8, true),
1008   ARCH (opteron, K8, K8, false),
1009   ARCH (k8, K8, K8, false),
1010   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1011   ARCH (bdver1, BD, BDVER1, false),
1012   ARCH (bdver2, BD, BDVER2, false),
1013   ARCH (bdver3, BD, BDVER3, false),
1014   ARCH (bdver4, BD, BDVER4, false),
1015   ARCH (znver1, ZNVER, ZNVER1, false),
1016   ARCH (znver2, ZNVER, ZNVER2, false),
1017   ARCH (znver3, ZNVER, ZNVER3, false),
1018   ARCH (znver4, ZNVER, ZNVER4, false),
1019   ARCH (btver1, BT, BTVER1, false),
1020   ARCH (btver2, BT, BTVER2, false),
1021
1022   SUBARCH (8087, 8087, ANY_8087, false),
1023   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1024   SUBARCH (287, 287, ANY_287, false),
1025   SUBARCH (387, 387, ANY_387, false),
1026   SUBARCH (687, 687, ANY_687, false),
1027   SUBARCH (cmov, CMOV, CMOV, false),
1028   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1029   SUBARCH (mmx, MMX, ANY_MMX, false),
1030   SUBARCH (sse, SSE, ANY_SSE, false),
1031   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1032   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1033   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1034   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1035   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1036   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1037   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1038   SUBARCH (avx, AVX, ANY_AVX, false),
1039   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1040   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1041   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1042   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1043   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1044   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1045   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1046   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1047   SUBARCH (monitor, MONITOR, MONITOR, false),
1048   SUBARCH (vmx, VMX, ANY_VMX, false),
1049   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1050   SUBARCH (smx, SMX, SMX, false),
1051   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1052   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1053   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1054   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1055   SUBARCH (aes, AES, ANY_AES, false),
1056   SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
1057   SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
1058   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1059   SUBARCH (rdrnd, RDRND, RDRND, false),
1060   SUBARCH (f16c, F16C, ANY_F16C, false),
1061   SUBARCH (bmi2, BMI2, BMI2, false),
1062   SUBARCH (fma, FMA, ANY_FMA, false),
1063   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1064   SUBARCH (xop, XOP, ANY_XOP, false),
1065   SUBARCH (lwp, LWP, ANY_LWP, false),
1066   SUBARCH (movbe, MOVBE, MOVBE, false),
1067   SUBARCH (cx16, CX16, CX16, false),
1068   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1069   SUBARCH (ept, EPT, ANY_EPT, false),
1070   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1071   SUBARCH (popcnt, POPCNT, POPCNT, false),
1072   SUBARCH (hle, HLE, HLE, false),
1073   SUBARCH (rtm, RTM, ANY_RTM, false),
1074   SUBARCH (tsx, TSX, TSX, false),
1075   SUBARCH (invpcid, INVPCID, INVPCID, false),
1076   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1077   SUBARCH (nop, NOP, NOP, false),
1078   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1079   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1080   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1081   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1082   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1083   SUBARCH (pacifica, SVME, ANY_SVME, true),
1084   SUBARCH (svme, SVME, ANY_SVME, false),
1085   SUBARCH (abm, ABM, ABM, false),
1086   SUBARCH (bmi, BMI, BMI, false),
1087   SUBARCH (tbm, TBM, TBM, false),
1088   SUBARCH (adx, ADX, ADX, false),
1089   SUBARCH (rdseed, RDSEED, RDSEED, false),
1090   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1091   SUBARCH (smap, SMAP, SMAP, false),
1092   SUBARCH (mpx, MPX, ANY_MPX, false),
1093   SUBARCH (sha, SHA, ANY_SHA, false),
1094   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1095   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1096   SUBARCH (se1, SE1, SE1, false),
1097   SUBARCH (clwb, CLWB, CLWB, false),
1098   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1099   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1100   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1101   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1102   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1103   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1104   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1105   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1106   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1107   SUBARCH (clzero, CLZERO, CLZERO, false),
1108   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1109   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1110   SUBARCH (rdpid, RDPID, RDPID, false),
1111   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1112   SUBARCH (ibt, IBT, IBT, false),
1113   SUBARCH (shstk, SHSTK, SHSTK, false),
1114   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1115   SUBARCH (vaes, VAES, ANY_VAES, false),
1116   SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
1117   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1118   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1119   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1120   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1121   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1122   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1123   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1124   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1125   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1126   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1127   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1128   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1129   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1130            ANY_AVX512_VP2INTERSECT, false),
1131   SUBARCH (tdx, TDX, TDX, false),
1132   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1133   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1134   SUBARCH (rdpru, RDPRU, RDPRU, false),
1135   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1136   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1137   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1138   SUBARCH (kl, KL, ANY_KL, false),
1139   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1140   SUBARCH (uintr, UINTR, UINTR, false),
1141   SUBARCH (hreset, HRESET, HRESET, false),
1142   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1143   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1144   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1145   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1146   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1147   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1148   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1149   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1150   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1151   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1152   SUBARCH (fred, FRED, ANY_FRED, false),
1153   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1154 };
1155
1156 #undef SUBARCH
1157 #undef ARCH
1158
1159 #ifdef I386COFF
1160 /* Like s_lcomm_internal in gas/read.c but the alignment string
1161    is allowed to be optional.  */
1162
1163 static symbolS *
1164 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1165 {
1166   addressT align = 0;
1167
1168   SKIP_WHITESPACE ();
1169
1170   if (needs_align
1171       && *input_line_pointer == ',')
1172     {
1173       align = parse_align (needs_align - 1);
1174
1175       if (align == (addressT) -1)
1176         return NULL;
1177     }
1178   else
1179     {
1180       if (size >= 8)
1181         align = 3;
1182       else if (size >= 4)
1183         align = 2;
1184       else if (size >= 2)
1185         align = 1;
1186       else
1187         align = 0;
1188     }
1189
1190   bss_alloc (symbolP, size, align);
1191   return symbolP;
1192 }
1193
1194 static void
1195 pe_lcomm (int needs_align)
1196 {
1197   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1198 }
1199 #endif
1200
1201 const pseudo_typeS md_pseudo_table[] =
1202 {
1203 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1204   {"align", s_align_bytes, 0},
1205 #else
1206   {"align", s_align_ptwo, 0},
1207 #endif
1208   {"arch", set_cpu_arch, 0},
1209 #ifndef I386COFF
1210   {"bss", s_bss, 0},
1211 #else
1212   {"lcomm", pe_lcomm, 1},
1213 #endif
1214   {"ffloat", float_cons, 'f'},
1215   {"dfloat", float_cons, 'd'},
1216   {"tfloat", float_cons, 'x'},
1217   {"hfloat", float_cons, 'h'},
1218   {"bfloat16", float_cons, 'b'},
1219   {"value", cons, 2},
1220   {"slong", signed_cons, 4},
1221   {"insn", s_insn, 0},
1222   {"noopt", s_ignore, 0},
1223   {"optim", s_ignore, 0},
1224   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1225   {"code16", set_code_flag, CODE_16BIT},
1226   {"code32", set_code_flag, CODE_32BIT},
1227 #ifdef BFD64
1228   {"code64", set_code_flag, CODE_64BIT},
1229 #endif
1230   {"intel_syntax", set_intel_syntax, 1},
1231   {"att_syntax", set_intel_syntax, 0},
1232   {"intel_mnemonic", set_intel_mnemonic, 1},
1233   {"att_mnemonic", set_intel_mnemonic, 0},
1234   {"allow_index_reg", set_allow_index_reg, 1},
1235   {"disallow_index_reg", set_allow_index_reg, 0},
1236   {"sse_check", set_check, 0},
1237   {"operand_check", set_check, 1},
1238 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1239   {"largecomm", handle_large_common, 0},
1240 #else
1241   {"file", dwarf2_directive_file, 0},
1242   {"loc", dwarf2_directive_loc, 0},
1243   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1244 #endif
1245 #ifdef TE_PE
1246   {"secrel32", pe_directive_secrel, 0},
1247   {"secidx", pe_directive_secidx, 0},
1248 #endif
1249   {0, 0, 0}
1250 };
1251
1252 /* For interface with expression ().  */
1253 extern char *input_line_pointer;
1254
1255 /* Hash table for instruction mnemonic lookup.  */
1256 static htab_t op_hash;
1257
1258 /* Hash table for register lookup.  */
1259 static htab_t reg_hash;
1260 \f
1261   /* Various efficient no-op patterns for aligning code labels.
1262      Note: Don't try to assemble the instructions in the comments.
1263      0L and 0w are not legal.  */
1264 static const unsigned char f32_1[] =
1265   {0x90};                               /* nop                  */
1266 static const unsigned char f32_2[] =
1267   {0x66,0x90};                          /* xchg %ax,%ax         */
1268 static const unsigned char f32_3[] =
1269   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1270 static const unsigned char f32_4[] =
1271   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1272 static const unsigned char f32_6[] =
1273   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1274 static const unsigned char f32_7[] =
1275   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1276 static const unsigned char f16_3[] =
1277   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1278 static const unsigned char f16_4[] =
1279   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1280 static const unsigned char jump_disp8[] =
1281   {0xeb};                               /* jmp disp8           */
1282 static const unsigned char jump32_disp32[] =
1283   {0xe9};                               /* jmp disp32          */
1284 static const unsigned char jump16_disp32[] =
1285   {0x66,0xe9};                          /* jmp disp32          */
1286 /* 32-bit NOPs patterns.  */
1287 static const unsigned char *const f32_patt[] = {
1288   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1289 };
1290 /* 16-bit NOPs patterns.  */
1291 static const unsigned char *const f16_patt[] = {
1292   f32_1, f32_2, f16_3, f16_4
1293 };
1294 /* nopl (%[re]ax) */
1295 static const unsigned char alt_3[] =
1296   {0x0f,0x1f,0x00};
1297 /* nopl 0(%[re]ax) */
1298 static const unsigned char alt_4[] =
1299   {0x0f,0x1f,0x40,0x00};
1300 /* nopl 0(%[re]ax,%[re]ax,1) */
1301 static const unsigned char alt_5[] =
1302   {0x0f,0x1f,0x44,0x00,0x00};
1303 /* nopw 0(%[re]ax,%[re]ax,1) */
1304 static const unsigned char alt_6[] =
1305   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1306 /* nopl 0L(%[re]ax) */
1307 static const unsigned char alt_7[] =
1308   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1309 /* nopl 0L(%[re]ax,%[re]ax,1) */
1310 static const unsigned char alt_8[] =
1311   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1312 /* nopw 0L(%[re]ax,%[re]ax,1) */
1313 static const unsigned char alt_9[] =
1314   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1315 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1316 static const unsigned char alt_10[] =
1317   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1318 /* data16 nopw %cs:0L(%eax,%eax,1) */
1319 static const unsigned char alt_11[] =
1320   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1321 /* 32-bit and 64-bit NOPs patterns.  */
1322 static const unsigned char *const alt_patt[] = {
1323   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1324   alt_9, alt_10, alt_11
1325 };
1326
1327 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1328    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1329
1330 static void
1331 i386_output_nops (char *where, const unsigned char *const *patt,
1332                   int count, int max_single_nop_size)
1333
1334 {
1335   /* Place the longer NOP first.  */
1336   int last;
1337   int offset;
1338   const unsigned char *nops;
1339
1340   if (max_single_nop_size < 1)
1341     {
1342       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1343                 max_single_nop_size);
1344       return;
1345     }
1346
1347   nops = patt[max_single_nop_size - 1];
1348
1349   /* Use the smaller one if the requsted one isn't available.  */
1350   if (nops == NULL)
1351     {
1352       max_single_nop_size--;
1353       nops = patt[max_single_nop_size - 1];
1354     }
1355
1356   last = count % max_single_nop_size;
1357
1358   count -= last;
1359   for (offset = 0; offset < count; offset += max_single_nop_size)
1360     memcpy (where + offset, nops, max_single_nop_size);
1361
1362   if (last)
1363     {
1364       nops = patt[last - 1];
1365       if (nops == NULL)
1366         {
1367           /* Use the smaller one plus one-byte NOP if the needed one
1368              isn't available.  */
1369           last--;
1370           nops = patt[last - 1];
1371           memcpy (where + offset, nops, last);
1372           where[offset + last] = *patt[0];
1373         }
1374       else
1375         memcpy (where + offset, nops, last);
1376     }
1377 }
1378
1379 static INLINE int
1380 fits_in_imm7 (offsetT num)
1381 {
1382   return (num & 0x7f) == num;
1383 }
1384
1385 static INLINE int
1386 fits_in_imm31 (offsetT num)
1387 {
1388   return (num & 0x7fffffff) == num;
1389 }
1390
1391 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1392    single NOP instruction LIMIT.  */
1393
1394 void
1395 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1396 {
1397   const unsigned char *const *patt = NULL;
1398   int max_single_nop_size;
1399   /* Maximum number of NOPs before switching to jump over NOPs.  */
1400   int max_number_of_nops;
1401
1402   switch (fragP->fr_type)
1403     {
1404     case rs_fill_nop:
1405     case rs_align_code:
1406       break;
1407     case rs_machine_dependent:
1408       /* Allow NOP padding for jumps and calls.  */
1409       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1410           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1411         break;
1412       /* Fall through.  */
1413     default:
1414       return;
1415     }
1416
1417   /* We need to decide which NOP sequence to use for 32bit and
1418      64bit. When -mtune= is used:
1419
1420      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1421      PROCESSOR_GENERIC32, f32_patt will be used.
1422      2. For the rest, alt_patt will be used.
1423
1424      When -mtune= isn't used, alt_patt will be used if
1425      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1426      be used.
1427
1428      When -march= or .arch is used, we can't use anything beyond
1429      cpu_arch_isa_flags.   */
1430
1431   if (flag_code == CODE_16BIT)
1432     {
1433       patt = f16_patt;
1434       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1435       /* Limit number of NOPs to 2 in 16-bit mode.  */
1436       max_number_of_nops = 2;
1437     }
1438   else
1439     {
1440       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1441         {
1442           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1443           switch (cpu_arch_tune)
1444             {
1445             case PROCESSOR_UNKNOWN:
1446               /* We use cpu_arch_isa_flags to check if we SHOULD
1447                  optimize with nops.  */
1448               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1449                 patt = alt_patt;
1450               else
1451                 patt = f32_patt;
1452               break;
1453             case PROCESSOR_PENTIUM4:
1454             case PROCESSOR_NOCONA:
1455             case PROCESSOR_CORE:
1456             case PROCESSOR_CORE2:
1457             case PROCESSOR_COREI7:
1458             case PROCESSOR_GENERIC64:
1459             case PROCESSOR_K6:
1460             case PROCESSOR_ATHLON:
1461             case PROCESSOR_K8:
1462             case PROCESSOR_AMDFAM10:
1463             case PROCESSOR_BD:
1464             case PROCESSOR_ZNVER:
1465             case PROCESSOR_BT:
1466               patt = alt_patt;
1467               break;
1468             case PROCESSOR_I386:
1469             case PROCESSOR_I486:
1470             case PROCESSOR_PENTIUM:
1471             case PROCESSOR_PENTIUMPRO:
1472             case PROCESSOR_IAMCU:
1473             case PROCESSOR_GENERIC32:
1474               patt = f32_patt;
1475               break;
1476             case PROCESSOR_NONE:
1477               abort ();
1478             }
1479         }
1480       else
1481         {
1482           switch (fragP->tc_frag_data.tune)
1483             {
1484             case PROCESSOR_UNKNOWN:
1485               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1486                  PROCESSOR_UNKNOWN.  */
1487               abort ();
1488               break;
1489
1490             case PROCESSOR_I386:
1491             case PROCESSOR_I486:
1492             case PROCESSOR_PENTIUM:
1493             case PROCESSOR_IAMCU:
1494             case PROCESSOR_K6:
1495             case PROCESSOR_ATHLON:
1496             case PROCESSOR_K8:
1497             case PROCESSOR_AMDFAM10:
1498             case PROCESSOR_BD:
1499             case PROCESSOR_ZNVER:
1500             case PROCESSOR_BT:
1501             case PROCESSOR_GENERIC32:
1502               /* We use cpu_arch_isa_flags to check if we CAN optimize
1503                  with nops.  */
1504               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1505                 patt = alt_patt;
1506               else
1507                 patt = f32_patt;
1508               break;
1509             case PROCESSOR_PENTIUMPRO:
1510             case PROCESSOR_PENTIUM4:
1511             case PROCESSOR_NOCONA:
1512             case PROCESSOR_CORE:
1513             case PROCESSOR_CORE2:
1514             case PROCESSOR_COREI7:
1515               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1516                 patt = alt_patt;
1517               else
1518                 patt = f32_patt;
1519               break;
1520             case PROCESSOR_GENERIC64:
1521               patt = alt_patt;
1522               break;
1523             case PROCESSOR_NONE:
1524               abort ();
1525             }
1526         }
1527
1528       if (patt == f32_patt)
1529         {
1530           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1531           /* Limit number of NOPs to 2 for older processors.  */
1532           max_number_of_nops = 2;
1533         }
1534       else
1535         {
1536           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1537           /* Limit number of NOPs to 7 for newer processors.  */
1538           max_number_of_nops = 7;
1539         }
1540     }
1541
1542   if (limit == 0)
1543     limit = max_single_nop_size;
1544
1545   if (fragP->fr_type == rs_fill_nop)
1546     {
1547       /* Output NOPs for .nop directive.  */
1548       if (limit > max_single_nop_size)
1549         {
1550           as_bad_where (fragP->fr_file, fragP->fr_line,
1551                         _("invalid single nop size: %d "
1552                           "(expect within [0, %d])"),
1553                         limit, max_single_nop_size);
1554           return;
1555         }
1556     }
1557   else if (fragP->fr_type != rs_machine_dependent)
1558     fragP->fr_var = count;
1559
1560   if ((count / max_single_nop_size) > max_number_of_nops)
1561     {
1562       /* Generate jump over NOPs.  */
1563       offsetT disp = count - 2;
1564       if (fits_in_imm7 (disp))
1565         {
1566           /* Use "jmp disp8" if possible.  */
1567           count = disp;
1568           where[0] = jump_disp8[0];
1569           where[1] = count;
1570           where += 2;
1571         }
1572       else
1573         {
1574           unsigned int size_of_jump;
1575
1576           if (flag_code == CODE_16BIT)
1577             {
1578               where[0] = jump16_disp32[0];
1579               where[1] = jump16_disp32[1];
1580               size_of_jump = 2;
1581             }
1582           else
1583             {
1584               where[0] = jump32_disp32[0];
1585               size_of_jump = 1;
1586             }
1587
1588           count -= size_of_jump + 4;
1589           if (!fits_in_imm31 (count))
1590             {
1591               as_bad_where (fragP->fr_file, fragP->fr_line,
1592                             _("jump over nop padding out of range"));
1593               return;
1594             }
1595
1596           md_number_to_chars (where + size_of_jump, count, 4);
1597           where += size_of_jump + 4;
1598         }
1599     }
1600
1601   /* Generate multiple NOPs.  */
1602   i386_output_nops (where, patt, count, limit);
1603 }
1604
1605 static INLINE int
1606 operand_type_all_zero (const union i386_operand_type *x)
1607 {
1608   switch (ARRAY_SIZE(x->array))
1609     {
1610     case 3:
1611       if (x->array[2])
1612         return 0;
1613       /* Fall through.  */
1614     case 2:
1615       if (x->array[1])
1616         return 0;
1617       /* Fall through.  */
1618     case 1:
1619       return !x->array[0];
1620     default:
1621       abort ();
1622     }
1623 }
1624
1625 static INLINE void
1626 operand_type_set (union i386_operand_type *x, unsigned int v)
1627 {
1628   switch (ARRAY_SIZE(x->array))
1629     {
1630     case 3:
1631       x->array[2] = v;
1632       /* Fall through.  */
1633     case 2:
1634       x->array[1] = v;
1635       /* Fall through.  */
1636     case 1:
1637       x->array[0] = v;
1638       /* Fall through.  */
1639       break;
1640     default:
1641       abort ();
1642     }
1643
1644   x->bitfield.class = ClassNone;
1645   x->bitfield.instance = InstanceNone;
1646 }
1647
1648 static INLINE int
1649 operand_type_equal (const union i386_operand_type *x,
1650                     const union i386_operand_type *y)
1651 {
1652   switch (ARRAY_SIZE(x->array))
1653     {
1654     case 3:
1655       if (x->array[2] != y->array[2])
1656         return 0;
1657       /* Fall through.  */
1658     case 2:
1659       if (x->array[1] != y->array[1])
1660         return 0;
1661       /* Fall through.  */
1662     case 1:
1663       return x->array[0] == y->array[0];
1664       break;
1665     default:
1666       abort ();
1667     }
1668 }
1669
1670 static INLINE int
1671 cpu_flags_all_zero (const union i386_cpu_flags *x)
1672 {
1673   switch (ARRAY_SIZE(x->array))
1674     {
1675     case 5:
1676       if (x->array[4])
1677         return 0;
1678       /* Fall through.  */
1679     case 4:
1680       if (x->array[3])
1681         return 0;
1682       /* Fall through.  */
1683     case 3:
1684       if (x->array[2])
1685         return 0;
1686       /* Fall through.  */
1687     case 2:
1688       if (x->array[1])
1689         return 0;
1690       /* Fall through.  */
1691     case 1:
1692       return !x->array[0];
1693     default:
1694       abort ();
1695     }
1696 }
1697
1698 static INLINE int
1699 cpu_flags_equal (const union i386_cpu_flags *x,
1700                  const union i386_cpu_flags *y)
1701 {
1702   switch (ARRAY_SIZE(x->array))
1703     {
1704     case 5:
1705       if (x->array[4] != y->array[4])
1706         return 0;
1707       /* Fall through.  */
1708     case 4:
1709       if (x->array[3] != y->array[3])
1710         return 0;
1711       /* Fall through.  */
1712     case 3:
1713       if (x->array[2] != y->array[2])
1714         return 0;
1715       /* Fall through.  */
1716     case 2:
1717       if (x->array[1] != y->array[1])
1718         return 0;
1719       /* Fall through.  */
1720     case 1:
1721       return x->array[0] == y->array[0];
1722       break;
1723     default:
1724       abort ();
1725     }
1726 }
1727
1728 static INLINE int
1729 cpu_flags_check_cpu64 (i386_cpu_flags f)
1730 {
1731   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1732            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1733 }
1734
1735 static INLINE i386_cpu_flags
1736 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1737 {
1738   switch (ARRAY_SIZE (x.array))
1739     {
1740     case 5:
1741       x.array [4] &= y.array [4];
1742       /* Fall through.  */
1743     case 4:
1744       x.array [3] &= y.array [3];
1745       /* Fall through.  */
1746     case 3:
1747       x.array [2] &= y.array [2];
1748       /* Fall through.  */
1749     case 2:
1750       x.array [1] &= y.array [1];
1751       /* Fall through.  */
1752     case 1:
1753       x.array [0] &= y.array [0];
1754       break;
1755     default:
1756       abort ();
1757     }
1758   return x;
1759 }
1760
1761 static INLINE i386_cpu_flags
1762 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1763 {
1764   switch (ARRAY_SIZE (x.array))
1765     {
1766     case 5:
1767       x.array [4] |= y.array [4];
1768       /* Fall through.  */
1769     case 4:
1770       x.array [3] |= y.array [3];
1771       /* Fall through.  */
1772     case 3:
1773       x.array [2] |= y.array [2];
1774       /* Fall through.  */
1775     case 2:
1776       x.array [1] |= y.array [1];
1777       /* Fall through.  */
1778     case 1:
1779       x.array [0] |= y.array [0];
1780       break;
1781     default:
1782       abort ();
1783     }
1784   return x;
1785 }
1786
1787 static INLINE i386_cpu_flags
1788 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1789 {
1790   switch (ARRAY_SIZE (x.array))
1791     {
1792     case 5:
1793       x.array [4] &= ~y.array [4];
1794       /* Fall through.  */
1795     case 4:
1796       x.array [3] &= ~y.array [3];
1797       /* Fall through.  */
1798     case 3:
1799       x.array [2] &= ~y.array [2];
1800       /* Fall through.  */
1801     case 2:
1802       x.array [1] &= ~y.array [1];
1803       /* Fall through.  */
1804     case 1:
1805       x.array [0] &= ~y.array [0];
1806       break;
1807     default:
1808       abort ();
1809     }
1810   return x;
1811 }
1812
1813 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1814
1815 #define CPU_FLAGS_ARCH_MATCH            0x1
1816 #define CPU_FLAGS_64BIT_MATCH           0x2
1817
1818 #define CPU_FLAGS_PERFECT_MATCH \
1819   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1820
1821 /* Return CPU flags match bits. */
1822
1823 static int
1824 cpu_flags_match (const insn_template *t)
1825 {
1826   i386_cpu_flags x = t->cpu_flags;
1827   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1828
1829   x.bitfield.cpu64 = 0;
1830   x.bitfield.cpuno64 = 0;
1831
1832   if (cpu_flags_all_zero (&x))
1833     {
1834       /* This instruction is available on all archs.  */
1835       match |= CPU_FLAGS_ARCH_MATCH;
1836     }
1837   else
1838     {
1839       /* This instruction is available only on some archs.  */
1840       i386_cpu_flags cpu = cpu_arch_flags;
1841
1842       /* AVX512VL is no standalone feature - match it and then strip it.  */
1843       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1844         return match;
1845       x.bitfield.cpuavx512vl = 0;
1846
1847       /* AVX and AVX2 present at the same time express an operand size
1848          dependency - strip AVX2 for the purposes here.  The operand size
1849          dependent check occurs in check_vecOperands().  */
1850       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1851         x.bitfield.cpuavx2 = 0;
1852
1853       cpu = cpu_flags_and (x, cpu);
1854       if (!cpu_flags_all_zero (&cpu))
1855         {
1856           if (x.bitfield.cpuavx)
1857             {
1858               /* We need to check a few extra flags with AVX.  */
1859               if (cpu.bitfield.cpuavx
1860                   && (!t->opcode_modifier.sse2avx
1861                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1862                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1863                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1864                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1865                 match |= CPU_FLAGS_ARCH_MATCH;
1866             }
1867           else if (x.bitfield.cpuavx512f)
1868             {
1869               /* We need to check a few extra flags with AVX512F.  */
1870               if (cpu.bitfield.cpuavx512f
1871                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1872                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1873                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1874                 match |= CPU_FLAGS_ARCH_MATCH;
1875             }
1876           else
1877             match |= CPU_FLAGS_ARCH_MATCH;
1878         }
1879     }
1880   return match;
1881 }
1882
1883 static INLINE i386_operand_type
1884 operand_type_and (i386_operand_type x, i386_operand_type y)
1885 {
1886   if (x.bitfield.class != y.bitfield.class)
1887     x.bitfield.class = ClassNone;
1888   if (x.bitfield.instance != y.bitfield.instance)
1889     x.bitfield.instance = InstanceNone;
1890
1891   switch (ARRAY_SIZE (x.array))
1892     {
1893     case 3:
1894       x.array [2] &= y.array [2];
1895       /* Fall through.  */
1896     case 2:
1897       x.array [1] &= y.array [1];
1898       /* Fall through.  */
1899     case 1:
1900       x.array [0] &= y.array [0];
1901       break;
1902     default:
1903       abort ();
1904     }
1905   return x;
1906 }
1907
1908 static INLINE i386_operand_type
1909 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1910 {
1911   gas_assert (y.bitfield.class == ClassNone);
1912   gas_assert (y.bitfield.instance == InstanceNone);
1913
1914   switch (ARRAY_SIZE (x.array))
1915     {
1916     case 3:
1917       x.array [2] &= ~y.array [2];
1918       /* Fall through.  */
1919     case 2:
1920       x.array [1] &= ~y.array [1];
1921       /* Fall through.  */
1922     case 1:
1923       x.array [0] &= ~y.array [0];
1924       break;
1925     default:
1926       abort ();
1927     }
1928   return x;
1929 }
1930
1931 static INLINE i386_operand_type
1932 operand_type_or (i386_operand_type x, i386_operand_type y)
1933 {
1934   gas_assert (x.bitfield.class == ClassNone ||
1935               y.bitfield.class == ClassNone ||
1936               x.bitfield.class == y.bitfield.class);
1937   gas_assert (x.bitfield.instance == InstanceNone ||
1938               y.bitfield.instance == InstanceNone ||
1939               x.bitfield.instance == y.bitfield.instance);
1940
1941   switch (ARRAY_SIZE (x.array))
1942     {
1943     case 3:
1944       x.array [2] |= y.array [2];
1945       /* Fall through.  */
1946     case 2:
1947       x.array [1] |= y.array [1];
1948       /* Fall through.  */
1949     case 1:
1950       x.array [0] |= y.array [0];
1951       break;
1952     default:
1953       abort ();
1954     }
1955   return x;
1956 }
1957
1958 static INLINE i386_operand_type
1959 operand_type_xor (i386_operand_type x, i386_operand_type y)
1960 {
1961   gas_assert (y.bitfield.class == ClassNone);
1962   gas_assert (y.bitfield.instance == InstanceNone);
1963
1964   switch (ARRAY_SIZE (x.array))
1965     {
1966     case 3:
1967       x.array [2] ^= y.array [2];
1968       /* Fall through.  */
1969     case 2:
1970       x.array [1] ^= y.array [1];
1971       /* Fall through.  */
1972     case 1:
1973       x.array [0] ^= y.array [0];
1974       break;
1975     default:
1976       abort ();
1977     }
1978   return x;
1979 }
1980
1981 static const i386_operand_type anydisp = {
1982   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1983 };
1984
1985 enum operand_type
1986 {
1987   reg,
1988   imm,
1989   disp,
1990   anymem
1991 };
1992
1993 static INLINE int
1994 operand_type_check (i386_operand_type t, enum operand_type c)
1995 {
1996   switch (c)
1997     {
1998     case reg:
1999       return t.bitfield.class == Reg;
2000
2001     case imm:
2002       return (t.bitfield.imm8
2003               || t.bitfield.imm8s
2004               || t.bitfield.imm16
2005               || t.bitfield.imm32
2006               || t.bitfield.imm32s
2007               || t.bitfield.imm64);
2008
2009     case disp:
2010       return (t.bitfield.disp8
2011               || t.bitfield.disp16
2012               || t.bitfield.disp32
2013               || t.bitfield.disp64);
2014
2015     case anymem:
2016       return (t.bitfield.disp8
2017               || t.bitfield.disp16
2018               || t.bitfield.disp32
2019               || t.bitfield.disp64
2020               || t.bitfield.baseindex);
2021
2022     default:
2023       abort ();
2024     }
2025
2026   return 0;
2027 }
2028
2029 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2030    between operand GIVEN and opeand WANTED for instruction template T.  */
2031
2032 static INLINE int
2033 match_operand_size (const insn_template *t, unsigned int wanted,
2034                     unsigned int given)
2035 {
2036   return !((i.types[given].bitfield.byte
2037             && !t->operand_types[wanted].bitfield.byte)
2038            || (i.types[given].bitfield.word
2039                && !t->operand_types[wanted].bitfield.word)
2040            || (i.types[given].bitfield.dword
2041                && !t->operand_types[wanted].bitfield.dword)
2042            || (i.types[given].bitfield.qword
2043                && (!t->operand_types[wanted].bitfield.qword
2044                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2045                       mode, when they're used where a 64-bit GPR could also
2046                       be used.  Checking is needed for Intel Syntax only.  */
2047                    || (intel_syntax
2048                        && flag_code != CODE_64BIT
2049                        && (t->operand_types[wanted].bitfield.class == Reg
2050                            || t->operand_types[wanted].bitfield.class == Accum
2051                            || t->opcode_modifier.isstring))))
2052            || (i.types[given].bitfield.tbyte
2053                && !t->operand_types[wanted].bitfield.tbyte));
2054 }
2055
2056 /* Return 1 if there is no conflict in SIMD register between operand
2057    GIVEN and opeand WANTED for instruction template T.  */
2058
2059 static INLINE int
2060 match_simd_size (const insn_template *t, unsigned int wanted,
2061                  unsigned int given)
2062 {
2063   return !((i.types[given].bitfield.xmmword
2064             && !t->operand_types[wanted].bitfield.xmmword)
2065            || (i.types[given].bitfield.ymmword
2066                && !t->operand_types[wanted].bitfield.ymmword)
2067            || (i.types[given].bitfield.zmmword
2068                && !t->operand_types[wanted].bitfield.zmmword)
2069            || (i.types[given].bitfield.tmmword
2070                && !t->operand_types[wanted].bitfield.tmmword));
2071 }
2072
2073 /* Return 1 if there is no conflict in any size between operand GIVEN
2074    and opeand WANTED for instruction template T.  */
2075
2076 static INLINE int
2077 match_mem_size (const insn_template *t, unsigned int wanted,
2078                 unsigned int given)
2079 {
2080   return (match_operand_size (t, wanted, given)
2081           && !((i.types[given].bitfield.unspecified
2082                 && !i.broadcast.type
2083                 && !i.broadcast.bytes
2084                 && !t->operand_types[wanted].bitfield.unspecified)
2085                || (i.types[given].bitfield.fword
2086                    && !t->operand_types[wanted].bitfield.fword)
2087                /* For scalar opcode templates to allow register and memory
2088                   operands at the same time, some special casing is needed
2089                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2090                   down-conversion vpmov*.  */
2091                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2092                     && t->operand_types[wanted].bitfield.byte
2093                        + t->operand_types[wanted].bitfield.word
2094                        + t->operand_types[wanted].bitfield.dword
2095                        + t->operand_types[wanted].bitfield.qword
2096                        > !!t->opcode_modifier.broadcast)
2097                    ? (i.types[given].bitfield.xmmword
2098                       || i.types[given].bitfield.ymmword
2099                       || i.types[given].bitfield.zmmword)
2100                    : !match_simd_size(t, wanted, given))));
2101 }
2102
2103 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2104    operands for instruction template T, and it has MATCH_REVERSE set if there
2105    is no size conflict on any operands for the template with operands reversed
2106    (and the template allows for reversing in the first place).  */
2107
2108 #define MATCH_STRAIGHT 1
2109 #define MATCH_REVERSE  2
2110
2111 static INLINE unsigned int
2112 operand_size_match (const insn_template *t)
2113 {
2114   unsigned int j, match = MATCH_STRAIGHT;
2115
2116   /* Don't check non-absolute jump instructions.  */
2117   if (t->opcode_modifier.jump
2118       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2119     return match;
2120
2121   /* Check memory and accumulator operand size.  */
2122   for (j = 0; j < i.operands; j++)
2123     {
2124       if (i.types[j].bitfield.class != Reg
2125           && i.types[j].bitfield.class != RegSIMD
2126           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2127         continue;
2128
2129       if (t->operand_types[j].bitfield.class == Reg
2130           && !match_operand_size (t, j, j))
2131         {
2132           match = 0;
2133           break;
2134         }
2135
2136       if (t->operand_types[j].bitfield.class == RegSIMD
2137           && !match_simd_size (t, j, j))
2138         {
2139           match = 0;
2140           break;
2141         }
2142
2143       if (t->operand_types[j].bitfield.instance == Accum
2144           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2145         {
2146           match = 0;
2147           break;
2148         }
2149
2150       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2151         {
2152           match = 0;
2153           break;
2154         }
2155     }
2156
2157   if (!t->opcode_modifier.d)
2158     return match;
2159
2160   /* Check reverse.  */
2161   gas_assert (i.operands >= 2);
2162
2163   for (j = 0; j < i.operands; j++)
2164     {
2165       unsigned int given = i.operands - j - 1;
2166
2167       /* For FMA4 and XOP insns VEX.W controls just the first two
2168          register operands.  */
2169       if (t->cpu_flags.bitfield.cpufma4 || t->cpu_flags.bitfield.cpuxop)
2170         given = j < 2 ? 1 - j : j;
2171
2172       if (t->operand_types[j].bitfield.class == Reg
2173           && !match_operand_size (t, j, given))
2174         return match;
2175
2176       if (t->operand_types[j].bitfield.class == RegSIMD
2177           && !match_simd_size (t, j, given))
2178         return match;
2179
2180       if (t->operand_types[j].bitfield.instance == Accum
2181           && (!match_operand_size (t, j, given)
2182               || !match_simd_size (t, j, given)))
2183         return match;
2184
2185       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2186         return match;
2187     }
2188
2189   return match | MATCH_REVERSE;
2190 }
2191
2192 static INLINE int
2193 operand_type_match (i386_operand_type overlap,
2194                     i386_operand_type given)
2195 {
2196   i386_operand_type temp = overlap;
2197
2198   temp.bitfield.unspecified = 0;
2199   temp.bitfield.byte = 0;
2200   temp.bitfield.word = 0;
2201   temp.bitfield.dword = 0;
2202   temp.bitfield.fword = 0;
2203   temp.bitfield.qword = 0;
2204   temp.bitfield.tbyte = 0;
2205   temp.bitfield.xmmword = 0;
2206   temp.bitfield.ymmword = 0;
2207   temp.bitfield.zmmword = 0;
2208   temp.bitfield.tmmword = 0;
2209   if (operand_type_all_zero (&temp))
2210     goto mismatch;
2211
2212   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2213     return 1;
2214
2215  mismatch:
2216   i.error = operand_type_mismatch;
2217   return 0;
2218 }
2219
2220 /* If given types g0 and g1 are registers they must be of the same type
2221    unless the expected operand type register overlap is null.
2222    Intel syntax sized memory operands are also checked here.  */
2223
2224 static INLINE int
2225 operand_type_register_match (i386_operand_type g0,
2226                              i386_operand_type t0,
2227                              i386_operand_type g1,
2228                              i386_operand_type t1)
2229 {
2230   if (g0.bitfield.class != Reg
2231       && g0.bitfield.class != RegSIMD
2232       && (g0.bitfield.unspecified
2233           || !operand_type_check (g0, anymem)))
2234     return 1;
2235
2236   if (g1.bitfield.class != Reg
2237       && g1.bitfield.class != RegSIMD
2238       && (g1.bitfield.unspecified
2239           || !operand_type_check (g1, anymem)))
2240     return 1;
2241
2242   if (g0.bitfield.byte == g1.bitfield.byte
2243       && g0.bitfield.word == g1.bitfield.word
2244       && g0.bitfield.dword == g1.bitfield.dword
2245       && g0.bitfield.qword == g1.bitfield.qword
2246       && g0.bitfield.xmmword == g1.bitfield.xmmword
2247       && g0.bitfield.ymmword == g1.bitfield.ymmword
2248       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2249     return 1;
2250
2251   /* If expectations overlap in no more than a single size, all is fine. */
2252   g0 = operand_type_and (t0, t1);
2253   if (g0.bitfield.byte
2254       + g0.bitfield.word
2255       + g0.bitfield.dword
2256       + g0.bitfield.qword
2257       + g0.bitfield.xmmword
2258       + g0.bitfield.ymmword
2259       + g0.bitfield.zmmword <= 1)
2260     return 1;
2261
2262   i.error = register_type_mismatch;
2263
2264   return 0;
2265 }
2266
2267 static INLINE unsigned int
2268 register_number (const reg_entry *r)
2269 {
2270   unsigned int nr = r->reg_num;
2271
2272   if (r->reg_flags & RegRex)
2273     nr += 8;
2274
2275   if (r->reg_flags & RegVRex)
2276     nr += 16;
2277
2278   return nr;
2279 }
2280
2281 static INLINE unsigned int
2282 mode_from_disp_size (i386_operand_type t)
2283 {
2284   if (t.bitfield.disp8)
2285     return 1;
2286   else if (t.bitfield.disp16
2287            || t.bitfield.disp32)
2288     return 2;
2289   else
2290     return 0;
2291 }
2292
2293 static INLINE int
2294 fits_in_signed_byte (addressT num)
2295 {
2296   return num + 0x80 <= 0xff;
2297 }
2298
2299 static INLINE int
2300 fits_in_unsigned_byte (addressT num)
2301 {
2302   return num <= 0xff;
2303 }
2304
2305 static INLINE int
2306 fits_in_unsigned_word (addressT num)
2307 {
2308   return num <= 0xffff;
2309 }
2310
2311 static INLINE int
2312 fits_in_signed_word (addressT num)
2313 {
2314   return num + 0x8000 <= 0xffff;
2315 }
2316
2317 static INLINE int
2318 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2319 {
2320 #ifndef BFD64
2321   return 1;
2322 #else
2323   return num + 0x80000000 <= 0xffffffff;
2324 #endif
2325 }                               /* fits_in_signed_long() */
2326
2327 static INLINE int
2328 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2329 {
2330 #ifndef BFD64
2331   return 1;
2332 #else
2333   return num <= 0xffffffff;
2334 #endif
2335 }                               /* fits_in_unsigned_long() */
2336
2337 static INLINE valueT extend_to_32bit_address (addressT num)
2338 {
2339 #ifdef BFD64
2340   if (fits_in_unsigned_long(num))
2341     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2342
2343   if (!fits_in_signed_long (num))
2344     return num & 0xffffffff;
2345 #endif
2346
2347   return num;
2348 }
2349
2350 static INLINE int
2351 fits_in_disp8 (offsetT num)
2352 {
2353   int shift = i.memshift;
2354   unsigned int mask;
2355
2356   if (shift == -1)
2357     abort ();
2358
2359   mask = (1 << shift) - 1;
2360
2361   /* Return 0 if NUM isn't properly aligned.  */
2362   if ((num & mask))
2363     return 0;
2364
2365   /* Check if NUM will fit in 8bit after shift.  */
2366   return fits_in_signed_byte (num >> shift);
2367 }
2368
2369 static INLINE int
2370 fits_in_imm4 (offsetT num)
2371 {
2372   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2373   return (num & (i.vec_encoding != vex_encoding_evex ? 0xf : 7)) == num;
2374 }
2375
2376 static i386_operand_type
2377 smallest_imm_type (offsetT num)
2378 {
2379   i386_operand_type t;
2380
2381   operand_type_set (&t, 0);
2382   t.bitfield.imm64 = 1;
2383
2384   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2385     {
2386       /* This code is disabled on the 486 because all the Imm1 forms
2387          in the opcode table are slower on the i486.  They're the
2388          versions with the implicitly specified single-position
2389          displacement, which has another syntax if you really want to
2390          use that form.  */
2391       t.bitfield.imm1 = 1;
2392       t.bitfield.imm8 = 1;
2393       t.bitfield.imm8s = 1;
2394       t.bitfield.imm16 = 1;
2395       t.bitfield.imm32 = 1;
2396       t.bitfield.imm32s = 1;
2397     }
2398   else if (fits_in_signed_byte (num))
2399     {
2400       if (fits_in_unsigned_byte (num))
2401         t.bitfield.imm8 = 1;
2402       t.bitfield.imm8s = 1;
2403       t.bitfield.imm16 = 1;
2404       t.bitfield.imm32 = 1;
2405       t.bitfield.imm32s = 1;
2406     }
2407   else if (fits_in_unsigned_byte (num))
2408     {
2409       t.bitfield.imm8 = 1;
2410       t.bitfield.imm16 = 1;
2411       t.bitfield.imm32 = 1;
2412       t.bitfield.imm32s = 1;
2413     }
2414   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2415     {
2416       t.bitfield.imm16 = 1;
2417       t.bitfield.imm32 = 1;
2418       t.bitfield.imm32s = 1;
2419     }
2420   else if (fits_in_signed_long (num))
2421     {
2422       t.bitfield.imm32 = 1;
2423       t.bitfield.imm32s = 1;
2424     }
2425   else if (fits_in_unsigned_long (num))
2426     t.bitfield.imm32 = 1;
2427
2428   return t;
2429 }
2430
2431 static offsetT
2432 offset_in_range (offsetT val, int size)
2433 {
2434   addressT mask;
2435
2436   switch (size)
2437     {
2438     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2439     case 2: mask = ((addressT) 1 << 16) - 1; break;
2440 #ifdef BFD64
2441     case 4: mask = ((addressT) 1 << 32) - 1; break;
2442 #endif
2443     case sizeof (val): return val;
2444     default: abort ();
2445     }
2446
2447   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2448     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2449              (uint64_t) val, (uint64_t) (val & mask));
2450
2451   return val & mask;
2452 }
2453
2454 static INLINE const char *insn_name (const insn_template *t)
2455 {
2456   return &i386_mnemonics[t->mnem_off];
2457 }
2458
2459 enum PREFIX_GROUP
2460 {
2461   PREFIX_EXIST = 0,
2462   PREFIX_LOCK,
2463   PREFIX_REP,
2464   PREFIX_DS,
2465   PREFIX_OTHER
2466 };
2467
2468 /* Returns
2469    a. PREFIX_EXIST if attempting to add a prefix where one from the
2470    same class already exists.
2471    b. PREFIX_LOCK if lock prefix is added.
2472    c. PREFIX_REP if rep/repne prefix is added.
2473    d. PREFIX_DS if ds prefix is added.
2474    e. PREFIX_OTHER if other prefix is added.
2475  */
2476
2477 static enum PREFIX_GROUP
2478 add_prefix (unsigned int prefix)
2479 {
2480   enum PREFIX_GROUP ret = PREFIX_OTHER;
2481   unsigned int q;
2482
2483   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2484       && flag_code == CODE_64BIT)
2485     {
2486       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2487           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2488           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2489           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2490         ret = PREFIX_EXIST;
2491       q = REX_PREFIX;
2492     }
2493   else
2494     {
2495       switch (prefix)
2496         {
2497         default:
2498           abort ();
2499
2500         case DS_PREFIX_OPCODE:
2501           ret = PREFIX_DS;
2502           /* Fall through.  */
2503         case CS_PREFIX_OPCODE:
2504         case ES_PREFIX_OPCODE:
2505         case FS_PREFIX_OPCODE:
2506         case GS_PREFIX_OPCODE:
2507         case SS_PREFIX_OPCODE:
2508           q = SEG_PREFIX;
2509           break;
2510
2511         case REPNE_PREFIX_OPCODE:
2512         case REPE_PREFIX_OPCODE:
2513           q = REP_PREFIX;
2514           ret = PREFIX_REP;
2515           break;
2516
2517         case LOCK_PREFIX_OPCODE:
2518           q = LOCK_PREFIX;
2519           ret = PREFIX_LOCK;
2520           break;
2521
2522         case FWAIT_OPCODE:
2523           q = WAIT_PREFIX;
2524           break;
2525
2526         case ADDR_PREFIX_OPCODE:
2527           q = ADDR_PREFIX;
2528           break;
2529
2530         case DATA_PREFIX_OPCODE:
2531           q = DATA_PREFIX;
2532           break;
2533         }
2534       if (i.prefix[q] != 0)
2535         ret = PREFIX_EXIST;
2536     }
2537
2538   if (ret)
2539     {
2540       if (!i.prefix[q])
2541         ++i.prefixes;
2542       i.prefix[q] |= prefix;
2543     }
2544   else
2545     as_bad (_("same type of prefix used twice"));
2546
2547   return ret;
2548 }
2549
2550 static void
2551 update_code_flag (int value, int check)
2552 {
2553   PRINTF_LIKE ((*as_error));
2554
2555   flag_code = (enum flag_code) value;
2556   if (flag_code == CODE_64BIT)
2557     {
2558       cpu_arch_flags.bitfield.cpu64 = 1;
2559       cpu_arch_flags.bitfield.cpuno64 = 0;
2560     }
2561   else
2562     {
2563       cpu_arch_flags.bitfield.cpu64 = 0;
2564       cpu_arch_flags.bitfield.cpuno64 = 1;
2565     }
2566   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2567     {
2568       if (check)
2569         as_error = as_fatal;
2570       else
2571         as_error = as_bad;
2572       (*as_error) (_("64bit mode not supported on `%s'."),
2573                    cpu_arch_name ? cpu_arch_name : default_arch);
2574     }
2575   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2576     {
2577       if (check)
2578         as_error = as_fatal;
2579       else
2580         as_error = as_bad;
2581       (*as_error) (_("32bit mode not supported on `%s'."),
2582                    cpu_arch_name ? cpu_arch_name : default_arch);
2583     }
2584   stackop_size = '\0';
2585 }
2586
2587 static void
2588 set_code_flag (int value)
2589 {
2590   update_code_flag (value, 0);
2591 }
2592
2593 static void
2594 set_16bit_gcc_code_flag (int new_code_flag)
2595 {
2596   flag_code = (enum flag_code) new_code_flag;
2597   if (flag_code != CODE_16BIT)
2598     abort ();
2599   cpu_arch_flags.bitfield.cpu64 = 0;
2600   cpu_arch_flags.bitfield.cpuno64 = 1;
2601   stackop_size = LONG_MNEM_SUFFIX;
2602 }
2603
2604 static void
2605 set_intel_syntax (int syntax_flag)
2606 {
2607   /* Find out if register prefixing is specified.  */
2608   int ask_naked_reg = 0;
2609
2610   SKIP_WHITESPACE ();
2611   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2612     {
2613       char *string;
2614       int e = get_symbol_name (&string);
2615
2616       if (strcmp (string, "prefix") == 0)
2617         ask_naked_reg = 1;
2618       else if (strcmp (string, "noprefix") == 0)
2619         ask_naked_reg = -1;
2620       else
2621         as_bad (_("bad argument to syntax directive."));
2622       (void) restore_line_pointer (e);
2623     }
2624   demand_empty_rest_of_line ();
2625
2626   intel_syntax = syntax_flag;
2627
2628   if (ask_naked_reg == 0)
2629     allow_naked_reg = (intel_syntax
2630                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2631   else
2632     allow_naked_reg = (ask_naked_reg < 0);
2633
2634   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2635
2636   register_prefix = allow_naked_reg ? "" : "%";
2637 }
2638
2639 static void
2640 set_intel_mnemonic (int mnemonic_flag)
2641 {
2642   intel_mnemonic = mnemonic_flag;
2643 }
2644
2645 static void
2646 set_allow_index_reg (int flag)
2647 {
2648   allow_index_reg = flag;
2649 }
2650
2651 static void
2652 set_check (int what)
2653 {
2654   enum check_kind *kind;
2655   const char *str;
2656
2657   if (what)
2658     {
2659       kind = &operand_check;
2660       str = "operand";
2661     }
2662   else
2663     {
2664       kind = &sse_check;
2665       str = "sse";
2666     }
2667
2668   SKIP_WHITESPACE ();
2669
2670   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2671     {
2672       char *string;
2673       int e = get_symbol_name (&string);
2674
2675       if (strcmp (string, "none") == 0)
2676         *kind = check_none;
2677       else if (strcmp (string, "warning") == 0)
2678         *kind = check_warning;
2679       else if (strcmp (string, "error") == 0)
2680         *kind = check_error;
2681       else
2682         as_bad (_("bad argument to %s_check directive."), str);
2683       (void) restore_line_pointer (e);
2684     }
2685   else
2686     as_bad (_("missing argument for %s_check directive"), str);
2687
2688   demand_empty_rest_of_line ();
2689 }
2690
2691 static void
2692 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2693                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2694 {
2695 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2696   static const char *arch;
2697
2698   /* Intel MCU is only supported on ELF.  */
2699   if (!IS_ELF)
2700     return;
2701
2702   if (!arch)
2703     {
2704       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2705          use default_arch.  */
2706       arch = cpu_arch_name;
2707       if (!arch)
2708         arch = default_arch;
2709     }
2710
2711   /* If we are targeting Intel MCU, we must enable it.  */
2712   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2713       == new_flag.bitfield.cpuiamcu)
2714     return;
2715
2716   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2717 #endif
2718 }
2719
2720 static void
2721 extend_cpu_sub_arch_name (const char *name)
2722 {
2723   if (cpu_sub_arch_name)
2724     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2725                                   ".", name, (const char *) NULL);
2726   else
2727     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2728 }
2729
2730 static void
2731 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2732 {
2733   typedef struct arch_stack_entry
2734   {
2735     const struct arch_stack_entry *prev;
2736     const char *name;
2737     char *sub_name;
2738     i386_cpu_flags flags;
2739     i386_cpu_flags isa_flags;
2740     enum processor_type isa;
2741     enum flag_code flag_code;
2742     char stackop_size;
2743     bool no_cond_jump_promotion;
2744   } arch_stack_entry;
2745   static const arch_stack_entry *arch_stack_top;
2746
2747   SKIP_WHITESPACE ();
2748
2749   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2750     {
2751       char *s;
2752       int e = get_symbol_name (&s);
2753       const char *string = s;
2754       unsigned int j = 0;
2755       i386_cpu_flags flags;
2756
2757       if (strcmp (string, "default") == 0)
2758         {
2759           if (strcmp (default_arch, "iamcu") == 0)
2760             string = default_arch;
2761           else
2762             {
2763               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2764
2765               cpu_arch_name = NULL;
2766               free (cpu_sub_arch_name);
2767               cpu_sub_arch_name = NULL;
2768               cpu_arch_flags = cpu_unknown_flags;
2769               if (flag_code == CODE_64BIT)
2770                 {
2771                   cpu_arch_flags.bitfield.cpu64 = 1;
2772                   cpu_arch_flags.bitfield.cpuno64 = 0;
2773                 }
2774               else
2775                 {
2776                   cpu_arch_flags.bitfield.cpu64 = 0;
2777                   cpu_arch_flags.bitfield.cpuno64 = 1;
2778                 }
2779               cpu_arch_isa = PROCESSOR_UNKNOWN;
2780               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2781               if (!cpu_arch_tune_set)
2782                 {
2783                   cpu_arch_tune = cpu_arch_isa;
2784                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2785                 }
2786
2787               j = ARRAY_SIZE (cpu_arch) + 1;
2788             }
2789         }
2790       else if (strcmp (string, "push") == 0)
2791         {
2792           arch_stack_entry *top = XNEW (arch_stack_entry);
2793
2794           top->name = cpu_arch_name;
2795           if (cpu_sub_arch_name)
2796             top->sub_name = xstrdup (cpu_sub_arch_name);
2797           else
2798             top->sub_name = NULL;
2799           top->flags = cpu_arch_flags;
2800           top->isa = cpu_arch_isa;
2801           top->isa_flags = cpu_arch_isa_flags;
2802           top->flag_code = flag_code;
2803           top->stackop_size = stackop_size;
2804           top->no_cond_jump_promotion = no_cond_jump_promotion;
2805
2806           top->prev = arch_stack_top;
2807           arch_stack_top = top;
2808
2809           (void) restore_line_pointer (e);
2810           demand_empty_rest_of_line ();
2811           return;
2812         }
2813       else if (strcmp (string, "pop") == 0)
2814         {
2815           const arch_stack_entry *top = arch_stack_top;
2816
2817           if (!top)
2818             as_bad (_(".arch stack is empty"));
2819           else if (top->flag_code != flag_code
2820                    || top->stackop_size != stackop_size)
2821             {
2822               static const unsigned int bits[] = {
2823                 [CODE_16BIT] = 16,
2824                 [CODE_32BIT] = 32,
2825                 [CODE_64BIT] = 64,
2826               };
2827
2828               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2829                       bits[top->flag_code],
2830                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2831             }
2832           else
2833             {
2834               arch_stack_top = top->prev;
2835
2836               cpu_arch_name = top->name;
2837               free (cpu_sub_arch_name);
2838               cpu_sub_arch_name = top->sub_name;
2839               cpu_arch_flags = top->flags;
2840               cpu_arch_isa = top->isa;
2841               cpu_arch_isa_flags = top->isa_flags;
2842               no_cond_jump_promotion = top->no_cond_jump_promotion;
2843
2844               XDELETE (top);
2845             }
2846
2847           (void) restore_line_pointer (e);
2848           demand_empty_rest_of_line ();
2849           return;
2850         }
2851
2852       for (; j < ARRAY_SIZE (cpu_arch); j++)
2853         {
2854           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2855              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2856             {
2857               if (*string != '.')
2858                 {
2859                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2860
2861                   cpu_arch_name = cpu_arch[j].name;
2862                   free (cpu_sub_arch_name);
2863                   cpu_sub_arch_name = NULL;
2864                   cpu_arch_flags = cpu_arch[j].enable;
2865                   if (flag_code == CODE_64BIT)
2866                     {
2867                       cpu_arch_flags.bitfield.cpu64 = 1;
2868                       cpu_arch_flags.bitfield.cpuno64 = 0;
2869                     }
2870                   else
2871                     {
2872                       cpu_arch_flags.bitfield.cpu64 = 0;
2873                       cpu_arch_flags.bitfield.cpuno64 = 1;
2874                     }
2875                   cpu_arch_isa = cpu_arch[j].type;
2876                   cpu_arch_isa_flags = cpu_arch[j].enable;
2877                   if (!cpu_arch_tune_set)
2878                     {
2879                       cpu_arch_tune = cpu_arch_isa;
2880                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2881                     }
2882                   pre_386_16bit_warned = false;
2883                   break;
2884                 }
2885
2886               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2887                 continue;
2888
2889               flags = cpu_flags_or (cpu_arch_flags,
2890                                     cpu_arch[j].enable);
2891
2892               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2893                 {
2894                   extend_cpu_sub_arch_name (string + 1);
2895                   cpu_arch_flags = flags;
2896                   cpu_arch_isa_flags = flags;
2897                 }
2898               else
2899                 cpu_arch_isa_flags
2900                   = cpu_flags_or (cpu_arch_isa_flags,
2901                                   cpu_arch[j].enable);
2902               (void) restore_line_pointer (e);
2903               demand_empty_rest_of_line ();
2904               return;
2905             }
2906         }
2907
2908       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2909         {
2910           /* Disable an ISA extension.  */
2911           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2912             if (cpu_arch[j].type == PROCESSOR_NONE
2913                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2914               {
2915                 flags = cpu_flags_and_not (cpu_arch_flags,
2916                                            cpu_arch[j].disable);
2917                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2918                   {
2919                     extend_cpu_sub_arch_name (string + 1);
2920                     cpu_arch_flags = flags;
2921                     cpu_arch_isa_flags = flags;
2922                   }
2923                 (void) restore_line_pointer (e);
2924                 demand_empty_rest_of_line ();
2925                 return;
2926               }
2927         }
2928
2929       if (j == ARRAY_SIZE (cpu_arch))
2930         as_bad (_("no such architecture: `%s'"), string);
2931
2932       *input_line_pointer = e;
2933     }
2934   else
2935     as_bad (_("missing cpu architecture"));
2936
2937   no_cond_jump_promotion = 0;
2938   if (*input_line_pointer == ','
2939       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2940     {
2941       char *string;
2942       char e;
2943
2944       ++input_line_pointer;
2945       e = get_symbol_name (&string);
2946
2947       if (strcmp (string, "nojumps") == 0)
2948         no_cond_jump_promotion = 1;
2949       else if (strcmp (string, "jumps") == 0)
2950         ;
2951       else
2952         as_bad (_("no such architecture modifier: `%s'"), string);
2953
2954       (void) restore_line_pointer (e);
2955     }
2956
2957   demand_empty_rest_of_line ();
2958 }
2959
2960 enum bfd_architecture
2961 i386_arch (void)
2962 {
2963   if (cpu_arch_isa == PROCESSOR_IAMCU)
2964     {
2965       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2966           || flag_code == CODE_64BIT)
2967         as_fatal (_("Intel MCU is 32bit ELF only"));
2968       return bfd_arch_iamcu;
2969     }
2970   else
2971     return bfd_arch_i386;
2972 }
2973
2974 unsigned long
2975 i386_mach (void)
2976 {
2977   if (startswith (default_arch, "x86_64"))
2978     {
2979       if (default_arch[6] == '\0')
2980         return bfd_mach_x86_64;
2981       else
2982         return bfd_mach_x64_32;
2983     }
2984   else if (!strcmp (default_arch, "i386")
2985            || !strcmp (default_arch, "iamcu"))
2986     {
2987       if (cpu_arch_isa == PROCESSOR_IAMCU)
2988         {
2989           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2990             as_fatal (_("Intel MCU is 32bit ELF only"));
2991           return bfd_mach_i386_iamcu;
2992         }
2993       else
2994         return bfd_mach_i386_i386;
2995     }
2996   else
2997     as_fatal (_("unknown architecture"));
2998 }
2999 \f
3000 #include "opcodes/i386-tbl.h"
3001
3002 void
3003 md_begin (void)
3004 {
3005   /* Support pseudo prefixes like {disp32}.  */
3006   lex_type ['{'] = LEX_BEGIN_NAME;
3007
3008   /* Initialize op_hash hash table.  */
3009   op_hash = str_htab_create ();
3010
3011   {
3012     const insn_template *const *sets = i386_op_sets;
3013     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
3014
3015     /* Type checks to compensate for the conversion through void * which
3016        occurs during hash table insertion / lookup.  */
3017     (void) sizeof (sets == &current_templates->start);
3018     (void) sizeof (end == &current_templates->end);
3019     for (; sets < end; ++sets)
3020       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
3021         as_fatal (_("duplicate %s"), insn_name (*sets));
3022   }
3023
3024   /* Initialize reg_hash hash table.  */
3025   reg_hash = str_htab_create ();
3026   {
3027     const reg_entry *regtab;
3028     unsigned int regtab_size = i386_regtab_size;
3029
3030     for (regtab = i386_regtab; regtab_size--; regtab++)
3031       {
3032         switch (regtab->reg_type.bitfield.class)
3033           {
3034           case Reg:
3035             if (regtab->reg_type.bitfield.dword)
3036               {
3037                 if (regtab->reg_type.bitfield.instance == Accum)
3038                   reg_eax = regtab;
3039               }
3040             else if (regtab->reg_type.bitfield.tbyte)
3041               {
3042                 /* There's no point inserting st(<N>) in the hash table, as
3043                    parentheses aren't included in register_chars[] anyway.  */
3044                 if (regtab->reg_type.bitfield.instance != Accum)
3045                   continue;
3046                 reg_st0 = regtab;
3047               }
3048             break;
3049
3050           case SReg:
3051             switch (regtab->reg_num)
3052               {
3053               case 0: reg_es = regtab; break;
3054               case 2: reg_ss = regtab; break;
3055               case 3: reg_ds = regtab; break;
3056               }
3057             break;
3058
3059           case RegMask:
3060             if (!regtab->reg_num)
3061               reg_k0 = regtab;
3062             break;
3063           }
3064
3065         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3066           as_fatal (_("duplicate %s"), regtab->reg_name);
3067       }
3068   }
3069
3070   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3071   {
3072     int c;
3073     const char *p;
3074
3075     for (c = 0; c < 256; c++)
3076       {
3077         if (ISDIGIT (c) || ISLOWER (c))
3078           {
3079             mnemonic_chars[c] = c;
3080             register_chars[c] = c;
3081             operand_chars[c] = c;
3082           }
3083         else if (ISUPPER (c))
3084           {
3085             mnemonic_chars[c] = TOLOWER (c);
3086             register_chars[c] = mnemonic_chars[c];
3087             operand_chars[c] = c;
3088           }
3089 #ifdef SVR4_COMMENT_CHARS
3090         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3091           operand_chars[c] = c;
3092 #endif
3093
3094         if (c >= 128)
3095           operand_chars[c] = c;
3096       }
3097
3098     mnemonic_chars['_'] = '_';
3099     mnemonic_chars['-'] = '-';
3100     mnemonic_chars['.'] = '.';
3101
3102     for (p = extra_symbol_chars; *p != '\0'; p++)
3103       operand_chars[(unsigned char) *p] = *p;
3104     for (p = operand_special_chars; *p != '\0'; p++)
3105       operand_chars[(unsigned char) *p] = *p;
3106   }
3107
3108   if (flag_code == CODE_64BIT)
3109     {
3110 #if defined (OBJ_COFF) && defined (TE_PE)
3111       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3112                                   ? 32 : 16);
3113 #else
3114       x86_dwarf2_return_column = 16;
3115 #endif
3116       x86_cie_data_alignment = -8;
3117 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3118       x86_sframe_cfa_sp_reg = 7;
3119       x86_sframe_cfa_fp_reg = 6;
3120 #endif
3121     }
3122   else
3123     {
3124       x86_dwarf2_return_column = 8;
3125       x86_cie_data_alignment = -4;
3126     }
3127
3128   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3129      can be turned into BRANCH_PREFIX frag.  */
3130   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3131     abort ();
3132 }
3133
3134 void
3135 i386_print_statistics (FILE *file)
3136 {
3137   htab_print_statistics (file, "i386 opcode", op_hash);
3138   htab_print_statistics (file, "i386 register", reg_hash);
3139 }
3140
3141 void
3142 i386_md_end (void)
3143 {
3144   htab_delete (op_hash);
3145   htab_delete (reg_hash);
3146 }
3147 \f
3148 #ifdef DEBUG386
3149
3150 /* Debugging routines for md_assemble.  */
3151 static void pte (insn_template *);
3152 static void pt (i386_operand_type);
3153 static void pe (expressionS *);
3154 static void ps (symbolS *);
3155
3156 static void
3157 pi (const char *line, i386_insn *x)
3158 {
3159   unsigned int j;
3160
3161   fprintf (stdout, "%s: template ", line);
3162   pte (&x->tm);
3163   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3164            x->base_reg ? x->base_reg->reg_name : "none",
3165            x->index_reg ? x->index_reg->reg_name : "none",
3166            x->log2_scale_factor);
3167   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3168            x->rm.mode, x->rm.reg, x->rm.regmem);
3169   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3170            x->sib.base, x->sib.index, x->sib.scale);
3171   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3172            (x->rex & REX_W) != 0,
3173            (x->rex & REX_R) != 0,
3174            (x->rex & REX_X) != 0,
3175            (x->rex & REX_B) != 0);
3176   for (j = 0; j < x->operands; j++)
3177     {
3178       fprintf (stdout, "    #%d:  ", j + 1);
3179       pt (x->types[j]);
3180       fprintf (stdout, "\n");
3181       if (x->types[j].bitfield.class == Reg
3182           || x->types[j].bitfield.class == RegMMX
3183           || x->types[j].bitfield.class == RegSIMD
3184           || x->types[j].bitfield.class == RegMask
3185           || x->types[j].bitfield.class == SReg
3186           || x->types[j].bitfield.class == RegCR
3187           || x->types[j].bitfield.class == RegDR
3188           || x->types[j].bitfield.class == RegTR
3189           || x->types[j].bitfield.class == RegBND)
3190         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3191       if (operand_type_check (x->types[j], imm))
3192         pe (x->op[j].imms);
3193       if (operand_type_check (x->types[j], disp))
3194         pe (x->op[j].disps);
3195     }
3196 }
3197
3198 static void
3199 pte (insn_template *t)
3200 {
3201   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3202   static const char *const opc_spc[] = {
3203     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3204     "XOP08", "XOP09", "XOP0A",
3205   };
3206   unsigned int j;
3207
3208   fprintf (stdout, " %d operands ", t->operands);
3209   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3210     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3211   if (opc_spc[t->opcode_space])
3212     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3213   fprintf (stdout, "opcode %x ", t->base_opcode);
3214   if (t->extension_opcode != None)
3215     fprintf (stdout, "ext %x ", t->extension_opcode);
3216   if (t->opcode_modifier.d)
3217     fprintf (stdout, "D");
3218   if (t->opcode_modifier.w)
3219     fprintf (stdout, "W");
3220   fprintf (stdout, "\n");
3221   for (j = 0; j < t->operands; j++)
3222     {
3223       fprintf (stdout, "    #%d type ", j + 1);
3224       pt (t->operand_types[j]);
3225       fprintf (stdout, "\n");
3226     }
3227 }
3228
3229 static void
3230 pe (expressionS *e)
3231 {
3232   fprintf (stdout, "    operation     %d\n", e->X_op);
3233   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3234            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3235   if (e->X_add_symbol)
3236     {
3237       fprintf (stdout, "    add_symbol    ");
3238       ps (e->X_add_symbol);
3239       fprintf (stdout, "\n");
3240     }
3241   if (e->X_op_symbol)
3242     {
3243       fprintf (stdout, "    op_symbol    ");
3244       ps (e->X_op_symbol);
3245       fprintf (stdout, "\n");
3246     }
3247 }
3248
3249 static void
3250 ps (symbolS *s)
3251 {
3252   fprintf (stdout, "%s type %s%s",
3253            S_GET_NAME (s),
3254            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3255            segment_name (S_GET_SEGMENT (s)));
3256 }
3257
3258 static struct type_name
3259   {
3260     i386_operand_type mask;
3261     const char *name;
3262   }
3263 const type_names[] =
3264 {
3265   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3266   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3267   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3268   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3269   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3270   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3271   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3272   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3273   { { .bitfield = { .imm8 = 1 } }, "i8" },
3274   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3275   { { .bitfield = { .imm16 = 1 } }, "i16" },
3276   { { .bitfield = { .imm32 = 1 } }, "i32" },
3277   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3278   { { .bitfield = { .imm64 = 1 } }, "i64" },
3279   { { .bitfield = { .imm1 = 1 } }, "i1" },
3280   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3281   { { .bitfield = { .disp8 = 1 } }, "d8" },
3282   { { .bitfield = { .disp16 = 1 } }, "d16" },
3283   { { .bitfield = { .disp32 = 1 } }, "d32" },
3284   { { .bitfield = { .disp64 = 1 } }, "d64" },
3285   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3286   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3287   { { .bitfield = { .class = RegCR } }, "control reg" },
3288   { { .bitfield = { .class = RegTR } }, "test reg" },
3289   { { .bitfield = { .class = RegDR } }, "debug reg" },
3290   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3291   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3292   { { .bitfield = { .class = SReg } }, "SReg" },
3293   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3294   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3295   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3296   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3297   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3298   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3299 };
3300
3301 static void
3302 pt (i386_operand_type t)
3303 {
3304   unsigned int j;
3305   i386_operand_type a;
3306
3307   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3308     {
3309       a = operand_type_and (t, type_names[j].mask);
3310       if (operand_type_equal (&a, &type_names[j].mask))
3311         fprintf (stdout, "%s, ",  type_names[j].name);
3312     }
3313   fflush (stdout);
3314 }
3315
3316 #endif /* DEBUG386 */
3317 \f
3318 static bfd_reloc_code_real_type
3319 reloc (unsigned int size,
3320        int pcrel,
3321        int sign,
3322        bfd_reloc_code_real_type other)
3323 {
3324   if (other != NO_RELOC)
3325     {
3326       reloc_howto_type *rel;
3327
3328       if (size == 8)
3329         switch (other)
3330           {
3331           case BFD_RELOC_X86_64_GOT32:
3332             return BFD_RELOC_X86_64_GOT64;
3333             break;
3334           case BFD_RELOC_X86_64_GOTPLT64:
3335             return BFD_RELOC_X86_64_GOTPLT64;
3336             break;
3337           case BFD_RELOC_X86_64_PLTOFF64:
3338             return BFD_RELOC_X86_64_PLTOFF64;
3339             break;
3340           case BFD_RELOC_X86_64_GOTPC32:
3341             other = BFD_RELOC_X86_64_GOTPC64;
3342             break;
3343           case BFD_RELOC_X86_64_GOTPCREL:
3344             other = BFD_RELOC_X86_64_GOTPCREL64;
3345             break;
3346           case BFD_RELOC_X86_64_TPOFF32:
3347             other = BFD_RELOC_X86_64_TPOFF64;
3348             break;
3349           case BFD_RELOC_X86_64_DTPOFF32:
3350             other = BFD_RELOC_X86_64_DTPOFF64;
3351             break;
3352           default:
3353             break;
3354           }
3355
3356 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3357       if (other == BFD_RELOC_SIZE32)
3358         {
3359           if (size == 8)
3360             other = BFD_RELOC_SIZE64;
3361           if (pcrel)
3362             {
3363               as_bad (_("there are no pc-relative size relocations"));
3364               return NO_RELOC;
3365             }
3366         }
3367 #endif
3368
3369       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3370       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3371         sign = -1;
3372
3373       rel = bfd_reloc_type_lookup (stdoutput, other);
3374       if (!rel)
3375         as_bad (_("unknown relocation (%u)"), other);
3376       else if (size != bfd_get_reloc_size (rel))
3377         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3378                 bfd_get_reloc_size (rel),
3379                 size);
3380       else if (pcrel && !rel->pc_relative)
3381         as_bad (_("non-pc-relative relocation for pc-relative field"));
3382       else if ((rel->complain_on_overflow == complain_overflow_signed
3383                 && !sign)
3384                || (rel->complain_on_overflow == complain_overflow_unsigned
3385                    && sign > 0))
3386         as_bad (_("relocated field and relocation type differ in signedness"));
3387       else
3388         return other;
3389       return NO_RELOC;
3390     }
3391
3392   if (pcrel)
3393     {
3394       if (!sign)
3395         as_bad (_("there are no unsigned pc-relative relocations"));
3396       switch (size)
3397         {
3398         case 1: return BFD_RELOC_8_PCREL;
3399         case 2: return BFD_RELOC_16_PCREL;
3400         case 4: return BFD_RELOC_32_PCREL;
3401         case 8: return BFD_RELOC_64_PCREL;
3402         }
3403       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3404     }
3405   else
3406     {
3407       if (sign > 0)
3408         switch (size)
3409           {
3410           case 4: return BFD_RELOC_X86_64_32S;
3411           }
3412       else
3413         switch (size)
3414           {
3415           case 1: return BFD_RELOC_8;
3416           case 2: return BFD_RELOC_16;
3417           case 4: return BFD_RELOC_32;
3418           case 8: return BFD_RELOC_64;
3419           }
3420       as_bad (_("cannot do %s %u byte relocation"),
3421               sign > 0 ? "signed" : "unsigned", size);
3422     }
3423
3424   return NO_RELOC;
3425 }
3426
3427 /* Here we decide which fixups can be adjusted to make them relative to
3428    the beginning of the section instead of the symbol.  Basically we need
3429    to make sure that the dynamic relocations are done correctly, so in
3430    some cases we force the original symbol to be used.  */
3431
3432 int
3433 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3434 {
3435 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3436   if (!IS_ELF)
3437     return 1;
3438
3439   /* Don't adjust pc-relative references to merge sections in 64-bit
3440      mode.  */
3441   if (use_rela_relocations
3442       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3443       && fixP->fx_pcrel)
3444     return 0;
3445
3446   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3447      and changed later by validate_fix.  */
3448   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3449       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3450     return 0;
3451
3452   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3453      for size relocations.  */
3454   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3455       || fixP->fx_r_type == BFD_RELOC_SIZE64
3456       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3457       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3458       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3459       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3460       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3461       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3462       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3463       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3464       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3465       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3466       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3467       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3468       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3473       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3474       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3475       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3476       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3477       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3478       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3479       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3480       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3481       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3482       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3483       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3484       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3485     return 0;
3486 #endif
3487   return 1;
3488 }
3489
3490 static INLINE bool
3491 want_disp32 (const insn_template *t)
3492 {
3493   return flag_code != CODE_64BIT
3494          || i.prefix[ADDR_PREFIX]
3495          || (t->mnem_off == MN_lea
3496              && (!i.types[1].bitfield.qword
3497                 || t->opcode_modifier.size == SIZE32));
3498 }
3499
3500 static int
3501 intel_float_operand (const char *mnemonic)
3502 {
3503   /* Note that the value returned is meaningful only for opcodes with (memory)
3504      operands, hence the code here is free to improperly handle opcodes that
3505      have no operands (for better performance and smaller code). */
3506
3507   if (mnemonic[0] != 'f')
3508     return 0; /* non-math */
3509
3510   switch (mnemonic[1])
3511     {
3512     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3513        the fs segment override prefix not currently handled because no
3514        call path can make opcodes without operands get here */
3515     case 'i':
3516       return 2 /* integer op */;
3517     case 'l':
3518       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3519         return 3; /* fldcw/fldenv */
3520       break;
3521     case 'n':
3522       if (mnemonic[2] != 'o' /* fnop */)
3523         return 3; /* non-waiting control op */
3524       break;
3525     case 'r':
3526       if (mnemonic[2] == 's')
3527         return 3; /* frstor/frstpm */
3528       break;
3529     case 's':
3530       if (mnemonic[2] == 'a')
3531         return 3; /* fsave */
3532       if (mnemonic[2] == 't')
3533         {
3534           switch (mnemonic[3])
3535             {
3536             case 'c': /* fstcw */
3537             case 'd': /* fstdw */
3538             case 'e': /* fstenv */
3539             case 's': /* fsts[gw] */
3540               return 3;
3541             }
3542         }
3543       break;
3544     case 'x':
3545       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3546         return 0; /* fxsave/fxrstor are not really math ops */
3547       break;
3548     }
3549
3550   return 1;
3551 }
3552
3553 static INLINE void
3554 install_template (const insn_template *t)
3555 {
3556   unsigned int l;
3557
3558   i.tm = *t;
3559
3560   /* Note that for pseudo prefixes this produces a length of 1. But for them
3561      the length isn't interesting at all.  */
3562   for (l = 1; l < 4; ++l)
3563     if (!(t->base_opcode >> (8 * l)))
3564       break;
3565
3566   i.opcode_length = l;
3567 }
3568
3569 /* Build the VEX prefix.  */
3570
3571 static void
3572 build_vex_prefix (const insn_template *t)
3573 {
3574   unsigned int register_specifier;
3575   unsigned int vector_length;
3576   unsigned int w;
3577
3578   /* Check register specifier.  */
3579   if (i.vex.register_specifier)
3580     {
3581       register_specifier =
3582         ~register_number (i.vex.register_specifier) & 0xf;
3583       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3584     }
3585   else
3586     register_specifier = 0xf;
3587
3588   /* Use 2-byte VEX prefix by swapping destination and source operand
3589      if there are more than 1 register operand.  */
3590   if (i.reg_operands > 1
3591       && i.vec_encoding != vex_encoding_vex3
3592       && i.dir_encoding == dir_encoding_default
3593       && i.operands == i.reg_operands
3594       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3595       && i.tm.opcode_space == SPACE_0F
3596       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3597       && i.rex == REX_B)
3598     {
3599       unsigned int xchg;
3600
3601       swap_2_operands (0, i.operands - 1);
3602
3603       gas_assert (i.rm.mode == 3);
3604
3605       i.rex = REX_R;
3606       xchg = i.rm.regmem;
3607       i.rm.regmem = i.rm.reg;
3608       i.rm.reg = xchg;
3609
3610       if (i.tm.opcode_modifier.d)
3611         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3612                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3613       else /* Use the next insn.  */
3614         install_template (&t[1]);
3615     }
3616
3617   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3618      are no memory operands and at least 3 register ones.  */
3619   if (i.reg_operands >= 3
3620       && i.vec_encoding != vex_encoding_vex3
3621       && i.reg_operands == i.operands - i.imm_operands
3622       && i.tm.opcode_modifier.vex
3623       && i.tm.opcode_modifier.commutative
3624       && (i.tm.opcode_modifier.sse2avx
3625           || (optimize > 1 && !i.no_optimize))
3626       && i.rex == REX_B
3627       && i.vex.register_specifier
3628       && !(i.vex.register_specifier->reg_flags & RegRex))
3629     {
3630       unsigned int xchg = i.operands - i.reg_operands;
3631
3632       gas_assert (i.tm.opcode_space == SPACE_0F);
3633       gas_assert (!i.tm.opcode_modifier.sae);
3634       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3635                                       &i.types[i.operands - 3]));
3636       gas_assert (i.rm.mode == 3);
3637
3638       swap_2_operands (xchg, xchg + 1);
3639
3640       i.rex = 0;
3641       xchg = i.rm.regmem | 8;
3642       i.rm.regmem = ~register_specifier & 0xf;
3643       gas_assert (!(i.rm.regmem & 8));
3644       i.vex.register_specifier += xchg - i.rm.regmem;
3645       register_specifier = ~xchg & 0xf;
3646     }
3647
3648   if (i.tm.opcode_modifier.vex == VEXScalar)
3649     vector_length = avxscalar;
3650   else if (i.tm.opcode_modifier.vex == VEX256)
3651     vector_length = 1;
3652   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
3653     vector_length = 0;
3654   else
3655     {
3656       unsigned int op;
3657
3658       /* Determine vector length from the last multi-length vector
3659          operand.  */
3660       vector_length = 0;
3661       for (op = t->operands; op--;)
3662         if (t->operand_types[op].bitfield.xmmword
3663             && t->operand_types[op].bitfield.ymmword
3664             && i.types[op].bitfield.ymmword)
3665           {
3666             vector_length = 1;
3667             break;
3668           }
3669     }
3670
3671   /* Check the REX.W bit and VEXW.  */
3672   if (i.tm.opcode_modifier.vexw == VEXWIG)
3673     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3674   else if (i.tm.opcode_modifier.vexw)
3675     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3676   else
3677     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3678
3679   /* Use 2-byte VEX prefix if possible.  */
3680   if (w == 0
3681       && i.vec_encoding != vex_encoding_vex3
3682       && i.tm.opcode_space == SPACE_0F
3683       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3684     {
3685       /* 2-byte VEX prefix.  */
3686       unsigned int r;
3687
3688       i.vex.length = 2;
3689       i.vex.bytes[0] = 0xc5;
3690
3691       /* Check the REX.R bit.  */
3692       r = (i.rex & REX_R) ? 0 : 1;
3693       i.vex.bytes[1] = (r << 7
3694                         | register_specifier << 3
3695                         | vector_length << 2
3696                         | i.tm.opcode_modifier.opcodeprefix);
3697     }
3698   else
3699     {
3700       /* 3-byte VEX prefix.  */
3701       i.vex.length = 3;
3702
3703       switch (i.tm.opcode_space)
3704         {
3705         case SPACE_0F:
3706         case SPACE_0F38:
3707         case SPACE_0F3A:
3708           i.vex.bytes[0] = 0xc4;
3709           break;
3710         case SPACE_XOP08:
3711         case SPACE_XOP09:
3712         case SPACE_XOP0A:
3713           i.vex.bytes[0] = 0x8f;
3714           break;
3715         default:
3716           abort ();
3717         }
3718
3719       /* The high 3 bits of the second VEX byte are 1's compliment
3720          of RXB bits from REX.  */
3721       i.vex.bytes[1] = ((~i.rex & 7) << 5)
3722                        | (!dot_insn () ? i.tm.opcode_space
3723                                        : i.insn_opcode_space);
3724
3725       i.vex.bytes[2] = (w << 7
3726                         | register_specifier << 3
3727                         | vector_length << 2
3728                         | i.tm.opcode_modifier.opcodeprefix);
3729     }
3730 }
3731
3732 static INLINE bool
3733 is_evex_encoding (const insn_template *t)
3734 {
3735   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3736          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3737          || t->opcode_modifier.sae;
3738 }
3739
3740 static INLINE bool
3741 is_any_vex_encoding (const insn_template *t)
3742 {
3743   return t->opcode_modifier.vex || is_evex_encoding (t);
3744 }
3745
3746 static unsigned int
3747 get_broadcast_bytes (const insn_template *t, bool diag)
3748 {
3749   unsigned int op, bytes;
3750   const i386_operand_type *types;
3751
3752   if (i.broadcast.type)
3753     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
3754
3755   gas_assert (intel_syntax);
3756
3757   for (op = 0; op < t->operands; ++op)
3758     if (t->operand_types[op].bitfield.baseindex)
3759       break;
3760
3761   gas_assert (op < t->operands);
3762
3763   if (t->opcode_modifier.evex
3764       && t->opcode_modifier.evex != EVEXDYN)
3765     switch (i.broadcast.bytes)
3766       {
3767       case 1:
3768         if (t->operand_types[op].bitfield.word)
3769           return 2;
3770       /* Fall through.  */
3771       case 2:
3772         if (t->operand_types[op].bitfield.dword)
3773           return 4;
3774       /* Fall through.  */
3775       case 4:
3776         if (t->operand_types[op].bitfield.qword)
3777           return 8;
3778       /* Fall through.  */
3779       case 8:
3780         if (t->operand_types[op].bitfield.xmmword)
3781           return 16;
3782         if (t->operand_types[op].bitfield.ymmword)
3783           return 32;
3784         if (t->operand_types[op].bitfield.zmmword)
3785           return 64;
3786       /* Fall through.  */
3787       default:
3788         abort ();
3789       }
3790
3791   gas_assert (op + 1 < t->operands);
3792
3793   if (t->operand_types[op + 1].bitfield.xmmword
3794       + t->operand_types[op + 1].bitfield.ymmword
3795       + t->operand_types[op + 1].bitfield.zmmword > 1)
3796     {
3797       types = &i.types[op + 1];
3798       diag = false;
3799     }
3800   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3801     types = &t->operand_types[op];
3802
3803   if (types->bitfield.zmmword)
3804     bytes = 64;
3805   else if (types->bitfield.ymmword)
3806     bytes = 32;
3807   else
3808     bytes = 16;
3809
3810   if (diag)
3811     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3812              insn_name (t), bytes * 8);
3813
3814   return bytes;
3815 }
3816
3817 /* Build the EVEX prefix.  */
3818
3819 static void
3820 build_evex_prefix (void)
3821 {
3822   unsigned int register_specifier, w;
3823   rex_byte vrex_used = 0;
3824
3825   /* Check register specifier.  */
3826   if (i.vex.register_specifier)
3827     {
3828       gas_assert ((i.vrex & REX_X) == 0);
3829
3830       register_specifier = i.vex.register_specifier->reg_num;
3831       if ((i.vex.register_specifier->reg_flags & RegRex))
3832         register_specifier += 8;
3833       /* The upper 16 registers are encoded in the fourth byte of the
3834          EVEX prefix.  */
3835       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3836         i.vex.bytes[3] = 0x8;
3837       register_specifier = ~register_specifier & 0xf;
3838     }
3839   else
3840     {
3841       register_specifier = 0xf;
3842
3843       /* Encode upper 16 vector index register in the fourth byte of
3844          the EVEX prefix.  */
3845       if (!(i.vrex & REX_X))
3846         i.vex.bytes[3] = 0x8;
3847       else
3848         vrex_used |= REX_X;
3849     }
3850
3851   /* 4 byte EVEX prefix.  */
3852   i.vex.length = 4;
3853   i.vex.bytes[0] = 0x62;
3854
3855   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3856      bits from REX.  */
3857   gas_assert (i.tm.opcode_space >= SPACE_0F);
3858   gas_assert (i.tm.opcode_space <= SPACE_EVEXMAP6);
3859   i.vex.bytes[1] = ((~i.rex & 7) << 5)
3860                    | (!dot_insn () ? i.tm.opcode_space
3861                                    : i.insn_opcode_space);
3862
3863   /* The fifth bit of the second EVEX byte is 1's compliment of the
3864      REX_R bit in VREX.  */
3865   if (!(i.vrex & REX_R))
3866     i.vex.bytes[1] |= 0x10;
3867   else
3868     vrex_used |= REX_R;
3869
3870   if ((i.reg_operands + i.imm_operands) == i.operands)
3871     {
3872       /* When all operands are registers, the REX_X bit in REX is not
3873          used.  We reuse it to encode the upper 16 registers, which is
3874          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3875          as 1's compliment.  */
3876       if ((i.vrex & REX_B))
3877         {
3878           vrex_used |= REX_B;
3879           i.vex.bytes[1] &= ~0x40;
3880         }
3881     }
3882
3883   /* EVEX instructions shouldn't need the REX prefix.  */
3884   i.vrex &= ~vrex_used;
3885   gas_assert (i.vrex == 0);
3886
3887   /* Check the REX.W bit and VEXW.  */
3888   if (i.tm.opcode_modifier.vexw == VEXWIG)
3889     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3890   else if (i.tm.opcode_modifier.vexw)
3891     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3892   else
3893     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3894
3895   /* The third byte of the EVEX prefix.  */
3896   i.vex.bytes[2] = ((w << 7)
3897                     | (register_specifier << 3)
3898                     | 4 /* Encode the U bit.  */
3899                     | i.tm.opcode_modifier.opcodeprefix);
3900
3901   /* The fourth byte of the EVEX prefix.  */
3902   /* The zeroing-masking bit.  */
3903   if (i.mask.reg && i.mask.zeroing)
3904     i.vex.bytes[3] |= 0x80;
3905
3906   /* Don't always set the broadcast bit if there is no RC.  */
3907   if (i.rounding.type == rc_none)
3908     {
3909       /* Encode the vector length.  */
3910       unsigned int vec_length;
3911
3912       if (!i.tm.opcode_modifier.evex
3913           || i.tm.opcode_modifier.evex == EVEXDYN)
3914         {
3915           unsigned int op;
3916
3917           /* Determine vector length from the last multi-length vector
3918              operand.  */
3919           for (op = i.operands; op--;)
3920             if (i.tm.operand_types[op].bitfield.xmmword
3921                 + i.tm.operand_types[op].bitfield.ymmword
3922                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3923               {
3924                 if (i.types[op].bitfield.zmmword)
3925                   {
3926                     i.tm.opcode_modifier.evex = EVEX512;
3927                     break;
3928                   }
3929                 else if (i.types[op].bitfield.ymmword)
3930                   {
3931                     i.tm.opcode_modifier.evex = EVEX256;
3932                     break;
3933                   }
3934                 else if (i.types[op].bitfield.xmmword)
3935                   {
3936                     i.tm.opcode_modifier.evex = EVEX128;
3937                     break;
3938                   }
3939                 else if ((i.broadcast.type || i.broadcast.bytes)
3940                          && op == i.broadcast.operand)
3941                   {
3942                     switch (get_broadcast_bytes (&i.tm, true))
3943                       {
3944                         case 64:
3945                           i.tm.opcode_modifier.evex = EVEX512;
3946                           break;
3947                         case 32:
3948                           i.tm.opcode_modifier.evex = EVEX256;
3949                           break;
3950                         case 16:
3951                           i.tm.opcode_modifier.evex = EVEX128;
3952                           break;
3953                         default:
3954                           abort ();
3955                       }
3956                     break;
3957                   }
3958               }
3959
3960           if (op >= MAX_OPERANDS)
3961             abort ();
3962         }
3963
3964       switch (i.tm.opcode_modifier.evex)
3965         {
3966         case EVEXLIG: /* LL' is ignored */
3967           vec_length = evexlig << 5;
3968           break;
3969         case EVEX128:
3970           vec_length = 0 << 5;
3971           break;
3972         case EVEX256:
3973           vec_length = 1 << 5;
3974           break;
3975         case EVEX512:
3976           vec_length = 2 << 5;
3977           break;
3978         case EVEX_L3:
3979           if (dot_insn ())
3980             {
3981               vec_length = 3 << 5;
3982               break;
3983             }
3984           /* Fall through.  */
3985         default:
3986           abort ();
3987           break;
3988         }
3989       i.vex.bytes[3] |= vec_length;
3990       /* Encode the broadcast bit.  */
3991       if (i.broadcast.type || i.broadcast.bytes)
3992         i.vex.bytes[3] |= 0x10;
3993     }
3994   else if (i.rounding.type != saeonly)
3995     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3996   else
3997     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3998
3999   if (i.mask.reg)
4000     i.vex.bytes[3] |= i.mask.reg->reg_num;
4001 }
4002
4003 static void
4004 process_immext (void)
4005 {
4006   expressionS *exp;
4007
4008   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4009      which is coded in the same place as an 8-bit immediate field
4010      would be.  Here we fake an 8-bit immediate operand from the
4011      opcode suffix stored in tm.extension_opcode.
4012
4013      AVX instructions also use this encoding, for some of
4014      3 argument instructions.  */
4015
4016   gas_assert (i.imm_operands <= 1
4017               && (i.operands <= 2
4018                   || (is_any_vex_encoding (&i.tm)
4019                       && i.operands <= 4)));
4020
4021   exp = &im_expressions[i.imm_operands++];
4022   i.op[i.operands].imms = exp;
4023   i.types[i.operands].bitfield.imm8 = 1;
4024   i.operands++;
4025   exp->X_op = O_constant;
4026   exp->X_add_number = i.tm.extension_opcode;
4027   i.tm.extension_opcode = None;
4028 }
4029
4030
4031 static int
4032 check_hle (void)
4033 {
4034   switch (i.tm.opcode_modifier.prefixok)
4035     {
4036     default:
4037       abort ();
4038     case PrefixLock:
4039     case PrefixNone:
4040     case PrefixNoTrack:
4041     case PrefixRep:
4042       as_bad (_("invalid instruction `%s' after `%s'"),
4043               insn_name (&i.tm), i.hle_prefix);
4044       return 0;
4045     case PrefixHLELock:
4046       if (i.prefix[LOCK_PREFIX])
4047         return 1;
4048       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4049       return 0;
4050     case PrefixHLEAny:
4051       return 1;
4052     case PrefixHLERelease:
4053       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4054         {
4055           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4056                   insn_name (&i.tm));
4057           return 0;
4058         }
4059       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4060         {
4061           as_bad (_("memory destination needed for instruction `%s'"
4062                     " after `xrelease'"), insn_name (&i.tm));
4063           return 0;
4064         }
4065       return 1;
4066     }
4067 }
4068
4069 /* Encode aligned vector move as unaligned vector move.  */
4070
4071 static void
4072 encode_with_unaligned_vector_move (void)
4073 {
4074   switch (i.tm.base_opcode)
4075     {
4076     case 0x28:  /* Load instructions.  */
4077     case 0x29:  /* Store instructions.  */
4078       /* movaps/movapd/vmovaps/vmovapd.  */
4079       if (i.tm.opcode_space == SPACE_0F
4080           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4081         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4082       break;
4083     case 0x6f:  /* Load instructions.  */
4084     case 0x7f:  /* Store instructions.  */
4085       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4086       if (i.tm.opcode_space == SPACE_0F
4087           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4088         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4089       break;
4090     default:
4091       break;
4092     }
4093 }
4094
4095 /* Try the shortest encoding by shortening operand size.  */
4096
4097 static void
4098 optimize_encoding (void)
4099 {
4100   unsigned int j;
4101
4102   if (i.tm.mnem_off == MN_lea)
4103     {
4104       /* Optimize: -O:
4105            lea symbol, %rN    -> mov $symbol, %rN
4106            lea (%rM), %rN     -> mov %rM, %rN
4107            lea (,%rM,1), %rN  -> mov %rM, %rN
4108
4109            and in 32-bit mode for 16-bit addressing
4110
4111            lea (%rM), %rN     -> movzx %rM, %rN
4112
4113            and in 64-bit mode zap 32-bit addressing in favor of using a
4114            32-bit (or less) destination.
4115        */
4116       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4117         {
4118           if (!i.op[1].regs->reg_type.bitfield.word)
4119             i.tm.opcode_modifier.size = SIZE32;
4120           i.prefix[ADDR_PREFIX] = 0;
4121         }
4122
4123       if (!i.index_reg && !i.base_reg)
4124         {
4125           /* Handle:
4126                lea symbol, %rN    -> mov $symbol, %rN
4127            */
4128           if (flag_code == CODE_64BIT)
4129             {
4130               /* Don't transform a relocation to a 16-bit one.  */
4131               if (i.op[0].disps
4132                   && i.op[0].disps->X_op != O_constant
4133                   && i.op[1].regs->reg_type.bitfield.word)
4134                 return;
4135
4136               if (!i.op[1].regs->reg_type.bitfield.qword
4137                   || i.tm.opcode_modifier.size == SIZE32)
4138                 {
4139                   i.tm.base_opcode = 0xb8;
4140                   i.tm.opcode_modifier.modrm = 0;
4141                   if (!i.op[1].regs->reg_type.bitfield.word)
4142                     i.types[0].bitfield.imm32 = 1;
4143                   else
4144                     {
4145                       i.tm.opcode_modifier.size = SIZE16;
4146                       i.types[0].bitfield.imm16 = 1;
4147                     }
4148                 }
4149               else
4150                 {
4151                   /* Subject to further optimization below.  */
4152                   i.tm.base_opcode = 0xc7;
4153                   i.tm.extension_opcode = 0;
4154                   i.types[0].bitfield.imm32s = 1;
4155                   i.types[0].bitfield.baseindex = 0;
4156                 }
4157             }
4158           /* Outside of 64-bit mode address and operand sizes have to match if
4159              a relocation is involved, as otherwise we wouldn't (currently) or
4160              even couldn't express the relocation correctly.  */
4161           else if (i.op[0].disps
4162                    && i.op[0].disps->X_op != O_constant
4163                    && ((!i.prefix[ADDR_PREFIX])
4164                        != (flag_code == CODE_32BIT
4165                            ? i.op[1].regs->reg_type.bitfield.dword
4166                            : i.op[1].regs->reg_type.bitfield.word)))
4167             return;
4168           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4169              destination is going to grow encoding size.  */
4170           else if (flag_code == CODE_16BIT
4171                    && (optimize <= 1 || optimize_for_space)
4172                    && !i.prefix[ADDR_PREFIX]
4173                    && i.op[1].regs->reg_type.bitfield.dword)
4174             return;
4175           else
4176             {
4177               i.tm.base_opcode = 0xb8;
4178               i.tm.opcode_modifier.modrm = 0;
4179               if (i.op[1].regs->reg_type.bitfield.dword)
4180                 i.types[0].bitfield.imm32 = 1;
4181               else
4182                 i.types[0].bitfield.imm16 = 1;
4183
4184               if (i.op[0].disps
4185                   && i.op[0].disps->X_op == O_constant
4186                   && i.op[1].regs->reg_type.bitfield.dword
4187                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4188                      GCC 5. */
4189                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4190                 i.op[0].disps->X_add_number &= 0xffff;
4191             }
4192
4193           i.tm.operand_types[0] = i.types[0];
4194           i.imm_operands = 1;
4195           if (!i.op[0].imms)
4196             {
4197               i.op[0].imms = &im_expressions[0];
4198               i.op[0].imms->X_op = O_absent;
4199             }
4200         }
4201       else if (i.op[0].disps
4202                   && (i.op[0].disps->X_op != O_constant
4203                       || i.op[0].disps->X_add_number))
4204         return;
4205       else
4206         {
4207           /* Handle:
4208                lea (%rM), %rN     -> mov %rM, %rN
4209                lea (,%rM,1), %rN  -> mov %rM, %rN
4210                lea (%rM), %rN     -> movzx %rM, %rN
4211            */
4212           const reg_entry *addr_reg;
4213
4214           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4215             addr_reg = i.base_reg;
4216           else if (!i.base_reg
4217                    && i.index_reg->reg_num != RegIZ
4218                    && !i.log2_scale_factor)
4219             addr_reg = i.index_reg;
4220           else
4221             return;
4222
4223           if (addr_reg->reg_type.bitfield.word
4224               && i.op[1].regs->reg_type.bitfield.dword)
4225             {
4226               if (flag_code != CODE_32BIT)
4227                 return;
4228               i.tm.opcode_space = SPACE_0F;
4229               i.tm.base_opcode = 0xb7;
4230             }
4231           else
4232             i.tm.base_opcode = 0x8b;
4233
4234           if (addr_reg->reg_type.bitfield.dword
4235               && i.op[1].regs->reg_type.bitfield.qword)
4236             i.tm.opcode_modifier.size = SIZE32;
4237
4238           i.op[0].regs = addr_reg;
4239           i.reg_operands = 2;
4240         }
4241
4242       i.mem_operands = 0;
4243       i.disp_operands = 0;
4244       i.prefix[ADDR_PREFIX] = 0;
4245       i.prefix[SEG_PREFIX] = 0;
4246       i.seg[0] = NULL;
4247     }
4248
4249   if (optimize_for_space
4250       && i.tm.mnem_off == MN_test
4251       && i.reg_operands == 1
4252       && i.imm_operands == 1
4253       && !i.types[1].bitfield.byte
4254       && i.op[0].imms->X_op == O_constant
4255       && fits_in_imm7 (i.op[0].imms->X_add_number))
4256     {
4257       /* Optimize: -Os:
4258            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4259        */
4260       unsigned int base_regnum = i.op[1].regs->reg_num;
4261       if (flag_code == CODE_64BIT || base_regnum < 4)
4262         {
4263           i.types[1].bitfield.byte = 1;
4264           /* Ignore the suffix.  */
4265           i.suffix = 0;
4266           /* Convert to byte registers.  */
4267           if (i.types[1].bitfield.word)
4268             j = 16;
4269           else if (i.types[1].bitfield.dword)
4270             j = 32;
4271           else
4272             j = 48;
4273           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4274             j += 8;
4275           i.op[1].regs -= j;
4276         }
4277     }
4278   else if (flag_code == CODE_64BIT
4279            && i.tm.opcode_space == SPACE_BASE
4280            && ((i.types[1].bitfield.qword
4281                 && i.reg_operands == 1
4282                 && i.imm_operands == 1
4283                 && i.op[0].imms->X_op == O_constant
4284                 && ((i.tm.base_opcode == 0xb8
4285                      && i.tm.extension_opcode == None
4286                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4287                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4288                         && (i.tm.base_opcode == 0x24
4289                             || (i.tm.base_opcode == 0x80
4290                                 && i.tm.extension_opcode == 0x4)
4291                             || i.tm.mnem_off == MN_test
4292                             || ((i.tm.base_opcode | 1) == 0xc7
4293                                 && i.tm.extension_opcode == 0x0)))
4294                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4295                         && i.tm.base_opcode == 0x83
4296                         && i.tm.extension_opcode == 0x4)))
4297                || (i.types[0].bitfield.qword
4298                    && ((i.reg_operands == 2
4299                         && i.op[0].regs == i.op[1].regs
4300                         && (i.tm.mnem_off == MN_xor
4301                             || i.tm.mnem_off == MN_sub))
4302                        || i.tm.mnem_off == MN_clr))))
4303     {
4304       /* Optimize: -O:
4305            andq $imm31, %r64   -> andl $imm31, %r32
4306            andq $imm7, %r64    -> andl $imm7, %r32
4307            testq $imm31, %r64  -> testl $imm31, %r32
4308            xorq %r64, %r64     -> xorl %r32, %r32
4309            subq %r64, %r64     -> subl %r32, %r32
4310            movq $imm31, %r64   -> movl $imm31, %r32
4311            movq $imm32, %r64   -> movl $imm32, %r32
4312         */
4313       i.tm.opcode_modifier.size = SIZE32;
4314       if (i.imm_operands)
4315         {
4316           i.types[0].bitfield.imm32 = 1;
4317           i.types[0].bitfield.imm32s = 0;
4318           i.types[0].bitfield.imm64 = 0;
4319         }
4320       else
4321         {
4322           i.types[0].bitfield.dword = 1;
4323           i.types[0].bitfield.qword = 0;
4324         }
4325       i.types[1].bitfield.dword = 1;
4326       i.types[1].bitfield.qword = 0;
4327       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4328         {
4329           /* Handle
4330                movq $imm31, %r64   -> movl $imm31, %r32
4331                movq $imm32, %r64   -> movl $imm32, %r32
4332            */
4333           i.tm.operand_types[0].bitfield.imm32 = 1;
4334           i.tm.operand_types[0].bitfield.imm32s = 0;
4335           i.tm.operand_types[0].bitfield.imm64 = 0;
4336           if ((i.tm.base_opcode | 1) == 0xc7)
4337             {
4338               /* Handle
4339                    movq $imm31, %r64   -> movl $imm31, %r32
4340                */
4341               i.tm.base_opcode = 0xb8;
4342               i.tm.extension_opcode = None;
4343               i.tm.opcode_modifier.w = 0;
4344               i.tm.opcode_modifier.modrm = 0;
4345             }
4346         }
4347     }
4348   else if (optimize > 1
4349            && !optimize_for_space
4350            && i.reg_operands == 2
4351            && i.op[0].regs == i.op[1].regs
4352            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4353            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4354     {
4355       /* Optimize: -O2:
4356            andb %rN, %rN  -> testb %rN, %rN
4357            andw %rN, %rN  -> testw %rN, %rN
4358            andq %rN, %rN  -> testq %rN, %rN
4359            orb %rN, %rN   -> testb %rN, %rN
4360            orw %rN, %rN   -> testw %rN, %rN
4361            orq %rN, %rN   -> testq %rN, %rN
4362
4363            and outside of 64-bit mode
4364
4365            andl %rN, %rN  -> testl %rN, %rN
4366            orl %rN, %rN   -> testl %rN, %rN
4367        */
4368       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4369     }
4370   else if (i.tm.base_opcode == 0xba
4371            && i.tm.opcode_space == SPACE_0F
4372            && i.reg_operands == 1
4373            && i.op[0].imms->X_op == O_constant
4374            && i.op[0].imms->X_add_number >= 0)
4375     {
4376       /* Optimize: -O:
4377            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4378            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4379            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4380
4381            With <BT> one of bts, btr, and bts also:
4382            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4383            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4384        */
4385       switch (flag_code)
4386         {
4387         case CODE_64BIT:
4388           if (i.tm.extension_opcode != 4)
4389             break;
4390           if (i.types[1].bitfield.qword
4391               && i.op[0].imms->X_add_number < 32
4392               && !(i.op[1].regs->reg_flags & RegRex))
4393             i.tm.opcode_modifier.size = SIZE32;
4394           /* Fall through.  */
4395         case CODE_32BIT:
4396           if (i.types[1].bitfield.word
4397               && i.op[0].imms->X_add_number < 16)
4398             i.tm.opcode_modifier.size = SIZE32;
4399           break;
4400         case CODE_16BIT:
4401           if (i.op[0].imms->X_add_number < 16)
4402             i.tm.opcode_modifier.size = SIZE16;
4403           break;
4404         }
4405     }
4406   else if (i.reg_operands == 3
4407            && i.op[0].regs == i.op[1].regs
4408            && !i.types[2].bitfield.xmmword
4409            && (i.tm.opcode_modifier.vex
4410                || ((!i.mask.reg || i.mask.zeroing)
4411                    && is_evex_encoding (&i.tm)
4412                    && (i.vec_encoding != vex_encoding_evex
4413                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4414                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4415                        || (i.tm.operand_types[2].bitfield.zmmword
4416                            && i.types[2].bitfield.ymmword))))
4417            && i.tm.opcode_space == SPACE_0F
4418            && ((i.tm.base_opcode | 2) == 0x57
4419                || i.tm.base_opcode == 0xdf
4420                || i.tm.base_opcode == 0xef
4421                || (i.tm.base_opcode | 3) == 0xfb
4422                || i.tm.base_opcode == 0x42
4423                || i.tm.base_opcode == 0x47))
4424     {
4425       /* Optimize: -O1:
4426            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4427            vpsubq and vpsubw:
4428              EVEX VOP %zmmM, %zmmM, %zmmN
4429                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4430                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4431              EVEX VOP %ymmM, %ymmM, %ymmN
4432                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4433                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4434              VEX VOP %ymmM, %ymmM, %ymmN
4435                -> VEX VOP %xmmM, %xmmM, %xmmN
4436            VOP, one of vpandn and vpxor:
4437              VEX VOP %ymmM, %ymmM, %ymmN
4438                -> VEX VOP %xmmM, %xmmM, %xmmN
4439            VOP, one of vpandnd and vpandnq:
4440              EVEX VOP %zmmM, %zmmM, %zmmN
4441                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4442                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4443              EVEX VOP %ymmM, %ymmM, %ymmN
4444                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4445                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4446            VOP, one of vpxord and vpxorq:
4447              EVEX VOP %zmmM, %zmmM, %zmmN
4448                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4449                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4450              EVEX VOP %ymmM, %ymmM, %ymmN
4451                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4452                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4453            VOP, one of kxord and kxorq:
4454              VEX VOP %kM, %kM, %kN
4455                -> VEX kxorw %kM, %kM, %kN
4456            VOP, one of kandnd and kandnq:
4457              VEX VOP %kM, %kM, %kN
4458                -> VEX kandnw %kM, %kM, %kN
4459        */
4460       if (is_evex_encoding (&i.tm))
4461         {
4462           if (i.vec_encoding != vex_encoding_evex)
4463             {
4464               i.tm.opcode_modifier.vex = VEX128;
4465               i.tm.opcode_modifier.vexw = VEXW0;
4466               i.tm.opcode_modifier.evex = 0;
4467             }
4468           else if (optimize > 1)
4469             i.tm.opcode_modifier.evex = EVEX128;
4470           else
4471             return;
4472         }
4473       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4474         {
4475           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4476           i.tm.opcode_modifier.vexw = VEXW0;
4477         }
4478       else
4479         i.tm.opcode_modifier.vex = VEX128;
4480
4481       if (i.tm.opcode_modifier.vex)
4482         for (j = 0; j < 3; j++)
4483           {
4484             i.types[j].bitfield.xmmword = 1;
4485             i.types[j].bitfield.ymmword = 0;
4486           }
4487     }
4488   else if (i.vec_encoding != vex_encoding_evex
4489            && !i.types[0].bitfield.zmmword
4490            && !i.types[1].bitfield.zmmword
4491            && !i.mask.reg
4492            && !i.broadcast.type
4493            && !i.broadcast.bytes
4494            && is_evex_encoding (&i.tm)
4495            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4496                || (i.tm.base_opcode & ~4) == 0xdb
4497                || (i.tm.base_opcode & ~4) == 0xeb)
4498            && i.tm.extension_opcode == None)
4499     {
4500       /* Optimize: -O1:
4501            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4502            vmovdqu32 and vmovdqu64:
4503              EVEX VOP %xmmM, %xmmN
4504                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4505              EVEX VOP %ymmM, %ymmN
4506                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4507              EVEX VOP %xmmM, mem
4508                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4509              EVEX VOP %ymmM, mem
4510                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4511              EVEX VOP mem, %xmmN
4512                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4513              EVEX VOP mem, %ymmN
4514                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4515            VOP, one of vpand, vpandn, vpor, vpxor:
4516              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4517                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4518              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4519                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4520              EVEX VOP{d,q} mem, %xmmM, %xmmN
4521                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4522              EVEX VOP{d,q} mem, %ymmM, %ymmN
4523                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4524        */
4525       for (j = 0; j < i.operands; j++)
4526         if (operand_type_check (i.types[j], disp)
4527             && i.op[j].disps->X_op == O_constant)
4528           {
4529             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4530                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4531                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4532             int evex_disp8, vex_disp8;
4533             unsigned int memshift = i.memshift;
4534             offsetT n = i.op[j].disps->X_add_number;
4535
4536             evex_disp8 = fits_in_disp8 (n);
4537             i.memshift = 0;
4538             vex_disp8 = fits_in_disp8 (n);
4539             if (evex_disp8 != vex_disp8)
4540               {
4541                 i.memshift = memshift;
4542                 return;
4543               }
4544
4545             i.types[j].bitfield.disp8 = vex_disp8;
4546             break;
4547           }
4548       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4549           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4550         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4551       i.tm.opcode_modifier.vex
4552         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4553       i.tm.opcode_modifier.vexw = VEXW0;
4554       /* VPAND, VPOR, and VPXOR are commutative.  */
4555       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4556         i.tm.opcode_modifier.commutative = 1;
4557       i.tm.opcode_modifier.evex = 0;
4558       i.tm.opcode_modifier.masking = 0;
4559       i.tm.opcode_modifier.broadcast = 0;
4560       i.tm.opcode_modifier.disp8memshift = 0;
4561       i.memshift = 0;
4562       if (j < i.operands)
4563         i.types[j].bitfield.disp8
4564           = fits_in_disp8 (i.op[j].disps->X_add_number);
4565     }
4566 }
4567
4568 /* Return non-zero for load instruction.  */
4569
4570 static int
4571 load_insn_p (void)
4572 {
4573   unsigned int dest;
4574   int any_vex_p = is_any_vex_encoding (&i.tm);
4575   unsigned int base_opcode = i.tm.base_opcode | 1;
4576
4577   if (!any_vex_p)
4578     {
4579       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4580          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4581       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4582         return 0;
4583
4584       /* pop.   */
4585       if (i.tm.mnem_off == MN_pop)
4586         return 1;
4587     }
4588
4589   if (i.tm.opcode_space == SPACE_BASE)
4590     {
4591       /* popf, popa.   */
4592       if (i.tm.base_opcode == 0x9d
4593           || i.tm.base_opcode == 0x61)
4594         return 1;
4595
4596       /* movs, cmps, lods, scas.  */
4597       if ((i.tm.base_opcode | 0xb) == 0xaf)
4598         return 1;
4599
4600       /* outs, xlatb.  */
4601       if (base_opcode == 0x6f
4602           || i.tm.base_opcode == 0xd7)
4603         return 1;
4604       /* NB: For AMD-specific insns with implicit memory operands,
4605          they're intentionally not covered.  */
4606     }
4607
4608   /* No memory operand.  */
4609   if (!i.mem_operands)
4610     return 0;
4611
4612   if (any_vex_p)
4613     {
4614       if (i.tm.mnem_off == MN_vldmxcsr)
4615         return 1;
4616     }
4617   else if (i.tm.opcode_space == SPACE_BASE)
4618     {
4619       /* test, not, neg, mul, imul, div, idiv.  */
4620       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
4621         return 1;
4622
4623       /* inc, dec.  */
4624       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4625         return 1;
4626
4627       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4628       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4629         return 1;
4630
4631       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4632       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
4633           && i.tm.extension_opcode != 6)
4634         return 1;
4635
4636       /* Check for x87 instructions.  */
4637       if ((base_opcode | 6) == 0xdf)
4638         {
4639           /* Skip fst, fstp, fstenv, fstcw.  */
4640           if (i.tm.base_opcode == 0xd9
4641               && (i.tm.extension_opcode == 2
4642                   || i.tm.extension_opcode == 3
4643                   || i.tm.extension_opcode == 6
4644                   || i.tm.extension_opcode == 7))
4645             return 0;
4646
4647           /* Skip fisttp, fist, fistp, fstp.  */
4648           if (i.tm.base_opcode == 0xdb
4649               && (i.tm.extension_opcode == 1
4650                   || i.tm.extension_opcode == 2
4651                   || i.tm.extension_opcode == 3
4652                   || i.tm.extension_opcode == 7))
4653             return 0;
4654
4655           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4656           if (i.tm.base_opcode == 0xdd
4657               && (i.tm.extension_opcode == 1
4658                   || i.tm.extension_opcode == 2
4659                   || i.tm.extension_opcode == 3
4660                   || i.tm.extension_opcode == 6
4661                   || i.tm.extension_opcode == 7))
4662             return 0;
4663
4664           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4665           if (i.tm.base_opcode == 0xdf
4666               && (i.tm.extension_opcode == 1
4667                   || i.tm.extension_opcode == 2
4668                   || i.tm.extension_opcode == 3
4669                   || i.tm.extension_opcode == 6
4670                   || i.tm.extension_opcode == 7))
4671             return 0;
4672
4673           return 1;
4674         }
4675     }
4676   else if (i.tm.opcode_space == SPACE_0F)
4677     {
4678       /* bt, bts, btr, btc.  */
4679       if (i.tm.base_opcode == 0xba
4680           && (i.tm.extension_opcode | 3) == 7)
4681         return 1;
4682
4683       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4684       if (i.tm.base_opcode == 0xc7
4685           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4686           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4687               || i.tm.extension_opcode == 6))
4688         return 1;
4689
4690       /* fxrstor, ldmxcsr, xrstor.  */
4691       if (i.tm.base_opcode == 0xae
4692           && (i.tm.extension_opcode == 1
4693               || i.tm.extension_opcode == 2
4694               || i.tm.extension_opcode == 5))
4695         return 1;
4696
4697       /* lgdt, lidt, lmsw.  */
4698       if (i.tm.base_opcode == 0x01
4699           && (i.tm.extension_opcode == 2
4700               || i.tm.extension_opcode == 3
4701               || i.tm.extension_opcode == 6))
4702         return 1;
4703     }
4704
4705   dest = i.operands - 1;
4706
4707   /* Check fake imm8 operand and 3 source operands.  */
4708   if ((i.tm.opcode_modifier.immext
4709        || i.reg_operands + i.mem_operands == 4)
4710       && i.types[dest].bitfield.imm8)
4711     dest--;
4712
4713   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4714   if (i.tm.opcode_space == SPACE_BASE
4715       && ((base_opcode | 0x38) == 0x39
4716           || (base_opcode | 2) == 0x87))
4717     return 1;
4718
4719   if (i.tm.mnem_off == MN_xadd)
4720     return 1;
4721
4722   /* Check for load instruction.  */
4723   return (i.types[dest].bitfield.class != ClassNone
4724           || i.types[dest].bitfield.instance == Accum);
4725 }
4726
4727 /* Output lfence, 0xfaee8, after instruction.  */
4728
4729 static void
4730 insert_lfence_after (void)
4731 {
4732   if (lfence_after_load && load_insn_p ())
4733     {
4734       /* There are also two REP string instructions that require
4735          special treatment. Specifically, the compare string (CMPS)
4736          and scan string (SCAS) instructions set EFLAGS in a manner
4737          that depends on the data being compared/scanned. When used
4738          with a REP prefix, the number of iterations may therefore
4739          vary depending on this data. If the data is a program secret
4740          chosen by the adversary using an LVI method,
4741          then this data-dependent behavior may leak some aspect
4742          of the secret.  */
4743       if (((i.tm.base_opcode | 0x9) == 0xaf)
4744           && i.prefix[REP_PREFIX])
4745         {
4746             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4747                      insn_name (&i.tm));
4748         }
4749       char *p = frag_more (3);
4750       *p++ = 0xf;
4751       *p++ = 0xae;
4752       *p = 0xe8;
4753     }
4754 }
4755
4756 /* Output lfence, 0xfaee8, before instruction.  */
4757
4758 static void
4759 insert_lfence_before (void)
4760 {
4761   char *p;
4762
4763   if (i.tm.opcode_space != SPACE_BASE)
4764     return;
4765
4766   if (i.tm.base_opcode == 0xff
4767       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4768     {
4769       /* Insert lfence before indirect branch if needed.  */
4770
4771       if (lfence_before_indirect_branch == lfence_branch_none)
4772         return;
4773
4774       if (i.operands != 1)
4775         abort ();
4776
4777       if (i.reg_operands == 1)
4778         {
4779           /* Indirect branch via register.  Don't insert lfence with
4780              -mlfence-after-load=yes.  */
4781           if (lfence_after_load
4782               || lfence_before_indirect_branch == lfence_branch_memory)
4783             return;
4784         }
4785       else if (i.mem_operands == 1
4786                && lfence_before_indirect_branch != lfence_branch_register)
4787         {
4788           as_warn (_("indirect `%s` with memory operand should be avoided"),
4789                    insn_name (&i.tm));
4790           return;
4791         }
4792       else
4793         return;
4794
4795       if (last_insn.kind != last_insn_other
4796           && last_insn.seg == now_seg)
4797         {
4798           as_warn_where (last_insn.file, last_insn.line,
4799                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4800                          last_insn.name, insn_name (&i.tm));
4801           return;
4802         }
4803
4804       p = frag_more (3);
4805       *p++ = 0xf;
4806       *p++ = 0xae;
4807       *p = 0xe8;
4808       return;
4809     }
4810
4811   /* Output or/not/shl and lfence before near ret.  */
4812   if (lfence_before_ret != lfence_before_ret_none
4813       && (i.tm.base_opcode | 1) == 0xc3)
4814     {
4815       if (last_insn.kind != last_insn_other
4816           && last_insn.seg == now_seg)
4817         {
4818           as_warn_where (last_insn.file, last_insn.line,
4819                          _("`%s` skips -mlfence-before-ret on `%s`"),
4820                          last_insn.name, insn_name (&i.tm));
4821           return;
4822         }
4823
4824       /* Near ret ingore operand size override under CPU64.  */
4825       char prefix = flag_code == CODE_64BIT
4826                     ? 0x48
4827                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4828
4829       if (lfence_before_ret == lfence_before_ret_not)
4830         {
4831           /* not: 0xf71424, may add prefix
4832              for operand size override or 64-bit code.  */
4833           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4834           if (prefix)
4835             *p++ = prefix;
4836           *p++ = 0xf7;
4837           *p++ = 0x14;
4838           *p++ = 0x24;
4839           if (prefix)
4840             *p++ = prefix;
4841           *p++ = 0xf7;
4842           *p++ = 0x14;
4843           *p++ = 0x24;
4844         }
4845       else
4846         {
4847           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4848           if (prefix)
4849             *p++ = prefix;
4850           if (lfence_before_ret == lfence_before_ret_or)
4851             {
4852               /* or: 0x830c2400, may add prefix
4853                  for operand size override or 64-bit code.  */
4854               *p++ = 0x83;
4855               *p++ = 0x0c;
4856             }
4857           else
4858             {
4859               /* shl: 0xc1242400, may add prefix
4860                  for operand size override or 64-bit code.  */
4861               *p++ = 0xc1;
4862               *p++ = 0x24;
4863             }
4864
4865           *p++ = 0x24;
4866           *p++ = 0x0;
4867         }
4868
4869       *p++ = 0xf;
4870       *p++ = 0xae;
4871       *p = 0xe8;
4872     }
4873 }
4874
4875 /* Shared helper for md_assemble() and s_insn().  */
4876 static void init_globals (void)
4877 {
4878   unsigned int j;
4879
4880   memset (&i, '\0', sizeof (i));
4881   i.rounding.type = rc_none;
4882   for (j = 0; j < MAX_OPERANDS; j++)
4883     i.reloc[j] = NO_RELOC;
4884   memset (disp_expressions, '\0', sizeof (disp_expressions));
4885   memset (im_expressions, '\0', sizeof (im_expressions));
4886   save_stack_p = save_stack;
4887 }
4888
4889 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4890    parsing pass. Instead of introducing a rarely use new insn attribute this
4891    utilizes a common pattern between affected templates. It is deemed
4892    acceptable that this will lead to unnecessary pass 2 preparations in a
4893    limited set of cases.  */
4894 static INLINE bool may_need_pass2 (const insn_template *t)
4895 {
4896   return t->opcode_modifier.sse2avx
4897          /* Note that all SSE2AVX templates have at least one operand.  */
4898          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4899          : (t->opcode_space == SPACE_0F
4900             && (t->base_opcode | 1) == 0xbf)
4901            || (t->opcode_space == SPACE_BASE
4902                && t->base_opcode == 0x63);
4903 }
4904
4905 /* This is the guts of the machine-dependent assembler.  LINE points to a
4906    machine dependent instruction.  This function is supposed to emit
4907    the frags/bytes it assembles to.  */
4908
4909 void
4910 md_assemble (char *line)
4911 {
4912   unsigned int j;
4913   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4914   const char *end, *pass1_mnem = NULL;
4915   enum i386_error pass1_err = 0;
4916   const insn_template *t;
4917
4918   /* Initialize globals.  */
4919   current_templates = NULL;
4920  retry:
4921   init_globals ();
4922
4923   /* First parse an instruction mnemonic & call i386_operand for the operands.
4924      We assume that the scrubber has arranged it so that line[0] is the valid
4925      start of a (possibly prefixed) mnemonic.  */
4926
4927   end = parse_insn (line, mnemonic, false);
4928   if (end == NULL)
4929     {
4930       if (pass1_mnem != NULL)
4931         goto match_error;
4932       if (i.error != no_error)
4933         {
4934           gas_assert (current_templates != NULL);
4935           if (may_need_pass2 (current_templates->start) && !i.suffix)
4936             goto no_match;
4937           /* No point in trying a 2nd pass - it'll only find the same suffix
4938              again.  */
4939           mnem_suffix = i.suffix;
4940           goto match_error;
4941         }
4942       return;
4943     }
4944   t = current_templates->start;
4945   if (may_need_pass2 (t))
4946     {
4947       /* Make a copy of the full line in case we need to retry.  */
4948       copy = xstrdup (line);
4949     }
4950   line += end - line;
4951   mnem_suffix = i.suffix;
4952
4953   line = parse_operands (line, mnemonic);
4954   this_operand = -1;
4955   if (line == NULL)
4956     {
4957       free (copy);
4958       return;
4959     }
4960
4961   /* Now we've parsed the mnemonic into a set of templates, and have the
4962      operands at hand.  */
4963
4964   /* All Intel opcodes have reversed operands except for "bound", "enter",
4965      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4966      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4967      intersegment "jmp" and "call" instructions with 2 immediate operands so
4968      that the immediate segment precedes the offset consistently in Intel and
4969      AT&T modes.  */
4970   if (intel_syntax
4971       && i.operands > 1
4972       && (t->mnem_off != MN_bound)
4973       && !startswith (mnemonic, "invlpg")
4974       && !startswith (mnemonic, "monitor")
4975       && !startswith (mnemonic, "mwait")
4976       && (t->mnem_off != MN_pvalidate)
4977       && !startswith (mnemonic, "rmp")
4978       && (t->mnem_off != MN_tpause)
4979       && (t->mnem_off != MN_umwait)
4980       && !(i.operands == 2
4981            && operand_type_check (i.types[0], imm)
4982            && operand_type_check (i.types[1], imm)))
4983     swap_operands ();
4984
4985   /* The order of the immediates should be reversed
4986      for 2 immediates extrq and insertq instructions */
4987   if (i.imm_operands == 2
4988       && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
4989       swap_2_operands (0, 1);
4990
4991   if (i.imm_operands)
4992     optimize_imm ();
4993
4994   if (i.disp_operands && !optimize_disp (t))
4995     return;
4996
4997   /* Next, we find a template that matches the given insn,
4998      making sure the overlap of the given operands types is consistent
4999      with the template operand types.  */
5000
5001   if (!(t = match_template (mnem_suffix)))
5002     {
5003       const char *err_msg;
5004
5005       if (copy && !mnem_suffix)
5006         {
5007           line = copy;
5008           copy = NULL;
5009   no_match:
5010           pass1_err = i.error;
5011           pass1_mnem = insn_name (current_templates->start);
5012           goto retry;
5013         }
5014
5015       /* If a non-/only-64bit template (group) was found in pass 1, and if
5016          _some_ template (group) was found in pass 2, squash pass 1's
5017          error.  */
5018       if (pass1_err == unsupported_64bit)
5019         pass1_mnem = NULL;
5020
5021   match_error:
5022       free (copy);
5023
5024       switch (pass1_mnem ? pass1_err : i.error)
5025         {
5026         default:
5027           abort ();
5028         case operand_size_mismatch:
5029           err_msg = _("operand size mismatch");
5030           break;
5031         case operand_type_mismatch:
5032           err_msg = _("operand type mismatch");
5033           break;
5034         case register_type_mismatch:
5035           err_msg = _("register type mismatch");
5036           break;
5037         case number_of_operands_mismatch:
5038           err_msg = _("number of operands mismatch");
5039           break;
5040         case invalid_instruction_suffix:
5041           err_msg = _("invalid instruction suffix");
5042           break;
5043         case bad_imm4:
5044           err_msg = _("constant doesn't fit in 4 bits");
5045           break;
5046         case unsupported_with_intel_mnemonic:
5047           err_msg = _("unsupported with Intel mnemonic");
5048           break;
5049         case unsupported_syntax:
5050           err_msg = _("unsupported syntax");
5051           break;
5052         case unsupported:
5053           as_bad (_("unsupported instruction `%s'"),
5054                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5055           return;
5056         case unsupported_on_arch:
5057           as_bad (_("`%s' is not supported on `%s%s'"),
5058                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5059                   cpu_arch_name ? cpu_arch_name : default_arch,
5060                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5061           return;
5062         case unsupported_64bit:
5063           if (ISLOWER (mnem_suffix))
5064             {
5065               if (flag_code == CODE_64BIT)
5066                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5067                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5068                         mnem_suffix);
5069               else
5070                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5071                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5072                         mnem_suffix);
5073             }
5074           else
5075             {
5076               if (flag_code == CODE_64BIT)
5077                 as_bad (_("`%s' is not supported in 64-bit mode"),
5078                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5079               else
5080                 as_bad (_("`%s' is only supported in 64-bit mode"),
5081                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5082             }
5083           return;
5084         case invalid_sib_address:
5085           err_msg = _("invalid SIB address");
5086           break;
5087         case invalid_vsib_address:
5088           err_msg = _("invalid VSIB address");
5089           break;
5090         case invalid_vector_register_set:
5091           err_msg = _("mask, index, and destination registers must be distinct");
5092           break;
5093         case invalid_tmm_register_set:
5094           err_msg = _("all tmm registers must be distinct");
5095           break;
5096         case invalid_dest_and_src_register_set:
5097           err_msg = _("destination and source registers must be distinct");
5098           break;
5099         case unsupported_vector_index_register:
5100           err_msg = _("unsupported vector index register");
5101           break;
5102         case unsupported_broadcast:
5103           err_msg = _("unsupported broadcast");
5104           break;
5105         case broadcast_needed:
5106           err_msg = _("broadcast is needed for operand of such type");
5107           break;
5108         case unsupported_masking:
5109           err_msg = _("unsupported masking");
5110           break;
5111         case mask_not_on_destination:
5112           err_msg = _("mask not on destination operand");
5113           break;
5114         case no_default_mask:
5115           err_msg = _("default mask isn't allowed");
5116           break;
5117         case unsupported_rc_sae:
5118           err_msg = _("unsupported static rounding/sae");
5119           break;
5120         case invalid_register_operand:
5121           err_msg = _("invalid register operand");
5122           break;
5123         }
5124       as_bad (_("%s for `%s'"), err_msg,
5125               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5126       return;
5127     }
5128
5129   free (copy);
5130
5131   if (sse_check != check_none
5132       /* The opcode space check isn't strictly needed; it's there only to
5133          bypass the logic below when easily possible.  */
5134       && t->opcode_space >= SPACE_0F
5135       && t->opcode_space <= SPACE_0F3A
5136       && !i.tm.cpu_flags.bitfield.cpusse4a
5137       && !is_any_vex_encoding (t))
5138     {
5139       bool simd = false;
5140
5141       for (j = 0; j < t->operands; ++j)
5142         {
5143           if (t->operand_types[j].bitfield.class == RegMMX)
5144             break;
5145           if (t->operand_types[j].bitfield.class == RegSIMD)
5146             simd = true;
5147         }
5148
5149       if (j >= t->operands && simd)
5150         (sse_check == check_warning
5151          ? as_warn
5152          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5153     }
5154
5155   if (i.tm.opcode_modifier.fwait)
5156     if (!add_prefix (FWAIT_OPCODE))
5157       return;
5158
5159   /* Check if REP prefix is OK.  */
5160   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5161     {
5162       as_bad (_("invalid instruction `%s' after `%s'"),
5163                 insn_name (&i.tm), i.rep_prefix);
5164       return;
5165     }
5166
5167   /* Check for lock without a lockable instruction.  Destination operand
5168      must be memory unless it is xchg (0x86).  */
5169   if (i.prefix[LOCK_PREFIX])
5170     {
5171       if (i.tm.opcode_modifier.prefixok < PrefixLock
5172           || i.mem_operands == 0
5173           || (i.tm.base_opcode != 0x86
5174               && !(i.flags[i.operands - 1] & Operand_Mem)))
5175         {
5176           as_bad (_("expecting lockable instruction after `lock'"));
5177           return;
5178         }
5179
5180       /* Zap the redundant prefix from XCHG when optimizing.  */
5181       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
5182         i.prefix[LOCK_PREFIX] = 0;
5183     }
5184
5185   if (is_any_vex_encoding (&i.tm)
5186       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5187       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5188     {
5189       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5190       if (i.prefix[DATA_PREFIX])
5191         {
5192           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5193           return;
5194         }
5195
5196       /* Don't allow e.g. KMOV in TLS code sequences.  */
5197       for (j = i.imm_operands; j < i.operands; ++j)
5198         switch (i.reloc[j])
5199           {
5200           case BFD_RELOC_386_TLS_GOTIE:
5201           case BFD_RELOC_386_TLS_LE_32:
5202           case BFD_RELOC_X86_64_GOTTPOFF:
5203           case BFD_RELOC_X86_64_TLSLD:
5204             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5205             return;
5206           default:
5207             break;
5208           }
5209     }
5210
5211   /* Check if HLE prefix is OK.  */
5212   if (i.hle_prefix && !check_hle ())
5213     return;
5214
5215   /* Check BND prefix.  */
5216   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5217     as_bad (_("expecting valid branch instruction after `bnd'"));
5218
5219   /* Check NOTRACK prefix.  */
5220   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5221     as_bad (_("expecting indirect branch instruction after `notrack'"));
5222
5223   if (i.tm.cpu_flags.bitfield.cpumpx)
5224     {
5225       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5226         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5227       else if (flag_code != CODE_16BIT
5228                ? i.prefix[ADDR_PREFIX]
5229                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5230         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5231     }
5232
5233   /* Insert BND prefix.  */
5234   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5235     {
5236       if (!i.prefix[BND_PREFIX])
5237         add_prefix (BND_PREFIX_OPCODE);
5238       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5239         {
5240           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5241           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5242         }
5243     }
5244
5245   /* Check string instruction segment overrides.  */
5246   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5247     {
5248       gas_assert (i.mem_operands);
5249       if (!check_string ())
5250         return;
5251       i.disp_operands = 0;
5252     }
5253
5254   /* The memory operand of (%dx) should be only used with input/output
5255      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5256   if (i.input_output_operand
5257       && ((i.tm.base_opcode | 0x82) != 0xee
5258           || i.tm.opcode_space != SPACE_BASE))
5259     {
5260       as_bad (_("input/output port address isn't allowed with `%s'"),
5261               insn_name (&i.tm));
5262       return;
5263     }
5264
5265   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5266     optimize_encoding ();
5267
5268   if (use_unaligned_vector_move)
5269     encode_with_unaligned_vector_move ();
5270
5271   if (!process_suffix ())
5272     return;
5273
5274   /* Check if IP-relative addressing requirements can be satisfied.  */
5275   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5276       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5277     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5278
5279   /* Update operand types and check extended states.  */
5280   for (j = 0; j < i.operands; j++)
5281     {
5282       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5283       switch (i.tm.operand_types[j].bitfield.class)
5284         {
5285         default:
5286           break;
5287         case RegMMX:
5288           i.xstate |= xstate_mmx;
5289           break;
5290         case RegMask:
5291           i.xstate |= xstate_mask;
5292           break;
5293         case RegSIMD:
5294           if (i.tm.operand_types[j].bitfield.tmmword)
5295             i.xstate |= xstate_tmm;
5296           else if (i.tm.operand_types[j].bitfield.zmmword)
5297             i.xstate |= xstate_zmm;
5298           else if (i.tm.operand_types[j].bitfield.ymmword)
5299             i.xstate |= xstate_ymm;
5300           else if (i.tm.operand_types[j].bitfield.xmmword)
5301             i.xstate |= xstate_xmm;
5302           break;
5303         }
5304     }
5305
5306   /* Make still unresolved immediate matches conform to size of immediate
5307      given in i.suffix.  */
5308   if (!finalize_imm ())
5309     return;
5310
5311   if (i.types[0].bitfield.imm1)
5312     i.imm_operands = 0; /* kludge for shift insns.  */
5313
5314   /* For insns with operands there are more diddles to do to the opcode.  */
5315   if (i.operands)
5316     {
5317       if (!process_operands ())
5318         return;
5319     }
5320   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5321     {
5322       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5323       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5324     }
5325
5326   if (is_any_vex_encoding (&i.tm))
5327     {
5328       if (!cpu_arch_flags.bitfield.cpui286)
5329         {
5330           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5331                   insn_name (&i.tm));
5332           return;
5333         }
5334
5335       /* Check for explicit REX prefix.  */
5336       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5337         {
5338           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5339           return;
5340         }
5341
5342       if (i.tm.opcode_modifier.vex)
5343         build_vex_prefix (t);
5344       else
5345         build_evex_prefix ();
5346
5347       /* The individual REX.RXBW bits got consumed.  */
5348       i.rex &= REX_OPCODE;
5349     }
5350
5351   /* Handle conversion of 'int $3' --> special int3 insn.  */
5352   if (i.tm.mnem_off == MN_int
5353       && i.op[0].imms->X_add_number == 3)
5354     {
5355       i.tm.base_opcode = INT3_OPCODE;
5356       i.imm_operands = 0;
5357     }
5358
5359   if ((i.tm.opcode_modifier.jump == JUMP
5360        || i.tm.opcode_modifier.jump == JUMP_BYTE
5361        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5362       && i.op[0].disps->X_op == O_constant)
5363     {
5364       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5365          the absolute address given by the constant.  Since ix86 jumps and
5366          calls are pc relative, we need to generate a reloc.  */
5367       i.op[0].disps->X_add_symbol = &abs_symbol;
5368       i.op[0].disps->X_op = O_symbol;
5369     }
5370
5371   /* For 8 bit registers we need an empty rex prefix.  Also if the
5372      instruction already has a prefix, we need to convert old
5373      registers to new ones.  */
5374
5375   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5376        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5377       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5378           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5379       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5380            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5381           && i.rex != 0))
5382     {
5383       int x;
5384
5385       i.rex |= REX_OPCODE;
5386       for (x = 0; x < 2; x++)
5387         {
5388           /* Look for 8 bit operand that uses old registers.  */
5389           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5390               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5391             {
5392               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5393               /* In case it is "hi" register, give up.  */
5394               if (i.op[x].regs->reg_num > 3)
5395                 as_bad (_("can't encode register '%s%s' in an "
5396                           "instruction requiring REX prefix."),
5397                         register_prefix, i.op[x].regs->reg_name);
5398
5399               /* Otherwise it is equivalent to the extended register.
5400                  Since the encoding doesn't change this is merely
5401                  cosmetic cleanup for debug output.  */
5402
5403               i.op[x].regs = i.op[x].regs + 8;
5404             }
5405         }
5406     }
5407
5408   if (i.rex == 0 && i.rex_encoding)
5409     {
5410       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5411          that uses legacy register.  If it is "hi" register, don't add
5412          the REX_OPCODE byte.  */
5413       int x;
5414       for (x = 0; x < 2; x++)
5415         if (i.types[x].bitfield.class == Reg
5416             && i.types[x].bitfield.byte
5417             && (i.op[x].regs->reg_flags & RegRex64) == 0
5418             && i.op[x].regs->reg_num > 3)
5419           {
5420             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5421             i.rex_encoding = false;
5422             break;
5423           }
5424
5425       if (i.rex_encoding)
5426         i.rex = REX_OPCODE;
5427     }
5428
5429   if (i.rex != 0)
5430     add_prefix (REX_OPCODE | i.rex);
5431
5432   insert_lfence_before ();
5433
5434   /* We are ready to output the insn.  */
5435   output_insn ();
5436
5437   insert_lfence_after ();
5438
5439   last_insn.seg = now_seg;
5440
5441   if (i.tm.opcode_modifier.isprefix)
5442     {
5443       last_insn.kind = last_insn_prefix;
5444       last_insn.name = insn_name (&i.tm);
5445       last_insn.file = as_where (&last_insn.line);
5446     }
5447   else
5448     last_insn.kind = last_insn_other;
5449 }
5450
5451 /* The Q suffix is generally valid only in 64-bit mode, with very few
5452    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5453    and fisttp only one of their two templates is matched below: That's
5454    sufficient since other relevant attributes are the same between both
5455    respective templates.  */
5456 static INLINE bool q_suffix_allowed(const insn_template *t)
5457 {
5458   return flag_code == CODE_64BIT
5459          || (t->opcode_space == SPACE_BASE
5460              && t->base_opcode == 0xdf
5461              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5462          || t->mnem_off == MN_cmpxchg8b;
5463 }
5464
5465 static const char *
5466 parse_insn (const char *line, char *mnemonic, bool prefix_only)
5467 {
5468   const char *l = line, *token_start = l;
5469   char *mnem_p;
5470   bool pass1 = !current_templates;
5471   int supported;
5472   const insn_template *t;
5473   char *dot_p = NULL;
5474
5475   while (1)
5476     {
5477       mnem_p = mnemonic;
5478       /* Pseudo-prefixes start with an opening figure brace.  */
5479       if ((*mnem_p = *l) == '{')
5480         {
5481           ++mnem_p;
5482           ++l;
5483         }
5484       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5485         {
5486           if (*mnem_p == '.')
5487             dot_p = mnem_p;
5488           mnem_p++;
5489           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5490             {
5491             too_long:
5492               as_bad (_("no such instruction: `%s'"), token_start);
5493               return NULL;
5494             }
5495           l++;
5496         }
5497       /* Pseudo-prefixes end with a closing figure brace.  */
5498       if (*mnemonic == '{' && *l == '}')
5499         {
5500           *mnem_p++ = *l++;
5501           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5502             goto too_long;
5503           *mnem_p = '\0';
5504
5505           /* Point l at the closing brace if there's no other separator.  */
5506           if (*l != END_OF_INSN && !is_space_char (*l)
5507               && *l != PREFIX_SEPARATOR)
5508             --l;
5509         }
5510       else if (!is_space_char (*l)
5511                && *l != END_OF_INSN
5512                && (intel_syntax
5513                    || (*l != PREFIX_SEPARATOR && *l != ',')))
5514         {
5515           if (prefix_only)
5516             break;
5517           as_bad (_("invalid character %s in mnemonic"),
5518                   output_invalid (*l));
5519           return NULL;
5520         }
5521       if (token_start == l)
5522         {
5523           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5524             as_bad (_("expecting prefix; got nothing"));
5525           else
5526             as_bad (_("expecting mnemonic; got nothing"));
5527           return NULL;
5528         }
5529
5530       /* Look up instruction (or prefix) via hash table.  */
5531       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5532
5533       if (*l != END_OF_INSN
5534           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5535           && current_templates
5536           && current_templates->start->opcode_modifier.isprefix)
5537         {
5538           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5539             {
5540               as_bad ((flag_code != CODE_64BIT
5541                        ? _("`%s' is only supported in 64-bit mode")
5542                        : _("`%s' is not supported in 64-bit mode")),
5543                       insn_name (current_templates->start));
5544               return NULL;
5545             }
5546           /* If we are in 16-bit mode, do not allow addr16 or data16.
5547              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5548           if ((current_templates->start->opcode_modifier.size == SIZE16
5549                || current_templates->start->opcode_modifier.size == SIZE32)
5550               && flag_code != CODE_64BIT
5551               && ((current_templates->start->opcode_modifier.size == SIZE32)
5552                   ^ (flag_code == CODE_16BIT)))
5553             {
5554               as_bad (_("redundant %s prefix"),
5555                       insn_name (current_templates->start));
5556               return NULL;
5557             }
5558
5559           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5560             {
5561               /* Handle pseudo prefixes.  */
5562               switch (current_templates->start->extension_opcode)
5563                 {
5564                 case Prefix_Disp8:
5565                   /* {disp8} */
5566                   i.disp_encoding = disp_encoding_8bit;
5567                   break;
5568                 case Prefix_Disp16:
5569                   /* {disp16} */
5570                   i.disp_encoding = disp_encoding_16bit;
5571                   break;
5572                 case Prefix_Disp32:
5573                   /* {disp32} */
5574                   i.disp_encoding = disp_encoding_32bit;
5575                   break;
5576                 case Prefix_Load:
5577                   /* {load} */
5578                   i.dir_encoding = dir_encoding_load;
5579                   break;
5580                 case Prefix_Store:
5581                   /* {store} */
5582                   i.dir_encoding = dir_encoding_store;
5583                   break;
5584                 case Prefix_VEX:
5585                   /* {vex} */
5586                   i.vec_encoding = vex_encoding_vex;
5587                   break;
5588                 case Prefix_VEX3:
5589                   /* {vex3} */
5590                   i.vec_encoding = vex_encoding_vex3;
5591                   break;
5592                 case Prefix_EVEX:
5593                   /* {evex} */
5594                   i.vec_encoding = vex_encoding_evex;
5595                   break;
5596                 case Prefix_REX:
5597                   /* {rex} */
5598                   i.rex_encoding = true;
5599                   break;
5600                 case Prefix_NoOptimize:
5601                   /* {nooptimize} */
5602                   i.no_optimize = true;
5603                   break;
5604                 default:
5605                   abort ();
5606                 }
5607             }
5608           else
5609             {
5610               /* Add prefix, checking for repeated prefixes.  */
5611               switch (add_prefix (current_templates->start->base_opcode))
5612                 {
5613                 case PREFIX_EXIST:
5614                   return NULL;
5615                 case PREFIX_DS:
5616                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5617                     i.notrack_prefix = insn_name (current_templates->start);
5618                   break;
5619                 case PREFIX_REP:
5620                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5621                     i.hle_prefix = insn_name (current_templates->start);
5622                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5623                     i.bnd_prefix = insn_name (current_templates->start);
5624                   else
5625                     i.rep_prefix = insn_name (current_templates->start);
5626                   break;
5627                 default:
5628                   break;
5629                 }
5630             }
5631           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5632           token_start = ++l;
5633         }
5634       else
5635         break;
5636     }
5637
5638   if (prefix_only)
5639     return token_start;
5640
5641   if (!current_templates)
5642     {
5643       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5644          Check if we should swap operand or force 32bit displacement in
5645          encoding.  */
5646       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5647         i.dir_encoding = dir_encoding_swap;
5648       else if (mnem_p - 3 == dot_p
5649                && dot_p[1] == 'd'
5650                && dot_p[2] == '8')
5651         i.disp_encoding = disp_encoding_8bit;
5652       else if (mnem_p - 4 == dot_p
5653                && dot_p[1] == 'd'
5654                && dot_p[2] == '3'
5655                && dot_p[3] == '2')
5656         i.disp_encoding = disp_encoding_32bit;
5657       else
5658         goto check_suffix;
5659       mnem_p = dot_p;
5660       *dot_p = '\0';
5661       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5662     }
5663
5664   if (!current_templates || !pass1)
5665     {
5666       current_templates = NULL;
5667
5668     check_suffix:
5669       if (mnem_p > mnemonic)
5670         {
5671           /* See if we can get a match by trimming off a suffix.  */
5672           switch (mnem_p[-1])
5673             {
5674             case WORD_MNEM_SUFFIX:
5675               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5676                 i.suffix = SHORT_MNEM_SUFFIX;
5677               else
5678                 /* Fall through.  */
5679               case BYTE_MNEM_SUFFIX:
5680               case QWORD_MNEM_SUFFIX:
5681                 i.suffix = mnem_p[-1];
5682               mnem_p[-1] = '\0';
5683               current_templates
5684                 = (const templates *) str_hash_find (op_hash, mnemonic);
5685               break;
5686             case SHORT_MNEM_SUFFIX:
5687             case LONG_MNEM_SUFFIX:
5688               if (!intel_syntax)
5689                 {
5690                   i.suffix = mnem_p[-1];
5691                   mnem_p[-1] = '\0';
5692                   current_templates
5693                     = (const templates *) str_hash_find (op_hash, mnemonic);
5694                 }
5695               break;
5696
5697               /* Intel Syntax.  */
5698             case 'd':
5699               if (intel_syntax)
5700                 {
5701                   if (intel_float_operand (mnemonic) == 1)
5702                     i.suffix = SHORT_MNEM_SUFFIX;
5703                   else
5704                     i.suffix = LONG_MNEM_SUFFIX;
5705                   mnem_p[-1] = '\0';
5706                   current_templates
5707                     = (const templates *) str_hash_find (op_hash, mnemonic);
5708                 }
5709               /* For compatibility reasons accept MOVSD and CMPSD without
5710                  operands even in AT&T mode.  */
5711               else if (*l == END_OF_INSN
5712                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5713                 {
5714                   mnem_p[-1] = '\0';
5715                   current_templates
5716                     = (const templates *) str_hash_find (op_hash, mnemonic);
5717                   if (current_templates != NULL
5718                       /* MOVS or CMPS */
5719                       && (current_templates->start->base_opcode | 2) == 0xa6
5720                       && current_templates->start->opcode_space
5721                          == SPACE_BASE
5722                       && mnem_p[-2] == 's')
5723                     {
5724                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5725                                mnemonic, mnemonic);
5726                       i.suffix = LONG_MNEM_SUFFIX;
5727                     }
5728                   else
5729                     {
5730                       current_templates = NULL;
5731                       mnem_p[-1] = 'd';
5732                     }
5733                 }
5734               break;
5735             }
5736         }
5737
5738       if (!current_templates)
5739         {
5740           if (pass1)
5741             as_bad (_("no such instruction: `%s'"), token_start);
5742           return NULL;
5743         }
5744     }
5745
5746   if (current_templates->start->opcode_modifier.jump == JUMP
5747       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5748     {
5749       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5750          predict taken and predict not taken respectively.
5751          I'm not sure that branch hints actually do anything on loop
5752          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5753          may work in the future and it doesn't hurt to accept them
5754          now.  */
5755       if (l[0] == ',' && l[1] == 'p')
5756         {
5757           if (l[2] == 't')
5758             {
5759               if (!add_prefix (DS_PREFIX_OPCODE))
5760                 return NULL;
5761               l += 3;
5762             }
5763           else if (l[2] == 'n')
5764             {
5765               if (!add_prefix (CS_PREFIX_OPCODE))
5766                 return NULL;
5767               l += 3;
5768             }
5769         }
5770     }
5771   /* Any other comma loses.  */
5772   if (*l == ',')
5773     {
5774       as_bad (_("invalid character %s in mnemonic"),
5775               output_invalid (*l));
5776       return NULL;
5777     }
5778
5779   /* Check if instruction is supported on specified architecture.  */
5780   supported = 0;
5781   for (t = current_templates->start; t < current_templates->end; ++t)
5782     {
5783       supported |= cpu_flags_match (t);
5784
5785       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5786         supported &= ~CPU_FLAGS_64BIT_MATCH;
5787
5788       if (supported == CPU_FLAGS_PERFECT_MATCH)
5789         return l;
5790     }
5791
5792   if (pass1)
5793     {
5794       if (supported & CPU_FLAGS_64BIT_MATCH)
5795         i.error = unsupported_on_arch;
5796       else
5797         i.error = unsupported_64bit;
5798     }
5799
5800   return NULL;
5801 }
5802
5803 static char *
5804 parse_operands (char *l, const char *mnemonic)
5805 {
5806   char *token_start;
5807
5808   /* 1 if operand is pending after ','.  */
5809   unsigned int expecting_operand = 0;
5810
5811   while (*l != END_OF_INSN)
5812     {
5813       /* Non-zero if operand parens not balanced.  */
5814       unsigned int paren_not_balanced = 0;
5815       /* True if inside double quotes.  */
5816       bool in_quotes = false;
5817
5818       /* Skip optional white space before operand.  */
5819       if (is_space_char (*l))
5820         ++l;
5821       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5822         {
5823           as_bad (_("invalid character %s before operand %d"),
5824                   output_invalid (*l),
5825                   i.operands + 1);
5826           return NULL;
5827         }
5828       token_start = l;  /* After white space.  */
5829       while (in_quotes || paren_not_balanced || *l != ',')
5830         {
5831           if (*l == END_OF_INSN)
5832             {
5833               if (in_quotes)
5834                 {
5835                   as_bad (_("unbalanced double quotes in operand %d."),
5836                           i.operands + 1);
5837                   return NULL;
5838                 }
5839               if (paren_not_balanced)
5840                 {
5841                   know (!intel_syntax);
5842                   as_bad (_("unbalanced parenthesis in operand %d."),
5843                           i.operands + 1);
5844                   return NULL;
5845                 }
5846               else
5847                 break;  /* we are done */
5848             }
5849           else if (*l == '\\' && l[1] == '"')
5850             ++l;
5851           else if (*l == '"')
5852             in_quotes = !in_quotes;
5853           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5854             {
5855               as_bad (_("invalid character %s in operand %d"),
5856                       output_invalid (*l),
5857                       i.operands + 1);
5858               return NULL;
5859             }
5860           if (!intel_syntax && !in_quotes)
5861             {
5862               if (*l == '(')
5863                 ++paren_not_balanced;
5864               if (*l == ')')
5865                 --paren_not_balanced;
5866             }
5867           l++;
5868         }
5869       if (l != token_start)
5870         {                       /* Yes, we've read in another operand.  */
5871           unsigned int operand_ok;
5872           this_operand = i.operands++;
5873           if (i.operands > MAX_OPERANDS)
5874             {
5875               as_bad (_("spurious operands; (%d operands/instruction max)"),
5876                       MAX_OPERANDS);
5877               return NULL;
5878             }
5879           i.types[this_operand].bitfield.unspecified = 1;
5880           /* Now parse operand adding info to 'i' as we go along.  */
5881           END_STRING_AND_SAVE (l);
5882
5883           if (i.mem_operands > 1)
5884             {
5885               as_bad (_("too many memory references for `%s'"),
5886                       mnemonic);
5887               return 0;
5888             }
5889
5890           if (intel_syntax)
5891             operand_ok =
5892               i386_intel_operand (token_start,
5893                                   intel_float_operand (mnemonic));
5894           else
5895             operand_ok = i386_att_operand (token_start);
5896
5897           RESTORE_END_STRING (l);
5898           if (!operand_ok)
5899             return NULL;
5900         }
5901       else
5902         {
5903           if (expecting_operand)
5904             {
5905             expecting_operand_after_comma:
5906               as_bad (_("expecting operand after ','; got nothing"));
5907               return NULL;
5908             }
5909           if (*l == ',')
5910             {
5911               as_bad (_("expecting operand before ','; got nothing"));
5912               return NULL;
5913             }
5914         }
5915
5916       /* Now *l must be either ',' or END_OF_INSN.  */
5917       if (*l == ',')
5918         {
5919           if (*++l == END_OF_INSN)
5920             {
5921               /* Just skip it, if it's \n complain.  */
5922               goto expecting_operand_after_comma;
5923             }
5924           expecting_operand = 1;
5925         }
5926     }
5927   return l;
5928 }
5929
5930 static void
5931 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5932 {
5933   union i386_op temp_op;
5934   i386_operand_type temp_type;
5935   unsigned int temp_flags;
5936   enum bfd_reloc_code_real temp_reloc;
5937
5938   temp_type = i.types[xchg2];
5939   i.types[xchg2] = i.types[xchg1];
5940   i.types[xchg1] = temp_type;
5941
5942   temp_flags = i.flags[xchg2];
5943   i.flags[xchg2] = i.flags[xchg1];
5944   i.flags[xchg1] = temp_flags;
5945
5946   temp_op = i.op[xchg2];
5947   i.op[xchg2] = i.op[xchg1];
5948   i.op[xchg1] = temp_op;
5949
5950   temp_reloc = i.reloc[xchg2];
5951   i.reloc[xchg2] = i.reloc[xchg1];
5952   i.reloc[xchg1] = temp_reloc;
5953
5954   temp_flags = i.imm_bits[xchg2];
5955   i.imm_bits[xchg2] = i.imm_bits[xchg1];
5956   i.imm_bits[xchg1] = temp_flags;
5957
5958   if (i.mask.reg)
5959     {
5960       if (i.mask.operand == xchg1)
5961         i.mask.operand = xchg2;
5962       else if (i.mask.operand == xchg2)
5963         i.mask.operand = xchg1;
5964     }
5965   if (i.broadcast.type || i.broadcast.bytes)
5966     {
5967       if (i.broadcast.operand == xchg1)
5968         i.broadcast.operand = xchg2;
5969       else if (i.broadcast.operand == xchg2)
5970         i.broadcast.operand = xchg1;
5971     }
5972 }
5973
5974 static void
5975 swap_operands (void)
5976 {
5977   switch (i.operands)
5978     {
5979     case 5:
5980     case 4:
5981       swap_2_operands (1, i.operands - 2);
5982       /* Fall through.  */
5983     case 3:
5984     case 2:
5985       swap_2_operands (0, i.operands - 1);
5986       break;
5987     default:
5988       abort ();
5989     }
5990
5991   if (i.mem_operands == 2)
5992     {
5993       const reg_entry *temp_seg;
5994       temp_seg = i.seg[0];
5995       i.seg[0] = i.seg[1];
5996       i.seg[1] = temp_seg;
5997     }
5998 }
5999
6000 /* Try to ensure constant immediates are represented in the smallest
6001    opcode possible.  */
6002 static void
6003 optimize_imm (void)
6004 {
6005   char guess_suffix = 0;
6006   int op;
6007
6008   if (i.suffix)
6009     guess_suffix = i.suffix;
6010   else if (i.reg_operands)
6011     {
6012       /* Figure out a suffix from the last register operand specified.
6013          We can't do this properly yet, i.e. excluding special register
6014          instances, but the following works for instructions with
6015          immediates.  In any case, we can't set i.suffix yet.  */
6016       for (op = i.operands; --op >= 0;)
6017         if (i.types[op].bitfield.class != Reg)
6018           continue;
6019         else if (i.types[op].bitfield.byte)
6020           {
6021             guess_suffix = BYTE_MNEM_SUFFIX;
6022             break;
6023           }
6024         else if (i.types[op].bitfield.word)
6025           {
6026             guess_suffix = WORD_MNEM_SUFFIX;
6027             break;
6028           }
6029         else if (i.types[op].bitfield.dword)
6030           {
6031             guess_suffix = LONG_MNEM_SUFFIX;
6032             break;
6033           }
6034         else if (i.types[op].bitfield.qword)
6035           {
6036             guess_suffix = QWORD_MNEM_SUFFIX;
6037             break;
6038           }
6039     }
6040   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6041     guess_suffix = WORD_MNEM_SUFFIX;
6042   else if (flag_code != CODE_64BIT || !(i.prefix[REX_PREFIX] & REX_W))
6043     guess_suffix = LONG_MNEM_SUFFIX;
6044
6045   for (op = i.operands; --op >= 0;)
6046     if (operand_type_check (i.types[op], imm))
6047       {
6048         switch (i.op[op].imms->X_op)
6049           {
6050           case O_constant:
6051             /* If a suffix is given, this operand may be shortened.  */
6052             switch (guess_suffix)
6053               {
6054               case LONG_MNEM_SUFFIX:
6055                 i.types[op].bitfield.imm32 = 1;
6056                 i.types[op].bitfield.imm64 = 1;
6057                 break;
6058               case WORD_MNEM_SUFFIX:
6059                 i.types[op].bitfield.imm16 = 1;
6060                 i.types[op].bitfield.imm32 = 1;
6061                 i.types[op].bitfield.imm32s = 1;
6062                 i.types[op].bitfield.imm64 = 1;
6063                 break;
6064               case BYTE_MNEM_SUFFIX:
6065                 i.types[op].bitfield.imm8 = 1;
6066                 i.types[op].bitfield.imm8s = 1;
6067                 i.types[op].bitfield.imm16 = 1;
6068                 i.types[op].bitfield.imm32 = 1;
6069                 i.types[op].bitfield.imm32s = 1;
6070                 i.types[op].bitfield.imm64 = 1;
6071                 break;
6072               }
6073
6074             /* If this operand is at most 16 bits, convert it
6075                to a signed 16 bit number before trying to see
6076                whether it will fit in an even smaller size.
6077                This allows a 16-bit operand such as $0xffe0 to
6078                be recognised as within Imm8S range.  */
6079             if ((i.types[op].bitfield.imm16)
6080                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6081               {
6082                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6083                                                 ^ 0x8000) - 0x8000);
6084               }
6085 #ifdef BFD64
6086             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6087             if ((i.types[op].bitfield.imm32)
6088                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6089               {
6090                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6091                                                 ^ ((offsetT) 1 << 31))
6092                                                - ((offsetT) 1 << 31));
6093               }
6094 #endif
6095             i.types[op]
6096               = operand_type_or (i.types[op],
6097                                  smallest_imm_type (i.op[op].imms->X_add_number));
6098
6099             /* We must avoid matching of Imm32 templates when 64bit
6100                only immediate is available.  */
6101             if (guess_suffix == QWORD_MNEM_SUFFIX)
6102               i.types[op].bitfield.imm32 = 0;
6103             break;
6104
6105           case O_absent:
6106           case O_register:
6107             abort ();
6108
6109             /* Symbols and expressions.  */
6110           default:
6111             /* Convert symbolic operand to proper sizes for matching, but don't
6112                prevent matching a set of insns that only supports sizes other
6113                than those matching the insn suffix.  */
6114             {
6115               i386_operand_type mask, allowed;
6116               const insn_template *t = current_templates->start;
6117
6118               operand_type_set (&mask, 0);
6119               switch (guess_suffix)
6120                 {
6121                 case QWORD_MNEM_SUFFIX:
6122                   mask.bitfield.imm64 = 1;
6123                   mask.bitfield.imm32s = 1;
6124                   break;
6125                 case LONG_MNEM_SUFFIX:
6126                   mask.bitfield.imm32 = 1;
6127                   break;
6128                 case WORD_MNEM_SUFFIX:
6129                   mask.bitfield.imm16 = 1;
6130                   break;
6131                 case BYTE_MNEM_SUFFIX:
6132                   mask.bitfield.imm8 = 1;
6133                   break;
6134                 default:
6135                   break;
6136                 }
6137
6138               allowed = operand_type_and (t->operand_types[op], mask);
6139               while (++t < current_templates->end)
6140                 {
6141                   allowed = operand_type_or (allowed, t->operand_types[op]);
6142                   allowed = operand_type_and (allowed, mask);
6143                 }
6144
6145               if (!operand_type_all_zero (&allowed))
6146                 i.types[op] = operand_type_and (i.types[op], mask);
6147             }
6148             break;
6149           }
6150       }
6151 }
6152
6153 /* Try to use the smallest displacement type too.  */
6154 static bool
6155 optimize_disp (const insn_template *t)
6156 {
6157   unsigned int op;
6158
6159   if (!want_disp32 (t)
6160       && (!t->opcode_modifier.jump
6161           || i.jumpabsolute || i.types[0].bitfield.baseindex))
6162     {
6163       for (op = 0; op < i.operands; ++op)
6164         {
6165           const expressionS *exp = i.op[op].disps;
6166
6167           if (!operand_type_check (i.types[op], disp))
6168             continue;
6169
6170           if (exp->X_op != O_constant)
6171             continue;
6172
6173           /* Since displacement is signed extended to 64bit, don't allow
6174              disp32 if it is out of range.  */
6175           if (fits_in_signed_long (exp->X_add_number))
6176             continue;
6177
6178           i.types[op].bitfield.disp32 = 0;
6179           if (i.types[op].bitfield.baseindex)
6180             {
6181               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
6182                       (uint64_t) exp->X_add_number);
6183               return false;
6184             }
6185         }
6186     }
6187
6188   /* Don't optimize displacement for movabs since it only takes 64bit
6189      displacement.  */
6190   if (i.disp_encoding > disp_encoding_8bit
6191       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
6192     return true;
6193
6194   for (op = i.operands; op-- > 0;)
6195     if (operand_type_check (i.types[op], disp))
6196       {
6197         if (i.op[op].disps->X_op == O_constant)
6198           {
6199             offsetT op_disp = i.op[op].disps->X_add_number;
6200
6201             if (!op_disp && i.types[op].bitfield.baseindex)
6202               {
6203                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6204                 i.op[op].disps = NULL;
6205                 i.disp_operands--;
6206                 continue;
6207               }
6208
6209             if (i.types[op].bitfield.disp16
6210                 && fits_in_unsigned_word (op_disp))
6211               {
6212                 /* If this operand is at most 16 bits, convert
6213                    to a signed 16 bit number and don't use 64bit
6214                    displacement.  */
6215                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6216                 i.types[op].bitfield.disp64 = 0;
6217               }
6218
6219 #ifdef BFD64
6220             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6221             if ((flag_code != CODE_64BIT
6222                  ? i.types[op].bitfield.disp32
6223                  : want_disp32 (t)
6224                    && (!t->opcode_modifier.jump
6225                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6226                 && fits_in_unsigned_long (op_disp))
6227               {
6228                 /* If this operand is at most 32 bits, convert
6229                    to a signed 32 bit number and don't use 64bit
6230                    displacement.  */
6231                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6232                 i.types[op].bitfield.disp64 = 0;
6233                 i.types[op].bitfield.disp32 = 1;
6234               }
6235
6236             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6237               {
6238                 i.types[op].bitfield.disp64 = 0;
6239                 i.types[op].bitfield.disp32 = 1;
6240               }
6241 #endif
6242             if ((i.types[op].bitfield.disp32
6243                  || i.types[op].bitfield.disp16)
6244                 && fits_in_disp8 (op_disp))
6245               i.types[op].bitfield.disp8 = 1;
6246
6247             i.op[op].disps->X_add_number = op_disp;
6248           }
6249         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6250                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6251           {
6252             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6253                          i.op[op].disps, 0, i.reloc[op]);
6254             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6255           }
6256         else
6257           /* We only support 64bit displacement on constants.  */
6258           i.types[op].bitfield.disp64 = 0;
6259       }
6260
6261   return true;
6262 }
6263
6264 /* Return 1 if there is a match in broadcast bytes between operand
6265    GIVEN and instruction template T.   */
6266
6267 static INLINE int
6268 match_broadcast_size (const insn_template *t, unsigned int given)
6269 {
6270   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6271            && i.types[given].bitfield.byte)
6272           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6273               && i.types[given].bitfield.word)
6274           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6275               && i.types[given].bitfield.dword)
6276           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6277               && i.types[given].bitfield.qword));
6278 }
6279
6280 /* Check if operands are valid for the instruction.  */
6281
6282 static int
6283 check_VecOperands (const insn_template *t)
6284 {
6285   unsigned int op;
6286   i386_cpu_flags cpu;
6287
6288   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6289      any one operand are implicity requiring AVX512VL support if the actual
6290      operand size is YMMword or XMMword.  Since this function runs after
6291      template matching, there's no need to check for YMMword/XMMword in
6292      the template.  */
6293   cpu = cpu_flags_and (t->cpu_flags, avx512);
6294   if (!cpu_flags_all_zero (&cpu)
6295       && !t->cpu_flags.bitfield.cpuavx512vl
6296       && !cpu_arch_flags.bitfield.cpuavx512vl)
6297     {
6298       for (op = 0; op < t->operands; ++op)
6299         {
6300           if (t->operand_types[op].bitfield.zmmword
6301               && (i.types[op].bitfield.ymmword
6302                   || i.types[op].bitfield.xmmword))
6303             {
6304               i.error = unsupported;
6305               return 1;
6306             }
6307         }
6308     }
6309
6310   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6311      requiring AVX2 support if the actual operand size is YMMword.  */
6312   if (t->cpu_flags.bitfield.cpuavx
6313       && t->cpu_flags.bitfield.cpuavx2
6314       && !cpu_arch_flags.bitfield.cpuavx2)
6315     {
6316       for (op = 0; op < t->operands; ++op)
6317         {
6318           if (t->operand_types[op].bitfield.xmmword
6319               && i.types[op].bitfield.ymmword)
6320             {
6321               i.error = unsupported;
6322               return 1;
6323             }
6324         }
6325     }
6326
6327   /* Without VSIB byte, we can't have a vector register for index.  */
6328   if (!t->opcode_modifier.sib
6329       && i.index_reg
6330       && (i.index_reg->reg_type.bitfield.xmmword
6331           || i.index_reg->reg_type.bitfield.ymmword
6332           || i.index_reg->reg_type.bitfield.zmmword))
6333     {
6334       i.error = unsupported_vector_index_register;
6335       return 1;
6336     }
6337
6338   /* Check if default mask is allowed.  */
6339   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6340       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6341     {
6342       i.error = no_default_mask;
6343       return 1;
6344     }
6345
6346   /* For VSIB byte, we need a vector register for index, and all vector
6347      registers must be distinct.  */
6348   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6349     {
6350       if (!i.index_reg
6351           || !((t->opcode_modifier.sib == VECSIB128
6352                 && i.index_reg->reg_type.bitfield.xmmword)
6353                || (t->opcode_modifier.sib == VECSIB256
6354                    && i.index_reg->reg_type.bitfield.ymmword)
6355                || (t->opcode_modifier.sib == VECSIB512
6356                    && i.index_reg->reg_type.bitfield.zmmword)))
6357       {
6358         i.error = invalid_vsib_address;
6359         return 1;
6360       }
6361
6362       gas_assert (i.reg_operands == 2 || i.mask.reg);
6363       if (i.reg_operands == 2 && !i.mask.reg)
6364         {
6365           gas_assert (i.types[0].bitfield.class == RegSIMD);
6366           gas_assert (i.types[0].bitfield.xmmword
6367                       || i.types[0].bitfield.ymmword);
6368           gas_assert (i.types[2].bitfield.class == RegSIMD);
6369           gas_assert (i.types[2].bitfield.xmmword
6370                       || i.types[2].bitfield.ymmword);
6371           if (operand_check == check_none)
6372             return 0;
6373           if (register_number (i.op[0].regs)
6374               != register_number (i.index_reg)
6375               && register_number (i.op[2].regs)
6376                  != register_number (i.index_reg)
6377               && register_number (i.op[0].regs)
6378                  != register_number (i.op[2].regs))
6379             return 0;
6380           if (operand_check == check_error)
6381             {
6382               i.error = invalid_vector_register_set;
6383               return 1;
6384             }
6385           as_warn (_("mask, index, and destination registers should be distinct"));
6386         }
6387       else if (i.reg_operands == 1 && i.mask.reg)
6388         {
6389           if (i.types[1].bitfield.class == RegSIMD
6390               && (i.types[1].bitfield.xmmword
6391                   || i.types[1].bitfield.ymmword
6392                   || i.types[1].bitfield.zmmword)
6393               && (register_number (i.op[1].regs)
6394                   == register_number (i.index_reg)))
6395             {
6396               if (operand_check == check_error)
6397                 {
6398                   i.error = invalid_vector_register_set;
6399                   return 1;
6400                 }
6401               if (operand_check != check_none)
6402                 as_warn (_("index and destination registers should be distinct"));
6403             }
6404         }
6405     }
6406
6407   /* For AMX instructions with 3 TMM register operands, all operands
6408       must be distinct.  */
6409   if (i.reg_operands == 3
6410       && t->operand_types[0].bitfield.tmmword
6411       && (i.op[0].regs == i.op[1].regs
6412           || i.op[0].regs == i.op[2].regs
6413           || i.op[1].regs == i.op[2].regs))
6414     {
6415       i.error = invalid_tmm_register_set;
6416       return 1;
6417     }
6418
6419   /* For some special instructions require that destination must be distinct
6420      from source registers.  */
6421   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6422     {
6423       unsigned int dest_reg = i.operands - 1;
6424
6425       know (i.operands >= 3);
6426
6427       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6428       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6429           || (i.reg_operands > 2
6430               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6431         {
6432           i.error = invalid_dest_and_src_register_set;
6433           return 1;
6434         }
6435     }
6436
6437   /* Check if broadcast is supported by the instruction and is applied
6438      to the memory operand.  */
6439   if (i.broadcast.type || i.broadcast.bytes)
6440     {
6441       i386_operand_type type, overlap;
6442
6443       /* Check if specified broadcast is supported in this instruction,
6444          and its broadcast bytes match the memory operand.  */
6445       op = i.broadcast.operand;
6446       if (!t->opcode_modifier.broadcast
6447           || !(i.flags[op] & Operand_Mem)
6448           || (!i.types[op].bitfield.unspecified
6449               && !match_broadcast_size (t, op)))
6450         {
6451         bad_broadcast:
6452           i.error = unsupported_broadcast;
6453           return 1;
6454         }
6455
6456       operand_type_set (&type, 0);
6457       switch (get_broadcast_bytes (t, false))
6458         {
6459         case 2:
6460           type.bitfield.word = 1;
6461           break;
6462         case 4:
6463           type.bitfield.dword = 1;
6464           break;
6465         case 8:
6466           type.bitfield.qword = 1;
6467           break;
6468         case 16:
6469           type.bitfield.xmmword = 1;
6470           break;
6471         case 32:
6472           type.bitfield.ymmword = 1;
6473           break;
6474         case 64:
6475           type.bitfield.zmmword = 1;
6476           break;
6477         default:
6478           goto bad_broadcast;
6479         }
6480
6481       overlap = operand_type_and (type, t->operand_types[op]);
6482       if (t->operand_types[op].bitfield.class == RegSIMD
6483           && t->operand_types[op].bitfield.byte
6484              + t->operand_types[op].bitfield.word
6485              + t->operand_types[op].bitfield.dword
6486              + t->operand_types[op].bitfield.qword > 1)
6487         {
6488           overlap.bitfield.xmmword = 0;
6489           overlap.bitfield.ymmword = 0;
6490           overlap.bitfield.zmmword = 0;
6491         }
6492       if (operand_type_all_zero (&overlap))
6493           goto bad_broadcast;
6494
6495       if (t->opcode_modifier.checkoperandsize)
6496         {
6497           unsigned int j;
6498
6499           type.bitfield.baseindex = 1;
6500           for (j = 0; j < i.operands; ++j)
6501             {
6502               if (j != op
6503                   && !operand_type_register_match(i.types[j],
6504                                                   t->operand_types[j],
6505                                                   type,
6506                                                   t->operand_types[op]))
6507                 goto bad_broadcast;
6508             }
6509         }
6510     }
6511   /* If broadcast is supported in this instruction, we need to check if
6512      operand of one-element size isn't specified without broadcast.  */
6513   else if (t->opcode_modifier.broadcast && i.mem_operands)
6514     {
6515       /* Find memory operand.  */
6516       for (op = 0; op < i.operands; op++)
6517         if (i.flags[op] & Operand_Mem)
6518           break;
6519       gas_assert (op < i.operands);
6520       /* Check size of the memory operand.  */
6521       if (match_broadcast_size (t, op))
6522         {
6523           i.error = broadcast_needed;
6524           return 1;
6525         }
6526     }
6527   else
6528     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6529
6530   /* Check if requested masking is supported.  */
6531   if (i.mask.reg)
6532     {
6533       if (!t->opcode_modifier.masking)
6534         {
6535           i.error = unsupported_masking;
6536           return 1;
6537         }
6538
6539       /* Common rules for masking:
6540          - mask register destinations permit only zeroing-masking, without
6541            that actually being expressed by a {z} operand suffix or EVEX.z,
6542          - memory destinations allow only merging-masking,
6543          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
6544            masking.  */
6545       if (i.mask.zeroing
6546           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
6547               || (i.flags[t->operands - 1] & Operand_Mem)
6548               || t->opcode_modifier.sib))
6549         {
6550           i.error = unsupported_masking;
6551           return 1;
6552         }
6553     }
6554
6555   /* Check if masking is applied to dest operand.  */
6556   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6557     {
6558       i.error = mask_not_on_destination;
6559       return 1;
6560     }
6561
6562   /* Check RC/SAE.  */
6563   if (i.rounding.type != rc_none)
6564     {
6565       if (!t->opcode_modifier.sae
6566           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6567           || i.mem_operands)
6568         {
6569           i.error = unsupported_rc_sae;
6570           return 1;
6571         }
6572
6573       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6574          operand.  */
6575       if (t->opcode_modifier.evex != EVEXLIG)
6576         {
6577           for (op = 0; op < t->operands; ++op)
6578             if (i.types[op].bitfield.zmmword)
6579               break;
6580           if (op >= t->operands)
6581             {
6582               i.error = operand_size_mismatch;
6583               return 1;
6584             }
6585         }
6586     }
6587
6588   /* Check the special Imm4 cases; must be the first operand.  */
6589   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6590     {
6591       if (i.op[0].imms->X_op != O_constant
6592           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6593         {
6594           i.error = bad_imm4;
6595           return 1;
6596         }
6597
6598       /* Turn off Imm<N> so that update_imm won't complain.  */
6599       operand_type_set (&i.types[0], 0);
6600     }
6601
6602   /* Check vector Disp8 operand.  */
6603   if (t->opcode_modifier.disp8memshift
6604       && i.disp_encoding <= disp_encoding_8bit)
6605     {
6606       if (i.broadcast.type || i.broadcast.bytes)
6607         i.memshift = t->opcode_modifier.broadcast - 1;
6608       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6609         i.memshift = t->opcode_modifier.disp8memshift;
6610       else
6611         {
6612           const i386_operand_type *type = NULL, *fallback = NULL;
6613
6614           i.memshift = 0;
6615           for (op = 0; op < i.operands; op++)
6616             if (i.flags[op] & Operand_Mem)
6617               {
6618                 if (t->opcode_modifier.evex == EVEXLIG)
6619                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6620                 else if (t->operand_types[op].bitfield.xmmword
6621                          + t->operand_types[op].bitfield.ymmword
6622                          + t->operand_types[op].bitfield.zmmword <= 1)
6623                   type = &t->operand_types[op];
6624                 else if (!i.types[op].bitfield.unspecified)
6625                   type = &i.types[op];
6626                 else /* Ambiguities get resolved elsewhere.  */
6627                   fallback = &t->operand_types[op];
6628               }
6629             else if (i.types[op].bitfield.class == RegSIMD
6630                      && t->opcode_modifier.evex != EVEXLIG)
6631               {
6632                 if (i.types[op].bitfield.zmmword)
6633                   i.memshift = 6;
6634                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6635                   i.memshift = 5;
6636                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6637                   i.memshift = 4;
6638               }
6639
6640           if (!type && !i.memshift)
6641             type = fallback;
6642           if (type)
6643             {
6644               if (type->bitfield.zmmword)
6645                 i.memshift = 6;
6646               else if (type->bitfield.ymmword)
6647                 i.memshift = 5;
6648               else if (type->bitfield.xmmword)
6649                 i.memshift = 4;
6650             }
6651
6652           /* For the check in fits_in_disp8().  */
6653           if (i.memshift == 0)
6654             i.memshift = -1;
6655         }
6656
6657       for (op = 0; op < i.operands; op++)
6658         if (operand_type_check (i.types[op], disp)
6659             && i.op[op].disps->X_op == O_constant)
6660           {
6661             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6662               {
6663                 i.types[op].bitfield.disp8 = 1;
6664                 return 0;
6665               }
6666             i.types[op].bitfield.disp8 = 0;
6667           }
6668     }
6669
6670   i.memshift = 0;
6671
6672   return 0;
6673 }
6674
6675 /* Check if encoding requirements are met by the instruction.  */
6676
6677 static int
6678 VEX_check_encoding (const insn_template *t)
6679 {
6680   if (i.vec_encoding == vex_encoding_error)
6681     {
6682       i.error = unsupported;
6683       return 1;
6684     }
6685
6686   if (i.vec_encoding == vex_encoding_evex)
6687     {
6688       /* This instruction must be encoded with EVEX prefix.  */
6689       if (!is_evex_encoding (t))
6690         {
6691           i.error = unsupported;
6692           return 1;
6693         }
6694       return 0;
6695     }
6696
6697   if (!t->opcode_modifier.vex)
6698     {
6699       /* This instruction template doesn't have VEX prefix.  */
6700       if (i.vec_encoding != vex_encoding_default)
6701         {
6702           i.error = unsupported;
6703           return 1;
6704         }
6705       return 0;
6706     }
6707
6708   return 0;
6709 }
6710
6711 /* Helper function for the progress() macro in match_template().  */
6712 static INLINE enum i386_error progress (enum i386_error new,
6713                                         enum i386_error last,
6714                                         unsigned int line, unsigned int *line_p)
6715 {
6716   if (line <= *line_p)
6717     return last;
6718   *line_p = line;
6719   return new;
6720 }
6721
6722 static const insn_template *
6723 match_template (char mnem_suffix)
6724 {
6725   /* Points to template once we've found it.  */
6726   const insn_template *t;
6727   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6728   i386_operand_type overlap4;
6729   unsigned int found_reverse_match;
6730   i386_operand_type operand_types [MAX_OPERANDS];
6731   int addr_prefix_disp;
6732   unsigned int j, size_match, check_register, errline = __LINE__;
6733   enum i386_error specific_error = number_of_operands_mismatch;
6734 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6735
6736 #if MAX_OPERANDS != 5
6737 # error "MAX_OPERANDS must be 5."
6738 #endif
6739
6740   found_reverse_match = 0;
6741   addr_prefix_disp = -1;
6742
6743   for (t = current_templates->start; t < current_templates->end; t++)
6744     {
6745       addr_prefix_disp = -1;
6746       found_reverse_match = 0;
6747
6748       /* Must have right number of operands.  */
6749       if (i.operands != t->operands)
6750         continue;
6751
6752       /* Check processor support.  */
6753       specific_error = progress (unsupported);
6754       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6755         continue;
6756
6757       /* Check AT&T mnemonic.   */
6758       specific_error = progress (unsupported_with_intel_mnemonic);
6759       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6760         continue;
6761
6762       /* Check AT&T/Intel syntax.  */
6763       specific_error = progress (unsupported_syntax);
6764       if ((intel_syntax && t->opcode_modifier.attsyntax)
6765           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6766         continue;
6767
6768       /* Check Intel64/AMD64 ISA.   */
6769       switch (isa64)
6770         {
6771         default:
6772           /* Default: Don't accept Intel64.  */
6773           if (t->opcode_modifier.isa64 == INTEL64)
6774             continue;
6775           break;
6776         case amd64:
6777           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6778           if (t->opcode_modifier.isa64 >= INTEL64)
6779             continue;
6780           break;
6781         case intel64:
6782           /* -mintel64: Don't accept AMD64.  */
6783           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6784             continue;
6785           break;
6786         }
6787
6788       /* Check the suffix.  */
6789       specific_error = progress (invalid_instruction_suffix);
6790       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6791           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6792           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6793           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6794           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6795         continue;
6796
6797       specific_error = progress (operand_size_mismatch);
6798       size_match = operand_size_match (t);
6799       if (!size_match)
6800         continue;
6801
6802       /* This is intentionally not
6803
6804          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6805
6806          as the case of a missing * on the operand is accepted (perhaps with
6807          a warning, issued further down).  */
6808       specific_error = progress (operand_type_mismatch);
6809       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6810         continue;
6811
6812       /* In Intel syntax, normally we can check for memory operand size when
6813          there is no mnemonic suffix.  But jmp and call have 2 different
6814          encodings with Dword memory operand size.  Skip the "near" one
6815          (permitting a register operand) when "far" was requested.  */
6816       if (i.far_branch
6817           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6818           && t->operand_types[0].bitfield.class == Reg)
6819         continue;
6820
6821       for (j = 0; j < MAX_OPERANDS; j++)
6822         operand_types[j] = t->operand_types[j];
6823
6824       /* In general, don't allow 32-bit operands on pre-386.  */
6825       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6826                                              : operand_size_mismatch);
6827       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6828       if (i.suffix == LONG_MNEM_SUFFIX
6829           && !cpu_arch_flags.bitfield.cpui386
6830           && (intel_syntax
6831               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6832                  && !intel_float_operand (insn_name (t)))
6833               : intel_float_operand (insn_name (t)) != 2)
6834           && (t->operands == i.imm_operands
6835               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6836                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6837                && operand_types[i.imm_operands].bitfield.class != RegMask)
6838               || (operand_types[j].bitfield.class != RegMMX
6839                   && operand_types[j].bitfield.class != RegSIMD
6840                   && operand_types[j].bitfield.class != RegMask))
6841           && !t->opcode_modifier.sib)
6842         continue;
6843
6844       /* Do not verify operands when there are none.  */
6845       if (!t->operands)
6846         {
6847           if (VEX_check_encoding (t))
6848             {
6849               specific_error = progress (i.error);
6850               continue;
6851             }
6852
6853           /* We've found a match; break out of loop.  */
6854           break;
6855         }
6856
6857       if (!t->opcode_modifier.jump
6858           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6859         {
6860           /* There should be only one Disp operand.  */
6861           for (j = 0; j < MAX_OPERANDS; j++)
6862             if (operand_type_check (operand_types[j], disp))
6863               break;
6864           if (j < MAX_OPERANDS)
6865             {
6866               bool override = (i.prefix[ADDR_PREFIX] != 0);
6867
6868               addr_prefix_disp = j;
6869
6870               /* Address size prefix will turn Disp64 operand into Disp32 and
6871                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6872               switch (flag_code)
6873                 {
6874                 case CODE_16BIT:
6875                   override = !override;
6876                   /* Fall through.  */
6877                 case CODE_32BIT:
6878                   if (operand_types[j].bitfield.disp32
6879                       && operand_types[j].bitfield.disp16)
6880                     {
6881                       operand_types[j].bitfield.disp16 = override;
6882                       operand_types[j].bitfield.disp32 = !override;
6883                     }
6884                   gas_assert (!operand_types[j].bitfield.disp64);
6885                   break;
6886
6887                 case CODE_64BIT:
6888                   if (operand_types[j].bitfield.disp64)
6889                     {
6890                       gas_assert (!operand_types[j].bitfield.disp32);
6891                       operand_types[j].bitfield.disp32 = override;
6892                       operand_types[j].bitfield.disp64 = !override;
6893                     }
6894                   operand_types[j].bitfield.disp16 = 0;
6895                   break;
6896                 }
6897             }
6898         }
6899
6900       /* We check register size if needed.  */
6901       if (t->opcode_modifier.checkoperandsize)
6902         {
6903           check_register = (1 << t->operands) - 1;
6904           if (i.broadcast.type || i.broadcast.bytes)
6905             check_register &= ~(1 << i.broadcast.operand);
6906         }
6907       else
6908         check_register = 0;
6909
6910       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6911       switch (t->operands)
6912         {
6913         case 1:
6914           if (!operand_type_match (overlap0, i.types[0]))
6915             continue;
6916
6917           /* Allow the ModR/M encoding to be requested by using the {load} or
6918              {store} pseudo prefix on an applicable insn.  */
6919           if (!t->opcode_modifier.modrm
6920               && i.reg_operands == 1
6921               && ((i.dir_encoding == dir_encoding_load
6922                    && t->mnem_off != MN_pop)
6923                   || (i.dir_encoding == dir_encoding_store
6924                       && t->mnem_off != MN_push))
6925               /* Avoid BSWAP.  */
6926               && t->mnem_off != MN_bswap)
6927             continue;
6928           break;
6929
6930         case 2:
6931           /* xchg %eax, %eax is a special case. It is an alias for nop
6932              only in 32bit mode and we can use opcode 0x90.  In 64bit
6933              mode, we can't use 0x90 for xchg %eax, %eax since it should
6934              zero-extend %eax to %rax.  */
6935           if (t->base_opcode == 0x90
6936               && t->opcode_space == SPACE_BASE)
6937             {
6938               if (flag_code == CODE_64BIT
6939                   && i.types[0].bitfield.instance == Accum
6940                   && i.types[0].bitfield.dword
6941                   && i.types[1].bitfield.instance == Accum)
6942                 continue;
6943
6944               /* Allow the ModR/M encoding to be requested by using the
6945                  {load} or {store} pseudo prefix.  */
6946               if (i.dir_encoding == dir_encoding_load
6947                   || i.dir_encoding == dir_encoding_store)
6948                 continue;
6949             }
6950
6951           if (t->base_opcode == MOV_AX_DISP32
6952               && t->opcode_space == SPACE_BASE
6953               && t->mnem_off != MN_movabs)
6954             {
6955               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6956               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6957                 continue;
6958
6959               /* xrelease mov %eax, <disp> is another special case. It must not
6960                  match the accumulator-only encoding of mov.  */
6961               if (i.hle_prefix)
6962                 continue;
6963
6964               /* Allow the ModR/M encoding to be requested by using a suitable
6965                  {load} or {store} pseudo prefix.  */
6966               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
6967                                      ? dir_encoding_store
6968                                      : dir_encoding_load)
6969                   && !i.types[0].bitfield.disp64
6970                   && !i.types[1].bitfield.disp64)
6971                 continue;
6972             }
6973
6974           /* Allow the ModR/M encoding to be requested by using the {load} or
6975              {store} pseudo prefix on an applicable insn.  */
6976           if (!t->opcode_modifier.modrm
6977               && i.reg_operands == 1
6978               && i.imm_operands == 1
6979               && (i.dir_encoding == dir_encoding_load
6980                   || i.dir_encoding == dir_encoding_store)
6981               && t->opcode_space == SPACE_BASE)
6982             {
6983               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
6984                   && i.dir_encoding == dir_encoding_store)
6985                 continue;
6986
6987               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
6988                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
6989                       || i.dir_encoding == dir_encoding_load))
6990                 continue;
6991
6992               if (t->base_opcode == 0xa8 /* test $imm, %acc */
6993                   && i.dir_encoding == dir_encoding_load)
6994                 continue;
6995             }
6996           /* Fall through.  */
6997
6998         case 3:
6999           if (!(size_match & MATCH_STRAIGHT))
7000             goto check_reverse;
7001           /* Reverse direction of operands if swapping is possible in the first
7002              place (operands need to be symmetric) and
7003              - the load form is requested, and the template is a store form,
7004              - the store form is requested, and the template is a load form,
7005              - the non-default (swapped) form is requested.  */
7006           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
7007           if (t->opcode_modifier.d && i.reg_operands == i.operands
7008               && !operand_type_all_zero (&overlap1))
7009             switch (i.dir_encoding)
7010               {
7011               case dir_encoding_load:
7012                 if (operand_type_check (operand_types[i.operands - 1], anymem)
7013                     || t->opcode_modifier.regmem)
7014                   goto check_reverse;
7015                 break;
7016
7017               case dir_encoding_store:
7018                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
7019                     && !t->opcode_modifier.regmem)
7020                   goto check_reverse;
7021                 break;
7022
7023               case dir_encoding_swap:
7024                 goto check_reverse;
7025
7026               case dir_encoding_default:
7027                 break;
7028               }
7029           /* If we want store form, we skip the current load.  */
7030           if ((i.dir_encoding == dir_encoding_store
7031                || i.dir_encoding == dir_encoding_swap)
7032               && i.mem_operands == 0
7033               && t->opcode_modifier.load)
7034             continue;
7035           /* Fall through.  */
7036         case 4:
7037         case 5:
7038           overlap1 = operand_type_and (i.types[1], operand_types[1]);
7039           if (!operand_type_match (overlap0, i.types[0])
7040               || !operand_type_match (overlap1, i.types[1])
7041               || ((check_register & 3) == 3
7042                   && !operand_type_register_match (i.types[0],
7043                                                    operand_types[0],
7044                                                    i.types[1],
7045                                                    operand_types[1])))
7046             {
7047               specific_error = progress (i.error);
7048
7049               /* Check if other direction is valid ...  */
7050               if (!t->opcode_modifier.d)
7051                 continue;
7052
7053             check_reverse:
7054               if (!(size_match & MATCH_REVERSE))
7055                 continue;
7056               /* Try reversing direction of operands.  */
7057               j = t->cpu_flags.bitfield.cpufma4
7058                   || t->cpu_flags.bitfield.cpuxop ? 1 : i.operands - 1;
7059               overlap0 = operand_type_and (i.types[0], operand_types[j]);
7060               overlap1 = operand_type_and (i.types[j], operand_types[0]);
7061               overlap2 = operand_type_and (i.types[1], operand_types[1]);
7062               gas_assert (t->operands != 3 || !check_register);
7063               if (!operand_type_match (overlap0, i.types[0])
7064                   || !operand_type_match (overlap1, i.types[j])
7065                   || (t->operands == 3
7066                       && !operand_type_match (overlap2, i.types[1]))
7067                   || (check_register
7068                       && !operand_type_register_match (i.types[0],
7069                                                        operand_types[j],
7070                                                        i.types[j],
7071                                                        operand_types[0])))
7072                 {
7073                   /* Does not match either direction.  */
7074                   specific_error = progress (i.error);
7075                   continue;
7076                 }
7077               /* found_reverse_match holds which variant of D
7078                  we've found.  */
7079               if (!t->opcode_modifier.d)
7080                 found_reverse_match = 0;
7081               else if (operand_types[0].bitfield.tbyte)
7082                 {
7083                   if (t->opcode_modifier.operandconstraint != UGH)
7084                     found_reverse_match = Opcode_FloatD;
7085                   else
7086                     found_reverse_match = ~0;
7087                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
7088                   if ((t->extension_opcode & 4)
7089                       && (intel_syntax || intel_mnemonic))
7090                     found_reverse_match |= Opcode_FloatR;
7091                 }
7092               else if (t->cpu_flags.bitfield.cpufma4
7093                        || t->cpu_flags.bitfield.cpuxop)
7094                 {
7095                   found_reverse_match = Opcode_VexW;
7096                   goto check_operands_345;
7097                 }
7098               else if (t->opcode_space != SPACE_BASE
7099                        && (t->opcode_space != SPACE_0F
7100                            /* MOV to/from CR/DR/TR, as an exception, follow
7101                               the base opcode space encoding model.  */
7102                            || (t->base_opcode | 7) != 0x27))
7103                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7104                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7105               else if (!t->opcode_modifier.commutative)
7106                 found_reverse_match = Opcode_D;
7107               else
7108                 found_reverse_match = ~0;
7109             }
7110           else
7111             {
7112               /* Found a forward 2 operand match here.  */
7113             check_operands_345:
7114               switch (t->operands)
7115                 {
7116                 case 5:
7117                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7118                   if (!operand_type_match (overlap4, i.types[4])
7119                       || !operand_type_register_match (i.types[3],
7120                                                        operand_types[3],
7121                                                        i.types[4],
7122                                                        operand_types[4]))
7123                     {
7124                       specific_error = progress (i.error);
7125                       continue;
7126                     }
7127                   /* Fall through.  */
7128                 case 4:
7129                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7130                   if (!operand_type_match (overlap3, i.types[3])
7131                       || ((check_register & 0xa) == 0xa
7132                           && !operand_type_register_match (i.types[1],
7133                                                             operand_types[1],
7134                                                             i.types[3],
7135                                                             operand_types[3]))
7136                       || ((check_register & 0xc) == 0xc
7137                           && !operand_type_register_match (i.types[2],
7138                                                             operand_types[2],
7139                                                             i.types[3],
7140                                                             operand_types[3])))
7141                     {
7142                       specific_error = progress (i.error);
7143                       continue;
7144                     }
7145                   /* Fall through.  */
7146                 case 3:
7147                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7148                   if (!operand_type_match (overlap2, i.types[2])
7149                       || ((check_register & 5) == 5
7150                           && !operand_type_register_match (i.types[0],
7151                                                             operand_types[0],
7152                                                             i.types[2],
7153                                                             operand_types[2]))
7154                       || ((check_register & 6) == 6
7155                           && !operand_type_register_match (i.types[1],
7156                                                             operand_types[1],
7157                                                             i.types[2],
7158                                                             operand_types[2])))
7159                     {
7160                       specific_error = progress (i.error);
7161                       continue;
7162                     }
7163                   break;
7164                 }
7165             }
7166           /* Found either forward/reverse 2, 3 or 4 operand match here:
7167              slip through to break.  */
7168         }
7169
7170       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7171       if (VEX_check_encoding (t))
7172         {
7173           specific_error = progress (i.error);
7174           continue;
7175         }
7176
7177       /* Check if vector operands are valid.  */
7178       if (check_VecOperands (t))
7179         {
7180           specific_error = progress (i.error);
7181           continue;
7182         }
7183
7184       /* We've found a match; break out of loop.  */
7185       break;
7186     }
7187
7188 #undef progress
7189
7190   if (t == current_templates->end)
7191     {
7192       /* We found no match.  */
7193       i.error = specific_error;
7194       return NULL;
7195     }
7196
7197   if (!quiet_warnings)
7198     {
7199       if (!intel_syntax
7200           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7201         as_warn (_("indirect %s without `*'"), insn_name (t));
7202
7203       if (t->opcode_modifier.isprefix
7204           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7205         {
7206           /* Warn them that a data or address size prefix doesn't
7207              affect assembly of the next line of code.  */
7208           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7209         }
7210     }
7211
7212   /* Copy the template we found.  */
7213   install_template (t);
7214
7215   if (addr_prefix_disp != -1)
7216     i.tm.operand_types[addr_prefix_disp]
7217       = operand_types[addr_prefix_disp];
7218
7219   switch (found_reverse_match)
7220     {
7221     case 0:
7222       break;
7223
7224     case Opcode_FloatR:
7225     case Opcode_FloatR | Opcode_FloatD:
7226       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
7227       found_reverse_match &= Opcode_FloatD;
7228
7229       /* Fall through.  */
7230     default:
7231       /* If we found a reverse match we must alter the opcode direction
7232          bit and clear/flip the regmem modifier one.  found_reverse_match
7233          holds bits to change (different for int & float insns).  */
7234
7235       i.tm.base_opcode ^= found_reverse_match;
7236
7237       /* Certain SIMD insns have their load forms specified in the opcode
7238          table, and hence we need to _set_ RegMem instead of clearing it.
7239          We need to avoid setting the bit though on insns like KMOVW.  */
7240       i.tm.opcode_modifier.regmem
7241         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7242           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7243           && !i.tm.opcode_modifier.regmem;
7244
7245       /* Fall through.  */
7246     case ~0:
7247       i.tm.operand_types[0] = operand_types[i.operands - 1];
7248       i.tm.operand_types[i.operands - 1] = operand_types[0];
7249       break;
7250
7251     case Opcode_VexW:
7252       /* Only the first two register operands need reversing, alongside
7253          flipping VEX.W.  */
7254       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7255
7256       j = i.tm.operand_types[0].bitfield.imm8;
7257       i.tm.operand_types[j] = operand_types[j + 1];
7258       i.tm.operand_types[j + 1] = operand_types[j];
7259       break;
7260     }
7261
7262   return t;
7263 }
7264
7265 static int
7266 check_string (void)
7267 {
7268   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7269   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7270
7271   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7272     {
7273       as_bad (_("`%s' operand %u must use `%ses' segment"),
7274               insn_name (&i.tm),
7275               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7276               register_prefix);
7277       return 0;
7278     }
7279
7280   /* There's only ever one segment override allowed per instruction.
7281      This instruction possibly has a legal segment override on the
7282      second operand, so copy the segment to where non-string
7283      instructions store it, allowing common code.  */
7284   i.seg[op] = i.seg[1];
7285
7286   return 1;
7287 }
7288
7289 static int
7290 process_suffix (void)
7291 {
7292   bool is_movx = false;
7293
7294   /* If matched instruction specifies an explicit instruction mnemonic
7295      suffix, use it.  */
7296   if (i.tm.opcode_modifier.size == SIZE16)
7297     i.suffix = WORD_MNEM_SUFFIX;
7298   else if (i.tm.opcode_modifier.size == SIZE32)
7299     i.suffix = LONG_MNEM_SUFFIX;
7300   else if (i.tm.opcode_modifier.size == SIZE64)
7301     i.suffix = QWORD_MNEM_SUFFIX;
7302   else if (i.reg_operands
7303            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7304            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7305     {
7306       unsigned int numop = i.operands;
7307
7308       /* MOVSX/MOVZX */
7309       is_movx = (i.tm.opcode_space == SPACE_0F
7310                  && (i.tm.base_opcode | 8) == 0xbe)
7311                 || (i.tm.opcode_space == SPACE_BASE
7312                     && i.tm.base_opcode == 0x63
7313                     && i.tm.cpu_flags.bitfield.cpu64);
7314
7315       /* movsx/movzx want only their source operand considered here, for the
7316          ambiguity checking below.  The suffix will be replaced afterwards
7317          to represent the destination (register).  */
7318       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7319         --i.operands;
7320
7321       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7322       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
7323         i.rex |= REX_W;
7324
7325       /* If there's no instruction mnemonic suffix we try to invent one
7326          based on GPR operands.  */
7327       if (!i.suffix)
7328         {
7329           /* We take i.suffix from the last register operand specified,
7330              Destination register type is more significant than source
7331              register type.  crc32 in SSE4.2 prefers source register
7332              type. */
7333           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
7334
7335           while (op--)
7336             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7337                 || i.tm.operand_types[op].bitfield.instance == Accum)
7338               {
7339                 if (i.types[op].bitfield.class != Reg)
7340                   continue;
7341                 if (i.types[op].bitfield.byte)
7342                   i.suffix = BYTE_MNEM_SUFFIX;
7343                 else if (i.types[op].bitfield.word)
7344                   i.suffix = WORD_MNEM_SUFFIX;
7345                 else if (i.types[op].bitfield.dword)
7346                   i.suffix = LONG_MNEM_SUFFIX;
7347                 else if (i.types[op].bitfield.qword)
7348                   i.suffix = QWORD_MNEM_SUFFIX;
7349                 else
7350                   continue;
7351                 break;
7352               }
7353
7354           /* As an exception, movsx/movzx silently default to a byte source
7355              in AT&T mode.  */
7356           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7357             i.suffix = BYTE_MNEM_SUFFIX;
7358         }
7359       else if (i.suffix == BYTE_MNEM_SUFFIX)
7360         {
7361           if (!check_byte_reg ())
7362             return 0;
7363         }
7364       else if (i.suffix == LONG_MNEM_SUFFIX)
7365         {
7366           if (!check_long_reg ())
7367             return 0;
7368         }
7369       else if (i.suffix == QWORD_MNEM_SUFFIX)
7370         {
7371           if (!check_qword_reg ())
7372             return 0;
7373         }
7374       else if (i.suffix == WORD_MNEM_SUFFIX)
7375         {
7376           if (!check_word_reg ())
7377             return 0;
7378         }
7379       else if (intel_syntax
7380                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7381         /* Do nothing if the instruction is going to ignore the prefix.  */
7382         ;
7383       else
7384         abort ();
7385
7386       /* Undo the movsx/movzx change done above.  */
7387       i.operands = numop;
7388     }
7389   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7390            && !i.suffix)
7391     {
7392       i.suffix = stackop_size;
7393       if (stackop_size == LONG_MNEM_SUFFIX)
7394         {
7395           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7396              .code16gcc directive to support 16-bit mode with
7397              32-bit address.  For IRET without a suffix, generate
7398              16-bit IRET (opcode 0xcf) to return from an interrupt
7399              handler.  */
7400           if (i.tm.base_opcode == 0xcf)
7401             {
7402               i.suffix = WORD_MNEM_SUFFIX;
7403               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7404             }
7405           /* Warn about changed behavior for segment register push/pop.  */
7406           else if ((i.tm.base_opcode | 1) == 0x07)
7407             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7408                      insn_name (&i.tm));
7409         }
7410     }
7411   else if (!i.suffix
7412            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7413                || i.tm.opcode_modifier.jump == JUMP_BYTE
7414                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7415                || (i.tm.opcode_space == SPACE_0F
7416                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7417                    && i.tm.extension_opcode <= 3)))
7418     {
7419       switch (flag_code)
7420         {
7421         case CODE_64BIT:
7422           if (!i.tm.opcode_modifier.no_qsuf)
7423             {
7424               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7425                   || i.tm.opcode_modifier.no_lsuf)
7426                 i.suffix = QWORD_MNEM_SUFFIX;
7427               break;
7428             }
7429           /* Fall through.  */
7430         case CODE_32BIT:
7431           if (!i.tm.opcode_modifier.no_lsuf)
7432             i.suffix = LONG_MNEM_SUFFIX;
7433           break;
7434         case CODE_16BIT:
7435           if (!i.tm.opcode_modifier.no_wsuf)
7436             i.suffix = WORD_MNEM_SUFFIX;
7437           break;
7438         }
7439     }
7440
7441   if (!i.suffix
7442       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7443           /* Also cover lret/retf/iret in 64-bit mode.  */
7444           || (flag_code == CODE_64BIT
7445               && !i.tm.opcode_modifier.no_lsuf
7446               && !i.tm.opcode_modifier.no_qsuf))
7447       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7448       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7449       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7450       /* Accept FLDENV et al without suffix.  */
7451       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7452     {
7453       unsigned int suffixes, evex = 0;
7454
7455       suffixes = !i.tm.opcode_modifier.no_bsuf;
7456       if (!i.tm.opcode_modifier.no_wsuf)
7457         suffixes |= 1 << 1;
7458       if (!i.tm.opcode_modifier.no_lsuf)
7459         suffixes |= 1 << 2;
7460       if (!i.tm.opcode_modifier.no_ssuf)
7461         suffixes |= 1 << 4;
7462       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7463         suffixes |= 1 << 5;
7464
7465       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7466          also suitable for AT&T syntax mode, it was requested that this be
7467          restricted to just Intel syntax.  */
7468       if (intel_syntax && is_any_vex_encoding (&i.tm)
7469           && !i.broadcast.type && !i.broadcast.bytes)
7470         {
7471           unsigned int op;
7472
7473           for (op = 0; op < i.tm.operands; ++op)
7474             {
7475               if (is_evex_encoding (&i.tm)
7476                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7477                 {
7478                   if (i.tm.operand_types[op].bitfield.ymmword)
7479                     i.tm.operand_types[op].bitfield.xmmword = 0;
7480                   if (i.tm.operand_types[op].bitfield.zmmword)
7481                     i.tm.operand_types[op].bitfield.ymmword = 0;
7482                   if (!i.tm.opcode_modifier.evex
7483                       || i.tm.opcode_modifier.evex == EVEXDYN)
7484                     i.tm.opcode_modifier.evex = EVEX512;
7485                 }
7486
7487               if (i.tm.operand_types[op].bitfield.xmmword
7488                   + i.tm.operand_types[op].bitfield.ymmword
7489                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7490                 continue;
7491
7492               /* Any properly sized operand disambiguates the insn.  */
7493               if (i.types[op].bitfield.xmmword
7494                   || i.types[op].bitfield.ymmword
7495                   || i.types[op].bitfield.zmmword)
7496                 {
7497                   suffixes &= ~(7 << 6);
7498                   evex = 0;
7499                   break;
7500                 }
7501
7502               if ((i.flags[op] & Operand_Mem)
7503                   && i.tm.operand_types[op].bitfield.unspecified)
7504                 {
7505                   if (i.tm.operand_types[op].bitfield.xmmword)
7506                     suffixes |= 1 << 6;
7507                   if (i.tm.operand_types[op].bitfield.ymmword)
7508                     suffixes |= 1 << 7;
7509                   if (i.tm.operand_types[op].bitfield.zmmword)
7510                     suffixes |= 1 << 8;
7511                   if (is_evex_encoding (&i.tm))
7512                     evex = EVEX512;
7513                 }
7514             }
7515         }
7516
7517       /* Are multiple suffixes / operand sizes allowed?  */
7518       if (suffixes & (suffixes - 1))
7519         {
7520           if (intel_syntax
7521               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7522                   || operand_check == check_error))
7523             {
7524               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7525               return 0;
7526             }
7527           if (operand_check == check_error)
7528             {
7529               as_bad (_("no instruction mnemonic suffix given and "
7530                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7531               return 0;
7532             }
7533           if (operand_check == check_warning)
7534             as_warn (_("%s; using default for `%s'"),
7535                        intel_syntax
7536                        ? _("ambiguous operand size")
7537                        : _("no instruction mnemonic suffix given and "
7538                            "no register operands"),
7539                        insn_name (&i.tm));
7540
7541           if (i.tm.opcode_modifier.floatmf)
7542             i.suffix = SHORT_MNEM_SUFFIX;
7543           else if (is_movx)
7544             /* handled below */;
7545           else if (evex)
7546             i.tm.opcode_modifier.evex = evex;
7547           else if (flag_code == CODE_16BIT)
7548             i.suffix = WORD_MNEM_SUFFIX;
7549           else if (!i.tm.opcode_modifier.no_lsuf)
7550             i.suffix = LONG_MNEM_SUFFIX;
7551           else
7552             i.suffix = QWORD_MNEM_SUFFIX;
7553         }
7554     }
7555
7556   if (is_movx)
7557     {
7558       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7559          In AT&T syntax, if there is no suffix (warned about above), the default
7560          will be byte extension.  */
7561       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7562         i.tm.base_opcode |= 1;
7563
7564       /* For further processing, the suffix should represent the destination
7565          (register).  This is already the case when one was used with
7566          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7567          no suffix to begin with.  */
7568       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7569         {
7570           if (i.types[1].bitfield.word)
7571             i.suffix = WORD_MNEM_SUFFIX;
7572           else if (i.types[1].bitfield.qword)
7573             i.suffix = QWORD_MNEM_SUFFIX;
7574           else
7575             i.suffix = LONG_MNEM_SUFFIX;
7576
7577           i.tm.opcode_modifier.w = 0;
7578         }
7579     }
7580
7581   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7582     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7583                    != (i.tm.operand_types[1].bitfield.class == Reg);
7584
7585   /* Change the opcode based on the operand size given by i.suffix.  */
7586   switch (i.suffix)
7587     {
7588     /* Size floating point instruction.  */
7589     case LONG_MNEM_SUFFIX:
7590       if (i.tm.opcode_modifier.floatmf)
7591         {
7592           i.tm.base_opcode ^= 4;
7593           break;
7594         }
7595     /* fall through */
7596     case WORD_MNEM_SUFFIX:
7597     case QWORD_MNEM_SUFFIX:
7598       /* It's not a byte, select word/dword operation.  */
7599       if (i.tm.opcode_modifier.w)
7600         {
7601           if (i.short_form)
7602             i.tm.base_opcode |= 8;
7603           else
7604             i.tm.base_opcode |= 1;
7605         }
7606     /* fall through */
7607     case SHORT_MNEM_SUFFIX:
7608       /* Now select between word & dword operations via the operand
7609          size prefix, except for instructions that will ignore this
7610          prefix anyway.  */
7611       if (i.suffix != QWORD_MNEM_SUFFIX
7612           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7613           && !i.tm.opcode_modifier.floatmf
7614           && !is_any_vex_encoding (&i.tm)
7615           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7616               || (flag_code == CODE_64BIT
7617                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7618         {
7619           unsigned int prefix = DATA_PREFIX_OPCODE;
7620
7621           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7622             prefix = ADDR_PREFIX_OPCODE;
7623
7624           if (!add_prefix (prefix))
7625             return 0;
7626         }
7627
7628       /* Set mode64 for an operand.  */
7629       if (i.suffix == QWORD_MNEM_SUFFIX
7630           && flag_code == CODE_64BIT
7631           && !i.tm.opcode_modifier.norex64
7632           && !i.tm.opcode_modifier.vexw
7633           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7634              need rex64. */
7635           && ! (i.operands == 2
7636                 && i.tm.base_opcode == 0x90
7637                 && i.tm.opcode_space == SPACE_BASE
7638                 && i.types[0].bitfield.instance == Accum
7639                 && i.types[0].bitfield.qword
7640                 && i.types[1].bitfield.instance == Accum))
7641         i.rex |= REX_W;
7642
7643       break;
7644
7645     case 0:
7646       /* Select word/dword/qword operation with explicit data sizing prefix
7647          when there are no suitable register operands.  */
7648       if (i.tm.opcode_modifier.w
7649           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7650           && (!i.reg_operands
7651               || (i.reg_operands == 1
7652                       /* ShiftCount */
7653                   && (i.tm.operand_types[0].bitfield.instance == RegC
7654                       /* InOutPortReg */
7655                       || i.tm.operand_types[0].bitfield.instance == RegD
7656                       || i.tm.operand_types[1].bitfield.instance == RegD
7657                       || i.tm.mnem_off == MN_crc32))))
7658         i.tm.base_opcode |= 1;
7659       break;
7660     }
7661
7662   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7663     {
7664       gas_assert (!i.suffix);
7665       gas_assert (i.reg_operands);
7666
7667       if (i.tm.operand_types[0].bitfield.instance == Accum
7668           || i.operands == 1)
7669         {
7670           /* The address size override prefix changes the size of the
7671              first operand.  */
7672           if (flag_code == CODE_64BIT
7673               && i.op[0].regs->reg_type.bitfield.word)
7674             {
7675               as_bad (_("16-bit addressing unavailable for `%s'"),
7676                       insn_name (&i.tm));
7677               return 0;
7678             }
7679
7680           if ((flag_code == CODE_32BIT
7681                ? i.op[0].regs->reg_type.bitfield.word
7682                : i.op[0].regs->reg_type.bitfield.dword)
7683               && !add_prefix (ADDR_PREFIX_OPCODE))
7684             return 0;
7685         }
7686       else
7687         {
7688           /* Check invalid register operand when the address size override
7689              prefix changes the size of register operands.  */
7690           unsigned int op;
7691           enum { need_word, need_dword, need_qword } need;
7692
7693           /* Check the register operand for the address size prefix if
7694              the memory operand has no real registers, like symbol, DISP
7695              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7696           if (i.mem_operands == 1
7697               && i.reg_operands == 1
7698               && i.operands == 2
7699               && i.types[1].bitfield.class == Reg
7700               && (flag_code == CODE_32BIT
7701                   ? i.op[1].regs->reg_type.bitfield.word
7702                   : i.op[1].regs->reg_type.bitfield.dword)
7703               && ((i.base_reg == NULL && i.index_reg == NULL)
7704 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7705                   || (x86_elf_abi == X86_64_X32_ABI
7706                       && i.base_reg
7707                       && i.base_reg->reg_num == RegIP
7708                       && i.base_reg->reg_type.bitfield.qword))
7709 #else
7710                   || 0)
7711 #endif
7712               && !add_prefix (ADDR_PREFIX_OPCODE))
7713             return 0;
7714
7715           if (flag_code == CODE_32BIT)
7716             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7717           else if (i.prefix[ADDR_PREFIX])
7718             need = need_dword;
7719           else
7720             need = flag_code == CODE_64BIT ? need_qword : need_word;
7721
7722           for (op = 0; op < i.operands; op++)
7723             {
7724               if (i.types[op].bitfield.class != Reg)
7725                 continue;
7726
7727               switch (need)
7728                 {
7729                 case need_word:
7730                   if (i.op[op].regs->reg_type.bitfield.word)
7731                     continue;
7732                   break;
7733                 case need_dword:
7734                   if (i.op[op].regs->reg_type.bitfield.dword)
7735                     continue;
7736                   break;
7737                 case need_qword:
7738                   if (i.op[op].regs->reg_type.bitfield.qword)
7739                     continue;
7740                   break;
7741                 }
7742
7743               as_bad (_("invalid register operand size for `%s'"),
7744                       insn_name (&i.tm));
7745               return 0;
7746             }
7747         }
7748     }
7749
7750   return 1;
7751 }
7752
7753 static int
7754 check_byte_reg (void)
7755 {
7756   int op;
7757
7758   for (op = i.operands; --op >= 0;)
7759     {
7760       /* Skip non-register operands. */
7761       if (i.types[op].bitfield.class != Reg)
7762         continue;
7763
7764       /* If this is an eight bit register, it's OK.  If it's the 16 or
7765          32 bit version of an eight bit register, we will just use the
7766          low portion, and that's OK too.  */
7767       if (i.types[op].bitfield.byte)
7768         continue;
7769
7770       /* I/O port address operands are OK too.  */
7771       if (i.tm.operand_types[op].bitfield.instance == RegD
7772           && i.tm.operand_types[op].bitfield.word)
7773         continue;
7774
7775       /* crc32 only wants its source operand checked here.  */
7776       if (i.tm.mnem_off == MN_crc32 && op != 0)
7777         continue;
7778
7779       /* Any other register is bad.  */
7780       as_bad (_("`%s%s' not allowed with `%s%c'"),
7781               register_prefix, i.op[op].regs->reg_name,
7782               insn_name (&i.tm), i.suffix);
7783       return 0;
7784     }
7785   return 1;
7786 }
7787
7788 static int
7789 check_long_reg (void)
7790 {
7791   int op;
7792
7793   for (op = i.operands; --op >= 0;)
7794     /* Skip non-register operands. */
7795     if (i.types[op].bitfield.class != Reg)
7796       continue;
7797     /* Reject eight bit registers, except where the template requires
7798        them. (eg. movzb)  */
7799     else if (i.types[op].bitfield.byte
7800              && (i.tm.operand_types[op].bitfield.class == Reg
7801                  || i.tm.operand_types[op].bitfield.instance == Accum)
7802              && (i.tm.operand_types[op].bitfield.word
7803                  || i.tm.operand_types[op].bitfield.dword))
7804       {
7805         as_bad (_("`%s%s' not allowed with `%s%c'"),
7806                 register_prefix,
7807                 i.op[op].regs->reg_name,
7808                 insn_name (&i.tm),
7809                 i.suffix);
7810         return 0;
7811       }
7812     /* Error if the e prefix on a general reg is missing.  */
7813     else if (i.types[op].bitfield.word
7814              && (i.tm.operand_types[op].bitfield.class == Reg
7815                  || i.tm.operand_types[op].bitfield.instance == Accum)
7816              && i.tm.operand_types[op].bitfield.dword)
7817       {
7818         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7819                 register_prefix, i.op[op].regs->reg_name,
7820                 i.suffix);
7821         return 0;
7822       }
7823     /* Warn if the r prefix on a general reg is present.  */
7824     else if (i.types[op].bitfield.qword
7825              && (i.tm.operand_types[op].bitfield.class == Reg
7826                  || i.tm.operand_types[op].bitfield.instance == Accum)
7827              && i.tm.operand_types[op].bitfield.dword)
7828       {
7829         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7830                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7831         return 0;
7832       }
7833   return 1;
7834 }
7835
7836 static int
7837 check_qword_reg (void)
7838 {
7839   int op;
7840
7841   for (op = i.operands; --op >= 0; )
7842     /* Skip non-register operands. */
7843     if (i.types[op].bitfield.class != Reg)
7844       continue;
7845     /* Reject eight bit registers, except where the template requires
7846        them. (eg. movzb)  */
7847     else if (i.types[op].bitfield.byte
7848              && (i.tm.operand_types[op].bitfield.class == Reg
7849                  || i.tm.operand_types[op].bitfield.instance == Accum)
7850              && (i.tm.operand_types[op].bitfield.word
7851                  || i.tm.operand_types[op].bitfield.dword))
7852       {
7853         as_bad (_("`%s%s' not allowed with `%s%c'"),
7854                 register_prefix,
7855                 i.op[op].regs->reg_name,
7856                 insn_name (&i.tm),
7857                 i.suffix);
7858         return 0;
7859       }
7860     /* Warn if the r prefix on a general reg is missing.  */
7861     else if ((i.types[op].bitfield.word
7862               || i.types[op].bitfield.dword)
7863              && (i.tm.operand_types[op].bitfield.class == Reg
7864                  || i.tm.operand_types[op].bitfield.instance == Accum)
7865              && i.tm.operand_types[op].bitfield.qword)
7866       {
7867         /* Prohibit these changes in the 64bit mode, since the
7868            lowering is more complicated.  */
7869         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7870                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7871         return 0;
7872       }
7873   return 1;
7874 }
7875
7876 static int
7877 check_word_reg (void)
7878 {
7879   int op;
7880   for (op = i.operands; --op >= 0;)
7881     /* Skip non-register operands. */
7882     if (i.types[op].bitfield.class != Reg)
7883       continue;
7884     /* Reject eight bit registers, except where the template requires
7885        them. (eg. movzb)  */
7886     else if (i.types[op].bitfield.byte
7887              && (i.tm.operand_types[op].bitfield.class == Reg
7888                  || i.tm.operand_types[op].bitfield.instance == Accum)
7889              && (i.tm.operand_types[op].bitfield.word
7890                  || i.tm.operand_types[op].bitfield.dword))
7891       {
7892         as_bad (_("`%s%s' not allowed with `%s%c'"),
7893                 register_prefix,
7894                 i.op[op].regs->reg_name,
7895                 insn_name (&i.tm),
7896                 i.suffix);
7897         return 0;
7898       }
7899     /* Error if the e or r prefix on a general reg is present.  */
7900     else if ((i.types[op].bitfield.dword
7901                  || i.types[op].bitfield.qword)
7902              && (i.tm.operand_types[op].bitfield.class == Reg
7903                  || i.tm.operand_types[op].bitfield.instance == Accum)
7904              && i.tm.operand_types[op].bitfield.word)
7905       {
7906         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7907                 register_prefix, i.op[op].regs->reg_name,
7908                 i.suffix);
7909         return 0;
7910       }
7911   return 1;
7912 }
7913
7914 static int
7915 update_imm (unsigned int j)
7916 {
7917   i386_operand_type overlap = i.types[j];
7918
7919   if (i.tm.operand_types[j].bitfield.imm8
7920       && i.tm.operand_types[j].bitfield.imm8s
7921       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
7922     {
7923       /* This combination is used on 8-bit immediates where e.g. $~0 is
7924          desirable to permit.  We're past operand type matching, so simply
7925          put things back in the shape they were before introducing the
7926          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
7927       overlap.bitfield.imm8s = 0;
7928     }
7929
7930   if (overlap.bitfield.imm8
7931       + overlap.bitfield.imm8s
7932       + overlap.bitfield.imm16
7933       + overlap.bitfield.imm32
7934       + overlap.bitfield.imm32s
7935       + overlap.bitfield.imm64 > 1)
7936     {
7937       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7938       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7939       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7940       static const i386_operand_type imm16_32 = { .bitfield =
7941         { .imm16 = 1, .imm32 = 1 }
7942       };
7943       static const i386_operand_type imm16_32s =  { .bitfield =
7944         { .imm16 = 1, .imm32s = 1 }
7945       };
7946       static const i386_operand_type imm16_32_32s = { .bitfield =
7947         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7948       };
7949
7950       if (i.suffix)
7951         {
7952           i386_operand_type temp;
7953
7954           operand_type_set (&temp, 0);
7955           if (i.suffix == BYTE_MNEM_SUFFIX)
7956             {
7957               temp.bitfield.imm8 = overlap.bitfield.imm8;
7958               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7959             }
7960           else if (i.suffix == WORD_MNEM_SUFFIX)
7961             temp.bitfield.imm16 = overlap.bitfield.imm16;
7962           else if (i.suffix == QWORD_MNEM_SUFFIX)
7963             {
7964               temp.bitfield.imm64 = overlap.bitfield.imm64;
7965               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7966             }
7967           else
7968             temp.bitfield.imm32 = overlap.bitfield.imm32;
7969           overlap = temp;
7970         }
7971       else if (operand_type_equal (&overlap, &imm16_32_32s)
7972                || operand_type_equal (&overlap, &imm16_32)
7973                || operand_type_equal (&overlap, &imm16_32s))
7974         {
7975           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7976             overlap = imm16;
7977           else
7978             overlap = imm32s;
7979         }
7980       else if (i.prefix[REX_PREFIX] & REX_W)
7981         overlap = operand_type_and (overlap, imm32s);
7982       else if (i.prefix[DATA_PREFIX])
7983         overlap = operand_type_and (overlap,
7984                                     flag_code != CODE_16BIT ? imm16 : imm32);
7985       if (overlap.bitfield.imm8
7986           + overlap.bitfield.imm8s
7987           + overlap.bitfield.imm16
7988           + overlap.bitfield.imm32
7989           + overlap.bitfield.imm32s
7990           + overlap.bitfield.imm64 != 1)
7991         {
7992           as_bad (_("no instruction mnemonic suffix given; "
7993                     "can't determine immediate size"));
7994           return 0;
7995         }
7996     }
7997   i.types[j] = overlap;
7998
7999   return 1;
8000 }
8001
8002 static int
8003 finalize_imm (void)
8004 {
8005   unsigned int j, n;
8006
8007   /* Update the first 2 immediate operands.  */
8008   n = i.operands > 2 ? 2 : i.operands;
8009   if (n)
8010     {
8011       for (j = 0; j < n; j++)
8012         if (update_imm (j) == 0)
8013           return 0;
8014
8015       /* The 3rd operand can't be immediate operand.  */
8016       gas_assert (operand_type_check (i.types[2], imm) == 0);
8017     }
8018
8019   return 1;
8020 }
8021
8022 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8023                                  bool do_sse2avx)
8024 {
8025   if (r->reg_flags & RegRex)
8026     {
8027       if (i.rex & rex_bit)
8028         as_bad (_("same type of prefix used twice"));
8029       i.rex |= rex_bit;
8030     }
8031   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8032     {
8033       gas_assert (i.vex.register_specifier == r);
8034       i.vex.register_specifier += 8;
8035     }
8036
8037   if (r->reg_flags & RegVRex)
8038     i.vrex |= rex_bit;
8039 }
8040
8041 static int
8042 process_operands (void)
8043 {
8044   /* Default segment register this instruction will use for memory
8045      accesses.  0 means unknown.  This is only for optimizing out
8046      unnecessary segment overrides.  */
8047   const reg_entry *default_seg = NULL;
8048
8049   /* We only need to check those implicit registers for instructions
8050      with 3 operands or less.  */
8051   if (i.operands <= 3)
8052     for (unsigned int j = 0; j < i.operands; j++)
8053       if (i.types[j].bitfield.instance != InstanceNone)
8054         i.reg_operands--;
8055
8056   if (i.tm.opcode_modifier.sse2avx)
8057     {
8058       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
8059          need converting.  */
8060       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
8061       i.prefix[REX_PREFIX] = 0;
8062       i.rex_encoding = 0;
8063     }
8064   /* ImmExt should be processed after SSE2AVX.  */
8065   else if (i.tm.opcode_modifier.immext)
8066     process_immext ();
8067
8068   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
8069      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
8070      new destination operand here, while converting the source one to register
8071      number 0.  */
8072   if (i.tm.mnem_off == MN_tilezero)
8073     {
8074       i.op[1].regs = i.op[0].regs;
8075       i.op[0].regs -= i.op[0].regs->reg_num;
8076       i.types[1] = i.types[0];
8077       i.tm.operand_types[1] = i.tm.operand_types[0];
8078       i.flags[1] = i.flags[0];
8079       i.operands++;
8080       i.reg_operands++;
8081       i.tm.operands++;
8082     }
8083
8084   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
8085     {
8086       static const i386_operand_type regxmm = {
8087         .bitfield = { .class = RegSIMD, .xmmword = 1 }
8088       };
8089       unsigned int dupl = i.operands;
8090       unsigned int dest = dupl - 1;
8091       unsigned int j;
8092
8093       /* The destination must be an xmm register.  */
8094       gas_assert (i.reg_operands
8095                   && MAX_OPERANDS > dupl
8096                   && operand_type_equal (&i.types[dest], &regxmm));
8097
8098       if (i.tm.operand_types[0].bitfield.instance == Accum
8099           && i.tm.operand_types[0].bitfield.xmmword)
8100         {
8101           /* Keep xmm0 for instructions with VEX prefix and 3
8102              sources.  */
8103           i.tm.operand_types[0].bitfield.instance = InstanceNone;
8104           i.tm.operand_types[0].bitfield.class = RegSIMD;
8105           i.reg_operands++;
8106           goto duplicate;
8107         }
8108
8109       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
8110         {
8111           gas_assert ((MAX_OPERANDS - 1) > dupl);
8112
8113           /* Add the implicit xmm0 for instructions with VEX prefix
8114              and 3 sources.  */
8115           for (j = i.operands; j > 0; j--)
8116             {
8117               i.op[j] = i.op[j - 1];
8118               i.types[j] = i.types[j - 1];
8119               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
8120               i.flags[j] = i.flags[j - 1];
8121             }
8122           i.op[0].regs
8123             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
8124           i.types[0] = regxmm;
8125           i.tm.operand_types[0] = regxmm;
8126
8127           i.operands += 2;
8128           i.reg_operands += 2;
8129           i.tm.operands += 2;
8130
8131           dupl++;
8132           dest++;
8133           i.op[dupl] = i.op[dest];
8134           i.types[dupl] = i.types[dest];
8135           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8136           i.flags[dupl] = i.flags[dest];
8137         }
8138       else
8139         {
8140         duplicate:
8141           i.operands++;
8142           i.reg_operands++;
8143           i.tm.operands++;
8144
8145           i.op[dupl] = i.op[dest];
8146           i.types[dupl] = i.types[dest];
8147           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8148           i.flags[dupl] = i.flags[dest];
8149         }
8150
8151        if (i.tm.opcode_modifier.immext)
8152          process_immext ();
8153     }
8154   else if (i.tm.operand_types[0].bitfield.instance == Accum
8155            && i.tm.opcode_modifier.modrm)
8156     {
8157       unsigned int j;
8158
8159       for (j = 1; j < i.operands; j++)
8160         {
8161           i.op[j - 1] = i.op[j];
8162           i.types[j - 1] = i.types[j];
8163
8164           /* We need to adjust fields in i.tm since they are used by
8165              build_modrm_byte.  */
8166           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8167
8168           i.flags[j - 1] = i.flags[j];
8169         }
8170
8171       /* No adjustment to i.reg_operands: This was already done at the top
8172          of the function.  */
8173       i.operands--;
8174       i.tm.operands--;
8175     }
8176   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8177     {
8178       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8179
8180       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8181       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8182       regnum = register_number (i.op[1].regs);
8183       first_reg_in_group = regnum & ~3;
8184       last_reg_in_group = first_reg_in_group + 3;
8185       if (regnum != first_reg_in_group)
8186         as_warn (_("source register `%s%s' implicitly denotes"
8187                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8188                  register_prefix, i.op[1].regs->reg_name,
8189                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8190                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8191                  insn_name (&i.tm));
8192     }
8193   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8194     {
8195       /* The imul $imm, %reg instruction is converted into
8196          imul $imm, %reg, %reg, and the clr %reg instruction
8197          is converted into xor %reg, %reg.  */
8198
8199       unsigned int first_reg_op;
8200
8201       if (operand_type_check (i.types[0], reg))
8202         first_reg_op = 0;
8203       else
8204         first_reg_op = 1;
8205       /* Pretend we saw the extra register operand.  */
8206       gas_assert (i.reg_operands == 1
8207                   && i.op[first_reg_op + 1].regs == 0);
8208       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8209       i.types[first_reg_op + 1] = i.types[first_reg_op];
8210       i.operands++;
8211       i.reg_operands++;
8212     }
8213
8214   if (i.tm.opcode_modifier.modrm)
8215     {
8216       /* The opcode is completed (modulo i.tm.extension_opcode which
8217          must be put into the modrm byte).  Now, we make the modrm and
8218          index base bytes based on all the info we've collected.  */
8219
8220       default_seg = build_modrm_byte ();
8221
8222       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8223         {
8224           /* Warn about some common errors, but press on regardless.  */
8225           if (i.operands == 2)
8226             {
8227               /* Reversed arguments on faddp or fmulp.  */
8228               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8229                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8230                        register_prefix, i.op[intel_syntax].regs->reg_name);
8231             }
8232           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
8233             {
8234               /* Extraneous `l' suffix on fp insn.  */
8235               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8236                        register_prefix, i.op[0].regs->reg_name);
8237             }
8238         }
8239     }
8240   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
8241     {
8242       if (flag_code != CODE_64BIT
8243           ? i.tm.base_opcode == POP_SEG_SHORT
8244             && i.op[0].regs->reg_num == 1
8245           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8246             && i.op[0].regs->reg_num < 4)
8247         {
8248           as_bad (_("you can't `%s %s%s'"),
8249                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8250           return 0;
8251         }
8252       if (i.op[0].regs->reg_num > 3
8253           && i.tm.opcode_space == SPACE_BASE )
8254         {
8255           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8256           i.tm.opcode_space = SPACE_0F;
8257         }
8258       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8259     }
8260   else if (i.tm.opcode_space == SPACE_BASE
8261            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8262     {
8263       default_seg = reg_ds;
8264     }
8265   else if (i.tm.opcode_modifier.isstring)
8266     {
8267       /* For the string instructions that allow a segment override
8268          on one of their operands, the default segment is ds.  */
8269       default_seg = reg_ds;
8270     }
8271   else if (i.short_form)
8272     {
8273       /* The register operand is in the 1st or 2nd non-immediate operand.  */
8274       const reg_entry *r = i.op[i.imm_operands].regs;
8275
8276       if (!dot_insn ()
8277           && r->reg_type.bitfield.instance == Accum
8278           && i.op[i.imm_operands + 1].regs)
8279         r = i.op[i.imm_operands + 1].regs;
8280       /* Register goes in low 3 bits of opcode.  */
8281       i.tm.base_opcode |= r->reg_num;
8282       set_rex_vrex (r, REX_B, false);
8283
8284       if (dot_insn () && i.reg_operands == 2)
8285         {
8286           gas_assert (is_any_vex_encoding (&i.tm)
8287                       || i.vec_encoding != vex_encoding_default);
8288           i.vex.register_specifier = i.op[i.operands - 1].regs;
8289         }
8290     }
8291   else if (i.reg_operands == 1
8292            && !i.flags[i.operands - 1]
8293            && i.tm.operand_types[i.operands - 1].bitfield.instance
8294               == InstanceNone)
8295     {
8296       gas_assert (is_any_vex_encoding (&i.tm)
8297                   || i.vec_encoding != vex_encoding_default);
8298       i.vex.register_specifier = i.op[i.operands - 1].regs;
8299     }
8300
8301   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8302       && i.tm.mnem_off == MN_lea)
8303     {
8304       if (!quiet_warnings)
8305         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8306       if (optimize && !i.no_optimize)
8307         {
8308           i.seg[0] = NULL;
8309           i.prefix[SEG_PREFIX] = 0;
8310         }
8311     }
8312
8313   /* If a segment was explicitly specified, and the specified segment
8314      is neither the default nor the one already recorded from a prefix,
8315      use an opcode prefix to select it.  If we never figured out what
8316      the default segment is, then default_seg will be zero at this
8317      point, and the specified segment prefix will always be used.  */
8318   if (i.seg[0]
8319       && i.seg[0] != default_seg
8320       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8321     {
8322       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8323         return 0;
8324     }
8325   return 1;
8326 }
8327
8328 static const reg_entry *
8329 build_modrm_byte (void)
8330 {
8331   const reg_entry *default_seg = NULL;
8332   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
8333                         /* Compensate for kludge in md_assemble().  */
8334                         + i.tm.operand_types[0].bitfield.imm1;
8335   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
8336   unsigned int v, op, reg_slot = ~0;
8337
8338   /* Accumulator (in particular %st), shift count (%cl), and alike need
8339      to be skipped just like immediate operands do.  */
8340   if (i.tm.operand_types[source].bitfield.instance)
8341     ++source;
8342   while (i.tm.operand_types[dest].bitfield.instance)
8343     --dest;
8344
8345   for (op = source; op < i.operands; ++op)
8346     if (i.tm.operand_types[op].bitfield.baseindex)
8347       break;
8348
8349   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
8350     {
8351       expressionS *exp;
8352
8353       /* There are 2 kinds of instructions:
8354          1. 5 operands: 4 register operands or 3 register operands
8355          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8356          VexW0 or VexW1.  The destination must be either XMM, YMM or
8357          ZMM register.
8358          2. 4 operands: 4 register operands or 3 register operands
8359          plus 1 memory operand, with VexXDS.
8360          3. Other equivalent combinations when coming from s_insn().  */
8361       gas_assert (i.tm.opcode_modifier.vexvvvv
8362                   && i.tm.opcode_modifier.vexw);
8363       gas_assert (dot_insn ()
8364                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
8365
8366       /* Of the first two non-immediate operands the one with the template
8367          not allowing for a memory one is encoded in the immediate operand.  */
8368       if (source == op)
8369         reg_slot = source + 1;
8370       else
8371         reg_slot = source++;
8372
8373       if (!dot_insn ())
8374         {
8375           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8376           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
8377         }
8378       else
8379         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
8380
8381       if (i.imm_operands == 0)
8382         {
8383           /* When there is no immediate operand, generate an 8bit
8384              immediate operand to encode the first operand.  */
8385           exp = &im_expressions[i.imm_operands++];
8386           i.op[i.operands].imms = exp;
8387           i.types[i.operands].bitfield.imm8 = 1;
8388           i.operands++;
8389
8390           exp->X_op = O_constant;
8391         }
8392       else
8393         {
8394           gas_assert (i.imm_operands == 1);
8395           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8396           gas_assert (!i.tm.opcode_modifier.immext);
8397
8398           /* Turn on Imm8 again so that output_imm will generate it.  */
8399           i.types[0].bitfield.imm8 = 1;
8400
8401           exp = i.op[0].imms;
8402         }
8403       exp->X_add_number |= register_number (i.op[reg_slot].regs)
8404                            << (3 + !(is_evex_encoding (&i.tm)
8405                                      || i.vec_encoding == vex_encoding_evex));
8406     }
8407
8408   for (v = source + 1; v < dest; ++v)
8409     if (v != reg_slot)
8410       break;
8411   if (v >= dest)
8412     v = ~0;
8413   if (i.tm.extension_opcode != None)
8414     {
8415       if (dest != source)
8416         v = dest;
8417       dest = ~0;
8418     }
8419   gas_assert (source < dest);
8420   if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
8421       && source != op)
8422     {
8423       unsigned int tmp = source;
8424
8425       source = v;
8426       v = tmp;
8427     }
8428
8429   if (v < MAX_OPERANDS)
8430     {
8431       gas_assert (i.tm.opcode_modifier.vexvvvv);
8432       i.vex.register_specifier = i.op[v].regs;
8433     }
8434
8435   if (op < i.operands)
8436     {
8437       if (i.mem_operands)
8438         {
8439           unsigned int fake_zero_displacement = 0;
8440
8441           gas_assert (i.flags[op] & Operand_Mem);
8442
8443           if (i.tm.opcode_modifier.sib)
8444             {
8445               /* The index register of VSIB shouldn't be RegIZ.  */
8446               if (i.tm.opcode_modifier.sib != SIBMEM
8447                   && i.index_reg->reg_num == RegIZ)
8448                 abort ();
8449
8450               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8451               if (!i.base_reg)
8452                 {
8453                   i.sib.base = NO_BASE_REGISTER;
8454                   i.sib.scale = i.log2_scale_factor;
8455                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8456                   i.types[op].bitfield.disp32 = 1;
8457                 }
8458
8459               /* Since the mandatory SIB always has index register, so
8460                  the code logic remains unchanged. The non-mandatory SIB
8461                  without index register is allowed and will be handled
8462                  later.  */
8463               if (i.index_reg)
8464                 {
8465                   if (i.index_reg->reg_num == RegIZ)
8466                     i.sib.index = NO_INDEX_REGISTER;
8467                   else
8468                     i.sib.index = i.index_reg->reg_num;
8469                   set_rex_vrex (i.index_reg, REX_X, false);
8470                 }
8471             }
8472
8473           default_seg = reg_ds;
8474
8475           if (i.base_reg == 0)
8476             {
8477               i.rm.mode = 0;
8478               if (!i.disp_operands)
8479                 fake_zero_displacement = 1;
8480               if (i.index_reg == 0)
8481                 {
8482                   /* Both check for VSIB and mandatory non-vector SIB. */
8483                   gas_assert (!i.tm.opcode_modifier.sib
8484                               || i.tm.opcode_modifier.sib == SIBMEM);
8485                   /* Operand is just <disp>  */
8486                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8487                   if (flag_code == CODE_64BIT)
8488                     {
8489                       /* 64bit mode overwrites the 32bit absolute
8490                          addressing by RIP relative addressing and
8491                          absolute addressing is encoded by one of the
8492                          redundant SIB forms.  */
8493                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8494                       i.sib.base = NO_BASE_REGISTER;
8495                       i.sib.index = NO_INDEX_REGISTER;
8496                       i.types[op].bitfield.disp32 = 1;
8497                     }
8498                   else if ((flag_code == CODE_16BIT)
8499                            ^ (i.prefix[ADDR_PREFIX] != 0))
8500                     {
8501                       i.rm.regmem = NO_BASE_REGISTER_16;
8502                       i.types[op].bitfield.disp16 = 1;
8503                     }
8504                   else
8505                     {
8506                       i.rm.regmem = NO_BASE_REGISTER;
8507                       i.types[op].bitfield.disp32 = 1;
8508                     }
8509                 }
8510               else if (!i.tm.opcode_modifier.sib)
8511                 {
8512                   /* !i.base_reg && i.index_reg  */
8513                   if (i.index_reg->reg_num == RegIZ)
8514                     i.sib.index = NO_INDEX_REGISTER;
8515                   else
8516                     i.sib.index = i.index_reg->reg_num;
8517                   i.sib.base = NO_BASE_REGISTER;
8518                   i.sib.scale = i.log2_scale_factor;
8519                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8520                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8521                   i.types[op].bitfield.disp32 = 1;
8522                   if ((i.index_reg->reg_flags & RegRex) != 0)
8523                     i.rex |= REX_X;
8524                 }
8525             }
8526           /* RIP addressing for 64bit mode.  */
8527           else if (i.base_reg->reg_num == RegIP)
8528             {
8529               gas_assert (!i.tm.opcode_modifier.sib);
8530               i.rm.regmem = NO_BASE_REGISTER;
8531               i.types[op].bitfield.disp8 = 0;
8532               i.types[op].bitfield.disp16 = 0;
8533               i.types[op].bitfield.disp32 = 1;
8534               i.types[op].bitfield.disp64 = 0;
8535               i.flags[op] |= Operand_PCrel;
8536               if (! i.disp_operands)
8537                 fake_zero_displacement = 1;
8538             }
8539           else if (i.base_reg->reg_type.bitfield.word)
8540             {
8541               gas_assert (!i.tm.opcode_modifier.sib);
8542               switch (i.base_reg->reg_num)
8543                 {
8544                 case 3: /* (%bx)  */
8545                   if (i.index_reg == 0)
8546                     i.rm.regmem = 7;
8547                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8548                     i.rm.regmem = i.index_reg->reg_num - 6;
8549                   break;
8550                 case 5: /* (%bp)  */
8551                   default_seg = reg_ss;
8552                   if (i.index_reg == 0)
8553                     {
8554                       i.rm.regmem = 6;
8555                       if (operand_type_check (i.types[op], disp) == 0)
8556                         {
8557                           /* fake (%bp) into 0(%bp)  */
8558                           if (i.disp_encoding == disp_encoding_16bit)
8559                             i.types[op].bitfield.disp16 = 1;
8560                           else
8561                             i.types[op].bitfield.disp8 = 1;
8562                           fake_zero_displacement = 1;
8563                         }
8564                     }
8565                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8566                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8567                   break;
8568                 default: /* (%si) -> 4 or (%di) -> 5  */
8569                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8570                 }
8571               if (!fake_zero_displacement
8572                   && !i.disp_operands
8573                   && i.disp_encoding)
8574                 {
8575                   fake_zero_displacement = 1;
8576                   if (i.disp_encoding == disp_encoding_8bit)
8577                     i.types[op].bitfield.disp8 = 1;
8578                   else
8579                     i.types[op].bitfield.disp16 = 1;
8580                 }
8581               i.rm.mode = mode_from_disp_size (i.types[op]);
8582             }
8583           else /* i.base_reg and 32/64 bit mode  */
8584             {
8585               if (operand_type_check (i.types[op], disp))
8586                 {
8587                   i.types[op].bitfield.disp16 = 0;
8588                   i.types[op].bitfield.disp64 = 0;
8589                   i.types[op].bitfield.disp32 = 1;
8590                 }
8591
8592               if (!i.tm.opcode_modifier.sib)
8593                 i.rm.regmem = i.base_reg->reg_num;
8594               if ((i.base_reg->reg_flags & RegRex) != 0)
8595                 i.rex |= REX_B;
8596               i.sib.base = i.base_reg->reg_num;
8597               /* x86-64 ignores REX prefix bit here to avoid decoder
8598                  complications.  */
8599               if (!(i.base_reg->reg_flags & RegRex)
8600                   && (i.base_reg->reg_num == EBP_REG_NUM
8601                    || i.base_reg->reg_num == ESP_REG_NUM))
8602                   default_seg = reg_ss;
8603               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8604                 {
8605                   fake_zero_displacement = 1;
8606                   if (i.disp_encoding == disp_encoding_32bit)
8607                     i.types[op].bitfield.disp32 = 1;
8608                   else
8609                     i.types[op].bitfield.disp8 = 1;
8610                 }
8611               i.sib.scale = i.log2_scale_factor;
8612               if (i.index_reg == 0)
8613                 {
8614                   /* Only check for VSIB. */
8615                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8616                               && i.tm.opcode_modifier.sib != VECSIB256
8617                               && i.tm.opcode_modifier.sib != VECSIB512);
8618
8619                   /* <disp>(%esp) becomes two byte modrm with no index
8620                      register.  We've already stored the code for esp
8621                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8622                      Any base register besides %esp will not use the
8623                      extra modrm byte.  */
8624                   i.sib.index = NO_INDEX_REGISTER;
8625                 }
8626               else if (!i.tm.opcode_modifier.sib)
8627                 {
8628                   if (i.index_reg->reg_num == RegIZ)
8629                     i.sib.index = NO_INDEX_REGISTER;
8630                   else
8631                     i.sib.index = i.index_reg->reg_num;
8632                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8633                   if ((i.index_reg->reg_flags & RegRex) != 0)
8634                     i.rex |= REX_X;
8635                 }
8636
8637               if (i.disp_operands
8638                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8639                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8640                 i.rm.mode = 0;
8641               else
8642                 {
8643                   if (!fake_zero_displacement
8644                       && !i.disp_operands
8645                       && i.disp_encoding)
8646                     {
8647                       fake_zero_displacement = 1;
8648                       if (i.disp_encoding == disp_encoding_8bit)
8649                         i.types[op].bitfield.disp8 = 1;
8650                       else
8651                         i.types[op].bitfield.disp32 = 1;
8652                     }
8653                   i.rm.mode = mode_from_disp_size (i.types[op]);
8654                 }
8655             }
8656
8657           if (fake_zero_displacement)
8658             {
8659               /* Fakes a zero displacement assuming that i.types[op]
8660                  holds the correct displacement size.  */
8661               expressionS *exp;
8662
8663               gas_assert (i.op[op].disps == 0);
8664               exp = &disp_expressions[i.disp_operands++];
8665               i.op[op].disps = exp;
8666               exp->X_op = O_constant;
8667               exp->X_add_number = 0;
8668               exp->X_add_symbol = (symbolS *) 0;
8669               exp->X_op_symbol = (symbolS *) 0;
8670             }
8671         }
8672     else
8673         {
8674       i.rm.mode = 3;
8675       i.rm.regmem = i.op[op].regs->reg_num;
8676       set_rex_vrex (i.op[op].regs, REX_B, false);
8677         }
8678
8679       if (op == dest)
8680         dest = ~0;
8681       if (op == source)
8682         source = ~0;
8683     }
8684   else
8685     {
8686       i.rm.mode = 3;
8687       if (!i.tm.opcode_modifier.regmem)
8688         {
8689           gas_assert (source < MAX_OPERANDS);
8690           i.rm.regmem = i.op[source].regs->reg_num;
8691           set_rex_vrex (i.op[source].regs, REX_B,
8692                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
8693           source = ~0;
8694         }
8695       else
8696         {
8697           gas_assert (dest < MAX_OPERANDS);
8698           i.rm.regmem = i.op[dest].regs->reg_num;
8699           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8700           dest = ~0;
8701         }
8702     }
8703
8704   /* Fill in i.rm.reg field with extension opcode (if any) or the
8705      appropriate register.  */
8706   if (i.tm.extension_opcode != None)
8707     i.rm.reg = i.tm.extension_opcode;
8708   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
8709     {
8710       i.rm.reg = i.op[dest].regs->reg_num;
8711       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8712     }
8713   else
8714     {
8715       gas_assert (source < MAX_OPERANDS);
8716       i.rm.reg = i.op[source].regs->reg_num;
8717       set_rex_vrex (i.op[source].regs, REX_R, false);
8718     }
8719
8720   if (flag_code != CODE_64BIT && (i.rex & REX_R))
8721     {
8722       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
8723       i.rex &= ~REX_R;
8724       add_prefix (LOCK_PREFIX_OPCODE);
8725     }
8726
8727   return default_seg;
8728 }
8729
8730 static INLINE void
8731 frag_opcode_byte (unsigned char byte)
8732 {
8733   if (now_seg != absolute_section)
8734     FRAG_APPEND_1_CHAR (byte);
8735   else
8736     ++abs_section_offset;
8737 }
8738
8739 static unsigned int
8740 flip_code16 (unsigned int code16)
8741 {
8742   gas_assert (i.tm.operands == 1);
8743
8744   return !(i.prefix[REX_PREFIX] & REX_W)
8745          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8746                     : i.tm.operand_types[0].bitfield.disp16)
8747          ? CODE16 : 0;
8748 }
8749
8750 static void
8751 output_branch (void)
8752 {
8753   char *p;
8754   int size;
8755   int code16;
8756   int prefix;
8757   relax_substateT subtype;
8758   symbolS *sym;
8759   offsetT off;
8760
8761   if (now_seg == absolute_section)
8762     {
8763       as_bad (_("relaxable branches not supported in absolute section"));
8764       return;
8765     }
8766
8767   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8768   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8769
8770   prefix = 0;
8771   if (i.prefix[DATA_PREFIX] != 0)
8772     {
8773       prefix = 1;
8774       i.prefixes -= 1;
8775       code16 ^= flip_code16(code16);
8776     }
8777   /* Pentium4 branch hints.  */
8778   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8779       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8780     {
8781       prefix++;
8782       i.prefixes--;
8783     }
8784   if (i.prefix[REX_PREFIX] != 0)
8785     {
8786       prefix++;
8787       i.prefixes--;
8788     }
8789
8790   /* BND prefixed jump.  */
8791   if (i.prefix[BND_PREFIX] != 0)
8792     {
8793       prefix++;
8794       i.prefixes--;
8795     }
8796
8797   if (i.prefixes != 0)
8798     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8799
8800   /* It's always a symbol;  End frag & setup for relax.
8801      Make sure there is enough room in this frag for the largest
8802      instruction we may generate in md_convert_frag.  This is 2
8803      bytes for the opcode and room for the prefix and largest
8804      displacement.  */
8805   frag_grow (prefix + 2 + 4);
8806   /* Prefix and 1 opcode byte go in fr_fix.  */
8807   p = frag_more (prefix + 1);
8808   if (i.prefix[DATA_PREFIX] != 0)
8809     *p++ = DATA_PREFIX_OPCODE;
8810   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8811       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8812     *p++ = i.prefix[SEG_PREFIX];
8813   if (i.prefix[BND_PREFIX] != 0)
8814     *p++ = BND_PREFIX_OPCODE;
8815   if (i.prefix[REX_PREFIX] != 0)
8816     *p++ = i.prefix[REX_PREFIX];
8817   *p = i.tm.base_opcode;
8818
8819   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8820     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8821   else if (cpu_arch_flags.bitfield.cpui386)
8822     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8823   else
8824     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8825   subtype |= code16;
8826
8827   sym = i.op[0].disps->X_add_symbol;
8828   off = i.op[0].disps->X_add_number;
8829
8830   if (i.op[0].disps->X_op != O_constant
8831       && i.op[0].disps->X_op != O_symbol)
8832     {
8833       /* Handle complex expressions.  */
8834       sym = make_expr_symbol (i.op[0].disps);
8835       off = 0;
8836     }
8837
8838   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8839
8840   /* 1 possible extra opcode + 4 byte displacement go in var part.
8841      Pass reloc in fr_var.  */
8842   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8843 }
8844
8845 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8846 /* Return TRUE iff PLT32 relocation should be used for branching to
8847    symbol S.  */
8848
8849 static bool
8850 need_plt32_p (symbolS *s)
8851 {
8852   /* PLT32 relocation is ELF only.  */
8853   if (!IS_ELF)
8854     return false;
8855
8856 #ifdef TE_SOLARIS
8857   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8858      krtld support it.  */
8859   return false;
8860 #endif
8861
8862   /* Since there is no need to prepare for PLT branch on x86-64, we
8863      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8864      be used as a marker for 32-bit PC-relative branches.  */
8865   if (!object_64bit)
8866     return false;
8867
8868   if (s == NULL)
8869     return false;
8870
8871   /* Weak or undefined symbol need PLT32 relocation.  */
8872   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8873     return true;
8874
8875   /* Non-global symbol doesn't need PLT32 relocation.  */
8876   if (! S_IS_EXTERNAL (s))
8877     return false;
8878
8879   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8880      non-default visibilities are treated as normal global symbol
8881      so that PLT32 relocation can be used as a marker for 32-bit
8882      PC-relative branches.  It is useful for linker relaxation.  */
8883   return true;
8884 }
8885 #endif
8886
8887 static void
8888 output_jump (void)
8889 {
8890   char *p;
8891   int size;
8892   fixS *fixP;
8893   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8894
8895   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8896     {
8897       /* This is a loop or jecxz type instruction.  */
8898       size = 1;
8899       if (i.prefix[ADDR_PREFIX] != 0)
8900         {
8901           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8902           i.prefixes -= 1;
8903         }
8904       /* Pentium4 branch hints.  */
8905       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8906           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8907         {
8908           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8909           i.prefixes--;
8910         }
8911     }
8912   else
8913     {
8914       int code16;
8915
8916       code16 = 0;
8917       if (flag_code == CODE_16BIT)
8918         code16 = CODE16;
8919
8920       if (i.prefix[DATA_PREFIX] != 0)
8921         {
8922           frag_opcode_byte (DATA_PREFIX_OPCODE);
8923           i.prefixes -= 1;
8924           code16 ^= flip_code16(code16);
8925         }
8926
8927       size = 4;
8928       if (code16)
8929         size = 2;
8930     }
8931
8932   /* BND prefixed jump.  */
8933   if (i.prefix[BND_PREFIX] != 0)
8934     {
8935       frag_opcode_byte (i.prefix[BND_PREFIX]);
8936       i.prefixes -= 1;
8937     }
8938
8939   if (i.prefix[REX_PREFIX] != 0)
8940     {
8941       frag_opcode_byte (i.prefix[REX_PREFIX]);
8942       i.prefixes -= 1;
8943     }
8944
8945   if (i.prefixes != 0)
8946     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8947
8948   if (now_seg == absolute_section)
8949     {
8950       abs_section_offset += i.opcode_length + size;
8951       return;
8952     }
8953
8954   p = frag_more (i.opcode_length + size);
8955   switch (i.opcode_length)
8956     {
8957     case 2:
8958       *p++ = i.tm.base_opcode >> 8;
8959       /* Fall through.  */
8960     case 1:
8961       *p++ = i.tm.base_opcode;
8962       break;
8963     default:
8964       abort ();
8965     }
8966
8967 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8968   if (flag_code == CODE_64BIT && size == 4
8969       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
8970       && need_plt32_p (i.op[0].disps->X_add_symbol))
8971     jump_reloc = BFD_RELOC_X86_64_PLT32;
8972 #endif
8973
8974   jump_reloc = reloc (size, 1, 1, jump_reloc);
8975
8976   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8977                       i.op[0].disps, 1, jump_reloc);
8978
8979   /* All jumps handled here are signed, but don't unconditionally use a
8980      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8981      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8982      respectively.  */
8983   switch (size)
8984     {
8985     case 1:
8986       fixP->fx_signed = 1;
8987       break;
8988
8989     case 2:
8990       if (i.tm.mnem_off == MN_xbegin)
8991         fixP->fx_signed = 1;
8992       break;
8993
8994     case 4:
8995       if (flag_code == CODE_64BIT)
8996         fixP->fx_signed = 1;
8997       break;
8998     }
8999 }
9000
9001 static void
9002 output_interseg_jump (void)
9003 {
9004   char *p;
9005   int size;
9006   int prefix;
9007   int code16;
9008
9009   code16 = 0;
9010   if (flag_code == CODE_16BIT)
9011     code16 = CODE16;
9012
9013   prefix = 0;
9014   if (i.prefix[DATA_PREFIX] != 0)
9015     {
9016       prefix = 1;
9017       i.prefixes -= 1;
9018       code16 ^= CODE16;
9019     }
9020
9021   gas_assert (!i.prefix[REX_PREFIX]);
9022
9023   size = 4;
9024   if (code16)
9025     size = 2;
9026
9027   if (i.prefixes != 0)
9028     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9029
9030   if (now_seg == absolute_section)
9031     {
9032       abs_section_offset += prefix + 1 + 2 + size;
9033       return;
9034     }
9035
9036   /* 1 opcode; 2 segment; offset  */
9037   p = frag_more (prefix + 1 + 2 + size);
9038
9039   if (i.prefix[DATA_PREFIX] != 0)
9040     *p++ = DATA_PREFIX_OPCODE;
9041
9042   if (i.prefix[REX_PREFIX] != 0)
9043     *p++ = i.prefix[REX_PREFIX];
9044
9045   *p++ = i.tm.base_opcode;
9046   if (i.op[1].imms->X_op == O_constant)
9047     {
9048       offsetT n = i.op[1].imms->X_add_number;
9049
9050       if (size == 2
9051           && !fits_in_unsigned_word (n)
9052           && !fits_in_signed_word (n))
9053         {
9054           as_bad (_("16-bit jump out of range"));
9055           return;
9056         }
9057       md_number_to_chars (p, n, size);
9058     }
9059   else
9060     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9061                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9062
9063   p += size;
9064   if (i.op[0].imms->X_op == O_constant)
9065     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9066   else
9067     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9068                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9069 }
9070
9071 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9072 void
9073 x86_cleanup (void)
9074 {
9075   char *p;
9076   asection *seg = now_seg;
9077   subsegT subseg = now_subseg;
9078   asection *sec;
9079   unsigned int alignment, align_size_1;
9080   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9081   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9082   unsigned int padding;
9083
9084   if (!IS_ELF || !x86_used_note)
9085     return;
9086
9087   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9088
9089   /* The .note.gnu.property section layout:
9090
9091      Field      Length          Contents
9092      ----       ----            ----
9093      n_namsz    4               4
9094      n_descsz   4               The note descriptor size
9095      n_type     4               NT_GNU_PROPERTY_TYPE_0
9096      n_name     4               "GNU"
9097      n_desc     n_descsz        The program property array
9098      ....       ....            ....
9099    */
9100
9101   /* Create the .note.gnu.property section.  */
9102   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9103   bfd_set_section_flags (sec,
9104                          (SEC_ALLOC
9105                           | SEC_LOAD
9106                           | SEC_DATA
9107                           | SEC_HAS_CONTENTS
9108                           | SEC_READONLY));
9109
9110   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9111     {
9112       align_size_1 = 7;
9113       alignment = 3;
9114     }
9115   else
9116     {
9117       align_size_1 = 3;
9118       alignment = 2;
9119     }
9120
9121   bfd_set_section_alignment (sec, alignment);
9122   elf_section_type (sec) = SHT_NOTE;
9123
9124   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9125                                   + 4-byte data  */
9126   isa_1_descsz_raw = 4 + 4 + 4;
9127   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9128   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9129
9130   feature_2_descsz_raw = isa_1_descsz;
9131   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9132                                       + 4-byte data  */
9133   feature_2_descsz_raw += 4 + 4 + 4;
9134   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9135   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9136                       & ~align_size_1);
9137
9138   descsz = feature_2_descsz;
9139   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9140   p = frag_more (4 + 4 + 4 + 4 + descsz);
9141
9142   /* Write n_namsz.  */
9143   md_number_to_chars (p, (valueT) 4, 4);
9144
9145   /* Write n_descsz.  */
9146   md_number_to_chars (p + 4, (valueT) descsz, 4);
9147
9148   /* Write n_type.  */
9149   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9150
9151   /* Write n_name.  */
9152   memcpy (p + 4 * 3, "GNU", 4);
9153
9154   /* Write 4-byte type.  */
9155   md_number_to_chars (p + 4 * 4,
9156                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9157
9158   /* Write 4-byte data size.  */
9159   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9160
9161   /* Write 4-byte data.  */
9162   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9163
9164   /* Zero out paddings.  */
9165   padding = isa_1_descsz - isa_1_descsz_raw;
9166   if (padding)
9167     memset (p + 4 * 7, 0, padding);
9168
9169   /* Write 4-byte type.  */
9170   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9171                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9172
9173   /* Write 4-byte data size.  */
9174   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9175
9176   /* Write 4-byte data.  */
9177   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9178                       (valueT) x86_feature_2_used, 4);
9179
9180   /* Zero out paddings.  */
9181   padding = feature_2_descsz - feature_2_descsz_raw;
9182   if (padding)
9183     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9184
9185   /* We probably can't restore the current segment, for there likely
9186      isn't one yet...  */
9187   if (seg && subseg)
9188     subseg_set (seg, subseg);
9189 }
9190
9191 bool
9192 x86_support_sframe_p (void)
9193 {
9194   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
9195   return (x86_elf_abi == X86_64_ABI);
9196 }
9197
9198 bool
9199 x86_sframe_ra_tracking_p (void)
9200 {
9201   /* In AMD64, return address is always stored on the stack at a fixed offset
9202      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9203      Do not track explicitly via an SFrame Frame Row Entry.  */
9204   return false;
9205 }
9206
9207 offsetT
9208 x86_sframe_cfa_ra_offset (void)
9209 {
9210   gas_assert (x86_elf_abi == X86_64_ABI);
9211   return (offsetT) -8;
9212 }
9213
9214 unsigned char
9215 x86_sframe_get_abi_arch (void)
9216 {
9217   unsigned char sframe_abi_arch = 0;
9218
9219   if (x86_support_sframe_p ())
9220     {
9221       gas_assert (!target_big_endian);
9222       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9223     }
9224
9225   return sframe_abi_arch;
9226 }
9227
9228 #endif
9229
9230 static unsigned int
9231 encoding_length (const fragS *start_frag, offsetT start_off,
9232                  const char *frag_now_ptr)
9233 {
9234   unsigned int len = 0;
9235
9236   if (start_frag != frag_now)
9237     {
9238       const fragS *fr = start_frag;
9239
9240       do {
9241         len += fr->fr_fix;
9242         fr = fr->fr_next;
9243       } while (fr && fr != frag_now);
9244     }
9245
9246   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9247 }
9248
9249 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9250    be macro-fused with conditional jumps.
9251    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9252    or is one of the following format:
9253
9254     cmp m, imm
9255     add m, imm
9256     sub m, imm
9257    test m, imm
9258     and m, imm
9259     inc m
9260     dec m
9261
9262    it is unfusible.  */
9263
9264 static int
9265 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9266 {
9267   /* No RIP address.  */
9268   if (i.base_reg && i.base_reg->reg_num == RegIP)
9269     return 0;
9270
9271   /* No opcodes outside of base encoding space.  */
9272   if (i.tm.opcode_space != SPACE_BASE)
9273     return 0;
9274
9275   /* add, sub without add/sub m, imm.  */
9276   if (i.tm.base_opcode <= 5
9277       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9278       || ((i.tm.base_opcode | 3) == 0x83
9279           && (i.tm.extension_opcode == 0x5
9280               || i.tm.extension_opcode == 0x0)))
9281     {
9282       *mf_cmp_p = mf_cmp_alu_cmp;
9283       return !(i.mem_operands && i.imm_operands);
9284     }
9285
9286   /* and without and m, imm.  */
9287   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9288       || ((i.tm.base_opcode | 3) == 0x83
9289           && i.tm.extension_opcode == 0x4))
9290     {
9291       *mf_cmp_p = mf_cmp_test_and;
9292       return !(i.mem_operands && i.imm_operands);
9293     }
9294
9295   /* test without test m imm.  */
9296   if ((i.tm.base_opcode | 1) == 0x85
9297       || (i.tm.base_opcode | 1) == 0xa9
9298       || ((i.tm.base_opcode | 1) == 0xf7
9299           && i.tm.extension_opcode == 0))
9300     {
9301       *mf_cmp_p = mf_cmp_test_and;
9302       return !(i.mem_operands && i.imm_operands);
9303     }
9304
9305   /* cmp without cmp m, imm.  */
9306   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9307       || ((i.tm.base_opcode | 3) == 0x83
9308           && (i.tm.extension_opcode == 0x7)))
9309     {
9310       *mf_cmp_p = mf_cmp_alu_cmp;
9311       return !(i.mem_operands && i.imm_operands);
9312     }
9313
9314   /* inc, dec without inc/dec m.   */
9315   if ((i.tm.cpu_flags.bitfield.cpuno64
9316        && (i.tm.base_opcode | 0xf) == 0x4f)
9317       || ((i.tm.base_opcode | 1) == 0xff
9318           && i.tm.extension_opcode <= 0x1))
9319     {
9320       *mf_cmp_p = mf_cmp_incdec;
9321       return !i.mem_operands;
9322     }
9323
9324   return 0;
9325 }
9326
9327 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9328
9329 static int
9330 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9331 {
9332   /* NB: Don't work with COND_JUMP86 without i386.  */
9333   if (!align_branch_power
9334       || now_seg == absolute_section
9335       || !cpu_arch_flags.bitfield.cpui386
9336       || !(align_branch & align_branch_fused_bit))
9337     return 0;
9338
9339   if (maybe_fused_with_jcc_p (mf_cmp_p))
9340     {
9341       if (last_insn.kind == last_insn_other
9342           || last_insn.seg != now_seg)
9343         return 1;
9344       if (flag_debug)
9345         as_warn_where (last_insn.file, last_insn.line,
9346                        _("`%s` skips -malign-branch-boundary on `%s`"),
9347                        last_insn.name, insn_name (&i.tm));
9348     }
9349
9350   return 0;
9351 }
9352
9353 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9354
9355 static int
9356 add_branch_prefix_frag_p (void)
9357 {
9358   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9359      to PadLock instructions since they include prefixes in opcode.  */
9360   if (!align_branch_power
9361       || !align_branch_prefix_size
9362       || now_seg == absolute_section
9363       || i.tm.cpu_flags.bitfield.cpupadlock
9364       || !cpu_arch_flags.bitfield.cpui386)
9365     return 0;
9366
9367   /* Don't add prefix if it is a prefix or there is no operand in case
9368      that segment prefix is special.  */
9369   if (!i.operands || i.tm.opcode_modifier.isprefix)
9370     return 0;
9371
9372   if (last_insn.kind == last_insn_other
9373       || last_insn.seg != now_seg)
9374     return 1;
9375
9376   if (flag_debug)
9377     as_warn_where (last_insn.file, last_insn.line,
9378                    _("`%s` skips -malign-branch-boundary on `%s`"),
9379                    last_insn.name, insn_name (&i.tm));
9380
9381   return 0;
9382 }
9383
9384 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9385
9386 static int
9387 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9388                            enum mf_jcc_kind *mf_jcc_p)
9389 {
9390   int add_padding;
9391
9392   /* NB: Don't work with COND_JUMP86 without i386.  */
9393   if (!align_branch_power
9394       || now_seg == absolute_section
9395       || !cpu_arch_flags.bitfield.cpui386
9396       || i.tm.opcode_space != SPACE_BASE)
9397     return 0;
9398
9399   add_padding = 0;
9400
9401   /* Check for jcc and direct jmp.  */
9402   if (i.tm.opcode_modifier.jump == JUMP)
9403     {
9404       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9405         {
9406           *branch_p = align_branch_jmp;
9407           add_padding = align_branch & align_branch_jmp_bit;
9408         }
9409       else
9410         {
9411           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9412              igore the lowest bit.  */
9413           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9414           *branch_p = align_branch_jcc;
9415           if ((align_branch & align_branch_jcc_bit))
9416             add_padding = 1;
9417         }
9418     }
9419   else if ((i.tm.base_opcode | 1) == 0xc3)
9420     {
9421       /* Near ret.  */
9422       *branch_p = align_branch_ret;
9423       if ((align_branch & align_branch_ret_bit))
9424         add_padding = 1;
9425     }
9426   else
9427     {
9428       /* Check for indirect jmp, direct and indirect calls.  */
9429       if (i.tm.base_opcode == 0xe8)
9430         {
9431           /* Direct call.  */
9432           *branch_p = align_branch_call;
9433           if ((align_branch & align_branch_call_bit))
9434             add_padding = 1;
9435         }
9436       else if (i.tm.base_opcode == 0xff
9437                && (i.tm.extension_opcode == 2
9438                    || i.tm.extension_opcode == 4))
9439         {
9440           /* Indirect call and jmp.  */
9441           *branch_p = align_branch_indirect;
9442           if ((align_branch & align_branch_indirect_bit))
9443             add_padding = 1;
9444         }
9445
9446       if (add_padding
9447           && i.disp_operands
9448           && tls_get_addr
9449           && (i.op[0].disps->X_op == O_symbol
9450               || (i.op[0].disps->X_op == O_subtract
9451                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9452         {
9453           symbolS *s = i.op[0].disps->X_add_symbol;
9454           /* No padding to call to global or undefined tls_get_addr.  */
9455           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9456               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9457             return 0;
9458         }
9459     }
9460
9461   if (add_padding
9462       && last_insn.kind != last_insn_other
9463       && last_insn.seg == now_seg)
9464     {
9465       if (flag_debug)
9466         as_warn_where (last_insn.file, last_insn.line,
9467                        _("`%s` skips -malign-branch-boundary on `%s`"),
9468                        last_insn.name, insn_name (&i.tm));
9469       return 0;
9470     }
9471
9472   return add_padding;
9473 }
9474
9475 static void
9476 output_insn (void)
9477 {
9478   fragS *insn_start_frag;
9479   offsetT insn_start_off;
9480   fragS *fragP = NULL;
9481   enum align_branch_kind branch = align_branch_none;
9482   /* The initializer is arbitrary just to avoid uninitialized error.
9483      it's actually either assigned in add_branch_padding_frag_p
9484      or never be used.  */
9485   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9486
9487 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9488   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9489     {
9490       if ((i.xstate & xstate_tmm) == xstate_tmm
9491           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9492         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9493
9494       if (i.tm.cpu_flags.bitfield.cpu8087
9495           || i.tm.cpu_flags.bitfield.cpu287
9496           || i.tm.cpu_flags.bitfield.cpu387
9497           || i.tm.cpu_flags.bitfield.cpu687
9498           || i.tm.cpu_flags.bitfield.cpufisttp)
9499         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9500
9501       if ((i.xstate & xstate_mmx)
9502           || i.tm.mnem_off == MN_emms
9503           || i.tm.mnem_off == MN_femms)
9504         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9505
9506       if (i.index_reg)
9507         {
9508           if (i.index_reg->reg_type.bitfield.zmmword)
9509             i.xstate |= xstate_zmm;
9510           else if (i.index_reg->reg_type.bitfield.ymmword)
9511             i.xstate |= xstate_ymm;
9512           else if (i.index_reg->reg_type.bitfield.xmmword)
9513             i.xstate |= xstate_xmm;
9514         }
9515
9516       /* vzeroall / vzeroupper */
9517       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9518         i.xstate |= xstate_ymm;
9519
9520       if ((i.xstate & xstate_xmm)
9521           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9522           || (i.tm.base_opcode == 0xae
9523               && (i.tm.cpu_flags.bitfield.cpusse
9524                   || i.tm.cpu_flags.bitfield.cpuavx))
9525           || i.tm.cpu_flags.bitfield.cpuwidekl
9526           || i.tm.cpu_flags.bitfield.cpukl)
9527         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9528
9529       if ((i.xstate & xstate_ymm) == xstate_ymm)
9530         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9531       if ((i.xstate & xstate_zmm) == xstate_zmm)
9532         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9533       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9534         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9535       if (i.tm.cpu_flags.bitfield.cpufxsr)
9536         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9537       if (i.tm.cpu_flags.bitfield.cpuxsave)
9538         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9539       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9540         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9541       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9542         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9543
9544       if (x86_feature_2_used
9545           || i.tm.cpu_flags.bitfield.cpucmov
9546           || i.tm.cpu_flags.bitfield.cpusyscall
9547           || i.tm.mnem_off == MN_cmpxchg8b)
9548         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9549       if (i.tm.cpu_flags.bitfield.cpusse3
9550           || i.tm.cpu_flags.bitfield.cpussse3
9551           || i.tm.cpu_flags.bitfield.cpusse4_1
9552           || i.tm.cpu_flags.bitfield.cpusse4_2
9553           || i.tm.cpu_flags.bitfield.cpucx16
9554           || i.tm.cpu_flags.bitfield.cpupopcnt
9555           /* LAHF-SAHF insns in 64-bit mode.  */
9556           || (flag_code == CODE_64BIT
9557               && (i.tm.base_opcode | 1) == 0x9f
9558               && i.tm.opcode_space == SPACE_BASE))
9559         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9560       if (i.tm.cpu_flags.bitfield.cpuavx
9561           || i.tm.cpu_flags.bitfield.cpuavx2
9562           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9563              XOP, FMA4, LPW, TBM, and AMX.  */
9564           || (i.tm.opcode_modifier.vex
9565               && !i.tm.cpu_flags.bitfield.cpuavx512f
9566               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9567               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9568               && !i.tm.cpu_flags.bitfield.cpuxop
9569               && !i.tm.cpu_flags.bitfield.cpufma4
9570               && !i.tm.cpu_flags.bitfield.cpulwp
9571               && !i.tm.cpu_flags.bitfield.cputbm
9572               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9573           || i.tm.cpu_flags.bitfield.cpuf16c
9574           || i.tm.cpu_flags.bitfield.cpufma
9575           || i.tm.cpu_flags.bitfield.cpulzcnt
9576           || i.tm.cpu_flags.bitfield.cpumovbe
9577           || i.tm.cpu_flags.bitfield.cpuxsaves
9578           || (x86_feature_2_used
9579               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9580                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9581                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9582         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9583       if (i.tm.cpu_flags.bitfield.cpuavx512f
9584           || i.tm.cpu_flags.bitfield.cpuavx512bw
9585           || i.tm.cpu_flags.bitfield.cpuavx512dq
9586           || i.tm.cpu_flags.bitfield.cpuavx512vl
9587           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9588              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9589           || (i.tm.opcode_modifier.evex
9590               && !i.tm.cpu_flags.bitfield.cpuavx512er
9591               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9592               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9593               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9594         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9595     }
9596 #endif
9597
9598   /* Tie dwarf2 debug info to the address at the start of the insn.
9599      We can't do this after the insn has been output as the current
9600      frag may have been closed off.  eg. by frag_var.  */
9601   dwarf2_emit_insn (0);
9602
9603   insn_start_frag = frag_now;
9604   insn_start_off = frag_now_fix ();
9605
9606   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9607     {
9608       char *p;
9609       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9610       unsigned int max_branch_padding_size = 14;
9611
9612       /* Align section to boundary.  */
9613       record_alignment (now_seg, align_branch_power);
9614
9615       /* Make room for padding.  */
9616       frag_grow (max_branch_padding_size);
9617
9618       /* Start of the padding.  */
9619       p = frag_more (0);
9620
9621       fragP = frag_now;
9622
9623       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9624                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9625                 NULL, 0, p);
9626
9627       fragP->tc_frag_data.mf_type = mf_jcc;
9628       fragP->tc_frag_data.branch_type = branch;
9629       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9630     }
9631
9632   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9633       && !pre_386_16bit_warned)
9634     {
9635       as_warn (_("use .code16 to ensure correct addressing mode"));
9636       pre_386_16bit_warned = true;
9637     }
9638
9639   /* Output jumps.  */
9640   if (i.tm.opcode_modifier.jump == JUMP)
9641     output_branch ();
9642   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9643            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9644     output_jump ();
9645   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9646     output_interseg_jump ();
9647   else
9648     {
9649       /* Output normal instructions here.  */
9650       char *p;
9651       unsigned char *q;
9652       unsigned int j;
9653       enum mf_cmp_kind mf_cmp;
9654
9655       if (avoid_fence
9656           && (i.tm.base_opcode == 0xaee8
9657               || i.tm.base_opcode == 0xaef0
9658               || i.tm.base_opcode == 0xaef8))
9659         {
9660           /* Encode lfence, mfence, and sfence as
9661              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9662           if (flag_code == CODE_16BIT)
9663             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9664           else if (omit_lock_prefix)
9665             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9666                     insn_name (&i.tm));
9667           else if (now_seg != absolute_section)
9668             {
9669               offsetT val = 0x240483f0ULL;
9670
9671               p = frag_more (5);
9672               md_number_to_chars (p, val, 5);
9673             }
9674           else
9675             abs_section_offset += 5;
9676           return;
9677         }
9678
9679       /* Some processors fail on LOCK prefix. This options makes
9680          assembler ignore LOCK prefix and serves as a workaround.  */
9681       if (omit_lock_prefix)
9682         {
9683           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9684               && i.tm.opcode_modifier.isprefix)
9685             return;
9686           i.prefix[LOCK_PREFIX] = 0;
9687         }
9688
9689       if (branch)
9690         /* Skip if this is a branch.  */
9691         ;
9692       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9693         {
9694           /* Make room for padding.  */
9695           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9696           p = frag_more (0);
9697
9698           fragP = frag_now;
9699
9700           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9701                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9702                     NULL, 0, p);
9703
9704           fragP->tc_frag_data.mf_type = mf_cmp;
9705           fragP->tc_frag_data.branch_type = align_branch_fused;
9706           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9707         }
9708       else if (add_branch_prefix_frag_p ())
9709         {
9710           unsigned int max_prefix_size = align_branch_prefix_size;
9711
9712           /* Make room for padding.  */
9713           frag_grow (max_prefix_size);
9714           p = frag_more (0);
9715
9716           fragP = frag_now;
9717
9718           frag_var (rs_machine_dependent, max_prefix_size, 0,
9719                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9720                     NULL, 0, p);
9721
9722           fragP->tc_frag_data.max_bytes = max_prefix_size;
9723         }
9724
9725       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9726          don't need the explicit prefix.  */
9727       if (!is_any_vex_encoding (&i.tm))
9728         {
9729           switch (i.tm.opcode_modifier.opcodeprefix)
9730             {
9731             case PREFIX_0X66:
9732               add_prefix (0x66);
9733               break;
9734             case PREFIX_0XF2:
9735               add_prefix (0xf2);
9736               break;
9737             case PREFIX_0XF3:
9738               if (!i.tm.cpu_flags.bitfield.cpupadlock
9739                   || (i.prefix[REP_PREFIX] != 0xf3))
9740                 add_prefix (0xf3);
9741               break;
9742             case PREFIX_NONE:
9743               switch (i.opcode_length)
9744                 {
9745                 case 2:
9746                   break;
9747                 case 1:
9748                   /* Check for pseudo prefixes.  */
9749                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9750                     break;
9751                   as_bad_where (insn_start_frag->fr_file,
9752                                 insn_start_frag->fr_line,
9753                                 _("pseudo prefix without instruction"));
9754                   return;
9755                 default:
9756                   abort ();
9757                 }
9758               break;
9759             default:
9760               abort ();
9761             }
9762
9763 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9764           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9765              R_X86_64_GOTTPOFF relocation so that linker can safely
9766              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9767              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9768              relocation for GDesc -> IE/LE optimization.  */
9769           if (x86_elf_abi == X86_64_X32_ABI
9770               && i.operands == 2
9771               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9772                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9773               && i.prefix[REX_PREFIX] == 0)
9774             add_prefix (REX_OPCODE);
9775 #endif
9776
9777           /* The prefix bytes.  */
9778           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9779             if (*q)
9780               frag_opcode_byte (*q);
9781         }
9782       else
9783         {
9784           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9785             if (*q)
9786               switch (j)
9787                 {
9788                 case SEG_PREFIX:
9789                 case ADDR_PREFIX:
9790                   frag_opcode_byte (*q);
9791                   break;
9792                 default:
9793                   /* There should be no other prefixes for instructions
9794                      with VEX prefix.  */
9795                   abort ();
9796                 }
9797
9798           /* For EVEX instructions i.vrex should become 0 after
9799              build_evex_prefix.  For VEX instructions upper 16 registers
9800              aren't available, so VREX should be 0.  */
9801           if (i.vrex)
9802             abort ();
9803           /* Now the VEX prefix.  */
9804           if (now_seg != absolute_section)
9805             {
9806               p = frag_more (i.vex.length);
9807               for (j = 0; j < i.vex.length; j++)
9808                 p[j] = i.vex.bytes[j];
9809             }
9810           else
9811             abs_section_offset += i.vex.length;
9812         }
9813
9814       /* Now the opcode; be careful about word order here!  */
9815       j = i.opcode_length;
9816       if (!i.vex.length)
9817         switch (i.tm.opcode_space)
9818           {
9819           case SPACE_BASE:
9820             break;
9821           case SPACE_0F:
9822             ++j;
9823             break;
9824           case SPACE_0F38:
9825           case SPACE_0F3A:
9826             j += 2;
9827             break;
9828           default:
9829             abort ();
9830           }
9831
9832       if (now_seg == absolute_section)
9833         abs_section_offset += j;
9834       else if (j == 1)
9835         {
9836           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9837         }
9838       else
9839         {
9840           p = frag_more (j);
9841           if (!i.vex.length
9842               && i.tm.opcode_space != SPACE_BASE)
9843             {
9844               *p++ = 0x0f;
9845               if (i.tm.opcode_space != SPACE_0F)
9846                 *p++ = i.tm.opcode_space == SPACE_0F38
9847                        ? 0x38 : 0x3a;
9848             }
9849
9850           switch (i.opcode_length)
9851             {
9852             case 2:
9853               /* Put out high byte first: can't use md_number_to_chars!  */
9854               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9855               /* Fall through.  */
9856             case 1:
9857               *p = i.tm.base_opcode & 0xff;
9858               break;
9859             default:
9860               abort ();
9861               break;
9862             }
9863
9864         }
9865
9866       /* Now the modrm byte and sib byte (if present).  */
9867       if (i.tm.opcode_modifier.modrm)
9868         {
9869           frag_opcode_byte ((i.rm.regmem << 0)
9870                              | (i.rm.reg << 3)
9871                              | (i.rm.mode << 6));
9872           /* If i.rm.regmem == ESP (4)
9873              && i.rm.mode != (Register mode)
9874              && not 16 bit
9875              ==> need second modrm byte.  */
9876           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9877               && i.rm.mode != 3
9878               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9879             frag_opcode_byte ((i.sib.base << 0)
9880                               | (i.sib.index << 3)
9881                               | (i.sib.scale << 6));
9882         }
9883
9884       if (i.disp_operands)
9885         output_disp (insn_start_frag, insn_start_off);
9886
9887       if (i.imm_operands)
9888         output_imm (insn_start_frag, insn_start_off);
9889
9890       /*
9891        * frag_now_fix () returning plain abs_section_offset when we're in the
9892        * absolute section, and abs_section_offset not getting updated as data
9893        * gets added to the frag breaks the logic below.
9894        */
9895       if (now_seg != absolute_section)
9896         {
9897           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9898           if (j > 15)
9899             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9900                      j);
9901           else if (fragP)
9902             {
9903               /* NB: Don't add prefix with GOTPC relocation since
9904                  output_disp() above depends on the fixed encoding
9905                  length.  Can't add prefix with TLS relocation since
9906                  it breaks TLS linker optimization.  */
9907               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9908               /* Prefix count on the current instruction.  */
9909               unsigned int count = i.vex.length;
9910               unsigned int k;
9911               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9912                 /* REX byte is encoded in VEX/EVEX prefix.  */
9913                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9914                   count++;
9915
9916               /* Count prefixes for extended opcode maps.  */
9917               if (!i.vex.length)
9918                 switch (i.tm.opcode_space)
9919                   {
9920                   case SPACE_BASE:
9921                     break;
9922                   case SPACE_0F:
9923                     count++;
9924                     break;
9925                   case SPACE_0F38:
9926                   case SPACE_0F3A:
9927                     count += 2;
9928                     break;
9929                   default:
9930                     abort ();
9931                   }
9932
9933               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9934                   == BRANCH_PREFIX)
9935                 {
9936                   /* Set the maximum prefix size in BRANCH_PREFIX
9937                      frag.  */
9938                   if (fragP->tc_frag_data.max_bytes > max)
9939                     fragP->tc_frag_data.max_bytes = max;
9940                   if (fragP->tc_frag_data.max_bytes > count)
9941                     fragP->tc_frag_data.max_bytes -= count;
9942                   else
9943                     fragP->tc_frag_data.max_bytes = 0;
9944                 }
9945               else
9946                 {
9947                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9948                      frag.  */
9949                   unsigned int max_prefix_size;
9950                   if (align_branch_prefix_size > max)
9951                     max_prefix_size = max;
9952                   else
9953                     max_prefix_size = align_branch_prefix_size;
9954                   if (max_prefix_size > count)
9955                     fragP->tc_frag_data.max_prefix_length
9956                       = max_prefix_size - count;
9957                 }
9958
9959               /* Use existing segment prefix if possible.  Use CS
9960                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9961                  segment prefix with ESP/EBP base register and use DS
9962                  segment prefix without ESP/EBP base register.  */
9963               if (i.prefix[SEG_PREFIX])
9964                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9965               else if (flag_code == CODE_64BIT)
9966                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9967               else if (i.base_reg
9968                        && (i.base_reg->reg_num == 4
9969                            || i.base_reg->reg_num == 5))
9970                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9971               else
9972                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9973             }
9974         }
9975     }
9976
9977   /* NB: Don't work with COND_JUMP86 without i386.  */
9978   if (align_branch_power
9979       && now_seg != absolute_section
9980       && cpu_arch_flags.bitfield.cpui386)
9981     {
9982       /* Terminate each frag so that we can add prefix and check for
9983          fused jcc.  */
9984       frag_wane (frag_now);
9985       frag_new (0);
9986     }
9987
9988 #ifdef DEBUG386
9989   if (flag_debug)
9990     {
9991       pi ("" /*line*/, &i);
9992     }
9993 #endif /* DEBUG386  */
9994 }
9995
9996 /* Return the size of the displacement operand N.  */
9997
9998 static int
9999 disp_size (unsigned int n)
10000 {
10001   int size = 4;
10002
10003   if (i.types[n].bitfield.disp64)
10004     size = 8;
10005   else if (i.types[n].bitfield.disp8)
10006     size = 1;
10007   else if (i.types[n].bitfield.disp16)
10008     size = 2;
10009   return size;
10010 }
10011
10012 /* Return the size of the immediate operand N.  */
10013
10014 static int
10015 imm_size (unsigned int n)
10016 {
10017   int size = 4;
10018   if (i.types[n].bitfield.imm64)
10019     size = 8;
10020   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10021     size = 1;
10022   else if (i.types[n].bitfield.imm16)
10023     size = 2;
10024   return size;
10025 }
10026
10027 static void
10028 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10029 {
10030   char *p;
10031   unsigned int n;
10032
10033   for (n = 0; n < i.operands; n++)
10034     {
10035       if (operand_type_check (i.types[n], disp))
10036         {
10037           int size = disp_size (n);
10038
10039           if (now_seg == absolute_section)
10040             abs_section_offset += size;
10041           else if (i.op[n].disps->X_op == O_constant)
10042             {
10043               offsetT val = i.op[n].disps->X_add_number;
10044
10045               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10046                                      size);
10047               p = frag_more (size);
10048               md_number_to_chars (p, val, size);
10049             }
10050           else
10051             {
10052               enum bfd_reloc_code_real reloc_type;
10053               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10054               bool sign = (flag_code == CODE_64BIT && size == 4
10055                            && (!want_disp32 (&i.tm)
10056                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10057                                    && !i.types[n].bitfield.baseindex)))
10058                           || pcrel;
10059               fixS *fixP;
10060
10061               /* We can't have 8 bit displacement here.  */
10062               gas_assert (!i.types[n].bitfield.disp8);
10063
10064               /* The PC relative address is computed relative
10065                  to the instruction boundary, so in case immediate
10066                  fields follows, we need to adjust the value.  */
10067               if (pcrel && i.imm_operands)
10068                 {
10069                   unsigned int n1;
10070                   int sz = 0;
10071
10072                   for (n1 = 0; n1 < i.operands; n1++)
10073                     if (operand_type_check (i.types[n1], imm))
10074                       {
10075                         /* Only one immediate is allowed for PC
10076                            relative address, except with .insn.  */
10077                         gas_assert (sz == 0 || dot_insn ());
10078                         sz += imm_size (n1);
10079                       }
10080                   /* We should find at least one immediate.  */
10081                   gas_assert (sz != 0);
10082                   i.op[n].disps->X_add_number -= sz;
10083                 }
10084
10085               p = frag_more (size);
10086               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10087               if (GOT_symbol
10088                   && GOT_symbol == i.op[n].disps->X_add_symbol
10089                   && (((reloc_type == BFD_RELOC_32
10090                         || reloc_type == BFD_RELOC_X86_64_32S
10091                         || (reloc_type == BFD_RELOC_64
10092                             && object_64bit))
10093                        && (i.op[n].disps->X_op == O_symbol
10094                            || (i.op[n].disps->X_op == O_add
10095                                && ((symbol_get_value_expression
10096                                     (i.op[n].disps->X_op_symbol)->X_op)
10097                                    == O_subtract))))
10098                       || reloc_type == BFD_RELOC_32_PCREL))
10099                 {
10100                   if (!object_64bit)
10101                     {
10102                       reloc_type = BFD_RELOC_386_GOTPC;
10103                       i.has_gotpc_tls_reloc = true;
10104                       i.op[n].disps->X_add_number +=
10105                         encoding_length (insn_start_frag, insn_start_off, p);
10106                     }
10107                   else if (reloc_type == BFD_RELOC_64)
10108                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10109                   else
10110                     /* Don't do the adjustment for x86-64, as there
10111                        the pcrel addressing is relative to the _next_
10112                        insn, and that is taken care of in other code.  */
10113                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10114                 }
10115               else if (align_branch_power)
10116                 {
10117                   switch (reloc_type)
10118                     {
10119                     case BFD_RELOC_386_TLS_GD:
10120                     case BFD_RELOC_386_TLS_LDM:
10121                     case BFD_RELOC_386_TLS_IE:
10122                     case BFD_RELOC_386_TLS_IE_32:
10123                     case BFD_RELOC_386_TLS_GOTIE:
10124                     case BFD_RELOC_386_TLS_GOTDESC:
10125                     case BFD_RELOC_386_TLS_DESC_CALL:
10126                     case BFD_RELOC_X86_64_TLSGD:
10127                     case BFD_RELOC_X86_64_TLSLD:
10128                     case BFD_RELOC_X86_64_GOTTPOFF:
10129                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10130                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10131                       i.has_gotpc_tls_reloc = true;
10132                     default:
10133                       break;
10134                     }
10135                 }
10136               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10137                                   size, i.op[n].disps, pcrel,
10138                                   reloc_type);
10139
10140               if (flag_code == CODE_64BIT && size == 4 && pcrel
10141                   && !i.prefix[ADDR_PREFIX])
10142                 fixP->fx_signed = 1;
10143
10144               /* Check for "call/jmp *mem", "mov mem, %reg",
10145                  "test %reg, mem" and "binop mem, %reg" where binop
10146                  is one of adc, add, and, cmp, or, sbb, sub, xor
10147                  instructions without data prefix.  Always generate
10148                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10149               if (i.prefix[DATA_PREFIX] == 0
10150                   && (generate_relax_relocations
10151                       || (!object_64bit
10152                           && i.rm.mode == 0
10153                           && i.rm.regmem == 5))
10154                   && (i.rm.mode == 2
10155                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10156                   && i.tm.opcode_space == SPACE_BASE
10157                   && ((i.operands == 1
10158                        && i.tm.base_opcode == 0xff
10159                        && (i.rm.reg == 2 || i.rm.reg == 4))
10160                       || (i.operands == 2
10161                           && (i.tm.base_opcode == 0x8b
10162                               || i.tm.base_opcode == 0x85
10163                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10164                 {
10165                   if (object_64bit)
10166                     {
10167                       fixP->fx_tcbit = i.rex != 0;
10168                       if (i.base_reg
10169                           && (i.base_reg->reg_num == RegIP))
10170                       fixP->fx_tcbit2 = 1;
10171                     }
10172                   else
10173                     fixP->fx_tcbit2 = 1;
10174                 }
10175             }
10176         }
10177     }
10178 }
10179
10180 static void
10181 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10182 {
10183   char *p;
10184   unsigned int n;
10185
10186   for (n = 0; n < i.operands; n++)
10187     {
10188       if (operand_type_check (i.types[n], imm))
10189         {
10190           int size = imm_size (n);
10191
10192           if (now_seg == absolute_section)
10193             abs_section_offset += size;
10194           else if (i.op[n].imms->X_op == O_constant)
10195             {
10196               offsetT val;
10197
10198               val = offset_in_range (i.op[n].imms->X_add_number,
10199                                      size);
10200               p = frag_more (size);
10201               md_number_to_chars (p, val, size);
10202             }
10203           else
10204             {
10205               /* Not absolute_section.
10206                  Need a 32-bit fixup (don't support 8bit
10207                  non-absolute imms).  Try to support other
10208                  sizes ...  */
10209               enum bfd_reloc_code_real reloc_type;
10210               int sign;
10211
10212               if (i.types[n].bitfield.imm32s
10213                   && (i.suffix == QWORD_MNEM_SUFFIX
10214                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
10215                       || dot_insn ()))
10216                 sign = 1;
10217               else
10218                 sign = 0;
10219
10220               p = frag_more (size);
10221               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10222
10223               /*   This is tough to explain.  We end up with this one if we
10224                * have operands that look like
10225                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10226                * obtain the absolute address of the GOT, and it is strongly
10227                * preferable from a performance point of view to avoid using
10228                * a runtime relocation for this.  The actual sequence of
10229                * instructions often look something like:
10230                *
10231                *        call    .L66
10232                * .L66:
10233                *        popl    %ebx
10234                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10235                *
10236                *   The call and pop essentially return the absolute address
10237                * of the label .L66 and store it in %ebx.  The linker itself
10238                * will ultimately change the first operand of the addl so
10239                * that %ebx points to the GOT, but to keep things simple, the
10240                * .o file must have this operand set so that it generates not
10241                * the absolute address of .L66, but the absolute address of
10242                * itself.  This allows the linker itself simply treat a GOTPC
10243                * relocation as asking for a pcrel offset to the GOT to be
10244                * added in, and the addend of the relocation is stored in the
10245                * operand field for the instruction itself.
10246                *
10247                *   Our job here is to fix the operand so that it would add
10248                * the correct offset so that %ebx would point to itself.  The
10249                * thing that is tricky is that .-.L66 will point to the
10250                * beginning of the instruction, so we need to further modify
10251                * the operand so that it will point to itself.  There are
10252                * other cases where you have something like:
10253                *
10254                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10255                *
10256                * and here no correction would be required.  Internally in
10257                * the assembler we treat operands of this form as not being
10258                * pcrel since the '.' is explicitly mentioned, and I wonder
10259                * whether it would simplify matters to do it this way.  Who
10260                * knows.  In earlier versions of the PIC patches, the
10261                * pcrel_adjust field was used to store the correction, but
10262                * since the expression is not pcrel, I felt it would be
10263                * confusing to do it this way.  */
10264
10265               if ((reloc_type == BFD_RELOC_32
10266                    || reloc_type == BFD_RELOC_X86_64_32S
10267                    || reloc_type == BFD_RELOC_64)
10268                   && GOT_symbol
10269                   && GOT_symbol == i.op[n].imms->X_add_symbol
10270                   && (i.op[n].imms->X_op == O_symbol
10271                       || (i.op[n].imms->X_op == O_add
10272                           && ((symbol_get_value_expression
10273                                (i.op[n].imms->X_op_symbol)->X_op)
10274                               == O_subtract))))
10275                 {
10276                   if (!object_64bit)
10277                     reloc_type = BFD_RELOC_386_GOTPC;
10278                   else if (size == 4)
10279                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10280                   else if (size == 8)
10281                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10282                   i.has_gotpc_tls_reloc = true;
10283                   i.op[n].imms->X_add_number +=
10284                     encoding_length (insn_start_frag, insn_start_off, p);
10285                 }
10286               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10287                            i.op[n].imms, 0, reloc_type);
10288             }
10289         }
10290     }
10291 }
10292 \f
10293 /* x86_cons_fix_new is called via the expression parsing code when a
10294    reloc is needed.  We use this hook to get the correct .got reloc.  */
10295 static int cons_sign = -1;
10296
10297 void
10298 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10299                   expressionS *exp, bfd_reloc_code_real_type r)
10300 {
10301   r = reloc (len, 0, cons_sign, r);
10302
10303 #ifdef TE_PE
10304   if (exp->X_op == O_secrel)
10305     {
10306       exp->X_op = O_symbol;
10307       r = BFD_RELOC_32_SECREL;
10308     }
10309   else if (exp->X_op == O_secidx)
10310     r = BFD_RELOC_16_SECIDX;
10311 #endif
10312
10313   fix_new_exp (frag, off, len, exp, 0, r);
10314 }
10315
10316 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10317    purpose of the `.dc.a' internal pseudo-op.  */
10318
10319 int
10320 x86_address_bytes (void)
10321 {
10322   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10323     return 4;
10324   return stdoutput->arch_info->bits_per_address / 8;
10325 }
10326
10327 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10328      || defined (LEX_AT)) && !defined (TE_PE)
10329 # define lex_got(reloc, adjust, types) NULL
10330 #else
10331 /* Parse operands of the form
10332    <symbol>@GOTOFF+<nnn>
10333    and similar .plt or .got references.
10334
10335    If we find one, set up the correct relocation in RELOC and copy the
10336    input string, minus the `@GOTOFF' into a malloc'd buffer for
10337    parsing by the calling routine.  Return this buffer, and if ADJUST
10338    is non-null set it to the length of the string we removed from the
10339    input line.  Otherwise return NULL.  */
10340 static char *
10341 lex_got (enum bfd_reloc_code_real *rel,
10342          int *adjust,
10343          i386_operand_type *types)
10344 {
10345   /* Some of the relocations depend on the size of what field is to
10346      be relocated.  But in our callers i386_immediate and i386_displacement
10347      we don't yet know the operand size (this will be set by insn
10348      matching).  Hence we record the word32 relocation here,
10349      and adjust the reloc according to the real size in reloc().  */
10350   static const struct
10351   {
10352     const char *str;
10353     int len;
10354     const enum bfd_reloc_code_real rel[2];
10355     const i386_operand_type types64;
10356     bool need_GOT_symbol;
10357   }
10358     gotrel[] =
10359   {
10360
10361 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10362   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10363 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10364   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10365 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10366   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10367 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10368   { .imm64 = 1, .disp64 = 1 } }
10369
10370 #ifndef TE_PE
10371 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10372     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10373                                         BFD_RELOC_SIZE32 },
10374       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10375 #endif
10376     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10377                                        BFD_RELOC_X86_64_PLTOFF64 },
10378       { .bitfield = { .imm64 = 1 } }, true },
10379     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10380                                        BFD_RELOC_X86_64_PLT32    },
10381       OPERAND_TYPE_IMM32_32S_DISP32, false },
10382     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10383                                        BFD_RELOC_X86_64_GOTPLT64 },
10384       OPERAND_TYPE_IMM64_DISP64, true },
10385     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10386                                        BFD_RELOC_X86_64_GOTOFF64 },
10387       OPERAND_TYPE_IMM64_DISP64, true },
10388     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10389                                        BFD_RELOC_X86_64_GOTPCREL },
10390       OPERAND_TYPE_IMM32_32S_DISP32, true },
10391     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10392                                        BFD_RELOC_X86_64_TLSGD    },
10393       OPERAND_TYPE_IMM32_32S_DISP32, true },
10394     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10395                                        _dummy_first_bfd_reloc_code_real },
10396       OPERAND_TYPE_NONE, true },
10397     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10398                                        BFD_RELOC_X86_64_TLSLD    },
10399       OPERAND_TYPE_IMM32_32S_DISP32, true },
10400     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10401                                        BFD_RELOC_X86_64_GOTTPOFF },
10402       OPERAND_TYPE_IMM32_32S_DISP32, true },
10403     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10404                                        BFD_RELOC_X86_64_TPOFF32  },
10405       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10406     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10407                                        _dummy_first_bfd_reloc_code_real },
10408       OPERAND_TYPE_NONE, true },
10409     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10410                                        BFD_RELOC_X86_64_DTPOFF32 },
10411       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10412     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10413                                        _dummy_first_bfd_reloc_code_real },
10414       OPERAND_TYPE_NONE, true },
10415     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10416                                        _dummy_first_bfd_reloc_code_real },
10417       OPERAND_TYPE_NONE, true },
10418     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10419                                        BFD_RELOC_X86_64_GOT32    },
10420       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10421     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10422                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10423       OPERAND_TYPE_IMM32_32S_DISP32, true },
10424     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10425                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10426       OPERAND_TYPE_IMM32_32S_DISP32, true },
10427 #else /* TE_PE */
10428     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10429                                        BFD_RELOC_32_SECREL },
10430       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10431 #endif
10432
10433 #undef OPERAND_TYPE_IMM32_32S_DISP32
10434 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10435 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10436 #undef OPERAND_TYPE_IMM64_DISP64
10437
10438   };
10439   char *cp;
10440   unsigned int j;
10441
10442 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10443   if (!IS_ELF)
10444     return NULL;
10445 #endif
10446
10447   for (cp = input_line_pointer; *cp != '@'; cp++)
10448     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10449       return NULL;
10450
10451   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10452     {
10453       int len = gotrel[j].len;
10454       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10455         {
10456           if (gotrel[j].rel[object_64bit] != 0)
10457             {
10458               int first, second;
10459               char *tmpbuf, *past_reloc;
10460
10461               *rel = gotrel[j].rel[object_64bit];
10462
10463               if (types)
10464                 {
10465                   if (flag_code != CODE_64BIT)
10466                     {
10467                       types->bitfield.imm32 = 1;
10468                       types->bitfield.disp32 = 1;
10469                     }
10470                   else
10471                     *types = gotrel[j].types64;
10472                 }
10473
10474               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10475                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10476
10477               /* The length of the first part of our input line.  */
10478               first = cp - input_line_pointer;
10479
10480               /* The second part goes from after the reloc token until
10481                  (and including) an end_of_line char or comma.  */
10482               past_reloc = cp + 1 + len;
10483               cp = past_reloc;
10484               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10485                 ++cp;
10486               second = cp + 1 - past_reloc;
10487
10488               /* Allocate and copy string.  The trailing NUL shouldn't
10489                  be necessary, but be safe.  */
10490               tmpbuf = XNEWVEC (char, first + second + 2);
10491               memcpy (tmpbuf, input_line_pointer, first);
10492               if (second != 0 && *past_reloc != ' ')
10493                 /* Replace the relocation token with ' ', so that
10494                    errors like foo@GOTOFF1 will be detected.  */
10495                 tmpbuf[first++] = ' ';
10496               else
10497                 /* Increment length by 1 if the relocation token is
10498                    removed.  */
10499                 len++;
10500               if (adjust)
10501                 *adjust = len;
10502               memcpy (tmpbuf + first, past_reloc, second);
10503               tmpbuf[first + second] = '\0';
10504               return tmpbuf;
10505             }
10506
10507           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10508                   gotrel[j].str, 1 << (5 + object_64bit));
10509           return NULL;
10510         }
10511     }
10512
10513   /* Might be a symbol version string.  Don't as_bad here.  */
10514   return NULL;
10515 }
10516 #endif
10517
10518 bfd_reloc_code_real_type
10519 x86_cons (expressionS *exp, int size)
10520 {
10521   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10522
10523   intel_syntax = -intel_syntax;
10524   exp->X_md = 0;
10525   expr_mode = expr_operator_none;
10526
10527 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10528       && !defined (LEX_AT)) \
10529     || defined (TE_PE)
10530   if (size == 4 || (object_64bit && size == 8))
10531     {
10532       /* Handle @GOTOFF and the like in an expression.  */
10533       char *save;
10534       char *gotfree_input_line;
10535       int adjust = 0;
10536
10537       save = input_line_pointer;
10538       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10539       if (gotfree_input_line)
10540         input_line_pointer = gotfree_input_line;
10541
10542       expression (exp);
10543
10544       if (gotfree_input_line)
10545         {
10546           /* expression () has merrily parsed up to the end of line,
10547              or a comma - in the wrong buffer.  Transfer how far
10548              input_line_pointer has moved to the right buffer.  */
10549           input_line_pointer = (save
10550                                 + (input_line_pointer - gotfree_input_line)
10551                                 + adjust);
10552           free (gotfree_input_line);
10553           if (exp->X_op == O_constant
10554               || exp->X_op == O_absent
10555               || exp->X_op == O_illegal
10556               || exp->X_op == O_register
10557               || exp->X_op == O_big)
10558             {
10559               char c = *input_line_pointer;
10560               *input_line_pointer = 0;
10561               as_bad (_("missing or invalid expression `%s'"), save);
10562               *input_line_pointer = c;
10563             }
10564           else if ((got_reloc == BFD_RELOC_386_PLT32
10565                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10566                    && exp->X_op != O_symbol)
10567             {
10568               char c = *input_line_pointer;
10569               *input_line_pointer = 0;
10570               as_bad (_("invalid PLT expression `%s'"), save);
10571               *input_line_pointer = c;
10572             }
10573         }
10574     }
10575   else
10576 #endif
10577     expression (exp);
10578
10579   intel_syntax = -intel_syntax;
10580
10581   if (intel_syntax)
10582     i386_intel_simplify (exp);
10583
10584   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10585   if (size <= 4 && expr_mode == expr_operator_present
10586       && exp->X_op == O_constant && !object_64bit)
10587     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10588
10589   return got_reloc;
10590 }
10591
10592 static void
10593 signed_cons (int size)
10594 {
10595   if (object_64bit)
10596     cons_sign = 1;
10597   cons (size);
10598   cons_sign = -1;
10599 }
10600
10601 static void
10602 s_insn (int dummy ATTRIBUTE_UNUSED)
10603 {
10604   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
10605   char *saved_ilp = find_end_of_line (line, false), saved_char;
10606   const char *end;
10607   unsigned int j;
10608   valueT val;
10609   bool vex = false, xop = false, evex = false;
10610   static const templates tt = { &i.tm, &i.tm + 1 };
10611
10612   init_globals ();
10613
10614   saved_char = *saved_ilp;
10615   *saved_ilp = 0;
10616
10617   end = parse_insn (line, mnemonic, true);
10618   if (end == NULL)
10619     {
10620   bad:
10621       *saved_ilp = saved_char;
10622       ignore_rest_of_line ();
10623       i.tm.mnem_off = 0;
10624       return;
10625     }
10626   line += end - line;
10627
10628   current_templates = &tt;
10629   i.tm.mnem_off = MN__insn;
10630   i.tm.extension_opcode = None;
10631
10632   if (startswith (line, "VEX")
10633       && (line[3] == '.' || is_space_char (line[3])))
10634     {
10635       vex = true;
10636       line += 3;
10637     }
10638   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
10639     {
10640       char *e;
10641       unsigned long n = strtoul (line + 3, &e, 16);
10642
10643       if (e == line + 5 && n >= 0x08 && n <= 0x1f
10644           && (*e == '.' || is_space_char (*e)))
10645         {
10646           xop = true;
10647           /* Arrange for build_vex_prefix() to emit 0x8f.  */
10648           i.tm.opcode_space = SPACE_XOP08;
10649           i.insn_opcode_space = n;
10650           line = e;
10651         }
10652     }
10653   else if (startswith (line, "EVEX")
10654            && (line[4] == '.' || is_space_char (line[4])))
10655     {
10656       evex = true;
10657       line += 4;
10658     }
10659
10660   if (vex || xop
10661       ? i.vec_encoding == vex_encoding_evex
10662       : evex
10663         ? i.vec_encoding == vex_encoding_vex
10664           || i.vec_encoding == vex_encoding_vex3
10665         : i.vec_encoding != vex_encoding_default)
10666     {
10667       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
10668       goto bad;
10669     }
10670
10671   if (line > end && i.vec_encoding == vex_encoding_default)
10672     i.vec_encoding = evex ? vex_encoding_evex : vex_encoding_vex;
10673
10674   if (line > end && *line == '.')
10675     {
10676       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
10677       switch (line[1])
10678         {
10679         case 'L':
10680           switch (line[2])
10681             {
10682             case '0':
10683               if (evex)
10684                 i.tm.opcode_modifier.evex = EVEX128;
10685               else
10686                 i.tm.opcode_modifier.vex = VEX128;
10687               break;
10688
10689             case '1':
10690               if (evex)
10691                 i.tm.opcode_modifier.evex = EVEX256;
10692               else
10693                 i.tm.opcode_modifier.vex = VEX256;
10694               break;
10695
10696             case '2':
10697               if (evex)
10698                 i.tm.opcode_modifier.evex = EVEX512;
10699               break;
10700
10701             case '3':
10702               if (evex)
10703                 i.tm.opcode_modifier.evex = EVEX_L3;
10704               break;
10705
10706             case 'I':
10707               if (line[3] == 'G')
10708                 {
10709                   if (evex)
10710                     i.tm.opcode_modifier.evex = EVEXLIG;
10711                   else
10712                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
10713                   ++line;
10714                 }
10715               break;
10716             }
10717
10718           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
10719             line += 3;
10720           break;
10721
10722         case '1':
10723           if (line[2] == '2' && line[3] == '8')
10724             {
10725               if (evex)
10726                 i.tm.opcode_modifier.evex = EVEX128;
10727               else
10728                 i.tm.opcode_modifier.vex = VEX128;
10729               line += 4;
10730             }
10731           break;
10732
10733         case '2':
10734           if (line[2] == '5' && line[3] == '6')
10735             {
10736               if (evex)
10737                 i.tm.opcode_modifier.evex = EVEX256;
10738               else
10739                 i.tm.opcode_modifier.vex = VEX256;
10740               line += 4;
10741             }
10742           break;
10743
10744         case '5':
10745           if (evex && line[2] == '1' && line[3] == '2')
10746             {
10747               i.tm.opcode_modifier.evex = EVEX512;
10748               line += 4;
10749             }
10750           break;
10751         }
10752     }
10753
10754   if (line > end && *line == '.')
10755     {
10756       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
10757       switch (line[1])
10758         {
10759         case 'N':
10760           if (line[2] == 'P')
10761             line += 3;
10762           break;
10763
10764         case '6':
10765           if (line[2] == '6')
10766             {
10767               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
10768               line += 3;
10769             }
10770           break;
10771
10772         case 'F': case 'f':
10773           if (line[2] == '3')
10774             {
10775               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
10776               line += 3;
10777             }
10778           else if (line[2] == '2')
10779             {
10780               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
10781               line += 3;
10782             }
10783           break;
10784         }
10785     }
10786
10787   if (line > end && !xop && *line == '.')
10788     {
10789       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
10790       switch (line[1])
10791         {
10792         case '0':
10793           if (TOUPPER (line[2]) != 'F')
10794             break;
10795           if (line[3] == '.' || is_space_char (line[3]))
10796             {
10797               i.insn_opcode_space = SPACE_0F;
10798               line += 3;
10799             }
10800           else if (line[3] == '3'
10801                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
10802                    && (line[5] == '.' || is_space_char (line[5])))
10803             {
10804               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
10805               line += 5;
10806             }
10807           break;
10808
10809         case 'M':
10810           if (ISDIGIT (line[2]) && line[2] != '0')
10811             {
10812               char *e;
10813               unsigned long n = strtoul (line + 2, &e, 10);
10814
10815               if (n <= (evex ? 15 : 31)
10816                   && (*e == '.' || is_space_char (*e)))
10817                 {
10818                   i.insn_opcode_space = n;
10819                   line = e;
10820                 }
10821             }
10822           break;
10823         }
10824     }
10825
10826   if (line > end && *line == '.' && line[1] == 'W')
10827     {
10828       /* VEX.W, XOP.W, EVEX.W  */
10829       switch (line[2])
10830         {
10831         case '0':
10832           i.tm.opcode_modifier.vexw = VEXW0;
10833           break;
10834
10835         case '1':
10836           i.tm.opcode_modifier.vexw = VEXW1;
10837           break;
10838
10839         case 'I':
10840           if (line[3] == 'G')
10841             {
10842               i.tm.opcode_modifier.vexw = VEXWIG;
10843               ++line;
10844             }
10845           break;
10846         }
10847
10848       if (i.tm.opcode_modifier.vexw)
10849         line += 3;
10850     }
10851
10852   if (line > end && *line && !is_space_char (*line))
10853     {
10854       /* Improve diagnostic a little.  */
10855       if (*line == '.' && line[1] && !is_space_char (line[1]))
10856         ++line;
10857       goto done;
10858     }
10859
10860   /* Before processing the opcode expression, find trailing "+r" or
10861      "/<digit>" specifiers.  */
10862   for (ptr = line; ; ++ptr)
10863     {
10864       unsigned long n;
10865       char *e;
10866
10867       ptr = strpbrk (ptr, "+/,");
10868       if (ptr == NULL || *ptr == ',')
10869         break;
10870
10871       if (*ptr == '+' && ptr[1] == 'r'
10872           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
10873         {
10874           *ptr = ' ';
10875           ptr[1] = ' ';
10876           i.short_form = true;
10877           break;
10878         }
10879
10880       if (*ptr == '/' && ISDIGIT (ptr[1])
10881           && (n = strtoul (ptr + 1, &e, 8)) < 8
10882           && e == ptr + 2
10883           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
10884         {
10885           *ptr = ' ';
10886           ptr[1] = ' ';
10887           i.tm.extension_opcode = n;
10888           i.tm.opcode_modifier.modrm = 1;
10889           break;
10890         }
10891     }
10892
10893   input_line_pointer = line;
10894   val = get_absolute_expression ();
10895   line = input_line_pointer;
10896
10897   if (i.short_form && (val & 7))
10898     as_warn ("`+r' assumes low three opcode bits to be clear");
10899
10900   for (j = 1; j < sizeof(val); ++j)
10901     if (!(val >> (j * 8)))
10902       break;
10903
10904   /* Trim off a prefix if present.  */
10905   if (j > 1 && !vex && !xop && !evex)
10906     {
10907       uint8_t byte = val >> ((j - 1) * 8);
10908
10909       switch (byte)
10910         {
10911         case DATA_PREFIX_OPCODE:
10912         case REPE_PREFIX_OPCODE:
10913         case REPNE_PREFIX_OPCODE:
10914           if (!add_prefix (byte))
10915             goto bad;
10916           val &= ((uint64_t)1 << (--j * 8)) - 1;
10917           break;
10918         }
10919     }
10920
10921   /* Trim off encoding space.  */
10922   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
10923     {
10924       uint8_t byte = val >> ((--j - 1) * 8);
10925
10926       i.insn_opcode_space = SPACE_0F;
10927       switch (byte & -(j > 1))
10928         {
10929         case 0x38:
10930           i.insn_opcode_space = SPACE_0F38;
10931           --j;
10932           break;
10933         case 0x3a:
10934           i.insn_opcode_space = SPACE_0F3A;
10935           --j;
10936           break;
10937         }
10938       i.tm.opcode_space = i.insn_opcode_space;
10939       val &= ((uint64_t)1 << (j * 8)) - 1;
10940     }
10941   if (!i.tm.opcode_space && (vex || evex))
10942     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
10943        Also avoid hitting abort() there or in build_evex_prefix().  */
10944     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
10945                                                    : SPACE_0F38;
10946
10947   if (j > 2)
10948     {
10949       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
10950       goto bad;
10951     }
10952   i.opcode_length = j;
10953
10954   /* Handle operands, if any.  */
10955   if (*line == ',')
10956     {
10957       i386_operand_type combined;
10958       expressionS *disp_exp = NULL;
10959       bool changed;
10960
10961       i.memshift = -1;
10962
10963       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
10964       this_operand = -1;
10965       if (!ptr)
10966         goto bad;
10967       line = ptr;
10968
10969       if (!i.operands)
10970         {
10971           as_bad (_("expecting operand after ','; got nothing"));
10972           goto done;
10973         }
10974
10975       if (i.mem_operands > 1)
10976         {
10977           as_bad (_("too many memory references for `%s'"),
10978                   &i386_mnemonics[MN__insn]);
10979           goto done;
10980         }
10981
10982       /* Are we to emit ModR/M encoding?  */
10983       if (!i.short_form
10984           && (i.mem_operands
10985               || i.reg_operands > (i.vec_encoding != vex_encoding_default)
10986               || i.tm.extension_opcode != None))
10987         i.tm.opcode_modifier.modrm = 1;
10988
10989       if (!i.tm.opcode_modifier.modrm
10990           && (i.reg_operands
10991               > i.short_form + 0U + (i.vec_encoding != vex_encoding_default)
10992               || i.mem_operands))
10993         {
10994           as_bad (_("too many register/memory operands"));
10995           goto done;
10996         }
10997
10998       /* Enforce certain constraints on operands.  */
10999       switch (i.reg_operands + i.mem_operands
11000               + (i.tm.extension_opcode != None))
11001         {
11002         case 0:
11003           if (i.short_form)
11004             {
11005               as_bad (_("too few register/memory operands"));
11006               goto done;
11007             }
11008           /* Fall through.  */
11009         case 1:
11010           if (i.tm.opcode_modifier.modrm)
11011             {
11012               as_bad (_("too few register/memory operands"));
11013               goto done;
11014             }
11015           break;
11016
11017         case 2:
11018           break;
11019
11020         case 4:
11021           if (i.imm_operands
11022               && (i.op[0].imms->X_op != O_constant
11023                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
11024             {
11025               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
11026               goto done;
11027             }
11028           /* Fall through.  */
11029         case 3:
11030           if (i.vec_encoding != vex_encoding_default)
11031             {
11032               i.tm.opcode_modifier.vexvvvv = 1;
11033               break;
11034             }
11035           /* Fall through.  */
11036         default:
11037           as_bad (_("too many register/memory operands"));
11038           goto done;
11039         }
11040
11041       /* Bring operands into canonical order (imm, mem, reg).  */
11042       do
11043         {
11044           changed = false;
11045
11046           for (j = 1; j < i.operands; ++j)
11047             {
11048               if ((!operand_type_check (i.types[j - 1], imm)
11049                    && operand_type_check (i.types[j], imm))
11050                   || (i.types[j - 1].bitfield.class != ClassNone
11051                       && i.types[j].bitfield.class == ClassNone))
11052                 {
11053                   swap_2_operands (j - 1, j);
11054                   changed = true;
11055                 }
11056             }
11057         }
11058       while (changed);
11059
11060       /* For Intel syntax swap the order of register operands.  */
11061       if (intel_syntax)
11062         switch (i.reg_operands)
11063           {
11064           case 0:
11065           case 1:
11066             break;
11067
11068           case 4:
11069             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
11070             /* Fall through.  */
11071           case 3:
11072           case 2:
11073             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
11074             break;
11075
11076           default:
11077             abort ();
11078           }
11079
11080       /* Enforce constraints when using VSIB.  */
11081       if (i.index_reg
11082           && (i.index_reg->reg_type.bitfield.xmmword
11083               || i.index_reg->reg_type.bitfield.ymmword
11084               || i.index_reg->reg_type.bitfield.zmmword))
11085         {
11086           if (i.vec_encoding == vex_encoding_default)
11087             {
11088               as_bad (_("VSIB unavailable with legacy encoding"));
11089               goto done;
11090             }
11091
11092           if (i.vec_encoding == vex_encoding_evex
11093               && i.reg_operands > 1)
11094             {
11095               /* We could allow two register operands, encoding the 2nd one in
11096                  an 8-bit immediate like for 4-register-operand insns, but that
11097                  would require ugly fiddling with process_operands() and/or
11098                  build_modrm_byte().  */
11099               as_bad (_("too many register operands with VSIB"));
11100               goto done;
11101             }
11102
11103           i.tm.opcode_modifier.sib = 1;
11104         }
11105
11106       /* Establish operand size encoding.  */
11107       operand_type_set (&combined, 0);
11108
11109       for (j = i.imm_operands; j < i.operands; ++j)
11110         {
11111           i.types[j].bitfield.instance = InstanceNone;
11112
11113           if (operand_type_check (i.types[j], disp))
11114             {
11115               i.types[j].bitfield.baseindex = 1;
11116               disp_exp = i.op[j].disps;
11117             }
11118
11119           if (evex && i.types[j].bitfield.baseindex)
11120             {
11121               unsigned int n = i.memshift;
11122
11123               if (i.types[j].bitfield.byte)
11124                 n = 0;
11125               else if (i.types[j].bitfield.word)
11126                 n = 1;
11127               else if (i.types[j].bitfield.dword)
11128                 n = 2;
11129               else if (i.types[j].bitfield.qword)
11130                 n = 3;
11131               else if (i.types[j].bitfield.xmmword)
11132                 n = 4;
11133               else if (i.types[j].bitfield.ymmword)
11134                 n = 5;
11135               else if (i.types[j].bitfield.zmmword)
11136                 n = 6;
11137
11138               if (i.memshift < 32 && n != i.memshift)
11139                 as_warn ("conflicting memory operand size specifiers");
11140               i.memshift = n;
11141             }
11142
11143           if ((i.broadcast.type || i.broadcast.bytes)
11144               && j == i.broadcast.operand)
11145             continue;
11146
11147           combined = operand_type_or (combined, i.types[j]);
11148           combined.bitfield.class = ClassNone;
11149         }
11150
11151       switch ((i.broadcast.type ? i.broadcast.type : 1)
11152               << (i.memshift < 32 ? i.memshift : 0))
11153         {
11154         case 64: combined.bitfield.zmmword = 1; break;
11155         case 32: combined.bitfield.ymmword = 1; break;
11156         case 16: combined.bitfield.xmmword = 1; break;
11157         case  8: combined.bitfield.qword = 1; break;
11158         case  4: combined.bitfield.dword = 1; break;
11159         }
11160
11161       if (i.vec_encoding == vex_encoding_default)
11162         {
11163           if (flag_code == CODE_64BIT && combined.bitfield.qword)
11164             i.rex |= REX_W;
11165           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
11166                                             : combined.bitfield.word)
11167                    && !add_prefix (DATA_PREFIX_OPCODE))
11168             goto done;
11169         }
11170       else if (!i.tm.opcode_modifier.vexw)
11171         {
11172           if (flag_code == CODE_64BIT)
11173             {
11174               if (combined.bitfield.qword)
11175                 i.tm.opcode_modifier.vexw = VEXW1;
11176               else if (combined.bitfield.dword)
11177                 i.tm.opcode_modifier.vexw = VEXW0;
11178             }
11179
11180           if (!i.tm.opcode_modifier.vexw)
11181             i.tm.opcode_modifier.vexw = VEXWIG;
11182         }
11183
11184       if (vex || xop)
11185         {
11186           if (!i.tm.opcode_modifier.vex)
11187             {
11188               if (combined.bitfield.ymmword)
11189                 i.tm.opcode_modifier.vex = VEX256;
11190               else if (combined.bitfield.xmmword)
11191                 i.tm.opcode_modifier.vex = VEX128;
11192             }
11193         }
11194       else if (evex)
11195         {
11196           if (!i.tm.opcode_modifier.evex)
11197             {
11198               /* Do _not_ consider AVX512VL here.  */
11199               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
11200                 i.tm.opcode_modifier.evex = EVEX512;
11201               else if (combined.bitfield.ymmword)
11202                 i.tm.opcode_modifier.evex = EVEX256;
11203               else if (combined.bitfield.xmmword)
11204                 i.tm.opcode_modifier.evex = EVEX128;
11205             }
11206
11207           if (i.memshift >= 32)
11208             {
11209               unsigned int n = 0;
11210
11211               switch (i.tm.opcode_modifier.evex)
11212                 {
11213                 case EVEX512: n = 64; break;
11214                 case EVEX256: n = 32; break;
11215                 case EVEX128: n = 16; break;
11216                 }
11217
11218               if (i.broadcast.type)
11219                 n /= i.broadcast.type;
11220
11221               if (n > 0)
11222                 for (i.memshift = 0; !(n & 1); n >>= 1)
11223                   ++i.memshift;
11224               else if (disp_exp != NULL && disp_exp->X_op == O_constant
11225                        && disp_exp->X_add_number != 0
11226                        && i.disp_encoding != disp_encoding_32bit)
11227                 {
11228                   if (!quiet_warnings)
11229                     as_warn ("cannot determine memory operand size");
11230                   i.disp_encoding = disp_encoding_32bit;
11231                 }
11232             }
11233         }
11234
11235       if (i.memshift >= 32)
11236         i.memshift = 0;
11237       else if (!evex)
11238         i.vec_encoding = vex_encoding_error;
11239
11240       if (i.disp_operands && !optimize_disp (&i.tm))
11241         goto done;
11242
11243       /* Establish size for immediate operands.  */
11244       for (j = 0; j < i.imm_operands; ++j)
11245         {
11246           expressionS *expP = i.op[j].imms;
11247
11248           gas_assert (operand_type_check (i.types[j], imm));
11249           operand_type_set (&i.types[j], 0);
11250
11251           if (i.imm_bits[j] > 32)
11252             i.types[j].bitfield.imm64 = 1;
11253           else if (i.imm_bits[j] > 16)
11254             {
11255               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
11256                 i.types[j].bitfield.imm32s = 1;
11257               else
11258                 i.types[j].bitfield.imm32 = 1;
11259             }
11260           else if (i.imm_bits[j] > 8)
11261             i.types[j].bitfield.imm16 = 1;
11262           else if (i.imm_bits[j] > 0)
11263             {
11264               if (i.flags[j] & Operand_Signed)
11265                 i.types[j].bitfield.imm8s = 1;
11266               else
11267                 i.types[j].bitfield.imm8 = 1;
11268             }
11269           else if (expP->X_op == O_constant)
11270             {
11271               i.types[j] = smallest_imm_type (expP->X_add_number);
11272               i.types[j].bitfield.imm1 = 0;
11273               /* Oddly enough imm_size() checks imm64 first, so the bit needs
11274                  zapping since smallest_imm_type() sets it unconditionally.  */
11275               if (flag_code != CODE_64BIT)
11276                 {
11277                   i.types[j].bitfield.imm64 = 0;
11278                   i.types[j].bitfield.imm32s = 0;
11279                   i.types[j].bitfield.imm32 = 1;
11280                 }
11281               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
11282                 i.types[j].bitfield.imm64 = 0;
11283             }
11284           else
11285             /* Non-constant expressions are sized heuristically.  */
11286             switch (flag_code)
11287               {
11288               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
11289               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
11290               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
11291               }
11292         }
11293
11294       for (j = 0; j < i.operands; ++j)
11295         i.tm.operand_types[j] = i.types[j];
11296
11297       process_operands ();
11298     }
11299
11300   /* Don't set opcode until after processing operands, to avoid any
11301      potential special casing there.  */
11302   i.tm.base_opcode |= val;
11303
11304   if (i.vec_encoding == vex_encoding_error
11305       || (i.vec_encoding != vex_encoding_evex
11306           ? i.broadcast.type || i.broadcast.bytes
11307             || i.rounding.type != rc_none
11308             || i.mask.reg
11309           : (i.broadcast.type || i.broadcast.bytes)
11310             && i.rounding.type != rc_none))
11311     {
11312       as_bad (_("conflicting .insn operands"));
11313       goto done;
11314     }
11315
11316   if (vex || xop)
11317     {
11318       if (!i.tm.opcode_modifier.vex)
11319         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
11320
11321       build_vex_prefix (NULL);
11322       i.rex &= REX_OPCODE;
11323     }
11324   else if (evex)
11325     {
11326       if (!i.tm.opcode_modifier.evex)
11327         i.tm.opcode_modifier.evex = EVEXLIG;
11328
11329       build_evex_prefix ();
11330       i.rex &= REX_OPCODE;
11331     }
11332   else if (i.rex != 0)
11333     add_prefix (REX_OPCODE | i.rex);
11334
11335   output_insn ();
11336
11337  done:
11338   *saved_ilp = saved_char;
11339   input_line_pointer = line;
11340
11341   demand_empty_rest_of_line ();
11342
11343   /* Make sure dot_insn() won't yield "true" anymore.  */
11344   i.tm.mnem_off = 0;
11345 }
11346
11347 #ifdef TE_PE
11348 static void
11349 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
11350 {
11351   expressionS exp;
11352
11353   do
11354     {
11355       expression (&exp);
11356       if (exp.X_op == O_symbol)
11357         exp.X_op = O_secrel;
11358
11359       emit_expr (&exp, 4);
11360     }
11361   while (*input_line_pointer++ == ',');
11362
11363   input_line_pointer--;
11364   demand_empty_rest_of_line ();
11365 }
11366
11367 static void
11368 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
11369 {
11370   expressionS exp;
11371
11372   do
11373     {
11374       expression (&exp);
11375       if (exp.X_op == O_symbol)
11376         exp.X_op = O_secidx;
11377
11378       emit_expr (&exp, 2);
11379     }
11380   while (*input_line_pointer++ == ',');
11381
11382   input_line_pointer--;
11383   demand_empty_rest_of_line ();
11384 }
11385 #endif
11386
11387 /* Handle Rounding Control / SAE specifiers.  */
11388
11389 static char *
11390 RC_SAE_specifier (const char *pstr)
11391 {
11392   unsigned int j;
11393
11394   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11395     {
11396       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11397         {
11398           if (i.rounding.type != rc_none)
11399             {
11400               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
11401               return NULL;
11402             }
11403
11404           i.rounding.type = RC_NamesTable[j].type;
11405
11406           return (char *)(pstr + RC_NamesTable[j].len);
11407         }
11408     }
11409
11410   return NULL;
11411 }
11412
11413 /* Handle Vector operations.  */
11414
11415 static char *
11416 check_VecOperations (char *op_string)
11417 {
11418   const reg_entry *mask;
11419   const char *saved;
11420   char *end_op;
11421
11422   while (*op_string)
11423     {
11424       saved = op_string;
11425       if (*op_string == '{')
11426         {
11427           op_string++;
11428
11429           /* Check broadcasts.  */
11430           if (startswith (op_string, "1to"))
11431             {
11432               unsigned int bcst_type;
11433
11434               if (i.broadcast.type)
11435                 goto duplicated_vec_op;
11436
11437               op_string += 3;
11438               if (*op_string == '8')
11439                 bcst_type = 8;
11440               else if (*op_string == '4')
11441                 bcst_type = 4;
11442               else if (*op_string == '2')
11443                 bcst_type = 2;
11444               else if (*op_string == '1'
11445                        && *(op_string+1) == '6')
11446                 {
11447                   bcst_type = 16;
11448                   op_string++;
11449                 }
11450               else if (*op_string == '3'
11451                        && *(op_string+1) == '2')
11452                 {
11453                   bcst_type = 32;
11454                   op_string++;
11455                 }
11456               else
11457                 {
11458                   as_bad (_("Unsupported broadcast: `%s'"), saved);
11459                   return NULL;
11460                 }
11461               op_string++;
11462
11463               i.broadcast.type = bcst_type;
11464               i.broadcast.operand = this_operand;
11465
11466               /* For .insn a data size specifier may be appended.  */
11467               if (dot_insn () && *op_string == ':')
11468                 goto dot_insn_modifier;
11469             }
11470           /* Check .insn special cases.  */
11471           else if (dot_insn () && *op_string == ':')
11472             {
11473             dot_insn_modifier:
11474               switch (op_string[1])
11475                 {
11476                   unsigned long n;
11477
11478                 case 'd':
11479                   if (i.memshift < 32)
11480                     goto duplicated_vec_op;
11481
11482                   n = strtoul (op_string + 2, &end_op, 0);
11483                   if (n)
11484                     for (i.memshift = 0; !(n & 1); n >>= 1)
11485                       ++i.memshift;
11486                   if (i.memshift < 32 && n == 1)
11487                     op_string = end_op;
11488                   break;
11489
11490                 case 's': case 'u':
11491                   /* This isn't really a "vector" operation, but a sign/size
11492                      specifier for immediate operands of .insn.  Note that AT&T
11493                      syntax handles the same in i386_immediate().  */
11494                   if (!intel_syntax)
11495                     break;
11496
11497                   if (i.imm_bits[this_operand])
11498                     goto duplicated_vec_op;
11499
11500                   n = strtoul (op_string + 2, &end_op, 0);
11501                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
11502                     {
11503                       i.imm_bits[this_operand] = n;
11504                       if (op_string[1] == 's')
11505                         i.flags[this_operand] |= Operand_Signed;
11506                       op_string = end_op;
11507                     }
11508                   break;
11509                 }
11510             }
11511           /* Check masking operation.  */
11512           else if ((mask = parse_register (op_string, &end_op)) != NULL)
11513             {
11514               if (mask == &bad_reg)
11515                 return NULL;
11516
11517               /* k0 can't be used for write mask.  */
11518               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
11519                 {
11520                   as_bad (_("`%s%s' can't be used for write mask"),
11521                           register_prefix, mask->reg_name);
11522                   return NULL;
11523                 }
11524
11525               if (!i.mask.reg)
11526                 {
11527                   i.mask.reg = mask;
11528                   i.mask.operand = this_operand;
11529                 }
11530               else if (i.mask.reg->reg_num)
11531                 goto duplicated_vec_op;
11532               else
11533                 {
11534                   i.mask.reg = mask;
11535
11536                   /* Only "{z}" is allowed here.  No need to check
11537                      zeroing mask explicitly.  */
11538                   if (i.mask.operand != (unsigned int) this_operand)
11539                     {
11540                       as_bad (_("invalid write mask `%s'"), saved);
11541                       return NULL;
11542                     }
11543                 }
11544
11545               op_string = end_op;
11546             }
11547           /* Check zeroing-flag for masking operation.  */
11548           else if (*op_string == 'z')
11549             {
11550               if (!i.mask.reg)
11551                 {
11552                   i.mask.reg = reg_k0;
11553                   i.mask.zeroing = 1;
11554                   i.mask.operand = this_operand;
11555                 }
11556               else
11557                 {
11558                   if (i.mask.zeroing)
11559                     {
11560                     duplicated_vec_op:
11561                       as_bad (_("duplicated `%s'"), saved);
11562                       return NULL;
11563                     }
11564
11565                   i.mask.zeroing = 1;
11566
11567                   /* Only "{%k}" is allowed here.  No need to check mask
11568                      register explicitly.  */
11569                   if (i.mask.operand != (unsigned int) this_operand)
11570                     {
11571                       as_bad (_("invalid zeroing-masking `%s'"),
11572                               saved);
11573                       return NULL;
11574                     }
11575                 }
11576
11577               op_string++;
11578             }
11579           else if (intel_syntax
11580                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
11581             i.rounding.modifier = true;
11582           else
11583             goto unknown_vec_op;
11584
11585           if (*op_string != '}')
11586             {
11587               as_bad (_("missing `}' in `%s'"), saved);
11588               return NULL;
11589             }
11590           op_string++;
11591
11592           /* Strip whitespace since the addition of pseudo prefixes
11593              changed how the scrubber treats '{'.  */
11594           if (is_space_char (*op_string))
11595             ++op_string;
11596
11597           continue;
11598         }
11599     unknown_vec_op:
11600       /* We don't know this one.  */
11601       as_bad (_("unknown vector operation: `%s'"), saved);
11602       return NULL;
11603     }
11604
11605   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
11606     {
11607       as_bad (_("zeroing-masking only allowed with write mask"));
11608       return NULL;
11609     }
11610
11611   return op_string;
11612 }
11613
11614 static int
11615 i386_immediate (char *imm_start)
11616 {
11617   char *save_input_line_pointer;
11618   char *gotfree_input_line;
11619   segT exp_seg = 0;
11620   expressionS *exp;
11621   i386_operand_type types;
11622
11623   operand_type_set (&types, ~0);
11624
11625   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
11626     {
11627       as_bad (_("at most %d immediate operands are allowed"),
11628               MAX_IMMEDIATE_OPERANDS);
11629       return 0;
11630     }
11631
11632   exp = &im_expressions[i.imm_operands++];
11633   i.op[this_operand].imms = exp;
11634
11635   if (is_space_char (*imm_start))
11636     ++imm_start;
11637
11638   save_input_line_pointer = input_line_pointer;
11639   input_line_pointer = imm_start;
11640
11641   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11642   if (gotfree_input_line)
11643     input_line_pointer = gotfree_input_line;
11644
11645   expr_mode = expr_operator_none;
11646   exp_seg = expression (exp);
11647
11648   /* For .insn immediates there may be a size specifier.  */
11649   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
11650       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
11651     {
11652       char *e;
11653       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
11654
11655       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
11656         {
11657           i.imm_bits[this_operand] = n;
11658           if (input_line_pointer[2] == 's')
11659             i.flags[this_operand] |= Operand_Signed;
11660           input_line_pointer = e + 1;
11661         }
11662     }
11663
11664   SKIP_WHITESPACE ();
11665   if (*input_line_pointer)
11666     as_bad (_("junk `%s' after expression"), input_line_pointer);
11667
11668   input_line_pointer = save_input_line_pointer;
11669   if (gotfree_input_line)
11670     {
11671       free (gotfree_input_line);
11672
11673       if (exp->X_op == O_constant)
11674         exp->X_op = O_illegal;
11675     }
11676
11677   if (exp_seg == reg_section)
11678     {
11679       as_bad (_("illegal immediate register operand %s"), imm_start);
11680       return 0;
11681     }
11682
11683   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
11684 }
11685
11686 static int
11687 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11688                          i386_operand_type types, const char *imm_start)
11689 {
11690   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
11691     {
11692       if (imm_start)
11693         as_bad (_("missing or invalid immediate expression `%s'"),
11694                 imm_start);
11695       return 0;
11696     }
11697   else if (exp->X_op == O_constant)
11698     {
11699       /* Size it properly later.  */
11700       i.types[this_operand].bitfield.imm64 = 1;
11701
11702       /* If not 64bit, sign/zero extend val, to account for wraparound
11703          when !BFD64.  */
11704       if (expr_mode == expr_operator_present
11705           && flag_code != CODE_64BIT && !object_64bit)
11706         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11707     }
11708 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11709   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11710            && exp_seg != absolute_section
11711            && exp_seg != text_section
11712            && exp_seg != data_section
11713            && exp_seg != bss_section
11714            && exp_seg != undefined_section
11715            && !bfd_is_com_section (exp_seg))
11716     {
11717       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11718       return 0;
11719     }
11720 #endif
11721   else
11722     {
11723       /* This is an address.  The size of the address will be
11724          determined later, depending on destination register,
11725          suffix, or the default for the section.  */
11726       i.types[this_operand].bitfield.imm8 = 1;
11727       i.types[this_operand].bitfield.imm16 = 1;
11728       i.types[this_operand].bitfield.imm32 = 1;
11729       i.types[this_operand].bitfield.imm32s = 1;
11730       i.types[this_operand].bitfield.imm64 = 1;
11731       i.types[this_operand] = operand_type_and (i.types[this_operand],
11732                                                 types);
11733     }
11734
11735   return 1;
11736 }
11737
11738 static char *
11739 i386_scale (char *scale)
11740 {
11741   offsetT val;
11742   char *save = input_line_pointer;
11743
11744   input_line_pointer = scale;
11745   val = get_absolute_expression ();
11746
11747   switch (val)
11748     {
11749     case 1:
11750       i.log2_scale_factor = 0;
11751       break;
11752     case 2:
11753       i.log2_scale_factor = 1;
11754       break;
11755     case 4:
11756       i.log2_scale_factor = 2;
11757       break;
11758     case 8:
11759       i.log2_scale_factor = 3;
11760       break;
11761     default:
11762       {
11763         char sep = *input_line_pointer;
11764
11765         *input_line_pointer = '\0';
11766         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11767                 scale);
11768         *input_line_pointer = sep;
11769         input_line_pointer = save;
11770         return NULL;
11771       }
11772     }
11773   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11774     {
11775       as_warn (_("scale factor of %d without an index register"),
11776                1 << i.log2_scale_factor);
11777       i.log2_scale_factor = 0;
11778     }
11779   scale = input_line_pointer;
11780   input_line_pointer = save;
11781   return scale;
11782 }
11783
11784 static int
11785 i386_displacement (char *disp_start, char *disp_end)
11786 {
11787   expressionS *exp;
11788   segT exp_seg = 0;
11789   char *save_input_line_pointer;
11790   char *gotfree_input_line;
11791   int override;
11792   i386_operand_type bigdisp, types = anydisp;
11793   int ret;
11794
11795   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11796     {
11797       as_bad (_("at most %d displacement operands are allowed"),
11798               MAX_MEMORY_OPERANDS);
11799       return 0;
11800     }
11801
11802   operand_type_set (&bigdisp, 0);
11803   if (i.jumpabsolute
11804       || i.types[this_operand].bitfield.baseindex
11805       || (current_templates->start->opcode_modifier.jump != JUMP
11806           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11807     {
11808       i386_addressing_mode ();
11809       override = (i.prefix[ADDR_PREFIX] != 0);
11810       if (flag_code == CODE_64BIT)
11811         {
11812           bigdisp.bitfield.disp32 = 1;
11813           if (!override)
11814             bigdisp.bitfield.disp64 = 1;
11815         }
11816       else if ((flag_code == CODE_16BIT) ^ override)
11817           bigdisp.bitfield.disp16 = 1;
11818       else
11819           bigdisp.bitfield.disp32 = 1;
11820     }
11821   else
11822     {
11823       /* For PC-relative branches, the width of the displacement may be
11824          dependent upon data size, but is never dependent upon address size.
11825          Also make sure to not unintentionally match against a non-PC-relative
11826          branch template.  */
11827       static templates aux_templates;
11828       const insn_template *t = current_templates->start;
11829       bool has_intel64 = false;
11830
11831       aux_templates.start = t;
11832       while (++t < current_templates->end)
11833         {
11834           if (t->opcode_modifier.jump
11835               != current_templates->start->opcode_modifier.jump)
11836             break;
11837           if ((t->opcode_modifier.isa64 >= INTEL64))
11838             has_intel64 = true;
11839         }
11840       if (t < current_templates->end)
11841         {
11842           aux_templates.end = t;
11843           current_templates = &aux_templates;
11844         }
11845
11846       override = (i.prefix[DATA_PREFIX] != 0);
11847       if (flag_code == CODE_64BIT)
11848         {
11849           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11850               && (!intel64 || !has_intel64))
11851             bigdisp.bitfield.disp16 = 1;
11852           else
11853             bigdisp.bitfield.disp32 = 1;
11854         }
11855       else
11856         {
11857           if (!override)
11858             override = (i.suffix == (flag_code != CODE_16BIT
11859                                      ? WORD_MNEM_SUFFIX
11860                                      : LONG_MNEM_SUFFIX));
11861           bigdisp.bitfield.disp32 = 1;
11862           if ((flag_code == CODE_16BIT) ^ override)
11863             {
11864               bigdisp.bitfield.disp32 = 0;
11865               bigdisp.bitfield.disp16 = 1;
11866             }
11867         }
11868     }
11869   i.types[this_operand] = operand_type_or (i.types[this_operand],
11870                                            bigdisp);
11871
11872   exp = &disp_expressions[i.disp_operands];
11873   i.op[this_operand].disps = exp;
11874   i.disp_operands++;
11875   save_input_line_pointer = input_line_pointer;
11876   input_line_pointer = disp_start;
11877   END_STRING_AND_SAVE (disp_end);
11878
11879 #ifndef GCC_ASM_O_HACK
11880 #define GCC_ASM_O_HACK 0
11881 #endif
11882 #if GCC_ASM_O_HACK
11883   END_STRING_AND_SAVE (disp_end + 1);
11884   if (i.types[this_operand].bitfield.baseIndex
11885       && displacement_string_end[-1] == '+')
11886     {
11887       /* This hack is to avoid a warning when using the "o"
11888          constraint within gcc asm statements.
11889          For instance:
11890
11891          #define _set_tssldt_desc(n,addr,limit,type) \
11892          __asm__ __volatile__ ( \
11893          "movw %w2,%0\n\t" \
11894          "movw %w1,2+%0\n\t" \
11895          "rorl $16,%1\n\t" \
11896          "movb %b1,4+%0\n\t" \
11897          "movb %4,5+%0\n\t" \
11898          "movb $0,6+%0\n\t" \
11899          "movb %h1,7+%0\n\t" \
11900          "rorl $16,%1" \
11901          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11902
11903          This works great except that the output assembler ends
11904          up looking a bit weird if it turns out that there is
11905          no offset.  You end up producing code that looks like:
11906
11907          #APP
11908          movw $235,(%eax)
11909          movw %dx,2+(%eax)
11910          rorl $16,%edx
11911          movb %dl,4+(%eax)
11912          movb $137,5+(%eax)
11913          movb $0,6+(%eax)
11914          movb %dh,7+(%eax)
11915          rorl $16,%edx
11916          #NO_APP
11917
11918          So here we provide the missing zero.  */
11919
11920       *displacement_string_end = '0';
11921     }
11922 #endif
11923   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11924   if (gotfree_input_line)
11925     input_line_pointer = gotfree_input_line;
11926
11927   expr_mode = expr_operator_none;
11928   exp_seg = expression (exp);
11929
11930   SKIP_WHITESPACE ();
11931   if (*input_line_pointer)
11932     as_bad (_("junk `%s' after expression"), input_line_pointer);
11933 #if GCC_ASM_O_HACK
11934   RESTORE_END_STRING (disp_end + 1);
11935 #endif
11936   input_line_pointer = save_input_line_pointer;
11937   if (gotfree_input_line)
11938     {
11939       free (gotfree_input_line);
11940
11941       if (exp->X_op == O_constant || exp->X_op == O_register)
11942         exp->X_op = O_illegal;
11943     }
11944
11945   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11946
11947   RESTORE_END_STRING (disp_end);
11948
11949   return ret;
11950 }
11951
11952 static int
11953 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11954                             i386_operand_type types, const char *disp_start)
11955 {
11956   int ret = 1;
11957
11958   /* We do this to make sure that the section symbol is in
11959      the symbol table.  We will ultimately change the relocation
11960      to be relative to the beginning of the section.  */
11961   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11962       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11963       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11964     {
11965       if (exp->X_op != O_symbol)
11966         goto inv_disp;
11967
11968       if (S_IS_LOCAL (exp->X_add_symbol)
11969           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11970           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11971         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11972       exp->X_op = O_subtract;
11973       exp->X_op_symbol = GOT_symbol;
11974       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11975         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11976       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11977         i.reloc[this_operand] = BFD_RELOC_64;
11978       else
11979         i.reloc[this_operand] = BFD_RELOC_32;
11980     }
11981
11982   else if (exp->X_op == O_absent
11983            || exp->X_op == O_illegal
11984            || exp->X_op == O_big)
11985     {
11986     inv_disp:
11987       as_bad (_("missing or invalid displacement expression `%s'"),
11988               disp_start);
11989       ret = 0;
11990     }
11991
11992   else if (exp->X_op == O_constant)
11993     {
11994       /* Sizing gets taken care of by optimize_disp().
11995
11996          If not 64bit, sign/zero extend val, to account for wraparound
11997          when !BFD64.  */
11998       if (expr_mode == expr_operator_present
11999           && flag_code != CODE_64BIT && !object_64bit)
12000         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
12001     }
12002
12003 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
12004   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
12005            && exp_seg != absolute_section
12006            && exp_seg != text_section
12007            && exp_seg != data_section
12008            && exp_seg != bss_section
12009            && exp_seg != undefined_section
12010            && !bfd_is_com_section (exp_seg))
12011     {
12012       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
12013       ret = 0;
12014     }
12015 #endif
12016
12017   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
12018     i.types[this_operand].bitfield.disp8 = 1;
12019
12020   /* Check if this is a displacement only operand.  */
12021   if (!i.types[this_operand].bitfield.baseindex)
12022     i.types[this_operand] =
12023       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
12024                        operand_type_and (i.types[this_operand], types));
12025
12026   return ret;
12027 }
12028
12029 /* Return the active addressing mode, taking address override and
12030    registers forming the address into consideration.  Update the
12031    address override prefix if necessary.  */
12032
12033 static enum flag_code
12034 i386_addressing_mode (void)
12035 {
12036   enum flag_code addr_mode;
12037
12038   if (i.prefix[ADDR_PREFIX])
12039     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
12040   else if (flag_code == CODE_16BIT
12041            && current_templates->start->cpu_flags.bitfield.cpumpx
12042            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
12043               from md_assemble() by "is not a valid base/index expression"
12044               when there is a base and/or index.  */
12045            && !i.types[this_operand].bitfield.baseindex)
12046     {
12047       /* MPX insn memory operands with neither base nor index must be forced
12048          to use 32-bit addressing in 16-bit mode.  */
12049       addr_mode = CODE_32BIT;
12050       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
12051       ++i.prefixes;
12052       gas_assert (!i.types[this_operand].bitfield.disp16);
12053       gas_assert (!i.types[this_operand].bitfield.disp32);
12054     }
12055   else
12056     {
12057       addr_mode = flag_code;
12058
12059 #if INFER_ADDR_PREFIX
12060       if (i.mem_operands == 0)
12061         {
12062           /* Infer address prefix from the first memory operand.  */
12063           const reg_entry *addr_reg = i.base_reg;
12064
12065           if (addr_reg == NULL)
12066             addr_reg = i.index_reg;
12067
12068           if (addr_reg)
12069             {
12070               if (addr_reg->reg_type.bitfield.dword)
12071                 addr_mode = CODE_32BIT;
12072               else if (flag_code != CODE_64BIT
12073                        && addr_reg->reg_type.bitfield.word)
12074                 addr_mode = CODE_16BIT;
12075
12076               if (addr_mode != flag_code)
12077                 {
12078                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
12079                   i.prefixes += 1;
12080                   /* Change the size of any displacement too.  At most one
12081                      of Disp16 or Disp32 is set.
12082                      FIXME.  There doesn't seem to be any real need for
12083                      separate Disp16 and Disp32 flags.  The same goes for
12084                      Imm16 and Imm32.  Removing them would probably clean
12085                      up the code quite a lot.  */
12086                   if (flag_code != CODE_64BIT
12087                       && (i.types[this_operand].bitfield.disp16
12088                           || i.types[this_operand].bitfield.disp32))
12089                     {
12090                       static const i386_operand_type disp16_32 = {
12091                         .bitfield = { .disp16 = 1, .disp32 = 1 }
12092                       };
12093
12094                       i.types[this_operand]
12095                         = operand_type_xor (i.types[this_operand], disp16_32);
12096                     }
12097                 }
12098             }
12099         }
12100 #endif
12101     }
12102
12103   return addr_mode;
12104 }
12105
12106 /* Make sure the memory operand we've been dealt is valid.
12107    Return 1 on success, 0 on a failure.  */
12108
12109 static int
12110 i386_index_check (const char *operand_string)
12111 {
12112   const char *kind = "base/index";
12113   enum flag_code addr_mode = i386_addressing_mode ();
12114   const insn_template *t = current_templates->end - 1;
12115
12116   if (t->opcode_modifier.isstring)
12117     {
12118       /* Memory operands of string insns are special in that they only allow
12119          a single register (rDI, rSI, or rBX) as their memory address.  */
12120       const reg_entry *expected_reg;
12121       static const char *di_si[][2] =
12122         {
12123           { "esi", "edi" },
12124           { "si", "di" },
12125           { "rsi", "rdi" }
12126         };
12127       static const char *bx[] = { "ebx", "bx", "rbx" };
12128
12129       kind = "string address";
12130
12131       if (t->opcode_modifier.prefixok == PrefixRep)
12132         {
12133           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
12134           int op = 0;
12135
12136           if (!t->operand_types[0].bitfield.baseindex
12137               || ((!i.mem_operands != !intel_syntax)
12138                   && t->operand_types[1].bitfield.baseindex))
12139             op = 1;
12140           expected_reg
12141             = (const reg_entry *) str_hash_find (reg_hash,
12142                                                  di_si[addr_mode][op == es_op]);
12143         }
12144       else
12145         expected_reg
12146           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
12147
12148       if (i.base_reg != expected_reg
12149           || i.index_reg
12150           || operand_type_check (i.types[this_operand], disp))
12151         {
12152           /* The second memory operand must have the same size as
12153              the first one.  */
12154           if (i.mem_operands
12155               && i.base_reg
12156               && !((addr_mode == CODE_64BIT
12157                     && i.base_reg->reg_type.bitfield.qword)
12158                    || (addr_mode == CODE_32BIT
12159                        ? i.base_reg->reg_type.bitfield.dword
12160                        : i.base_reg->reg_type.bitfield.word)))
12161             goto bad_address;
12162
12163           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
12164                    operand_string,
12165                    intel_syntax ? '[' : '(',
12166                    register_prefix,
12167                    expected_reg->reg_name,
12168                    intel_syntax ? ']' : ')');
12169           return 1;
12170         }
12171       else
12172         return 1;
12173
12174     bad_address:
12175       as_bad (_("`%s' is not a valid %s expression"),
12176               operand_string, kind);
12177       return 0;
12178     }
12179   else
12180     {
12181       t = current_templates->start;
12182
12183       if (addr_mode != CODE_16BIT)
12184         {
12185           /* 32-bit/64-bit checks.  */
12186           if (i.disp_encoding == disp_encoding_16bit)
12187             {
12188             bad_disp:
12189               as_bad (_("invalid `%s' prefix"),
12190                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
12191               return 0;
12192             }
12193
12194           if ((i.base_reg
12195                && ((addr_mode == CODE_64BIT
12196                     ? !i.base_reg->reg_type.bitfield.qword
12197                     : !i.base_reg->reg_type.bitfield.dword)
12198                    || (i.index_reg && i.base_reg->reg_num == RegIP)
12199                    || i.base_reg->reg_num == RegIZ))
12200               || (i.index_reg
12201                   && !i.index_reg->reg_type.bitfield.xmmword
12202                   && !i.index_reg->reg_type.bitfield.ymmword
12203                   && !i.index_reg->reg_type.bitfield.zmmword
12204                   && ((addr_mode == CODE_64BIT
12205                        ? !i.index_reg->reg_type.bitfield.qword
12206                        : !i.index_reg->reg_type.bitfield.dword)
12207                       || !i.index_reg->reg_type.bitfield.baseindex)))
12208             goto bad_address;
12209
12210           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
12211           if (t->mnem_off == MN_bndmk
12212               || t->mnem_off == MN_bndldx
12213               || t->mnem_off == MN_bndstx
12214               || t->opcode_modifier.sib == SIBMEM)
12215             {
12216               /* They cannot use RIP-relative addressing. */
12217               if (i.base_reg && i.base_reg->reg_num == RegIP)
12218                 {
12219                   as_bad (_("`%s' cannot be used here"), operand_string);
12220                   return 0;
12221                 }
12222
12223               /* bndldx and bndstx ignore their scale factor. */
12224               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
12225                   && i.log2_scale_factor)
12226                 as_warn (_("register scaling is being ignored here"));
12227             }
12228         }
12229       else
12230         {
12231           /* 16-bit checks.  */
12232           if (i.disp_encoding == disp_encoding_32bit)
12233             goto bad_disp;
12234
12235           if ((i.base_reg
12236                && (!i.base_reg->reg_type.bitfield.word
12237                    || !i.base_reg->reg_type.bitfield.baseindex))
12238               || (i.index_reg
12239                   && (!i.index_reg->reg_type.bitfield.word
12240                       || !i.index_reg->reg_type.bitfield.baseindex
12241                       || !(i.base_reg
12242                            && i.base_reg->reg_num < 6
12243                            && i.index_reg->reg_num >= 6
12244                            && i.log2_scale_factor == 0))))
12245             goto bad_address;
12246         }
12247     }
12248   return 1;
12249 }
12250
12251 /* Handle vector immediates.  */
12252
12253 static int
12254 RC_SAE_immediate (const char *imm_start)
12255 {
12256   const char *pstr = imm_start;
12257
12258   if (*pstr != '{')
12259     return 0;
12260
12261   pstr = RC_SAE_specifier (pstr + 1);
12262   if (pstr == NULL)
12263     return 0;
12264
12265   if (*pstr++ != '}')
12266     {
12267       as_bad (_("Missing '}': '%s'"), imm_start);
12268       return 0;
12269     }
12270   /* RC/SAE immediate string should contain nothing more.  */;
12271   if (*pstr != 0)
12272     {
12273       as_bad (_("Junk after '}': '%s'"), imm_start);
12274       return 0;
12275     }
12276
12277   /* Internally this doesn't count as an operand.  */
12278   --i.operands;
12279
12280   return 1;
12281 }
12282
12283 static INLINE bool starts_memory_operand (char c)
12284 {
12285   return ISDIGIT (c)
12286          || is_name_beginner (c)
12287          || strchr ("([\"+-!~", c);
12288 }
12289
12290 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
12291    on error.  */
12292
12293 static int
12294 i386_att_operand (char *operand_string)
12295 {
12296   const reg_entry *r;
12297   char *end_op;
12298   char *op_string = operand_string;
12299
12300   if (is_space_char (*op_string))
12301     ++op_string;
12302
12303   /* We check for an absolute prefix (differentiating,
12304      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
12305   if (*op_string == ABSOLUTE_PREFIX
12306       && current_templates->start->opcode_modifier.jump)
12307     {
12308       ++op_string;
12309       if (is_space_char (*op_string))
12310         ++op_string;
12311       i.jumpabsolute = true;
12312     }
12313
12314   /* Check if operand is a register.  */
12315   if ((r = parse_register (op_string, &end_op)) != NULL)
12316     {
12317       i386_operand_type temp;
12318
12319       if (r == &bad_reg)
12320         return 0;
12321
12322       /* Check for a segment override by searching for ':' after a
12323          segment register.  */
12324       op_string = end_op;
12325       if (is_space_char (*op_string))
12326         ++op_string;
12327       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
12328         {
12329           i.seg[i.mem_operands] = r;
12330
12331           /* Skip the ':' and whitespace.  */
12332           ++op_string;
12333           if (is_space_char (*op_string))
12334             ++op_string;
12335
12336           /* Handle case of %es:*foo.  */
12337           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
12338               && current_templates->start->opcode_modifier.jump)
12339             {
12340               ++op_string;
12341               if (is_space_char (*op_string))
12342                 ++op_string;
12343               i.jumpabsolute = true;
12344             }
12345
12346           if (!starts_memory_operand (*op_string))
12347             {
12348               as_bad (_("bad memory operand `%s'"), op_string);
12349               return 0;
12350             }
12351           goto do_memory_reference;
12352         }
12353
12354       /* Handle vector operations.  */
12355       if (*op_string == '{')
12356         {
12357           op_string = check_VecOperations (op_string);
12358           if (op_string == NULL)
12359             return 0;
12360         }
12361
12362       if (*op_string)
12363         {
12364           as_bad (_("junk `%s' after register"), op_string);
12365           return 0;
12366         }
12367
12368        /* Reject pseudo registers for .insn.  */
12369       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
12370         {
12371           as_bad (_("`%s%s' cannot be used here"),
12372                   register_prefix, r->reg_name);
12373           return 0;
12374         }
12375
12376       temp = r->reg_type;
12377       temp.bitfield.baseindex = 0;
12378       i.types[this_operand] = operand_type_or (i.types[this_operand],
12379                                                temp);
12380       i.types[this_operand].bitfield.unspecified = 0;
12381       i.op[this_operand].regs = r;
12382       i.reg_operands++;
12383
12384       /* A GPR may follow an RC or SAE immediate only if a (vector) register
12385          operand was also present earlier on.  */
12386       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
12387           && i.reg_operands == 1)
12388         {
12389           unsigned int j;
12390
12391           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
12392             if (i.rounding.type == RC_NamesTable[j].type)
12393               break;
12394           as_bad (_("`%s': misplaced `{%s}'"),
12395                   insn_name (current_templates->start), RC_NamesTable[j].name);
12396           return 0;
12397         }
12398     }
12399   else if (*op_string == REGISTER_PREFIX)
12400     {
12401       as_bad (_("bad register name `%s'"), op_string);
12402       return 0;
12403     }
12404   else if (*op_string == IMMEDIATE_PREFIX)
12405     {
12406       ++op_string;
12407       if (i.jumpabsolute)
12408         {
12409           as_bad (_("immediate operand illegal with absolute jump"));
12410           return 0;
12411         }
12412       if (!i386_immediate (op_string))
12413         return 0;
12414       if (i.rounding.type != rc_none)
12415         {
12416           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
12417                   insn_name (current_templates->start));
12418           return 0;
12419         }
12420     }
12421   else if (RC_SAE_immediate (operand_string))
12422     {
12423       /* If it is a RC or SAE immediate, do the necessary placement check:
12424          Only another immediate or a GPR may precede it.  */
12425       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
12426           || (i.reg_operands == 1
12427               && i.op[0].regs->reg_type.bitfield.class != Reg))
12428         {
12429           as_bad (_("`%s': misplaced `%s'"),
12430                   insn_name (current_templates->start), operand_string);
12431           return 0;
12432         }
12433     }
12434   else if (starts_memory_operand (*op_string))
12435     {
12436       /* This is a memory reference of some sort.  */
12437       char *base_string;
12438
12439       /* Start and end of displacement string expression (if found).  */
12440       char *displacement_string_start;
12441       char *displacement_string_end;
12442
12443     do_memory_reference:
12444       /* Check for base index form.  We detect the base index form by
12445          looking for an ')' at the end of the operand, searching
12446          for the '(' matching it, and finding a REGISTER_PREFIX or ','
12447          after the '('.  */
12448       base_string = op_string + strlen (op_string);
12449
12450       /* Handle vector operations.  */
12451       --base_string;
12452       if (is_space_char (*base_string))
12453         --base_string;
12454
12455       if (*base_string == '}')
12456         {
12457           char *vop_start = NULL;
12458
12459           while (base_string-- > op_string)
12460             {
12461               if (*base_string == '"')
12462                 break;
12463               if (*base_string != '{')
12464                 continue;
12465
12466               vop_start = base_string;
12467
12468               --base_string;
12469               if (is_space_char (*base_string))
12470                 --base_string;
12471
12472               if (*base_string != '}')
12473                 break;
12474
12475               vop_start = NULL;
12476             }
12477
12478           if (!vop_start)
12479             {
12480               as_bad (_("unbalanced figure braces"));
12481               return 0;
12482             }
12483
12484           if (check_VecOperations (vop_start) == NULL)
12485             return 0;
12486         }
12487
12488       /* If we only have a displacement, set-up for it to be parsed later.  */
12489       displacement_string_start = op_string;
12490       displacement_string_end = base_string + 1;
12491
12492       if (*base_string == ')')
12493         {
12494           char *temp_string;
12495           unsigned int parens_not_balanced = 0;
12496           bool in_quotes = false;
12497
12498           /* We've already checked that the number of left & right ()'s are
12499              equal, and that there's a matching set of double quotes.  */
12500           end_op = base_string;
12501           for (temp_string = op_string; temp_string < end_op; temp_string++)
12502             {
12503               if (*temp_string == '\\' && temp_string[1] == '"')
12504                 ++temp_string;
12505               else if (*temp_string == '"')
12506                 in_quotes = !in_quotes;
12507               else if (!in_quotes)
12508                 {
12509                   if (*temp_string == '(' && !parens_not_balanced++)
12510                     base_string = temp_string;
12511                   if (*temp_string == ')')
12512                     --parens_not_balanced;
12513                 }
12514             }
12515
12516           temp_string = base_string;
12517
12518           /* Skip past '(' and whitespace.  */
12519           gas_assert (*base_string == '(');
12520           ++base_string;
12521           if (is_space_char (*base_string))
12522             ++base_string;
12523
12524           if (*base_string == ','
12525               || ((i.base_reg = parse_register (base_string, &end_op))
12526                   != NULL))
12527             {
12528               displacement_string_end = temp_string;
12529
12530               i.types[this_operand].bitfield.baseindex = 1;
12531
12532               if (i.base_reg)
12533                 {
12534                   if (i.base_reg == &bad_reg)
12535                     return 0;
12536                   base_string = end_op;
12537                   if (is_space_char (*base_string))
12538                     ++base_string;
12539                 }
12540
12541               /* There may be an index reg or scale factor here.  */
12542               if (*base_string == ',')
12543                 {
12544                   ++base_string;
12545                   if (is_space_char (*base_string))
12546                     ++base_string;
12547
12548                   if ((i.index_reg = parse_register (base_string, &end_op))
12549                       != NULL)
12550                     {
12551                       if (i.index_reg == &bad_reg)
12552                         return 0;
12553                       base_string = end_op;
12554                       if (is_space_char (*base_string))
12555                         ++base_string;
12556                       if (*base_string == ',')
12557                         {
12558                           ++base_string;
12559                           if (is_space_char (*base_string))
12560                             ++base_string;
12561                         }
12562                       else if (*base_string != ')')
12563                         {
12564                           as_bad (_("expecting `,' or `)' "
12565                                     "after index register in `%s'"),
12566                                   operand_string);
12567                           return 0;
12568                         }
12569                     }
12570                   else if (*base_string == REGISTER_PREFIX)
12571                     {
12572                       end_op = strchr (base_string, ',');
12573                       if (end_op)
12574                         *end_op = '\0';
12575                       as_bad (_("bad register name `%s'"), base_string);
12576                       return 0;
12577                     }
12578
12579                   /* Check for scale factor.  */
12580                   if (*base_string != ')')
12581                     {
12582                       char *end_scale = i386_scale (base_string);
12583
12584                       if (!end_scale)
12585                         return 0;
12586
12587                       base_string = end_scale;
12588                       if (is_space_char (*base_string))
12589                         ++base_string;
12590                       if (*base_string != ')')
12591                         {
12592                           as_bad (_("expecting `)' "
12593                                     "after scale factor in `%s'"),
12594                                   operand_string);
12595                           return 0;
12596                         }
12597                     }
12598                   else if (!i.index_reg)
12599                     {
12600                       as_bad (_("expecting index register or scale factor "
12601                                 "after `,'; got '%c'"),
12602                               *base_string);
12603                       return 0;
12604                     }
12605                 }
12606               else if (*base_string != ')')
12607                 {
12608                   as_bad (_("expecting `,' or `)' "
12609                             "after base register in `%s'"),
12610                           operand_string);
12611                   return 0;
12612                 }
12613             }
12614           else if (*base_string == REGISTER_PREFIX)
12615             {
12616               end_op = strchr (base_string, ',');
12617               if (end_op)
12618                 *end_op = '\0';
12619               as_bad (_("bad register name `%s'"), base_string);
12620               return 0;
12621             }
12622         }
12623
12624       /* If there's an expression beginning the operand, parse it,
12625          assuming displacement_string_start and
12626          displacement_string_end are meaningful.  */
12627       if (displacement_string_start != displacement_string_end)
12628         {
12629           if (!i386_displacement (displacement_string_start,
12630                                   displacement_string_end))
12631             return 0;
12632         }
12633
12634       /* Special case for (%dx) while doing input/output op.  */
12635       if (i.base_reg
12636           && i.base_reg->reg_type.bitfield.instance == RegD
12637           && i.base_reg->reg_type.bitfield.word
12638           && i.index_reg == 0
12639           && i.log2_scale_factor == 0
12640           && i.seg[i.mem_operands] == 0
12641           && !operand_type_check (i.types[this_operand], disp))
12642         {
12643           i.types[this_operand] = i.base_reg->reg_type;
12644           i.input_output_operand = true;
12645           return 1;
12646         }
12647
12648       if (i386_index_check (operand_string) == 0)
12649         return 0;
12650       i.flags[this_operand] |= Operand_Mem;
12651       i.mem_operands++;
12652     }
12653   else
12654     {
12655       /* It's not a memory operand; argh!  */
12656       as_bad (_("invalid char %s beginning operand %d `%s'"),
12657               output_invalid (*op_string),
12658               this_operand + 1,
12659               op_string);
12660       return 0;
12661     }
12662   return 1;                     /* Normal return.  */
12663 }
12664 \f
12665 /* Calculate the maximum variable size (i.e., excluding fr_fix)
12666    that an rs_machine_dependent frag may reach.  */
12667
12668 unsigned int
12669 i386_frag_max_var (fragS *frag)
12670 {
12671   /* The only relaxable frags are for jumps.
12672      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
12673   gas_assert (frag->fr_type == rs_machine_dependent);
12674   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
12675 }
12676
12677 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12678 static int
12679 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
12680 {
12681   /* STT_GNU_IFUNC symbol must go through PLT.  */
12682   if ((symbol_get_bfdsym (fr_symbol)->flags
12683        & BSF_GNU_INDIRECT_FUNCTION) != 0)
12684     return 0;
12685
12686   if (!S_IS_EXTERNAL (fr_symbol))
12687     /* Symbol may be weak or local.  */
12688     return !S_IS_WEAK (fr_symbol);
12689
12690   /* Global symbols with non-default visibility can't be preempted. */
12691   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
12692     return 1;
12693
12694   if (fr_var != NO_RELOC)
12695     switch ((enum bfd_reloc_code_real) fr_var)
12696       {
12697       case BFD_RELOC_386_PLT32:
12698       case BFD_RELOC_X86_64_PLT32:
12699         /* Symbol with PLT relocation may be preempted. */
12700         return 0;
12701       default:
12702         abort ();
12703       }
12704
12705   /* Global symbols with default visibility in a shared library may be
12706      preempted by another definition.  */
12707   return !shared;
12708 }
12709 #endif
12710
12711 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
12712    Note also work for Skylake and Cascadelake.
12713 ---------------------------------------------------------------------
12714 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
12715 | ------  | ----------- | ------- | -------- |
12716 |   Jo    |      N      |    N    |     Y    |
12717 |   Jno   |      N      |    N    |     Y    |
12718 |  Jc/Jb  |      Y      |    N    |     Y    |
12719 | Jae/Jnb |      Y      |    N    |     Y    |
12720 |  Je/Jz  |      Y      |    Y    |     Y    |
12721 | Jne/Jnz |      Y      |    Y    |     Y    |
12722 | Jna/Jbe |      Y      |    N    |     Y    |
12723 | Ja/Jnbe |      Y      |    N    |     Y    |
12724 |   Js    |      N      |    N    |     Y    |
12725 |   Jns   |      N      |    N    |     Y    |
12726 |  Jp/Jpe |      N      |    N    |     Y    |
12727 | Jnp/Jpo |      N      |    N    |     Y    |
12728 | Jl/Jnge |      Y      |    Y    |     Y    |
12729 | Jge/Jnl |      Y      |    Y    |     Y    |
12730 | Jle/Jng |      Y      |    Y    |     Y    |
12731 | Jg/Jnle |      Y      |    Y    |     Y    |
12732 ---------------------------------------------------------------------  */
12733 static int
12734 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12735 {
12736   if (mf_cmp == mf_cmp_alu_cmp)
12737     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12738             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12739   if (mf_cmp == mf_cmp_incdec)
12740     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12741             || mf_jcc == mf_jcc_jle);
12742   if (mf_cmp == mf_cmp_test_and)
12743     return 1;
12744   return 0;
12745 }
12746
12747 /* Return the next non-empty frag.  */
12748
12749 static fragS *
12750 i386_next_non_empty_frag (fragS *fragP)
12751 {
12752   /* There may be a frag with a ".fill 0" when there is no room in
12753      the current frag for frag_grow in output_insn.  */
12754   for (fragP = fragP->fr_next;
12755        (fragP != NULL
12756         && fragP->fr_type == rs_fill
12757         && fragP->fr_fix == 0);
12758        fragP = fragP->fr_next)
12759     ;
12760   return fragP;
12761 }
12762
12763 /* Return the next jcc frag after BRANCH_PADDING.  */
12764
12765 static fragS *
12766 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12767 {
12768   fragS *branch_fragP;
12769   if (!pad_fragP)
12770     return NULL;
12771
12772   if (pad_fragP->fr_type == rs_machine_dependent
12773       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12774           == BRANCH_PADDING))
12775     {
12776       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12777       if (branch_fragP->fr_type != rs_machine_dependent)
12778         return NULL;
12779       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12780           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12781                                    pad_fragP->tc_frag_data.mf_type))
12782         return branch_fragP;
12783     }
12784
12785   return NULL;
12786 }
12787
12788 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12789
12790 static void
12791 i386_classify_machine_dependent_frag (fragS *fragP)
12792 {
12793   fragS *cmp_fragP;
12794   fragS *pad_fragP;
12795   fragS *branch_fragP;
12796   fragS *next_fragP;
12797   unsigned int max_prefix_length;
12798
12799   if (fragP->tc_frag_data.classified)
12800     return;
12801
12802   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12803      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12804   for (next_fragP = fragP;
12805        next_fragP != NULL;
12806        next_fragP = next_fragP->fr_next)
12807     {
12808       next_fragP->tc_frag_data.classified = 1;
12809       if (next_fragP->fr_type == rs_machine_dependent)
12810         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12811           {
12812           case BRANCH_PADDING:
12813             /* The BRANCH_PADDING frag must be followed by a branch
12814                frag.  */
12815             branch_fragP = i386_next_non_empty_frag (next_fragP);
12816             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12817             break;
12818           case FUSED_JCC_PADDING:
12819             /* Check if this is a fused jcc:
12820                FUSED_JCC_PADDING
12821                CMP like instruction
12822                BRANCH_PADDING
12823                COND_JUMP
12824                */
12825             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12826             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12827             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12828             if (branch_fragP)
12829               {
12830                 /* The BRANCH_PADDING frag is merged with the
12831                    FUSED_JCC_PADDING frag.  */
12832                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12833                 /* CMP like instruction size.  */
12834                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12835                 frag_wane (pad_fragP);
12836                 /* Skip to branch_fragP.  */
12837                 next_fragP = branch_fragP;
12838               }
12839             else if (next_fragP->tc_frag_data.max_prefix_length)
12840               {
12841                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12842                    a fused jcc.  */
12843                 next_fragP->fr_subtype
12844                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12845                 next_fragP->tc_frag_data.max_bytes
12846                   = next_fragP->tc_frag_data.max_prefix_length;
12847                 /* This will be updated in the BRANCH_PREFIX scan.  */
12848                 next_fragP->tc_frag_data.max_prefix_length = 0;
12849               }
12850             else
12851               frag_wane (next_fragP);
12852             break;
12853           }
12854     }
12855
12856   /* Stop if there is no BRANCH_PREFIX.  */
12857   if (!align_branch_prefix_size)
12858     return;
12859
12860   /* Scan for BRANCH_PREFIX.  */
12861   for (; fragP != NULL; fragP = fragP->fr_next)
12862     {
12863       if (fragP->fr_type != rs_machine_dependent
12864           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12865               != BRANCH_PREFIX))
12866         continue;
12867
12868       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12869          COND_JUMP_PREFIX.  */
12870       max_prefix_length = 0;
12871       for (next_fragP = fragP;
12872            next_fragP != NULL;
12873            next_fragP = next_fragP->fr_next)
12874         {
12875           if (next_fragP->fr_type == rs_fill)
12876             /* Skip rs_fill frags.  */
12877             continue;
12878           else if (next_fragP->fr_type != rs_machine_dependent)
12879             /* Stop for all other frags.  */
12880             break;
12881
12882           /* rs_machine_dependent frags.  */
12883           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12884               == BRANCH_PREFIX)
12885             {
12886               /* Count BRANCH_PREFIX frags.  */
12887               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12888                 {
12889                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12890                   frag_wane (next_fragP);
12891                 }
12892               else
12893                 max_prefix_length
12894                   += next_fragP->tc_frag_data.max_bytes;
12895             }
12896           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12897                     == BRANCH_PADDING)
12898                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12899                        == FUSED_JCC_PADDING))
12900             {
12901               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12902               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12903               break;
12904             }
12905           else
12906             /* Stop for other rs_machine_dependent frags.  */
12907             break;
12908         }
12909
12910       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12911
12912       /* Skip to the next frag.  */
12913       fragP = next_fragP;
12914     }
12915 }
12916
12917 /* Compute padding size for
12918
12919         FUSED_JCC_PADDING
12920         CMP like instruction
12921         BRANCH_PADDING
12922         COND_JUMP/UNCOND_JUMP
12923
12924    or
12925
12926         BRANCH_PADDING
12927         COND_JUMP/UNCOND_JUMP
12928  */
12929
12930 static int
12931 i386_branch_padding_size (fragS *fragP, offsetT address)
12932 {
12933   unsigned int offset, size, padding_size;
12934   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12935
12936   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12937   if (!address)
12938     address = fragP->fr_address;
12939   address += fragP->fr_fix;
12940
12941   /* CMP like instrunction size.  */
12942   size = fragP->tc_frag_data.cmp_size;
12943
12944   /* The base size of the branch frag.  */
12945   size += branch_fragP->fr_fix;
12946
12947   /* Add opcode and displacement bytes for the rs_machine_dependent
12948      branch frag.  */
12949   if (branch_fragP->fr_type == rs_machine_dependent)
12950     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12951
12952   /* Check if branch is within boundary and doesn't end at the last
12953      byte.  */
12954   offset = address & ((1U << align_branch_power) - 1);
12955   if ((offset + size) >= (1U << align_branch_power))
12956     /* Padding needed to avoid crossing boundary.  */
12957     padding_size = (1U << align_branch_power) - offset;
12958   else
12959     /* No padding needed.  */
12960     padding_size = 0;
12961
12962   /* The return value may be saved in tc_frag_data.length which is
12963      unsigned byte.  */
12964   if (!fits_in_unsigned_byte (padding_size))
12965     abort ();
12966
12967   return padding_size;
12968 }
12969
12970 /* i386_generic_table_relax_frag()
12971
12972    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12973    grow/shrink padding to align branch frags.  Hand others to
12974    relax_frag().  */
12975
12976 long
12977 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12978 {
12979   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12980       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12981     {
12982       long padding_size = i386_branch_padding_size (fragP, 0);
12983       long grow = padding_size - fragP->tc_frag_data.length;
12984
12985       /* When the BRANCH_PREFIX frag is used, the computed address
12986          must match the actual address and there should be no padding.  */
12987       if (fragP->tc_frag_data.padding_address
12988           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12989               || padding_size))
12990         abort ();
12991
12992       /* Update the padding size.  */
12993       if (grow)
12994         fragP->tc_frag_data.length = padding_size;
12995
12996       return grow;
12997     }
12998   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12999     {
13000       fragS *padding_fragP, *next_fragP;
13001       long padding_size, left_size, last_size;
13002
13003       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
13004       if (!padding_fragP)
13005         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
13006         return (fragP->tc_frag_data.length
13007                 - fragP->tc_frag_data.last_length);
13008
13009       /* Compute the relative address of the padding frag in the very
13010         first time where the BRANCH_PREFIX frag sizes are zero.  */
13011       if (!fragP->tc_frag_data.padding_address)
13012         fragP->tc_frag_data.padding_address
13013           = padding_fragP->fr_address - (fragP->fr_address - stretch);
13014
13015       /* First update the last length from the previous interation.  */
13016       left_size = fragP->tc_frag_data.prefix_length;
13017       for (next_fragP = fragP;
13018            next_fragP != padding_fragP;
13019            next_fragP = next_fragP->fr_next)
13020         if (next_fragP->fr_type == rs_machine_dependent
13021             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13022                 == BRANCH_PREFIX))
13023           {
13024             if (left_size)
13025               {
13026                 int max = next_fragP->tc_frag_data.max_bytes;
13027                 if (max)
13028                   {
13029                     int size;
13030                     if (max > left_size)
13031                       size = left_size;
13032                     else
13033                       size = max;
13034                     left_size -= size;
13035                     next_fragP->tc_frag_data.last_length = size;
13036                   }
13037               }
13038             else
13039               next_fragP->tc_frag_data.last_length = 0;
13040           }
13041
13042       /* Check the padding size for the padding frag.  */
13043       padding_size = i386_branch_padding_size
13044         (padding_fragP, (fragP->fr_address
13045                          + fragP->tc_frag_data.padding_address));
13046
13047       last_size = fragP->tc_frag_data.prefix_length;
13048       /* Check if there is change from the last interation.  */
13049       if (padding_size == last_size)
13050         {
13051           /* Update the expected address of the padding frag.  */
13052           padding_fragP->tc_frag_data.padding_address
13053             = (fragP->fr_address + padding_size
13054                + fragP->tc_frag_data.padding_address);
13055           return 0;
13056         }
13057
13058       if (padding_size > fragP->tc_frag_data.max_prefix_length)
13059         {
13060           /* No padding if there is no sufficient room.  Clear the
13061              expected address of the padding frag.  */
13062           padding_fragP->tc_frag_data.padding_address = 0;
13063           padding_size = 0;
13064         }
13065       else
13066         /* Store the expected address of the padding frag.  */
13067         padding_fragP->tc_frag_data.padding_address
13068           = (fragP->fr_address + padding_size
13069              + fragP->tc_frag_data.padding_address);
13070
13071       fragP->tc_frag_data.prefix_length = padding_size;
13072
13073       /* Update the length for the current interation.  */
13074       left_size = padding_size;
13075       for (next_fragP = fragP;
13076            next_fragP != padding_fragP;
13077            next_fragP = next_fragP->fr_next)
13078         if (next_fragP->fr_type == rs_machine_dependent
13079             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13080                 == BRANCH_PREFIX))
13081           {
13082             if (left_size)
13083               {
13084                 int max = next_fragP->tc_frag_data.max_bytes;
13085                 if (max)
13086                   {
13087                     int size;
13088                     if (max > left_size)
13089                       size = left_size;
13090                     else
13091                       size = max;
13092                     left_size -= size;
13093                     next_fragP->tc_frag_data.length = size;
13094                   }
13095               }
13096             else
13097               next_fragP->tc_frag_data.length = 0;
13098           }
13099
13100       return (fragP->tc_frag_data.length
13101               - fragP->tc_frag_data.last_length);
13102     }
13103   return relax_frag (segment, fragP, stretch);
13104 }
13105
13106 /* md_estimate_size_before_relax()
13107
13108    Called just before relax() for rs_machine_dependent frags.  The x86
13109    assembler uses these frags to handle variable size jump
13110    instructions.
13111
13112    Any symbol that is now undefined will not become defined.
13113    Return the correct fr_subtype in the frag.
13114    Return the initial "guess for variable size of frag" to caller.
13115    The guess is actually the growth beyond the fixed part.  Whatever
13116    we do to grow the fixed or variable part contributes to our
13117    returned value.  */
13118
13119 int
13120 md_estimate_size_before_relax (fragS *fragP, segT segment)
13121 {
13122   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13123       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
13124       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
13125     {
13126       i386_classify_machine_dependent_frag (fragP);
13127       return fragP->tc_frag_data.length;
13128     }
13129
13130   /* We've already got fragP->fr_subtype right;  all we have to do is
13131      check for un-relaxable symbols.  On an ELF system, we can't relax
13132      an externally visible symbol, because it may be overridden by a
13133      shared library.  */
13134   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
13135 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13136       || (IS_ELF
13137           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
13138                                                 fragP->fr_var))
13139 #endif
13140 #if defined (OBJ_COFF) && defined (TE_PE)
13141       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
13142           && S_IS_WEAK (fragP->fr_symbol))
13143 #endif
13144       )
13145     {
13146       /* Symbol is undefined in this segment, or we need to keep a
13147          reloc so that weak symbols can be overridden.  */
13148       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
13149       enum bfd_reloc_code_real reloc_type;
13150       unsigned char *opcode;
13151       int old_fr_fix;
13152       fixS *fixP = NULL;
13153
13154       if (fragP->fr_var != NO_RELOC)
13155         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
13156       else if (size == 2)
13157         reloc_type = BFD_RELOC_16_PCREL;
13158 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13159       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
13160                && need_plt32_p (fragP->fr_symbol))
13161         reloc_type = BFD_RELOC_X86_64_PLT32;
13162 #endif
13163       else
13164         reloc_type = BFD_RELOC_32_PCREL;
13165
13166       old_fr_fix = fragP->fr_fix;
13167       opcode = (unsigned char *) fragP->fr_opcode;
13168
13169       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
13170         {
13171         case UNCOND_JUMP:
13172           /* Make jmp (0xeb) a (d)word displacement jump.  */
13173           opcode[0] = 0xe9;
13174           fragP->fr_fix += size;
13175           fixP = fix_new (fragP, old_fr_fix, size,
13176                           fragP->fr_symbol,
13177                           fragP->fr_offset, 1,
13178                           reloc_type);
13179           break;
13180
13181         case COND_JUMP86:
13182           if (size == 2
13183               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
13184             {
13185               /* Negate the condition, and branch past an
13186                  unconditional jump.  */
13187               opcode[0] ^= 1;
13188               opcode[1] = 3;
13189               /* Insert an unconditional jump.  */
13190               opcode[2] = 0xe9;
13191               /* We added two extra opcode bytes, and have a two byte
13192                  offset.  */
13193               fragP->fr_fix += 2 + 2;
13194               fix_new (fragP, old_fr_fix + 2, 2,
13195                        fragP->fr_symbol,
13196                        fragP->fr_offset, 1,
13197                        reloc_type);
13198               break;
13199             }
13200           /* Fall through.  */
13201
13202         case COND_JUMP:
13203           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
13204             {
13205               fragP->fr_fix += 1;
13206               fixP = fix_new (fragP, old_fr_fix, 1,
13207                               fragP->fr_symbol,
13208                               fragP->fr_offset, 1,
13209                               BFD_RELOC_8_PCREL);
13210               fixP->fx_signed = 1;
13211               break;
13212             }
13213
13214           /* This changes the byte-displacement jump 0x7N
13215              to the (d)word-displacement jump 0x0f,0x8N.  */
13216           opcode[1] = opcode[0] + 0x10;
13217           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13218           /* We've added an opcode byte.  */
13219           fragP->fr_fix += 1 + size;
13220           fixP = fix_new (fragP, old_fr_fix + 1, size,
13221                           fragP->fr_symbol,
13222                           fragP->fr_offset, 1,
13223                           reloc_type);
13224           break;
13225
13226         default:
13227           BAD_CASE (fragP->fr_subtype);
13228           break;
13229         }
13230
13231       /* All jumps handled here are signed, but don't unconditionally use a
13232          signed limit check for 32 and 16 bit jumps as we want to allow wrap
13233          around at 4G (outside of 64-bit mode) and 64k.  */
13234       if (size == 4 && flag_code == CODE_64BIT)
13235         fixP->fx_signed = 1;
13236
13237       frag_wane (fragP);
13238       return fragP->fr_fix - old_fr_fix;
13239     }
13240
13241   /* Guess size depending on current relax state.  Initially the relax
13242      state will correspond to a short jump and we return 1, because
13243      the variable part of the frag (the branch offset) is one byte
13244      long.  However, we can relax a section more than once and in that
13245      case we must either set fr_subtype back to the unrelaxed state,
13246      or return the value for the appropriate branch.  */
13247   return md_relax_table[fragP->fr_subtype].rlx_length;
13248 }
13249
13250 /* Called after relax() is finished.
13251
13252    In:  Address of frag.
13253         fr_type == rs_machine_dependent.
13254         fr_subtype is what the address relaxed to.
13255
13256    Out: Any fixSs and constants are set up.
13257         Caller will turn frag into a ".space 0".  */
13258
13259 void
13260 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
13261                  fragS *fragP)
13262 {
13263   unsigned char *opcode;
13264   unsigned char *where_to_put_displacement = NULL;
13265   offsetT target_address;
13266   offsetT opcode_address;
13267   unsigned int extension = 0;
13268   offsetT displacement_from_opcode_start;
13269
13270   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13271       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
13272       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13273     {
13274       /* Generate nop padding.  */
13275       unsigned int size = fragP->tc_frag_data.length;
13276       if (size)
13277         {
13278           if (size > fragP->tc_frag_data.max_bytes)
13279             abort ();
13280
13281           if (flag_debug)
13282             {
13283               const char *msg;
13284               const char *branch = "branch";
13285               const char *prefix = "";
13286               fragS *padding_fragP;
13287               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
13288                   == BRANCH_PREFIX)
13289                 {
13290                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
13291                   switch (fragP->tc_frag_data.default_prefix)
13292                     {
13293                     default:
13294                       abort ();
13295                       break;
13296                     case CS_PREFIX_OPCODE:
13297                       prefix = " cs";
13298                       break;
13299                     case DS_PREFIX_OPCODE:
13300                       prefix = " ds";
13301                       break;
13302                     case ES_PREFIX_OPCODE:
13303                       prefix = " es";
13304                       break;
13305                     case FS_PREFIX_OPCODE:
13306                       prefix = " fs";
13307                       break;
13308                     case GS_PREFIX_OPCODE:
13309                       prefix = " gs";
13310                       break;
13311                     case SS_PREFIX_OPCODE:
13312                       prefix = " ss";
13313                       break;
13314                     }
13315                   if (padding_fragP)
13316                     msg = _("%s:%u: add %d%s at 0x%llx to align "
13317                             "%s within %d-byte boundary\n");
13318                   else
13319                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
13320                             "align %s within %d-byte boundary\n");
13321                 }
13322               else
13323                 {
13324                   padding_fragP = fragP;
13325                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
13326                           "%s within %d-byte boundary\n");
13327                 }
13328
13329               if (padding_fragP)
13330                 switch (padding_fragP->tc_frag_data.branch_type)
13331                   {
13332                   case align_branch_jcc:
13333                     branch = "jcc";
13334                     break;
13335                   case align_branch_fused:
13336                     branch = "fused jcc";
13337                     break;
13338                   case align_branch_jmp:
13339                     branch = "jmp";
13340                     break;
13341                   case align_branch_call:
13342                     branch = "call";
13343                     break;
13344                   case align_branch_indirect:
13345                     branch = "indiret branch";
13346                     break;
13347                   case align_branch_ret:
13348                     branch = "ret";
13349                     break;
13350                   default:
13351                     break;
13352                   }
13353
13354               fprintf (stdout, msg,
13355                        fragP->fr_file, fragP->fr_line, size, prefix,
13356                        (long long) fragP->fr_address, branch,
13357                        1 << align_branch_power);
13358             }
13359           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13360             memset (fragP->fr_opcode,
13361                     fragP->tc_frag_data.default_prefix, size);
13362           else
13363             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
13364                                 size, 0);
13365           fragP->fr_fix += size;
13366         }
13367       return;
13368     }
13369
13370   opcode = (unsigned char *) fragP->fr_opcode;
13371
13372   /* Address we want to reach in file space.  */
13373   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
13374
13375   /* Address opcode resides at in file space.  */
13376   opcode_address = fragP->fr_address + fragP->fr_fix;
13377
13378   /* Displacement from opcode start to fill into instruction.  */
13379   displacement_from_opcode_start = target_address - opcode_address;
13380
13381   if ((fragP->fr_subtype & BIG) == 0)
13382     {
13383       /* Don't have to change opcode.  */
13384       extension = 1;            /* 1 opcode + 1 displacement  */
13385       where_to_put_displacement = &opcode[1];
13386     }
13387   else
13388     {
13389       if (no_cond_jump_promotion
13390           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
13391         as_warn_where (fragP->fr_file, fragP->fr_line,
13392                        _("long jump required"));
13393
13394       switch (fragP->fr_subtype)
13395         {
13396         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
13397           extension = 4;                /* 1 opcode + 4 displacement  */
13398           opcode[0] = 0xe9;
13399           where_to_put_displacement = &opcode[1];
13400           break;
13401
13402         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
13403           extension = 2;                /* 1 opcode + 2 displacement  */
13404           opcode[0] = 0xe9;
13405           where_to_put_displacement = &opcode[1];
13406           break;
13407
13408         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
13409         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
13410           extension = 5;                /* 2 opcode + 4 displacement  */
13411           opcode[1] = opcode[0] + 0x10;
13412           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13413           where_to_put_displacement = &opcode[2];
13414           break;
13415
13416         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
13417           extension = 3;                /* 2 opcode + 2 displacement  */
13418           opcode[1] = opcode[0] + 0x10;
13419           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13420           where_to_put_displacement = &opcode[2];
13421           break;
13422
13423         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
13424           extension = 4;
13425           opcode[0] ^= 1;
13426           opcode[1] = 3;
13427           opcode[2] = 0xe9;
13428           where_to_put_displacement = &opcode[3];
13429           break;
13430
13431         default:
13432           BAD_CASE (fragP->fr_subtype);
13433           break;
13434         }
13435     }
13436
13437   /* If size if less then four we are sure that the operand fits,
13438      but if it's 4, then it could be that the displacement is larger
13439      then -/+ 2GB.  */
13440   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
13441       && object_64bit
13442       && ((addressT) (displacement_from_opcode_start - extension
13443                       + ((addressT) 1 << 31))
13444           > (((addressT) 2 << 31) - 1)))
13445     {
13446       as_bad_where (fragP->fr_file, fragP->fr_line,
13447                     _("jump target out of range"));
13448       /* Make us emit 0.  */
13449       displacement_from_opcode_start = extension;
13450     }
13451   /* Now put displacement after opcode.  */
13452   md_number_to_chars ((char *) where_to_put_displacement,
13453                       (valueT) (displacement_from_opcode_start - extension),
13454                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
13455   fragP->fr_fix += extension;
13456 }
13457 \f
13458 /* Apply a fixup (fixP) to segment data, once it has been determined
13459    by our caller that we have all the info we need to fix it up.
13460
13461    Parameter valP is the pointer to the value of the bits.
13462
13463    On the 386, immediates, displacements, and data pointers are all in
13464    the same (little-endian) format, so we don't need to care about which
13465    we are handling.  */
13466
13467 void
13468 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
13469 {
13470   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
13471   valueT value = *valP;
13472
13473 #if !defined (TE_Mach)
13474   if (fixP->fx_pcrel)
13475     {
13476       switch (fixP->fx_r_type)
13477         {
13478         default:
13479           break;
13480
13481         case BFD_RELOC_64:
13482           fixP->fx_r_type = BFD_RELOC_64_PCREL;
13483           break;
13484         case BFD_RELOC_32:
13485         case BFD_RELOC_X86_64_32S:
13486           fixP->fx_r_type = BFD_RELOC_32_PCREL;
13487           break;
13488         case BFD_RELOC_16:
13489           fixP->fx_r_type = BFD_RELOC_16_PCREL;
13490           break;
13491         case BFD_RELOC_8:
13492           fixP->fx_r_type = BFD_RELOC_8_PCREL;
13493           break;
13494         }
13495     }
13496
13497   if (fixP->fx_addsy != NULL
13498       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
13499           || fixP->fx_r_type == BFD_RELOC_64_PCREL
13500           || fixP->fx_r_type == BFD_RELOC_16_PCREL
13501           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
13502       && !use_rela_relocations)
13503     {
13504       /* This is a hack.  There should be a better way to handle this.
13505          This covers for the fact that bfd_install_relocation will
13506          subtract the current location (for partial_inplace, PC relative
13507          relocations); see more below.  */
13508 #ifndef OBJ_AOUT
13509       if (IS_ELF
13510 #ifdef TE_PE
13511           || OUTPUT_FLAVOR == bfd_target_coff_flavour
13512 #endif
13513           )
13514         value += fixP->fx_where + fixP->fx_frag->fr_address;
13515 #endif
13516 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13517       if (IS_ELF)
13518         {
13519           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
13520
13521           if ((sym_seg == seg
13522                || (symbol_section_p (fixP->fx_addsy)
13523                    && sym_seg != absolute_section))
13524               && !generic_force_reloc (fixP))
13525             {
13526               /* Yes, we add the values in twice.  This is because
13527                  bfd_install_relocation subtracts them out again.  I think
13528                  bfd_install_relocation is broken, but I don't dare change
13529                  it.  FIXME.  */
13530               value += fixP->fx_where + fixP->fx_frag->fr_address;
13531             }
13532         }
13533 #endif
13534 #if defined (OBJ_COFF) && defined (TE_PE)
13535       /* For some reason, the PE format does not store a
13536          section address offset for a PC relative symbol.  */
13537       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
13538           || S_IS_WEAK (fixP->fx_addsy))
13539         value += md_pcrel_from (fixP);
13540 #endif
13541     }
13542 #if defined (OBJ_COFF) && defined (TE_PE)
13543   if (fixP->fx_addsy != NULL
13544       && S_IS_WEAK (fixP->fx_addsy)
13545       /* PR 16858: Do not modify weak function references.  */
13546       && ! fixP->fx_pcrel)
13547     {
13548 #if !defined (TE_PEP)
13549       /* For x86 PE weak function symbols are neither PC-relative
13550          nor do they set S_IS_FUNCTION.  So the only reliable way
13551          to detect them is to check the flags of their containing
13552          section.  */
13553       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
13554           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
13555         ;
13556       else
13557 #endif
13558       value -= S_GET_VALUE (fixP->fx_addsy);
13559     }
13560 #endif
13561
13562   /* Fix a few things - the dynamic linker expects certain values here,
13563      and we must not disappoint it.  */
13564 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13565   if (IS_ELF && fixP->fx_addsy)
13566     switch (fixP->fx_r_type)
13567       {
13568       case BFD_RELOC_386_PLT32:
13569       case BFD_RELOC_X86_64_PLT32:
13570         /* Make the jump instruction point to the address of the operand.
13571            At runtime we merely add the offset to the actual PLT entry.
13572            NB: Subtract the offset size only for jump instructions.  */
13573         if (fixP->fx_pcrel)
13574           value = -4;
13575         break;
13576
13577       case BFD_RELOC_386_TLS_GD:
13578       case BFD_RELOC_386_TLS_LDM:
13579       case BFD_RELOC_386_TLS_IE_32:
13580       case BFD_RELOC_386_TLS_IE:
13581       case BFD_RELOC_386_TLS_GOTIE:
13582       case BFD_RELOC_386_TLS_GOTDESC:
13583       case BFD_RELOC_X86_64_TLSGD:
13584       case BFD_RELOC_X86_64_TLSLD:
13585       case BFD_RELOC_X86_64_GOTTPOFF:
13586       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13587         value = 0; /* Fully resolved at runtime.  No addend.  */
13588         /* Fallthrough */
13589       case BFD_RELOC_386_TLS_LE:
13590       case BFD_RELOC_386_TLS_LDO_32:
13591       case BFD_RELOC_386_TLS_LE_32:
13592       case BFD_RELOC_X86_64_DTPOFF32:
13593       case BFD_RELOC_X86_64_DTPOFF64:
13594       case BFD_RELOC_X86_64_TPOFF32:
13595       case BFD_RELOC_X86_64_TPOFF64:
13596         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13597         break;
13598
13599       case BFD_RELOC_386_TLS_DESC_CALL:
13600       case BFD_RELOC_X86_64_TLSDESC_CALL:
13601         value = 0; /* Fully resolved at runtime.  No addend.  */
13602         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13603         fixP->fx_done = 0;
13604         return;
13605
13606       case BFD_RELOC_VTABLE_INHERIT:
13607       case BFD_RELOC_VTABLE_ENTRY:
13608         fixP->fx_done = 0;
13609         return;
13610
13611       default:
13612         break;
13613       }
13614 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
13615
13616   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
13617   if (!object_64bit)
13618     value = extend_to_32bit_address (value);
13619
13620   *valP = value;
13621 #endif /* !defined (TE_Mach)  */
13622
13623   /* Are we finished with this relocation now?  */
13624   if (fixP->fx_addsy == NULL)
13625     {
13626       fixP->fx_done = 1;
13627       switch (fixP->fx_r_type)
13628         {
13629         case BFD_RELOC_X86_64_32S:
13630           fixP->fx_signed = 1;
13631           break;
13632
13633         default:
13634           break;
13635         }
13636     }
13637 #if defined (OBJ_COFF) && defined (TE_PE)
13638   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
13639     {
13640       fixP->fx_done = 0;
13641       /* Remember value for tc_gen_reloc.  */
13642       fixP->fx_addnumber = value;
13643       /* Clear out the frag for now.  */
13644       value = 0;
13645     }
13646 #endif
13647   else if (use_rela_relocations)
13648     {
13649       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
13650         fixP->fx_no_overflow = 1;
13651       /* Remember value for tc_gen_reloc.  */
13652       fixP->fx_addnumber = value;
13653       value = 0;
13654     }
13655
13656   md_number_to_chars (p, value, fixP->fx_size);
13657 }
13658 \f
13659 const char *
13660 md_atof (int type, char *litP, int *sizeP)
13661 {
13662   /* This outputs the LITTLENUMs in REVERSE order;
13663      in accord with the bigendian 386.  */
13664   return ieee_md_atof (type, litP, sizeP, false);
13665 }
13666 \f
13667 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
13668
13669 static char *
13670 output_invalid (int c)
13671 {
13672   if (ISPRINT (c))
13673     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13674               "'%c'", c);
13675   else
13676     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13677               "(0x%x)", (unsigned char) c);
13678   return output_invalid_buf;
13679 }
13680
13681 /* Verify that @r can be used in the current context.  */
13682
13683 static bool check_register (const reg_entry *r)
13684 {
13685   if (allow_pseudo_reg)
13686     return true;
13687
13688   if (operand_type_all_zero (&r->reg_type))
13689     return false;
13690
13691   if ((r->reg_type.bitfield.dword
13692        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
13693        || r->reg_type.bitfield.class == RegCR
13694        || r->reg_type.bitfield.class == RegDR)
13695       && !cpu_arch_flags.bitfield.cpui386)
13696     return false;
13697
13698   if (r->reg_type.bitfield.class == RegTR
13699       && (flag_code == CODE_64BIT
13700           || !cpu_arch_flags.bitfield.cpui386
13701           || cpu_arch_isa_flags.bitfield.cpui586
13702           || cpu_arch_isa_flags.bitfield.cpui686))
13703     return false;
13704
13705   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
13706     return false;
13707
13708   if (!cpu_arch_flags.bitfield.cpuavx512f)
13709     {
13710       if (r->reg_type.bitfield.zmmword
13711           || r->reg_type.bitfield.class == RegMask)
13712         return false;
13713
13714       if (!cpu_arch_flags.bitfield.cpuavx)
13715         {
13716           if (r->reg_type.bitfield.ymmword)
13717             return false;
13718
13719           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13720             return false;
13721         }
13722     }
13723
13724   if (r->reg_type.bitfield.tmmword
13725       && (!cpu_arch_flags.bitfield.cpuamx_tile
13726           || flag_code != CODE_64BIT))
13727     return false;
13728
13729   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13730     return false;
13731
13732   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13733   if (!allow_index_reg && r->reg_num == RegIZ)
13734     return false;
13735
13736   /* Upper 16 vector registers are only available with VREX in 64bit
13737      mode, and require EVEX encoding.  */
13738   if (r->reg_flags & RegVRex)
13739     {
13740       if (!cpu_arch_flags.bitfield.cpuavx512f
13741           || flag_code != CODE_64BIT)
13742         return false;
13743
13744       if (i.vec_encoding == vex_encoding_default)
13745         i.vec_encoding = vex_encoding_evex;
13746       else if (i.vec_encoding != vex_encoding_evex)
13747         i.vec_encoding = vex_encoding_error;
13748     }
13749
13750   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13751       && (!cpu_arch_flags.bitfield.cpulm
13752           || r->reg_type.bitfield.class != RegCR
13753           || dot_insn ())
13754       && flag_code != CODE_64BIT)
13755     return false;
13756
13757   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13758       && !intel_syntax)
13759     return false;
13760
13761   return true;
13762 }
13763
13764 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13765
13766 static const reg_entry *
13767 parse_real_register (const char *reg_string, char **end_op)
13768 {
13769   const char *s = reg_string;
13770   char *p;
13771   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13772   const reg_entry *r;
13773
13774   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13775   if (*s == REGISTER_PREFIX)
13776     ++s;
13777
13778   if (is_space_char (*s))
13779     ++s;
13780
13781   p = reg_name_given;
13782   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13783     {
13784       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13785         return (const reg_entry *) NULL;
13786       s++;
13787     }
13788
13789   if (is_part_of_name (*s))
13790     return (const reg_entry *) NULL;
13791
13792   *end_op = (char *) s;
13793
13794   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13795
13796   /* Handle floating point regs, allowing spaces in the (i) part.  */
13797   if (r == reg_st0)
13798     {
13799       if (!cpu_arch_flags.bitfield.cpu8087
13800           && !cpu_arch_flags.bitfield.cpu287
13801           && !cpu_arch_flags.bitfield.cpu387
13802           && !allow_pseudo_reg)
13803         return (const reg_entry *) NULL;
13804
13805       if (is_space_char (*s))
13806         ++s;
13807       if (*s == '(')
13808         {
13809           ++s;
13810           if (is_space_char (*s))
13811             ++s;
13812           if (*s >= '0' && *s <= '7')
13813             {
13814               int fpr = *s - '0';
13815               ++s;
13816               if (is_space_char (*s))
13817                 ++s;
13818               if (*s == ')')
13819                 {
13820                   *end_op = (char *) s + 1;
13821                   know (r[fpr].reg_num == fpr);
13822                   return r + fpr;
13823                 }
13824             }
13825           /* We have "%st(" then garbage.  */
13826           return (const reg_entry *) NULL;
13827         }
13828     }
13829
13830   return r && check_register (r) ? r : NULL;
13831 }
13832
13833 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13834
13835 static const reg_entry *
13836 parse_register (const char *reg_string, char **end_op)
13837 {
13838   const reg_entry *r;
13839
13840   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13841     r = parse_real_register (reg_string, end_op);
13842   else
13843     r = NULL;
13844   if (!r)
13845     {
13846       char *save = input_line_pointer;
13847       char *buf = xstrdup (reg_string), *name;
13848       symbolS *symbolP;
13849
13850       input_line_pointer = buf;
13851       get_symbol_name (&name);
13852       symbolP = symbol_find (name);
13853       while (symbolP && symbol_equated_p (symbolP))
13854         {
13855           const expressionS *e = symbol_get_value_expression(symbolP);
13856
13857           if (e->X_add_number)
13858             break;
13859           symbolP = e->X_add_symbol;
13860         }
13861       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13862         {
13863           const expressionS *e = symbol_get_value_expression (symbolP);
13864
13865           if (e->X_op == O_register)
13866             {
13867               know (e->X_add_number >= 0
13868                     && (valueT) e->X_add_number < i386_regtab_size);
13869               r = i386_regtab + e->X_add_number;
13870               *end_op = (char *) reg_string + (input_line_pointer - buf);
13871             }
13872           if (r && !check_register (r))
13873             {
13874               as_bad (_("register '%s%s' cannot be used here"),
13875                       register_prefix, r->reg_name);
13876               r = &bad_reg;
13877             }
13878         }
13879       input_line_pointer = save;
13880       free (buf);
13881     }
13882   return r;
13883 }
13884
13885 int
13886 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13887 {
13888   const reg_entry *r = NULL;
13889   char *end = input_line_pointer;
13890
13891   /* We only know the terminating character here.  It being double quote could
13892      be the closing one of a quoted symbol name, or an opening one from a
13893      following string (or another quoted symbol name).  Since the latter can't
13894      be valid syntax for anything, bailing in either case is good enough.  */
13895   if (*nextcharP == '"')
13896     return 0;
13897
13898   *end = *nextcharP;
13899   if (*name == REGISTER_PREFIX || allow_naked_reg)
13900     r = parse_real_register (name, &input_line_pointer);
13901   if (r && end <= input_line_pointer)
13902     {
13903       *nextcharP = *input_line_pointer;
13904       *input_line_pointer = 0;
13905       e->X_op = O_register;
13906       e->X_add_number = r - i386_regtab;
13907       return 1;
13908     }
13909   input_line_pointer = end;
13910   *end = 0;
13911   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13912 }
13913
13914 void
13915 md_operand (expressionS *e)
13916 {
13917   char *end;
13918   const reg_entry *r;
13919
13920   switch (*input_line_pointer)
13921     {
13922     case REGISTER_PREFIX:
13923       r = parse_real_register (input_line_pointer, &end);
13924       if (r)
13925         {
13926           e->X_op = O_register;
13927           e->X_add_number = r - i386_regtab;
13928           input_line_pointer = end;
13929         }
13930       break;
13931
13932     case '[':
13933       gas_assert (intel_syntax);
13934       end = input_line_pointer++;
13935       expression (e);
13936       if (*input_line_pointer == ']')
13937         {
13938           ++input_line_pointer;
13939           e->X_op_symbol = make_expr_symbol (e);
13940           e->X_add_symbol = NULL;
13941           e->X_add_number = 0;
13942           e->X_op = O_index;
13943         }
13944       else
13945         {
13946           e->X_op = O_absent;
13947           input_line_pointer = end;
13948         }
13949       break;
13950     }
13951 }
13952
13953 #ifdef BFD64
13954 /* To maintain consistency with !BFD64 builds of gas record, whether any
13955    (binary) operator was involved in an expression.  As expressions are
13956    evaluated in only 32 bits when !BFD64, we use this to decide whether to
13957    truncate results.  */
13958 bool i386_record_operator (operatorT op,
13959                            const expressionS *left,
13960                            const expressionS *right)
13961 {
13962   if (op == O_absent)
13963     return false;
13964
13965   if (!left)
13966     {
13967       /* Since the expression parser applies unary operators fine to bignum
13968          operands, we don't need to be concerned of respective operands not
13969          fitting in 32 bits.  */
13970       if (right->X_op == O_constant && right->X_unsigned
13971           && !fits_in_unsigned_long (right->X_add_number))
13972         return false;
13973     }
13974   /* This isn't entirely right: The pattern can also result when constant
13975      expressions are folded (e.g. 0xffffffff + 1).  */
13976   else if ((left->X_op == O_constant && left->X_unsigned
13977             && !fits_in_unsigned_long (left->X_add_number))
13978            || (right->X_op == O_constant && right->X_unsigned
13979                && !fits_in_unsigned_long (right->X_add_number)))
13980     expr_mode = expr_large_value;
13981
13982   if (expr_mode != expr_large_value)
13983     expr_mode = expr_operator_present;
13984
13985   return false;
13986 }
13987 #endif
13988 \f
13989 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13990 const char *md_shortopts = "kVQ:sqnO::";
13991 #else
13992 const char *md_shortopts = "qnO::";
13993 #endif
13994
13995 #define OPTION_32 (OPTION_MD_BASE + 0)
13996 #define OPTION_64 (OPTION_MD_BASE + 1)
13997 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13998 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13999 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
14000 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
14001 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
14002 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
14003 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
14004 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
14005 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
14006 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
14007 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
14008 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
14009 #define OPTION_X32 (OPTION_MD_BASE + 14)
14010 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
14011 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
14012 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
14013 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
14014 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
14015 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
14016 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
14017 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
14018 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
14019 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
14020 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
14021 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
14022 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
14023 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
14024 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
14025 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
14026 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
14027 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
14028 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
14029 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
14030
14031 struct option md_longopts[] =
14032 {
14033   {"32", no_argument, NULL, OPTION_32},
14034 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14035      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14036   {"64", no_argument, NULL, OPTION_64},
14037 #endif
14038 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14039   {"x32", no_argument, NULL, OPTION_X32},
14040   {"mshared", no_argument, NULL, OPTION_MSHARED},
14041   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
14042 #endif
14043   {"divide", no_argument, NULL, OPTION_DIVIDE},
14044   {"march", required_argument, NULL, OPTION_MARCH},
14045   {"mtune", required_argument, NULL, OPTION_MTUNE},
14046   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
14047   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
14048   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
14049   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
14050   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
14051   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
14052   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
14053   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
14054   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
14055   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
14056   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
14057   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
14058   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
14059 # if defined (TE_PE) || defined (TE_PEP)
14060   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
14061 #endif
14062   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
14063   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
14064   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
14065   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
14066   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
14067   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
14068   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
14069   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
14070   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
14071   {"mlfence-before-indirect-branch", required_argument, NULL,
14072    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
14073   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
14074   {"mamd64", no_argument, NULL, OPTION_MAMD64},
14075   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
14076   {NULL, no_argument, NULL, 0}
14077 };
14078 size_t md_longopts_size = sizeof (md_longopts);
14079
14080 int
14081 md_parse_option (int c, const char *arg)
14082 {
14083   unsigned int j;
14084   char *arch, *next, *saved, *type;
14085
14086   switch (c)
14087     {
14088     case 'n':
14089       optimize_align_code = 0;
14090       break;
14091
14092     case 'q':
14093       quiet_warnings = 1;
14094       break;
14095
14096 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14097       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
14098          should be emitted or not.  FIXME: Not implemented.  */
14099     case 'Q':
14100       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
14101         return 0;
14102       break;
14103
14104       /* -V: SVR4 argument to print version ID.  */
14105     case 'V':
14106       print_version_id ();
14107       break;
14108
14109       /* -k: Ignore for FreeBSD compatibility.  */
14110     case 'k':
14111       break;
14112
14113     case 's':
14114       /* -s: On i386 Solaris, this tells the native assembler to use
14115          .stab instead of .stab.excl.  We always use .stab anyhow.  */
14116       break;
14117
14118     case OPTION_MSHARED:
14119       shared = 1;
14120       break;
14121
14122     case OPTION_X86_USED_NOTE:
14123       if (strcasecmp (arg, "yes") == 0)
14124         x86_used_note = 1;
14125       else if (strcasecmp (arg, "no") == 0)
14126         x86_used_note = 0;
14127       else
14128         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
14129       break;
14130
14131
14132 #endif
14133 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14134      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14135     case OPTION_64:
14136       {
14137         const char **list, **l;
14138
14139         list = bfd_target_list ();
14140         for (l = list; *l != NULL; l++)
14141           if (startswith (*l, "elf64-x86-64")
14142               || strcmp (*l, "coff-x86-64") == 0
14143               || strcmp (*l, "pe-x86-64") == 0
14144               || strcmp (*l, "pei-x86-64") == 0
14145               || strcmp (*l, "mach-o-x86-64") == 0)
14146             {
14147               default_arch = "x86_64";
14148               break;
14149             }
14150         if (*l == NULL)
14151           as_fatal (_("no compiled in support for x86_64"));
14152         free (list);
14153       }
14154       break;
14155 #endif
14156
14157 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14158     case OPTION_X32:
14159       if (IS_ELF)
14160         {
14161           const char **list, **l;
14162
14163           list = bfd_target_list ();
14164           for (l = list; *l != NULL; l++)
14165             if (startswith (*l, "elf32-x86-64"))
14166               {
14167                 default_arch = "x86_64:32";
14168                 break;
14169               }
14170           if (*l == NULL)
14171             as_fatal (_("no compiled in support for 32bit x86_64"));
14172           free (list);
14173         }
14174       else
14175         as_fatal (_("32bit x86_64 is only supported for ELF"));
14176       break;
14177 #endif
14178
14179     case OPTION_32:
14180       {
14181         const char **list, **l;
14182
14183         list = bfd_target_list ();
14184         for (l = list; *l != NULL; l++)
14185           if (strstr (*l, "-i386")
14186               || strstr (*l, "-go32"))
14187             {
14188               default_arch = "i386";
14189               break;
14190             }
14191         if (*l == NULL)
14192           as_fatal (_("no compiled in support for ix86"));
14193         free (list);
14194       }
14195       break;
14196
14197     case OPTION_DIVIDE:
14198 #ifdef SVR4_COMMENT_CHARS
14199       {
14200         char *n, *t;
14201         const char *s;
14202
14203         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
14204         t = n;
14205         for (s = i386_comment_chars; *s != '\0'; s++)
14206           if (*s != '/')
14207             *t++ = *s;
14208         *t = '\0';
14209         i386_comment_chars = n;
14210       }
14211 #endif
14212       break;
14213
14214     case OPTION_MARCH:
14215       saved = xstrdup (arg);
14216       arch = saved;
14217       /* Allow -march=+nosse.  */
14218       if (*arch == '+')
14219         arch++;
14220       do
14221         {
14222           if (*arch == '.')
14223             as_fatal (_("invalid -march= option: `%s'"), arg);
14224           next = strchr (arch, '+');
14225           if (next)
14226             *next++ = '\0';
14227           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14228             {
14229               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
14230                   && strcmp (arch, cpu_arch[j].name) == 0)
14231                 {
14232                   /* Processor.  */
14233                   if (! cpu_arch[j].enable.bitfield.cpui386)
14234                     continue;
14235
14236                   cpu_arch_name = cpu_arch[j].name;
14237                   free (cpu_sub_arch_name);
14238                   cpu_sub_arch_name = NULL;
14239                   cpu_arch_flags = cpu_arch[j].enable;
14240                   cpu_arch_isa = cpu_arch[j].type;
14241                   cpu_arch_isa_flags = cpu_arch[j].enable;
14242                   if (!cpu_arch_tune_set)
14243                     {
14244                       cpu_arch_tune = cpu_arch_isa;
14245                       cpu_arch_tune_flags = cpu_arch_isa_flags;
14246                     }
14247                   break;
14248                 }
14249               else if (cpu_arch[j].type == PROCESSOR_NONE
14250                        && strcmp (arch, cpu_arch[j].name) == 0
14251                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
14252                 {
14253                   /* ISA extension.  */
14254                   i386_cpu_flags flags;
14255
14256                   flags = cpu_flags_or (cpu_arch_flags,
14257                                         cpu_arch[j].enable);
14258
14259                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
14260                     {
14261                       extend_cpu_sub_arch_name (arch);
14262                       cpu_arch_flags = flags;
14263                       cpu_arch_isa_flags = flags;
14264                     }
14265                   else
14266                     cpu_arch_isa_flags
14267                       = cpu_flags_or (cpu_arch_isa_flags,
14268                                       cpu_arch[j].enable);
14269                   break;
14270                 }
14271             }
14272
14273           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
14274             {
14275               /* Disable an ISA extension.  */
14276               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14277                 if (cpu_arch[j].type == PROCESSOR_NONE
14278                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
14279                   {
14280                     i386_cpu_flags flags;
14281
14282                     flags = cpu_flags_and_not (cpu_arch_flags,
14283                                                cpu_arch[j].disable);
14284                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
14285                       {
14286                         extend_cpu_sub_arch_name (arch);
14287                         cpu_arch_flags = flags;
14288                         cpu_arch_isa_flags = flags;
14289                       }
14290                     break;
14291                   }
14292             }
14293
14294           if (j >= ARRAY_SIZE (cpu_arch))
14295             as_fatal (_("invalid -march= option: `%s'"), arg);
14296
14297           arch = next;
14298         }
14299       while (next != NULL);
14300       free (saved);
14301       break;
14302
14303     case OPTION_MTUNE:
14304       if (*arg == '.')
14305         as_fatal (_("invalid -mtune= option: `%s'"), arg);
14306       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14307         {
14308           if (cpu_arch[j].type != PROCESSOR_NONE
14309               && strcmp (arg, cpu_arch[j].name) == 0)
14310             {
14311               cpu_arch_tune_set = 1;
14312               cpu_arch_tune = cpu_arch [j].type;
14313               cpu_arch_tune_flags = cpu_arch[j].enable;
14314               break;
14315             }
14316         }
14317       if (j >= ARRAY_SIZE (cpu_arch))
14318         as_fatal (_("invalid -mtune= option: `%s'"), arg);
14319       break;
14320
14321     case OPTION_MMNEMONIC:
14322       if (strcasecmp (arg, "att") == 0)
14323         intel_mnemonic = 0;
14324       else if (strcasecmp (arg, "intel") == 0)
14325         intel_mnemonic = 1;
14326       else
14327         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
14328       break;
14329
14330     case OPTION_MSYNTAX:
14331       if (strcasecmp (arg, "att") == 0)
14332         intel_syntax = 0;
14333       else if (strcasecmp (arg, "intel") == 0)
14334         intel_syntax = 1;
14335       else
14336         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
14337       break;
14338
14339     case OPTION_MINDEX_REG:
14340       allow_index_reg = 1;
14341       break;
14342
14343     case OPTION_MNAKED_REG:
14344       allow_naked_reg = 1;
14345       break;
14346
14347     case OPTION_MSSE2AVX:
14348       sse2avx = 1;
14349       break;
14350
14351     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
14352       use_unaligned_vector_move = 1;
14353       break;
14354
14355     case OPTION_MSSE_CHECK:
14356       if (strcasecmp (arg, "error") == 0)
14357         sse_check = check_error;
14358       else if (strcasecmp (arg, "warning") == 0)
14359         sse_check = check_warning;
14360       else if (strcasecmp (arg, "none") == 0)
14361         sse_check = check_none;
14362       else
14363         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
14364       break;
14365
14366     case OPTION_MOPERAND_CHECK:
14367       if (strcasecmp (arg, "error") == 0)
14368         operand_check = check_error;
14369       else if (strcasecmp (arg, "warning") == 0)
14370         operand_check = check_warning;
14371       else if (strcasecmp (arg, "none") == 0)
14372         operand_check = check_none;
14373       else
14374         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
14375       break;
14376
14377     case OPTION_MAVXSCALAR:
14378       if (strcasecmp (arg, "128") == 0)
14379         avxscalar = vex128;
14380       else if (strcasecmp (arg, "256") == 0)
14381         avxscalar = vex256;
14382       else
14383         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
14384       break;
14385
14386     case OPTION_MVEXWIG:
14387       if (strcmp (arg, "0") == 0)
14388         vexwig = vexw0;
14389       else if (strcmp (arg, "1") == 0)
14390         vexwig = vexw1;
14391       else
14392         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
14393       break;
14394
14395     case OPTION_MADD_BND_PREFIX:
14396       add_bnd_prefix = 1;
14397       break;
14398
14399     case OPTION_MEVEXLIG:
14400       if (strcmp (arg, "128") == 0)
14401         evexlig = evexl128;
14402       else if (strcmp (arg, "256") == 0)
14403         evexlig = evexl256;
14404       else  if (strcmp (arg, "512") == 0)
14405         evexlig = evexl512;
14406       else
14407         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
14408       break;
14409
14410     case OPTION_MEVEXRCIG:
14411       if (strcmp (arg, "rne") == 0)
14412         evexrcig = rne;
14413       else if (strcmp (arg, "rd") == 0)
14414         evexrcig = rd;
14415       else if (strcmp (arg, "ru") == 0)
14416         evexrcig = ru;
14417       else if (strcmp (arg, "rz") == 0)
14418         evexrcig = rz;
14419       else
14420         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
14421       break;
14422
14423     case OPTION_MEVEXWIG:
14424       if (strcmp (arg, "0") == 0)
14425         evexwig = evexw0;
14426       else if (strcmp (arg, "1") == 0)
14427         evexwig = evexw1;
14428       else
14429         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
14430       break;
14431
14432 # if defined (TE_PE) || defined (TE_PEP)
14433     case OPTION_MBIG_OBJ:
14434       use_big_obj = 1;
14435       break;
14436 #endif
14437
14438     case OPTION_MOMIT_LOCK_PREFIX:
14439       if (strcasecmp (arg, "yes") == 0)
14440         omit_lock_prefix = 1;
14441       else if (strcasecmp (arg, "no") == 0)
14442         omit_lock_prefix = 0;
14443       else
14444         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
14445       break;
14446
14447     case OPTION_MFENCE_AS_LOCK_ADD:
14448       if (strcasecmp (arg, "yes") == 0)
14449         avoid_fence = 1;
14450       else if (strcasecmp (arg, "no") == 0)
14451         avoid_fence = 0;
14452       else
14453         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
14454       break;
14455
14456     case OPTION_MLFENCE_AFTER_LOAD:
14457       if (strcasecmp (arg, "yes") == 0)
14458         lfence_after_load = 1;
14459       else if (strcasecmp (arg, "no") == 0)
14460         lfence_after_load = 0;
14461       else
14462         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
14463       break;
14464
14465     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
14466       if (strcasecmp (arg, "all") == 0)
14467         {
14468           lfence_before_indirect_branch = lfence_branch_all;
14469           if (lfence_before_ret == lfence_before_ret_none)
14470             lfence_before_ret = lfence_before_ret_shl;
14471         }
14472       else if (strcasecmp (arg, "memory") == 0)
14473         lfence_before_indirect_branch = lfence_branch_memory;
14474       else if (strcasecmp (arg, "register") == 0)
14475         lfence_before_indirect_branch = lfence_branch_register;
14476       else if (strcasecmp (arg, "none") == 0)
14477         lfence_before_indirect_branch = lfence_branch_none;
14478       else
14479         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
14480                   arg);
14481       break;
14482
14483     case OPTION_MLFENCE_BEFORE_RET:
14484       if (strcasecmp (arg, "or") == 0)
14485         lfence_before_ret = lfence_before_ret_or;
14486       else if (strcasecmp (arg, "not") == 0)
14487         lfence_before_ret = lfence_before_ret_not;
14488       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
14489         lfence_before_ret = lfence_before_ret_shl;
14490       else if (strcasecmp (arg, "none") == 0)
14491         lfence_before_ret = lfence_before_ret_none;
14492       else
14493         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
14494                   arg);
14495       break;
14496
14497     case OPTION_MRELAX_RELOCATIONS:
14498       if (strcasecmp (arg, "yes") == 0)
14499         generate_relax_relocations = 1;
14500       else if (strcasecmp (arg, "no") == 0)
14501         generate_relax_relocations = 0;
14502       else
14503         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
14504       break;
14505
14506     case OPTION_MALIGN_BRANCH_BOUNDARY:
14507       {
14508         char *end;
14509         long int align = strtoul (arg, &end, 0);
14510         if (*end == '\0')
14511           {
14512             if (align == 0)
14513               {
14514                 align_branch_power = 0;
14515                 break;
14516               }
14517             else if (align >= 16)
14518               {
14519                 int align_power;
14520                 for (align_power = 0;
14521                      (align & 1) == 0;
14522                      align >>= 1, align_power++)
14523                   continue;
14524                 /* Limit alignment power to 31.  */
14525                 if (align == 1 && align_power < 32)
14526                   {
14527                     align_branch_power = align_power;
14528                     break;
14529                   }
14530               }
14531           }
14532         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
14533       }
14534       break;
14535
14536     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
14537       {
14538         char *end;
14539         int align = strtoul (arg, &end, 0);
14540         /* Some processors only support 5 prefixes.  */
14541         if (*end == '\0' && align >= 0 && align < 6)
14542           {
14543             align_branch_prefix_size = align;
14544             break;
14545           }
14546         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
14547                   arg);
14548       }
14549       break;
14550
14551     case OPTION_MALIGN_BRANCH:
14552       align_branch = 0;
14553       saved = xstrdup (arg);
14554       type = saved;
14555       do
14556         {
14557           next = strchr (type, '+');
14558           if (next)
14559             *next++ = '\0';
14560           if (strcasecmp (type, "jcc") == 0)
14561             align_branch |= align_branch_jcc_bit;
14562           else if (strcasecmp (type, "fused") == 0)
14563             align_branch |= align_branch_fused_bit;
14564           else if (strcasecmp (type, "jmp") == 0)
14565             align_branch |= align_branch_jmp_bit;
14566           else if (strcasecmp (type, "call") == 0)
14567             align_branch |= align_branch_call_bit;
14568           else if (strcasecmp (type, "ret") == 0)
14569             align_branch |= align_branch_ret_bit;
14570           else if (strcasecmp (type, "indirect") == 0)
14571             align_branch |= align_branch_indirect_bit;
14572           else
14573             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
14574           type = next;
14575         }
14576       while (next != NULL);
14577       free (saved);
14578       break;
14579
14580     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
14581       align_branch_power = 5;
14582       align_branch_prefix_size = 5;
14583       align_branch = (align_branch_jcc_bit
14584                       | align_branch_fused_bit
14585                       | align_branch_jmp_bit);
14586       break;
14587
14588     case OPTION_MAMD64:
14589       isa64 = amd64;
14590       break;
14591
14592     case OPTION_MINTEL64:
14593       isa64 = intel64;
14594       break;
14595
14596     case 'O':
14597       if (arg == NULL)
14598         {
14599           optimize = 1;
14600           /* Turn off -Os.  */
14601           optimize_for_space = 0;
14602         }
14603       else if (*arg == 's')
14604         {
14605           optimize_for_space = 1;
14606           /* Turn on all encoding optimizations.  */
14607           optimize = INT_MAX;
14608         }
14609       else
14610         {
14611           optimize = atoi (arg);
14612           /* Turn off -Os.  */
14613           optimize_for_space = 0;
14614         }
14615       break;
14616
14617     default:
14618       return 0;
14619     }
14620   return 1;
14621 }
14622
14623 #define MESSAGE_TEMPLATE \
14624 "                                                                                "
14625
14626 static char *
14627 output_message (FILE *stream, char *p, char *message, char *start,
14628                 int *left_p, const char *name, int len)
14629 {
14630   int size = sizeof (MESSAGE_TEMPLATE);
14631   int left = *left_p;
14632
14633   /* Reserve 2 spaces for ", " or ",\0" */
14634   left -= len + 2;
14635
14636   /* Check if there is any room.  */
14637   if (left >= 0)
14638     {
14639       if (p != start)
14640         {
14641           *p++ = ',';
14642           *p++ = ' ';
14643         }
14644       p = mempcpy (p, name, len);
14645     }
14646   else
14647     {
14648       /* Output the current message now and start a new one.  */
14649       *p++ = ',';
14650       *p = '\0';
14651       fprintf (stream, "%s\n", message);
14652       p = start;
14653       left = size - (start - message) - len - 2;
14654
14655       gas_assert (left >= 0);
14656
14657       p = mempcpy (p, name, len);
14658     }
14659
14660   *left_p = left;
14661   return p;
14662 }
14663
14664 static void
14665 show_arch (FILE *stream, int ext, int check)
14666 {
14667   static char message[] = MESSAGE_TEMPLATE;
14668   char *start = message + 27;
14669   char *p;
14670   int size = sizeof (MESSAGE_TEMPLATE);
14671   int left;
14672   const char *name;
14673   int len;
14674   unsigned int j;
14675
14676   p = start;
14677   left = size - (start - message);
14678
14679   if (!ext && check)
14680     {
14681       p = output_message (stream, p, message, start, &left,
14682                           STRING_COMMA_LEN ("default"));
14683       p = output_message (stream, p, message, start, &left,
14684                           STRING_COMMA_LEN ("push"));
14685       p = output_message (stream, p, message, start, &left,
14686                           STRING_COMMA_LEN ("pop"));
14687     }
14688
14689   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14690     {
14691       /* Should it be skipped?  */
14692       if (cpu_arch [j].skip)
14693         continue;
14694
14695       name = cpu_arch [j].name;
14696       len = cpu_arch [j].len;
14697       if (cpu_arch[j].type == PROCESSOR_NONE)
14698         {
14699           /* It is an extension.  Skip if we aren't asked to show it.  */
14700           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
14701             continue;
14702         }
14703       else if (ext)
14704         {
14705           /* It is an processor.  Skip if we show only extension.  */
14706           continue;
14707         }
14708       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
14709         {
14710           /* It is an impossible processor - skip.  */
14711           continue;
14712         }
14713
14714       p = output_message (stream, p, message, start, &left, name, len);
14715     }
14716
14717   /* Display disabled extensions.  */
14718   if (ext)
14719     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14720       {
14721         char *str;
14722
14723         if (cpu_arch[j].type != PROCESSOR_NONE
14724             || !cpu_flags_all_zero (&cpu_arch[j].enable))
14725           continue;
14726         str = xasprintf ("no%s", cpu_arch[j].name);
14727         p = output_message (stream, p, message, start, &left, str,
14728                             strlen (str));
14729         free (str);
14730       }
14731
14732   *p = '\0';
14733   fprintf (stream, "%s\n", message);
14734 }
14735
14736 void
14737 md_show_usage (FILE *stream)
14738 {
14739 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14740   fprintf (stream, _("\
14741   -Qy, -Qn                ignored\n\
14742   -V                      print assembler version number\n\
14743   -k                      ignored\n"));
14744 #endif
14745   fprintf (stream, _("\
14746   -n                      do not optimize code alignment\n\
14747   -O{012s}                attempt some code optimizations\n\
14748   -q                      quieten some warnings\n"));
14749 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14750   fprintf (stream, _("\
14751   -s                      ignored\n"));
14752 #endif
14753 #ifdef BFD64
14754 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14755   fprintf (stream, _("\
14756   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
14757 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
14758   fprintf (stream, _("\
14759   --32/--64               generate 32bit/64bit object\n"));
14760 # endif
14761 #endif
14762 #ifdef SVR4_COMMENT_CHARS
14763   fprintf (stream, _("\
14764   --divide                do not treat `/' as a comment character\n"));
14765 #else
14766   fprintf (stream, _("\
14767   --divide                ignored\n"));
14768 #endif
14769   fprintf (stream, _("\
14770   -march=CPU[,+EXTENSION...]\n\
14771                           generate code for CPU and EXTENSION, CPU is one of:\n"));
14772   show_arch (stream, 0, 1);
14773   fprintf (stream, _("\
14774                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
14775   show_arch (stream, 1, 0);
14776   fprintf (stream, _("\
14777   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
14778   show_arch (stream, 0, 0);
14779   fprintf (stream, _("\
14780   -msse2avx               encode SSE instructions with VEX prefix\n"));
14781   fprintf (stream, _("\
14782   -muse-unaligned-vector-move\n\
14783                           encode aligned vector move as unaligned vector move\n"));
14784   fprintf (stream, _("\
14785   -msse-check=[none|error|warning] (default: warning)\n\
14786                           check SSE instructions\n"));
14787   fprintf (stream, _("\
14788   -moperand-check=[none|error|warning] (default: warning)\n\
14789                           check operand combinations for validity\n"));
14790   fprintf (stream, _("\
14791   -mavxscalar=[128|256] (default: 128)\n\
14792                           encode scalar AVX instructions with specific vector\n\
14793                            length\n"));
14794   fprintf (stream, _("\
14795   -mvexwig=[0|1] (default: 0)\n\
14796                           encode VEX instructions with specific VEX.W value\n\
14797                            for VEX.W bit ignored instructions\n"));
14798   fprintf (stream, _("\
14799   -mevexlig=[128|256|512] (default: 128)\n\
14800                           encode scalar EVEX instructions with specific vector\n\
14801                            length\n"));
14802   fprintf (stream, _("\
14803   -mevexwig=[0|1] (default: 0)\n\
14804                           encode EVEX instructions with specific EVEX.W value\n\
14805                            for EVEX.W bit ignored instructions\n"));
14806   fprintf (stream, _("\
14807   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14808                           encode EVEX instructions with specific EVEX.RC value\n\
14809                            for SAE-only ignored instructions\n"));
14810   fprintf (stream, _("\
14811   -mmnemonic=[att|intel] "));
14812   if (SYSV386_COMPAT)
14813     fprintf (stream, _("(default: att)\n"));
14814   else
14815     fprintf (stream, _("(default: intel)\n"));
14816   fprintf (stream, _("\
14817                           use AT&T/Intel mnemonic\n"));
14818   fprintf (stream, _("\
14819   -msyntax=[att|intel] (default: att)\n\
14820                           use AT&T/Intel syntax\n"));
14821   fprintf (stream, _("\
14822   -mindex-reg             support pseudo index registers\n"));
14823   fprintf (stream, _("\
14824   -mnaked-reg             don't require `%%' prefix for registers\n"));
14825   fprintf (stream, _("\
14826   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14827 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14828   fprintf (stream, _("\
14829   -mshared                disable branch optimization for shared code\n"));
14830   fprintf (stream, _("\
14831   -mx86-used-note=[no|yes] "));
14832   if (DEFAULT_X86_USED_NOTE)
14833     fprintf (stream, _("(default: yes)\n"));
14834   else
14835     fprintf (stream, _("(default: no)\n"));
14836   fprintf (stream, _("\
14837                           generate x86 used ISA and feature properties\n"));
14838 #endif
14839 #if defined (TE_PE) || defined (TE_PEP)
14840   fprintf (stream, _("\
14841   -mbig-obj               generate big object files\n"));
14842 #endif
14843   fprintf (stream, _("\
14844   -momit-lock-prefix=[no|yes] (default: no)\n\
14845                           strip all lock prefixes\n"));
14846   fprintf (stream, _("\
14847   -mfence-as-lock-add=[no|yes] (default: no)\n\
14848                           encode lfence, mfence and sfence as\n\
14849                            lock addl $0x0, (%%{re}sp)\n"));
14850   fprintf (stream, _("\
14851   -mrelax-relocations=[no|yes] "));
14852   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14853     fprintf (stream, _("(default: yes)\n"));
14854   else
14855     fprintf (stream, _("(default: no)\n"));
14856   fprintf (stream, _("\
14857                           generate relax relocations\n"));
14858   fprintf (stream, _("\
14859   -malign-branch-boundary=NUM (default: 0)\n\
14860                           align branches within NUM byte boundary\n"));
14861   fprintf (stream, _("\
14862   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14863                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14864                            indirect\n\
14865                           specify types of branches to align\n"));
14866   fprintf (stream, _("\
14867   -malign-branch-prefix-size=NUM (default: 5)\n\
14868                           align branches with NUM prefixes per instruction\n"));
14869   fprintf (stream, _("\
14870   -mbranches-within-32B-boundaries\n\
14871                           align branches within 32 byte boundary\n"));
14872   fprintf (stream, _("\
14873   -mlfence-after-load=[no|yes] (default: no)\n\
14874                           generate lfence after load\n"));
14875   fprintf (stream, _("\
14876   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14877                           generate lfence before indirect near branch\n"));
14878   fprintf (stream, _("\
14879   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14880                           generate lfence before ret\n"));
14881   fprintf (stream, _("\
14882   -mamd64                 accept only AMD64 ISA [default]\n"));
14883   fprintf (stream, _("\
14884   -mintel64               accept only Intel64 ISA\n"));
14885 }
14886
14887 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14888      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14889      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14890
14891 /* Pick the target format to use.  */
14892
14893 const char *
14894 i386_target_format (void)
14895 {
14896   if (startswith (default_arch, "x86_64"))
14897     {
14898       update_code_flag (CODE_64BIT, 1);
14899       if (default_arch[6] == '\0')
14900         x86_elf_abi = X86_64_ABI;
14901       else
14902         x86_elf_abi = X86_64_X32_ABI;
14903     }
14904   else if (!strcmp (default_arch, "i386"))
14905     update_code_flag (CODE_32BIT, 1);
14906   else if (!strcmp (default_arch, "iamcu"))
14907     {
14908       update_code_flag (CODE_32BIT, 1);
14909       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14910         {
14911           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14912           cpu_arch_name = "iamcu";
14913           free (cpu_sub_arch_name);
14914           cpu_sub_arch_name = NULL;
14915           cpu_arch_flags = iamcu_flags;
14916           cpu_arch_isa = PROCESSOR_IAMCU;
14917           cpu_arch_isa_flags = iamcu_flags;
14918           if (!cpu_arch_tune_set)
14919             {
14920               cpu_arch_tune = cpu_arch_isa;
14921               cpu_arch_tune_flags = cpu_arch_isa_flags;
14922             }
14923         }
14924       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14925         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14926                   cpu_arch_name);
14927     }
14928   else
14929     as_fatal (_("unknown architecture"));
14930
14931   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14932     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14933   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14934     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14935
14936   switch (OUTPUT_FLAVOR)
14937     {
14938 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14939     case bfd_target_aout_flavour:
14940       return AOUT_TARGET_FORMAT;
14941 #endif
14942 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14943 # if defined (TE_PE) || defined (TE_PEP)
14944     case bfd_target_coff_flavour:
14945       if (flag_code == CODE_64BIT)
14946         {
14947           object_64bit = 1;
14948           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14949         }
14950       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14951 # elif defined (TE_GO32)
14952     case bfd_target_coff_flavour:
14953       return "coff-go32";
14954 # else
14955     case bfd_target_coff_flavour:
14956       return "coff-i386";
14957 # endif
14958 #endif
14959 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14960     case bfd_target_elf_flavour:
14961       {
14962         const char *format;
14963
14964         switch (x86_elf_abi)
14965           {
14966           default:
14967             format = ELF_TARGET_FORMAT;
14968 #ifndef TE_SOLARIS
14969             tls_get_addr = "___tls_get_addr";
14970 #endif
14971             break;
14972           case X86_64_ABI:
14973             use_rela_relocations = 1;
14974             object_64bit = 1;
14975 #ifndef TE_SOLARIS
14976             tls_get_addr = "__tls_get_addr";
14977 #endif
14978             format = ELF_TARGET_FORMAT64;
14979             break;
14980           case X86_64_X32_ABI:
14981             use_rela_relocations = 1;
14982             object_64bit = 1;
14983 #ifndef TE_SOLARIS
14984             tls_get_addr = "__tls_get_addr";
14985 #endif
14986             disallow_64bit_reloc = 1;
14987             format = ELF_TARGET_FORMAT32;
14988             break;
14989           }
14990         if (cpu_arch_isa == PROCESSOR_IAMCU)
14991           {
14992             if (x86_elf_abi != I386_ABI)
14993               as_fatal (_("Intel MCU is 32bit only"));
14994             return ELF_TARGET_IAMCU_FORMAT;
14995           }
14996         else
14997           return format;
14998       }
14999 #endif
15000 #if defined (OBJ_MACH_O)
15001     case bfd_target_mach_o_flavour:
15002       if (flag_code == CODE_64BIT)
15003         {
15004           use_rela_relocations = 1;
15005           object_64bit = 1;
15006           return "mach-o-x86-64";
15007         }
15008       else
15009         return "mach-o-i386";
15010 #endif
15011     default:
15012       abort ();
15013       return NULL;
15014     }
15015 }
15016
15017 #endif /* OBJ_MAYBE_ more than one  */
15018 \f
15019 symbolS *
15020 md_undefined_symbol (char *name)
15021 {
15022   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
15023       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
15024       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
15025       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
15026     {
15027       if (!GOT_symbol)
15028         {
15029           if (symbol_find (name))
15030             as_bad (_("GOT already in symbol table"));
15031           GOT_symbol = symbol_new (name, undefined_section,
15032                                    &zero_address_frag, 0);
15033         };
15034       return GOT_symbol;
15035     }
15036   return 0;
15037 }
15038
15039 /* Round up a section size to the appropriate boundary.  */
15040
15041 valueT
15042 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
15043 {
15044 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
15045   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
15046     {
15047       /* For a.out, force the section size to be aligned.  If we don't do
15048          this, BFD will align it for us, but it will not write out the
15049          final bytes of the section.  This may be a bug in BFD, but it is
15050          easier to fix it here since that is how the other a.out targets
15051          work.  */
15052       int align;
15053
15054       align = bfd_section_alignment (segment);
15055       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
15056     }
15057 #endif
15058
15059   return size;
15060 }
15061
15062 /* On the i386, PC-relative offsets are relative to the start of the
15063    next instruction.  That is, the address of the offset, plus its
15064    size, since the offset is always the last part of the insn.  */
15065
15066 long
15067 md_pcrel_from (fixS *fixP)
15068 {
15069   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
15070 }
15071
15072 #ifndef I386COFF
15073
15074 static void
15075 s_bss (int ignore ATTRIBUTE_UNUSED)
15076 {
15077   int temp;
15078
15079 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15080   if (IS_ELF)
15081     obj_elf_section_change_hook ();
15082 #endif
15083   temp = get_absolute_expression ();
15084   subseg_set (bss_section, (subsegT) temp);
15085   demand_empty_rest_of_line ();
15086 }
15087
15088 #endif
15089
15090 /* Remember constant directive.  */
15091
15092 void
15093 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
15094 {
15095   if (last_insn.kind != last_insn_directive
15096       && (bfd_section_flags (now_seg) & SEC_CODE))
15097     {
15098       last_insn.seg = now_seg;
15099       last_insn.kind = last_insn_directive;
15100       last_insn.name = "constant directive";
15101       last_insn.file = as_where (&last_insn.line);
15102       if (lfence_before_ret != lfence_before_ret_none)
15103         {
15104           if (lfence_before_indirect_branch != lfence_branch_none)
15105             as_warn (_("constant directive skips -mlfence-before-ret "
15106                        "and -mlfence-before-indirect-branch"));
15107           else
15108             as_warn (_("constant directive skips -mlfence-before-ret"));
15109         }
15110       else if (lfence_before_indirect_branch != lfence_branch_none)
15111         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
15112     }
15113 }
15114
15115 int
15116 i386_validate_fix (fixS *fixp)
15117 {
15118   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
15119     {
15120       reloc_howto_type *howto;
15121
15122       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
15123       as_bad_where (fixp->fx_file, fixp->fx_line,
15124                     _("invalid %s relocation against register"),
15125                     howto ? howto->name : "<unknown>");
15126       return 0;
15127     }
15128
15129 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15130   if (fixp->fx_r_type == BFD_RELOC_SIZE32
15131       || fixp->fx_r_type == BFD_RELOC_SIZE64)
15132     return IS_ELF && fixp->fx_addsy
15133            && (!S_IS_DEFINED (fixp->fx_addsy)
15134                || S_IS_EXTERNAL (fixp->fx_addsy));
15135 #endif
15136
15137   if (fixp->fx_subsy)
15138     {
15139       if (fixp->fx_subsy == GOT_symbol)
15140         {
15141           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
15142             {
15143               if (!object_64bit)
15144                 abort ();
15145 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15146               if (fixp->fx_tcbit2)
15147                 fixp->fx_r_type = (fixp->fx_tcbit
15148                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
15149                                    : BFD_RELOC_X86_64_GOTPCRELX);
15150               else
15151 #endif
15152                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
15153             }
15154           else
15155             {
15156               if (!object_64bit)
15157                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
15158               else
15159                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
15160             }
15161           fixp->fx_subsy = 0;
15162         }
15163     }
15164 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15165   else
15166     {
15167       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
15168          to section.  Since PLT32 relocation must be against symbols,
15169          turn such PLT32 relocation into PC32 relocation.  */
15170       if (fixp->fx_addsy
15171           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
15172               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
15173           && symbol_section_p (fixp->fx_addsy))
15174         fixp->fx_r_type = BFD_RELOC_32_PCREL;
15175       if (!object_64bit)
15176         {
15177           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
15178               && fixp->fx_tcbit2)
15179             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
15180         }
15181     }
15182 #endif
15183
15184   return 1;
15185 }
15186
15187 arelent *
15188 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
15189 {
15190   arelent *rel;
15191   bfd_reloc_code_real_type code;
15192
15193   switch (fixp->fx_r_type)
15194     {
15195 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15196       symbolS *sym;
15197
15198     case BFD_RELOC_SIZE32:
15199     case BFD_RELOC_SIZE64:
15200       if (fixp->fx_addsy
15201           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
15202           && (!fixp->fx_subsy
15203               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
15204         sym = fixp->fx_addsy;
15205       else if (fixp->fx_subsy
15206                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
15207                && (!fixp->fx_addsy
15208                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
15209         sym = fixp->fx_subsy;
15210       else
15211         sym = NULL;
15212       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
15213         {
15214           /* Resolve size relocation against local symbol to size of
15215              the symbol plus addend.  */
15216           valueT value = S_GET_SIZE (sym);
15217
15218           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
15219             value = bfd_section_size (S_GET_SEGMENT (sym));
15220           if (sym == fixp->fx_subsy)
15221             {
15222               value = -value;
15223               if (fixp->fx_addsy)
15224                 value += S_GET_VALUE (fixp->fx_addsy);
15225             }
15226           else if (fixp->fx_subsy)
15227             value -= S_GET_VALUE (fixp->fx_subsy);
15228           value += fixp->fx_offset;
15229           if (fixp->fx_r_type == BFD_RELOC_SIZE32
15230               && object_64bit
15231               && !fits_in_unsigned_long (value))
15232             as_bad_where (fixp->fx_file, fixp->fx_line,
15233                           _("symbol size computation overflow"));
15234           fixp->fx_addsy = NULL;
15235           fixp->fx_subsy = NULL;
15236           md_apply_fix (fixp, (valueT *) &value, NULL);
15237           return NULL;
15238         }
15239       if (!fixp->fx_addsy || fixp->fx_subsy)
15240         {
15241           as_bad_where (fixp->fx_file, fixp->fx_line,
15242                         "unsupported expression involving @size");
15243           return NULL;
15244         }
15245 #endif
15246       /* Fall through.  */
15247
15248     case BFD_RELOC_X86_64_PLT32:
15249     case BFD_RELOC_X86_64_GOT32:
15250     case BFD_RELOC_X86_64_GOTPCREL:
15251     case BFD_RELOC_X86_64_GOTPCRELX:
15252     case BFD_RELOC_X86_64_REX_GOTPCRELX:
15253     case BFD_RELOC_386_PLT32:
15254     case BFD_RELOC_386_GOT32:
15255     case BFD_RELOC_386_GOT32X:
15256     case BFD_RELOC_386_GOTOFF:
15257     case BFD_RELOC_386_GOTPC:
15258     case BFD_RELOC_386_TLS_GD:
15259     case BFD_RELOC_386_TLS_LDM:
15260     case BFD_RELOC_386_TLS_LDO_32:
15261     case BFD_RELOC_386_TLS_IE_32:
15262     case BFD_RELOC_386_TLS_IE:
15263     case BFD_RELOC_386_TLS_GOTIE:
15264     case BFD_RELOC_386_TLS_LE_32:
15265     case BFD_RELOC_386_TLS_LE:
15266     case BFD_RELOC_386_TLS_GOTDESC:
15267     case BFD_RELOC_386_TLS_DESC_CALL:
15268     case BFD_RELOC_X86_64_TLSGD:
15269     case BFD_RELOC_X86_64_TLSLD:
15270     case BFD_RELOC_X86_64_DTPOFF32:
15271     case BFD_RELOC_X86_64_DTPOFF64:
15272     case BFD_RELOC_X86_64_GOTTPOFF:
15273     case BFD_RELOC_X86_64_TPOFF32:
15274     case BFD_RELOC_X86_64_TPOFF64:
15275     case BFD_RELOC_X86_64_GOTOFF64:
15276     case BFD_RELOC_X86_64_GOTPC32:
15277     case BFD_RELOC_X86_64_GOT64:
15278     case BFD_RELOC_X86_64_GOTPCREL64:
15279     case BFD_RELOC_X86_64_GOTPC64:
15280     case BFD_RELOC_X86_64_GOTPLT64:
15281     case BFD_RELOC_X86_64_PLTOFF64:
15282     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15283     case BFD_RELOC_X86_64_TLSDESC_CALL:
15284     case BFD_RELOC_RVA:
15285     case BFD_RELOC_VTABLE_ENTRY:
15286     case BFD_RELOC_VTABLE_INHERIT:
15287 #ifdef TE_PE
15288     case BFD_RELOC_32_SECREL:
15289     case BFD_RELOC_16_SECIDX:
15290 #endif
15291       code = fixp->fx_r_type;
15292       break;
15293     case BFD_RELOC_X86_64_32S:
15294       if (!fixp->fx_pcrel)
15295         {
15296           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
15297           code = fixp->fx_r_type;
15298           break;
15299         }
15300       /* Fall through.  */
15301     default:
15302       if (fixp->fx_pcrel)
15303         {
15304           switch (fixp->fx_size)
15305             {
15306             default:
15307               as_bad_where (fixp->fx_file, fixp->fx_line,
15308                             _("can not do %d byte pc-relative relocation"),
15309                             fixp->fx_size);
15310               code = BFD_RELOC_32_PCREL;
15311               break;
15312             case 1: code = BFD_RELOC_8_PCREL;  break;
15313             case 2: code = BFD_RELOC_16_PCREL; break;
15314             case 4: code = BFD_RELOC_32_PCREL; break;
15315 #ifdef BFD64
15316             case 8: code = BFD_RELOC_64_PCREL; break;
15317 #endif
15318             }
15319         }
15320       else
15321         {
15322           switch (fixp->fx_size)
15323             {
15324             default:
15325               as_bad_where (fixp->fx_file, fixp->fx_line,
15326                             _("can not do %d byte relocation"),
15327                             fixp->fx_size);
15328               code = BFD_RELOC_32;
15329               break;
15330             case 1: code = BFD_RELOC_8;  break;
15331             case 2: code = BFD_RELOC_16; break;
15332             case 4: code = BFD_RELOC_32; break;
15333 #ifdef BFD64
15334             case 8: code = BFD_RELOC_64; break;
15335 #endif
15336             }
15337         }
15338       break;
15339     }
15340
15341   if ((code == BFD_RELOC_32
15342        || code == BFD_RELOC_32_PCREL
15343        || code == BFD_RELOC_X86_64_32S)
15344       && GOT_symbol
15345       && fixp->fx_addsy == GOT_symbol)
15346     {
15347       if (!object_64bit)
15348         code = BFD_RELOC_386_GOTPC;
15349       else
15350         code = BFD_RELOC_X86_64_GOTPC32;
15351     }
15352   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
15353       && GOT_symbol
15354       && fixp->fx_addsy == GOT_symbol)
15355     {
15356       code = BFD_RELOC_X86_64_GOTPC64;
15357     }
15358
15359   rel = XNEW (arelent);
15360   rel->sym_ptr_ptr = XNEW (asymbol *);
15361   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
15362
15363   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
15364
15365   if (!use_rela_relocations)
15366     {
15367       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
15368          vtable entry to be used in the relocation's section offset.  */
15369       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
15370         rel->address = fixp->fx_offset;
15371 #if defined (OBJ_COFF) && defined (TE_PE)
15372       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
15373         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
15374       else
15375 #endif
15376       rel->addend = 0;
15377     }
15378   /* Use the rela in 64bit mode.  */
15379   else
15380     {
15381       if (disallow_64bit_reloc)
15382         switch (code)
15383           {
15384           case BFD_RELOC_X86_64_DTPOFF64:
15385           case BFD_RELOC_X86_64_TPOFF64:
15386           case BFD_RELOC_64_PCREL:
15387           case BFD_RELOC_X86_64_GOTOFF64:
15388           case BFD_RELOC_X86_64_GOT64:
15389           case BFD_RELOC_X86_64_GOTPCREL64:
15390           case BFD_RELOC_X86_64_GOTPC64:
15391           case BFD_RELOC_X86_64_GOTPLT64:
15392           case BFD_RELOC_X86_64_PLTOFF64:
15393             as_bad_where (fixp->fx_file, fixp->fx_line,
15394                           _("cannot represent relocation type %s in x32 mode"),
15395                           bfd_get_reloc_code_name (code));
15396             break;
15397           default:
15398             break;
15399           }
15400
15401       if (!fixp->fx_pcrel)
15402         rel->addend = fixp->fx_offset;
15403       else
15404         switch (code)
15405           {
15406           case BFD_RELOC_X86_64_PLT32:
15407           case BFD_RELOC_X86_64_GOT32:
15408           case BFD_RELOC_X86_64_GOTPCREL:
15409           case BFD_RELOC_X86_64_GOTPCRELX:
15410           case BFD_RELOC_X86_64_REX_GOTPCRELX:
15411           case BFD_RELOC_X86_64_TLSGD:
15412           case BFD_RELOC_X86_64_TLSLD:
15413           case BFD_RELOC_X86_64_GOTTPOFF:
15414           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15415           case BFD_RELOC_X86_64_TLSDESC_CALL:
15416             rel->addend = fixp->fx_offset - fixp->fx_size;
15417             break;
15418           default:
15419             rel->addend = (section->vma
15420                            - fixp->fx_size
15421                            + fixp->fx_addnumber
15422                            + md_pcrel_from (fixp));
15423             break;
15424           }
15425     }
15426
15427   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
15428   if (rel->howto == NULL)
15429     {
15430       as_bad_where (fixp->fx_file, fixp->fx_line,
15431                     _("cannot represent relocation type %s"),
15432                     bfd_get_reloc_code_name (code));
15433       /* Set howto to a garbage value so that we can keep going.  */
15434       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
15435       gas_assert (rel->howto != NULL);
15436     }
15437
15438   return rel;
15439 }
15440
15441 #include "tc-i386-intel.c"
15442
15443 void
15444 tc_x86_parse_to_dw2regnum (expressionS *exp)
15445 {
15446   int saved_naked_reg;
15447   char saved_register_dot;
15448
15449   saved_naked_reg = allow_naked_reg;
15450   allow_naked_reg = 1;
15451   saved_register_dot = register_chars['.'];
15452   register_chars['.'] = '.';
15453   allow_pseudo_reg = 1;
15454   expression_and_evaluate (exp);
15455   allow_pseudo_reg = 0;
15456   register_chars['.'] = saved_register_dot;
15457   allow_naked_reg = saved_naked_reg;
15458
15459   if (exp->X_op == O_register && exp->X_add_number >= 0)
15460     {
15461       if ((addressT) exp->X_add_number < i386_regtab_size)
15462         {
15463           exp->X_op = O_constant;
15464           exp->X_add_number = i386_regtab[exp->X_add_number]
15465                               .dw2_regnum[flag_code >> 1];
15466         }
15467       else
15468         exp->X_op = O_illegal;
15469     }
15470 }
15471
15472 void
15473 tc_x86_frame_initial_instructions (void)
15474 {
15475   static unsigned int sp_regno[2];
15476
15477   if (!sp_regno[flag_code >> 1])
15478     {
15479       char *saved_input = input_line_pointer;
15480       char sp[][4] = {"esp", "rsp"};
15481       expressionS exp;
15482
15483       input_line_pointer = sp[flag_code >> 1];
15484       tc_x86_parse_to_dw2regnum (&exp);
15485       gas_assert (exp.X_op == O_constant);
15486       sp_regno[flag_code >> 1] = exp.X_add_number;
15487       input_line_pointer = saved_input;
15488     }
15489
15490   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
15491   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
15492 }
15493
15494 int
15495 x86_dwarf2_addr_size (void)
15496 {
15497 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
15498   if (x86_elf_abi == X86_64_X32_ABI)
15499     return 4;
15500 #endif
15501   return bfd_arch_bits_per_address (stdoutput) / 8;
15502 }
15503
15504 int
15505 i386_elf_section_type (const char *str, size_t len)
15506 {
15507   if (flag_code == CODE_64BIT
15508       && len == sizeof ("unwind") - 1
15509       && startswith (str, "unwind"))
15510     return SHT_X86_64_UNWIND;
15511
15512   return -1;
15513 }
15514
15515 #ifdef TE_SOLARIS
15516 void
15517 i386_solaris_fix_up_eh_frame (segT sec)
15518 {
15519   if (flag_code == CODE_64BIT)
15520     elf_section_type (sec) = SHT_X86_64_UNWIND;
15521 }
15522 #endif
15523
15524 #ifdef TE_PE
15525 void
15526 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
15527 {
15528   expressionS exp;
15529
15530   exp.X_op = O_secrel;
15531   exp.X_add_symbol = symbol;
15532   exp.X_add_number = 0;
15533   emit_expr (&exp, size);
15534 }
15535 #endif
15536
15537 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15538 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
15539
15540 bfd_vma
15541 x86_64_section_letter (int letter, const char **ptr_msg)
15542 {
15543   if (flag_code == CODE_64BIT)
15544     {
15545       if (letter == 'l')
15546         return SHF_X86_64_LARGE;
15547
15548       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
15549     }
15550   else
15551     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
15552   return -1;
15553 }
15554
15555 bfd_vma
15556 x86_64_section_word (char *str, size_t len)
15557 {
15558   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
15559     return SHF_X86_64_LARGE;
15560
15561   return -1;
15562 }
15563
15564 static void
15565 handle_large_common (int small ATTRIBUTE_UNUSED)
15566 {
15567   if (flag_code != CODE_64BIT)
15568     {
15569       s_comm_internal (0, elf_common_parse);
15570       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
15571     }
15572   else
15573     {
15574       static segT lbss_section;
15575       asection *saved_com_section_ptr = elf_com_section_ptr;
15576       asection *saved_bss_section = bss_section;
15577
15578       if (lbss_section == NULL)
15579         {
15580           flagword applicable;
15581           segT seg = now_seg;
15582           subsegT subseg = now_subseg;
15583
15584           /* The .lbss section is for local .largecomm symbols.  */
15585           lbss_section = subseg_new (".lbss", 0);
15586           applicable = bfd_applicable_section_flags (stdoutput);
15587           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
15588           seg_info (lbss_section)->bss = 1;
15589
15590           subseg_set (seg, subseg);
15591         }
15592
15593       elf_com_section_ptr = &_bfd_elf_large_com_section;
15594       bss_section = lbss_section;
15595
15596       s_comm_internal (0, elf_common_parse);
15597
15598       elf_com_section_ptr = saved_com_section_ptr;
15599       bss_section = saved_bss_section;
15600     }
15601 }
15602 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */