gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2024 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "scfi.h"
  34 #include "gen-sframe.h"
  35 #include "sframe.h"
  36 #include "elf/x86-64.h"
  37 #include "opcodes/i386-init.h"
  38 #include "opcodes/i386-mnem.h"
  39 #include <limits.h>
  40
  41 #ifndef INFER_ADDR_PREFIX
  42 #define INFER_ADDR_PREFIX 1
  43 #endif
  44
  45 #ifndef DEFAULT_ARCH
  46 #define DEFAULT_ARCH "i386"
  47 #endif
  48
  49 #ifndef INLINE
  50 #if __GNUC__ >= 2
  51 #define INLINE __inline__
  52 #else
  53 #define INLINE
  54 #endif
  55 #endif
  56
  57 /* Prefixes will be emitted in the order defined below.
  58    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  59    instruction, and so must come before any prefixes.
  60    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  61    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  62 #define WAIT_PREFIX     0
  63 #define SEG_PREFIX      1
  64 #define ADDR_PREFIX     2
  65 #define DATA_PREFIX     3
  66 #define REP_PREFIX      4
  67 #define HLE_PREFIX      REP_PREFIX
  68 #define BND_PREFIX      REP_PREFIX
  69 #define LOCK_PREFIX     5
  70 #define REX_PREFIX      6       /* must come last.  */
  71 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  72
  73 /* we define the syntax here (modulo base,index,scale syntax) */
  74 #define REGISTER_PREFIX '%'
  75 #define IMMEDIATE_PREFIX '$'
  76 #define ABSOLUTE_PREFIX '*'
  77
  78 /* these are the instruction mnemonic suffixes in AT&T syntax or
  79    memory operand size in Intel syntax.  */
  80 #define WORD_MNEM_SUFFIX  'w'
  81 #define BYTE_MNEM_SUFFIX  'b'
  82 #define SHORT_MNEM_SUFFIX 's'
  83 #define LONG_MNEM_SUFFIX  'l'
  84 #define QWORD_MNEM_SUFFIX  'q'
  85
  86 #define END_OF_INSN '\0'
  87
  88 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  89
  90 /* This matches the C -> StaticRounding alias in the opcode table.  */
  91 #define commutative staticrounding
  92
  93 /*
  94   'templates' is for grouping together 'template' structures for opcodes
  95   of the same name.  This is only used for storing the insns in the grand
  96   ole hash table of insns.
  97   The templates themselves start at START and range up to (but not including)
  98   END.
  99   */
 100 typedef struct
 101 {
 102   const insn_template *start;
 103   const insn_template *end;
 104 }
 105 templates;
 106
 107 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 108 typedef struct
 109 {
 110   unsigned int regmem;  /* codes register or memory operand */
 111   unsigned int reg;     /* codes register operand (or extended opcode) */
 112   unsigned int mode;    /* how to interpret regmem & reg */
 113 }
 114 modrm_byte;
 115
 116 /* x86-64 extension prefix.  */
 117 typedef int rex_byte;
 118
 119 /* 386 opcode byte to code indirect addressing.  */
 120 typedef struct
 121 {
 122   unsigned base;
 123   unsigned index;
 124   unsigned scale;
 125 }
 126 sib_byte;
 127
 128 /* x86 arch names, types and features */
 129 typedef struct
 130 {
 131   const char *name;             /* arch name */
 132   unsigned int len:8;           /* arch string length */
 133   bool skip:1;                  /* show_arch should skip this. */
 134   enum processor_type type;     /* arch type */
 135   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
 136   i386_cpu_flags enable;                /* cpu feature enable flags */
 137   i386_cpu_flags disable;       /* cpu feature disable flags */
 138 }
 139 arch_entry;
 140
 141 static void update_code_flag (int, int);
 142 static void s_insn (int);
 143 static void s_noopt (int);
 144 static void set_code_flag (int);
 145 static void set_16bit_gcc_code_flag (int);
 146 static void set_intel_syntax (int);
 147 static void set_intel_mnemonic (int);
 148 static void set_allow_index_reg (int);
 149 static void set_check (int);
 150 static void set_cpu_arch (int);
 151 #ifdef TE_PE
 152 static void pe_directive_secrel (int);
 153 static void pe_directive_secidx (int);
 154 #endif
 155 static void signed_cons (int);
 156 static char *output_invalid (int c);
 157 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 158                                     const char *);
 159 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 160                                        const char *);
 161 static int i386_att_operand (char *);
 162 static int i386_intel_operand (char *, int);
 163 static int i386_intel_simplify (expressionS *);
 164 static int i386_intel_parse_name (const char *, expressionS *);
 165 static const reg_entry *parse_register (const char *, char **);
 166 static const char *parse_insn (const char *, char *, bool);
 167 static char *parse_operands (char *, const char *);
 168 static void swap_operands (void);
 169 static void swap_2_operands (unsigned int, unsigned int);
 170 static enum i386_flag_code i386_addressing_mode (void);
 171 static void optimize_imm (void);
 172 static bool optimize_disp (const insn_template *t);
 173 static const insn_template *match_template (char);
 174 static int check_string (void);
 175 static int process_suffix (void);
 176 static int check_byte_reg (void);
 177 static int check_long_reg (void);
 178 static int check_qword_reg (void);
 179 static int check_word_reg (void);
 180 static int finalize_imm (void);
 181 static int process_operands (void);
 182 static const reg_entry *build_modrm_byte (void);
 183 static void output_insn (const struct last_insn *);
 184 static void output_imm (fragS *, offsetT);
 185 static void output_disp (fragS *, offsetT);
 186 #ifdef OBJ_AOUT
 187 static void s_bss (int);
 188 #endif
 189 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 190 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 191
 192 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 193 static unsigned int x86_isa_1_used;
 194 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 195 static unsigned int x86_feature_2_used;
 196 /* Generate x86 used ISA and feature properties.  */
 197 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 198 #endif
 199
 200 static const char *default_arch = DEFAULT_ARCH;
 201
 202 /* parse_register() returns this when a register alias cannot be used.  */
 203 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 204                                    { Dw2Inval, Dw2Inval } };
 205
 206 static const reg_entry *reg_eax;
 207 static const reg_entry *reg_ds;
 208 static const reg_entry *reg_es;
 209 static const reg_entry *reg_ss;
 210 static const reg_entry *reg_st0;
 211 static const reg_entry *reg_k0;
 212
 213 /* VEX prefix.  */
 214 typedef struct
 215 {
 216   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 217   unsigned char bytes[4];
 218   unsigned int length;
 219   /* Destination or source register specifier.  */
 220   const reg_entry *register_specifier;
 221 } vex_prefix;
 222
 223 /* 'md_assemble ()' gathers together information and puts it into a
 224    i386_insn.  */
 225
 226 union i386_op
 227   {
 228     expressionS *disps;
 229     expressionS *imms;
 230     const reg_entry *regs;
 231   };
 232
 233 enum i386_error
 234   {
 235     no_error, /* Must be first.  */
 236     operand_size_mismatch,
 237     operand_type_mismatch,
 238     register_type_mismatch,
 239     number_of_operands_mismatch,
 240     invalid_instruction_suffix,
 241     bad_imm4,
 242     unsupported_with_intel_mnemonic,
 243     unsupported_syntax,
 244     unsupported_EGPR_for_addressing,
 245     unsupported,
 246     unsupported_on_arch,
 247     unsupported_64bit,
 248     no_vex_encoding,
 249     no_evex_encoding,
 250     invalid_sib_address,
 251     invalid_vsib_address,
 252     invalid_vector_register_set,
 253     invalid_tmm_register_set,
 254     invalid_dest_and_src_register_set,
 255     invalid_dest_register_set,
 256     invalid_pseudo_prefix,
 257     unsupported_vector_index_register,
 258     unsupported_broadcast,
 259     broadcast_needed,
 260     unsupported_masking,
 261     mask_not_on_destination,
 262     no_default_mask,
 263     unsupported_rc_sae,
 264     unsupported_vector_size,
 265     unsupported_rsp_register,
 266     internal_error,
 267   };
 268
 269 struct _i386_insn
 270   {
 271     /* TM holds the template for the insn were currently assembling.  */
 272     insn_template tm;
 273
 274     /* SUFFIX holds the instruction size suffix for byte, word, dword
 275        or qword, if given.  */
 276     char suffix;
 277
 278     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 279     unsigned char opcode_length;
 280
 281     /* OPERANDS gives the number of given operands.  */
 282     unsigned int operands;
 283
 284     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 285        of given register, displacement, memory operands and immediate
 286        operands.  */
 287     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 288
 289     /* TYPES [i] is the type (see above #defines) which tells us how to
 290        use OP[i] for the corresponding operand.  */
 291     i386_operand_type types[MAX_OPERANDS];
 292
 293     /* Displacement expression, immediate expression, or register for each
 294        operand.  */
 295     union i386_op op[MAX_OPERANDS];
 296
 297     /* Flags for operands.  */
 298     unsigned int flags[MAX_OPERANDS];
 299 #define Operand_PCrel 1
 300 #define Operand_Mem   2
 301 #define Operand_Signed 4 /* .insn only */
 302
 303     /* Relocation type for operand */
 304     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 305
 306     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 307        the base index byte below.  */
 308     const reg_entry *base_reg;
 309     const reg_entry *index_reg;
 310     unsigned int log2_scale_factor;
 311
 312     /* SEG gives the seg_entries of this insn.  They are zero unless
 313        explicit segment overrides are given.  */
 314     const reg_entry *seg[2];
 315
 316     /* PREFIX holds all the given prefix opcodes (usually null).
 317        PREFIXES is the number of prefix opcodes.  */
 318     unsigned int prefixes;
 319     unsigned char prefix[MAX_PREFIXES];
 320
 321     /* .insn allows for reserved opcode spaces.  */
 322     unsigned char insn_opcode_space;
 323
 324     /* .insn also allows (requires) specifying immediate size.  */
 325     unsigned char imm_bits[MAX_OPERANDS];
 326
 327     /* Register is in low 3 bits of opcode.  */
 328     bool short_form;
 329
 330     /* The operand to a branch insn indicates an absolute branch.  */
 331     bool jumpabsolute;
 332
 333     /* The operand to a branch insn indicates a far branch.  */
 334     bool far_branch;
 335
 336     /* There is a memory operand of (%dx) which should be only used
 337        with input/output instructions.  */
 338     bool input_output_operand;
 339
 340     /* Extended states.  */
 341     enum
 342       {
 343         /* Use MMX state.  */
 344         xstate_mmx = 1 << 0,
 345         /* Use XMM state.  */
 346         xstate_xmm = 1 << 1,
 347         /* Use YMM state.  */
 348         xstate_ymm = 1 << 2 | xstate_xmm,
 349         /* Use ZMM state.  */
 350         xstate_zmm = 1 << 3 | xstate_ymm,
 351         /* Use TMM state.  */
 352         xstate_tmm = 1 << 4,
 353         /* Use MASK state.  */
 354         xstate_mask = 1 << 5
 355       } xstate;
 356
 357     /* Has GOTPC or TLS relocation.  */
 358     bool has_gotpc_tls_reloc;
 359
 360     /* RM and SIB are the modrm byte and the sib byte where the
 361        addressing modes of this insn are encoded.  */
 362     modrm_byte rm;
 363     rex_byte rex;
 364     rex_byte vrex;
 365     rex_byte rex2;
 366     sib_byte sib;
 367     vex_prefix vex;
 368
 369     /* Masking attributes.
 370
 371        The struct describes masking, applied to OPERAND in the instruction.
 372        REG is a pointer to the corresponding mask register.  ZEROING tells
 373        whether merging or zeroing mask is used.  */
 374     struct Mask_Operation
 375     {
 376       const reg_entry *reg;
 377       unsigned int zeroing;
 378       /* The operand where this operation is associated.  */
 379       unsigned int operand;
 380     } mask;
 381
 382     /* Rounding control and SAE attributes.  */
 383     struct RC_Operation
 384     {
 385       enum rc_type
 386         {
 387           rc_none = -1,
 388           rne,
 389           rd,
 390           ru,
 391           rz,
 392           saeonly
 393         } type;
 394       /* In Intel syntax the operand modifier form is supposed to be used, but
 395          we continue to accept the immediate forms as well.  */
 396       bool modifier;
 397     } rounding;
 398
 399     /* Broadcasting attributes.
 400
 401        The struct describes broadcasting, applied to OPERAND.  TYPE is
 402        expresses the broadcast factor.  */
 403     struct Broadcast_Operation
 404     {
 405       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 406       unsigned int type;
 407
 408       /* Index of broadcasted operand.  */
 409       unsigned int operand;
 410
 411       /* Number of bytes to broadcast.  */
 412       unsigned int bytes;
 413     } broadcast;
 414
 415     /* Compressed disp8*N attribute.  */
 416     unsigned int memshift;
 417
 418     /* Prefer load or store in encoding.  */
 419     enum
 420       {
 421         dir_encoding_default = 0,
 422         dir_encoding_load,
 423         dir_encoding_store,
 424         dir_encoding_swap
 425       } dir_encoding;
 426
 427     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 428     enum
 429       {
 430         disp_encoding_default = 0,
 431         disp_encoding_8bit,
 432         disp_encoding_16bit,
 433         disp_encoding_32bit
 434       } disp_encoding;
 435
 436     /* Prefer the REX byte in encoding.  */
 437     bool rex_encoding;
 438
 439     /* Prefer the REX2 prefix in encoding.  */
 440     bool rex2_encoding;
 441
 442     /* Disable instruction size optimization.  */
 443     bool no_optimize;
 444
 445     /* How to encode instructions.  */
 446     enum
 447       {
 448         encoding_default = 0,
 449         encoding_vex,
 450         encoding_vex3,
 451         encoding_egpr, /* REX2 or EVEX.  */
 452         encoding_evex,
 453         encoding_evex512,
 454         encoding_error
 455       } encoding;
 456
 457     /* REP prefix.  */
 458     const char *rep_prefix;
 459
 460     /* HLE prefix.  */
 461     const char *hle_prefix;
 462
 463     /* Have BND prefix.  */
 464     const char *bnd_prefix;
 465
 466     /* Have NOTRACK prefix.  */
 467     const char *notrack_prefix;
 468
 469     /* Error message.  */
 470     enum i386_error error;
 471   };
 472
 473 typedef struct _i386_insn i386_insn;
 474
 475 /* Link RC type with corresponding string, that'll be looked for in
 476    asm.  */
 477 struct RC_name
 478 {
 479   enum rc_type type;
 480   const char *name;
 481   unsigned int len;
 482 };
 483
 484 static const struct RC_name RC_NamesTable[] =
 485 {
 486   {  rne, STRING_COMMA_LEN ("rn-sae") },
 487   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 488   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 489   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 490   {  saeonly,  STRING_COMMA_LEN ("sae") },
 491 };
 492
 493 /* To be indexed by segment register number.  */
 494 static const unsigned char i386_seg_prefixes[] = {
 495   ES_PREFIX_OPCODE,
 496   CS_PREFIX_OPCODE,
 497   SS_PREFIX_OPCODE,
 498   DS_PREFIX_OPCODE,
 499   FS_PREFIX_OPCODE,
 500   GS_PREFIX_OPCODE
 501 };
 502
 503 /* List of chars besides those in app.c:symbol_chars that can start an
 504    operand.  Used to prevent the scrubber eating vital white-space.  */
 505 const char extra_symbol_chars[] = "*%-([{}"
 506 #ifdef LEX_AT
 507         "@"
 508 #endif
 509 #ifdef LEX_QM
 510         "?"
 511 #endif
 512         ;
 513
 514 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 515      && !defined (TE_GNU)                               \
 516      && !defined (TE_LINUX)                             \
 517      && !defined (TE_Haiku)                             \
 518      && !defined (TE_FreeBSD)                           \
 519      && !defined (TE_DragonFly)                         \
 520      && !defined (TE_NetBSD))
 521 /* This array holds the chars that always start a comment.  If the
 522    pre-processor is disabled, these aren't very useful.  The option
 523    --divide will remove '/' from this list.  */
 524 const char *i386_comment_chars = "#/";
 525 #define SVR4_COMMENT_CHARS 1
 526 #define PREFIX_SEPARATOR '\\'
 527
 528 #else
 529 const char *i386_comment_chars = "#";
 530 #define PREFIX_SEPARATOR '/'
 531 #endif
 532
 533 /* This array holds the chars that only start a comment at the beginning of
 534    a line.  If the line seems to have the form '# 123 filename'
 535    .line and .file directives will appear in the pre-processed output.
 536    Note that input_file.c hand checks for '#' at the beginning of the
 537    first line of the input file.  This is because the compiler outputs
 538    #NO_APP at the beginning of its output.
 539    Also note that comments started like this one will always work if
 540    '/' isn't otherwise defined.  */
 541 const char line_comment_chars[] = "#/";
 542
 543 const char line_separator_chars[] = ";";
 544
 545 /* Chars that can be used to separate mant from exp in floating point
 546    nums.  */
 547 const char EXP_CHARS[] = "eE";
 548
 549 /* Chars that mean this number is a floating point constant
 550    As in 0f12.456
 551    or    0d1.2345e12.  */
 552 const char FLT_CHARS[] = "fFdDxXhHbB";
 553
 554 /* Tables for lexical analysis.  */
 555 static char mnemonic_chars[256];
 556 static char register_chars[256];
 557 static char operand_chars[256];
 558
 559 /* Lexical macros.  */
 560 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 561 #define is_register_char(x) (register_chars[(unsigned char) x])
 562 #define is_space_char(x) ((x) == ' ')
 563
 564 /* All non-digit non-letter characters that may occur in an operand and
 565    which aren't already in extra_symbol_chars[].  */
 566 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
 567
 568 /* md_assemble() always leaves the strings it's passed unaltered.  To
 569    effect this we maintain a stack of saved characters that we've smashed
 570    with '\0's (indicating end of strings for various sub-fields of the
 571    assembler instruction).  */
 572 static char save_stack[32];
 573 static char *save_stack_p;
 574 #define END_STRING_AND_SAVE(s) \
 575         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 576 #define RESTORE_END_STRING(s) \
 577         do { *(s) = *--save_stack_p; } while (0)
 578
 579 /* The instruction we're assembling.  */
 580 static i386_insn i;
 581
 582 /* Possible templates for current insn.  */
 583 static templates current_templates;
 584
 585 /* Per instruction expressionS buffers: max displacements & immediates.  */
 586 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 587 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 588
 589 /* Current operand we are working on.  */
 590 static int this_operand = -1;
 591
 592 /* Are we processing a .insn directive?  */
 593 #define dot_insn() (i.tm.mnem_off == MN__insn)
 594
 595 enum i386_flag_code i386_flag_code;
 596 #define flag_code i386_flag_code /* Permit to continue using original name.  */
 597 static unsigned int object_64bit;
 598 static unsigned int disallow_64bit_reloc;
 599 static int use_rela_relocations = 0;
 600 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 601 static const char *tls_get_addr;
 602
 603 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 604
 605 /* The ELF ABI to use.  */
 606 enum x86_elf_abi
 607 {
 608   I386_ABI,
 609   X86_64_ABI,
 610   X86_64_X32_ABI
 611 };
 612
 613 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 614 #endif
 615
 616 #if defined (TE_PE) || defined (TE_PEP)
 617 /* Use big object file format.  */
 618 static int use_big_obj = 0;
 619 #endif
 620
 621 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 622 /* 1 if generating code for a shared library.  */
 623 static int shared = 0;
 624
 625 unsigned int x86_sframe_cfa_sp_reg;
 626 /* The other CFA base register for SFrame stack trace info.  */
 627 unsigned int x86_sframe_cfa_fp_reg;
 628 unsigned int x86_sframe_cfa_ra_reg;
 629
 630 #endif
 631
 632 /* 1 for intel syntax,
 633    0 if att syntax.  */
 634 static int intel_syntax = 0;
 635
 636 static enum x86_64_isa
 637 {
 638   amd64 = 1,    /* AMD64 ISA.  */
 639   intel64       /* Intel64 ISA.  */
 640 } isa64;
 641
 642 /* 1 for intel mnemonic,
 643    0 if att mnemonic.  */
 644 static int intel_mnemonic = !SYSV386_COMPAT;
 645
 646 /* 1 if pseudo registers are permitted.  */
 647 static int allow_pseudo_reg = 0;
 648
 649 /* 1 if register prefix % not required.  */
 650 static int allow_naked_reg = 0;
 651
 652 /* 1 if the assembler should add BND prefix for all control-transferring
 653    instructions supporting it, even if this prefix wasn't specified
 654    explicitly.  */
 655 static int add_bnd_prefix = 0;
 656
 657 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 658 static int allow_index_reg = 0;
 659
 660 /* 1 if the assembler should ignore LOCK prefix, even if it was
 661    specified explicitly.  */
 662 static int omit_lock_prefix = 0;
 663
 664 /* 1 if the assembler should encode lfence, mfence, and sfence as
 665    "lock addl $0, (%{re}sp)".  */
 666 static int avoid_fence = 0;
 667
 668 /* 1 if lfence should be inserted after every load.  */
 669 static int lfence_after_load = 0;
 670
 671 /* Non-zero if lfence should be inserted before indirect branch.  */
 672 static enum lfence_before_indirect_branch_kind
 673   {
 674     lfence_branch_none = 0,
 675     lfence_branch_register,
 676     lfence_branch_memory,
 677     lfence_branch_all
 678   }
 679 lfence_before_indirect_branch;
 680
 681 /* Non-zero if lfence should be inserted before ret.  */
 682 static enum lfence_before_ret_kind
 683   {
 684     lfence_before_ret_none = 0,
 685     lfence_before_ret_not,
 686     lfence_before_ret_or,
 687     lfence_before_ret_shl
 688   }
 689 lfence_before_ret;
 690
 691 /* 1 if the assembler should generate relax relocations.  */
 692
 693 static int generate_relax_relocations
 694   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 695
 696 static enum check_kind
 697   {
 698     check_none = 0,
 699     check_warning,
 700     check_error
 701   }
 702 sse_check, operand_check = check_warning;
 703
 704 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 705 static int align_branch_power = 0;
 706
 707 /* Types of branches to align.  */
 708 enum align_branch_kind
 709   {
 710     align_branch_none = 0,
 711     align_branch_jcc = 1,
 712     align_branch_fused = 2,
 713     align_branch_jmp = 3,
 714     align_branch_call = 4,
 715     align_branch_indirect = 5,
 716     align_branch_ret = 6
 717   };
 718
 719 /* Type bits of branches to align.  */
 720 enum align_branch_bit
 721   {
 722     align_branch_jcc_bit = 1 << align_branch_jcc,
 723     align_branch_fused_bit = 1 << align_branch_fused,
 724     align_branch_jmp_bit = 1 << align_branch_jmp,
 725     align_branch_call_bit = 1 << align_branch_call,
 726     align_branch_indirect_bit = 1 << align_branch_indirect,
 727     align_branch_ret_bit = 1 << align_branch_ret
 728   };
 729
 730 static unsigned int align_branch = (align_branch_jcc_bit
 731                                     | align_branch_fused_bit
 732                                     | align_branch_jmp_bit);
 733
 734 /* Types of condition jump used by macro-fusion.  */
 735 enum mf_jcc_kind
 736   {
 737     mf_jcc_jo = 0,  /* base opcode 0x70  */
 738     mf_jcc_jc,      /* base opcode 0x72  */
 739     mf_jcc_je,      /* base opcode 0x74  */
 740     mf_jcc_jna,     /* base opcode 0x76  */
 741     mf_jcc_js,      /* base opcode 0x78  */
 742     mf_jcc_jp,      /* base opcode 0x7a  */
 743     mf_jcc_jl,      /* base opcode 0x7c  */
 744     mf_jcc_jle,     /* base opcode 0x7e  */
 745   };
 746
 747 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 748 enum mf_cmp_kind
 749   {
 750     mf_cmp_test_and,  /* test/cmp */
 751     mf_cmp_alu_cmp,  /* add/sub/cmp */
 752     mf_cmp_incdec  /* inc/dec */
 753   };
 754
 755 /* The maximum padding size for fused jcc.  CMP like instruction can
 756    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 757    prefixes.   */
 758 #define MAX_FUSED_JCC_PADDING_SIZE 20
 759
 760 /* The maximum number of prefixes added for an instruction.  */
 761 static unsigned int align_branch_prefix_size = 5;
 762
 763 /* Optimization:
 764    1. Clear the REX_W bit with register operand if possible.
 765    2. Above plus use 128bit vector instruction to clear the full vector
 766       register.
 767  */
 768 static int optimize = 0;
 769
 770 /* Optimization:
 771    1. Clear the REX_W bit with register operand if possible.
 772    2. Above plus use 128bit vector instruction to clear the full vector
 773       register.
 774    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 775       "testb $imm7,%r8".
 776  */
 777 static int optimize_for_space = 0;
 778
 779 /* Register prefix used for error message.  */
 780 static const char *register_prefix = "%";
 781
 782 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 783    leave, push, and pop instructions so that gcc has the same stack
 784    frame as in 32 bit mode.  */
 785 static char stackop_size = '\0';
 786
 787 /* Non-zero to optimize code alignment.  */
 788 int optimize_align_code = 1;
 789
 790 /* Non-zero to quieten some warnings.  */
 791 static int quiet_warnings = 0;
 792
 793 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 794 static bool pre_386_16bit_warned;
 795
 796 /* CPU name.  */
 797 static const char *cpu_arch_name = NULL;
 798 static char *cpu_sub_arch_name = NULL;
 799
 800 /* CPU feature flags.  */
 801 i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 802
 803 /* ISA extensions available in 64-bit mode only.  */
 804 static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
 805
 806 /* If we have selected a cpu we are generating instructions for.  */
 807 static int cpu_arch_tune_set = 0;
 808
 809 /* Cpu we are generating instructions for.  */
 810 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 811
 812 /* CPU instruction set architecture used.  */
 813 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 814
 815 /* CPU feature flags of instruction set architecture used.  */
 816 i386_cpu_flags cpu_arch_isa_flags;
 817
 818 /* If set, conditional jumps are not automatically promoted to handle
 819    larger than a byte offset.  */
 820 static bool no_cond_jump_promotion = false;
 821
 822 /* This will be set from an expression parser hook if there's any
 823    applicable operator involved in an expression.  */
 824 static enum {
 825   expr_operator_none,
 826   expr_operator_present,
 827   expr_large_value,
 828 } expr_mode;
 829
 830 /* Encode SSE instructions with VEX prefix.  */
 831 static unsigned int sse2avx;
 832
 833 /* Encode aligned vector move as unaligned vector move.  */
 834 static unsigned int use_unaligned_vector_move;
 835
 836 /* Maximum permitted vector size. */
 837 #define VSZ128 0
 838 #define VSZ256 1
 839 #define VSZ512 2
 840 #define VSZ_DEFAULT VSZ512
 841 static unsigned int vector_size = VSZ_DEFAULT;
 842
 843 /* Encode scalar AVX instructions with specific vector length.  */
 844 static enum
 845   {
 846     vex128 = 0,
 847     vex256
 848   } avxscalar;
 849
 850 /* Encode VEX WIG instructions with specific vex.w.  */
 851 static enum
 852   {
 853     vexw0 = 0,
 854     vexw1
 855   } vexwig;
 856
 857 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 858 static enum
 859   {
 860     evexl128 = 0,
 861     evexl256,
 862     evexl512
 863   } evexlig;
 864
 865 /* Encode EVEX WIG instructions with specific evex.w.  */
 866 static enum
 867   {
 868     evexw0 = 0,
 869     evexw1
 870   } evexwig;
 871
 872 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 873 static enum rc_type evexrcig = rne;
 874
 875 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 876 static symbolS *GOT_symbol;
 877
 878 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 879 unsigned int x86_dwarf2_return_column;
 880
 881 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 882 int x86_cie_data_alignment;
 883
 884 /* Interface to relax_segment.
 885    There are 3 major relax states for 386 jump insns because the
 886    different types of jumps add different sizes to frags when we're
 887    figuring out what sort of jump to choose to reach a given label.
 888
 889    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 890    branches which are handled by md_estimate_size_before_relax() and
 891    i386_generic_table_relax_frag().  */
 892
 893 /* Types.  */
 894 #define UNCOND_JUMP 0
 895 #define COND_JUMP 1
 896 #define COND_JUMP86 2
 897 #define BRANCH_PADDING 3
 898 #define BRANCH_PREFIX 4
 899 #define FUSED_JCC_PADDING 5
 900
 901 /* Sizes.  */
 902 #define CODE16  1
 903 #define SMALL   0
 904 #define SMALL16 (SMALL | CODE16)
 905 #define BIG     2
 906 #define BIG16   (BIG | CODE16)
 907
 908 #ifndef INLINE
 909 #ifdef __GNUC__
 910 #define INLINE __inline__
 911 #else
 912 #define INLINE
 913 #endif
 914 #endif
 915
 916 #define ENCODE_RELAX_STATE(type, size) \
 917   ((relax_substateT) (((type) << 2) | (size)))
 918 #define TYPE_FROM_RELAX_STATE(s) \
 919   ((s) >> 2)
 920 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 921     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 922
 923 /* This table is used by relax_frag to promote short jumps to long
 924    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 925    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 926    don't allow a short jump in a 32 bit code segment to be promoted to
 927    a 16 bit offset jump because it's slower (requires data size
 928    prefix), and doesn't work, unless the destination is in the bottom
 929    64k of the code segment (The top 16 bits of eip are zeroed).  */
 930
 931 const relax_typeS md_relax_table[] =
 932 {
 933   /* The fields are:
 934      1) most positive reach of this state,
 935      2) most negative reach of this state,
 936      3) how many bytes this mode will have in the variable part of the frag
 937      4) which index into the table to try if we can't fit into this one.  */
 938
 939   /* UNCOND_JUMP states.  */
 940   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 941   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 942   /* dword jmp adds 4 bytes to frag:
 943      0 extra opcode bytes, 4 displacement bytes.  */
 944   {0, 0, 4, 0},
 945   /* word jmp adds 2 byte2 to frag:
 946      0 extra opcode bytes, 2 displacement bytes.  */
 947   {0, 0, 2, 0},
 948
 949   /* COND_JUMP states.  */
 950   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 951   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 952   /* dword conditionals adds 5 bytes to frag:
 953      1 extra opcode byte, 4 displacement bytes.  */
 954   {0, 0, 5, 0},
 955   /* word conditionals add 3 bytes to frag:
 956      1 extra opcode byte, 2 displacement bytes.  */
 957   {0, 0, 3, 0},
 958
 959   /* COND_JUMP86 states.  */
 960   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 961   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 962   /* dword conditionals adds 5 bytes to frag:
 963      1 extra opcode byte, 4 displacement bytes.  */
 964   {0, 0, 5, 0},
 965   /* word conditionals add 4 bytes to frag:
 966      1 displacement byte and a 3 byte long branch insn.  */
 967   {0, 0, 4, 0}
 968 };
 969
 970 #define ARCH(n, t, f, s) \
 971   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
 972     CPU_NONE_FLAGS }
 973 #define SUBARCH(n, e, d, s) \
 974   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
 975     CPU_ ## d ## _FLAGS }
 976 #define VECARCH(n, e, d, v) \
 977   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
 978     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
 979
 980 static const arch_entry cpu_arch[] =
 981 {
 982   /* Do not replace the first two entries - i386_target_format() and
 983      set_cpu_arch() rely on them being there in this order.  */
 984   ARCH (generic32, GENERIC32, GENERIC32, false),
 985   ARCH (generic64, GENERIC64, GENERIC64, false),
 986   ARCH (i8086, UNKNOWN, NONE, false),
 987   ARCH (i186, UNKNOWN, 186, false),
 988   ARCH (i286, UNKNOWN, 286, false),
 989   ARCH (i386, I386, 386, false),
 990   ARCH (i486, I486, 486, false),
 991   ARCH (i586, PENTIUM, 586, false),
 992   ARCH (pentium, PENTIUM, 586, false),
 993   ARCH (i686, I686, 686, false),
 994   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 995   ARCH (pentiumii, PENTIUMPRO, P2, false),
 996   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 997   ARCH (pentium4, PENTIUM4, P4, false),
 998   ARCH (prescott, NOCONA, CORE, false),
 999   ARCH (nocona, NOCONA, NOCONA, false),
1000   ARCH (yonah, CORE, CORE, true),
1001   ARCH (core, CORE, CORE, false),
1002   ARCH (merom, CORE2, CORE2, true),
1003   ARCH (core2, CORE2, CORE2, false),
1004   ARCH (corei7, COREI7, COREI7, false),
1005   ARCH (iamcu, IAMCU, IAMCU, false),
1006   ARCH (k6, K6, K6, false),
1007   ARCH (k6_2, K6, K6_2, false),
1008   ARCH (athlon, ATHLON, ATHLON, false),
1009   ARCH (sledgehammer, K8, K8, true),
1010   ARCH (opteron, K8, K8, false),
1011   ARCH (k8, K8, K8, false),
1012   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1013   ARCH (bdver1, BD, BDVER1, false),
1014   ARCH (bdver2, BD, BDVER2, false),
1015   ARCH (bdver3, BD, BDVER3, false),
1016   ARCH (bdver4, BD, BDVER4, false),
1017   ARCH (znver1, ZNVER, ZNVER1, false),
1018   ARCH (znver2, ZNVER, ZNVER2, false),
1019   ARCH (znver3, ZNVER, ZNVER3, false),
1020   ARCH (znver4, ZNVER, ZNVER4, false),
1021   ARCH (znver5, ZNVER, ZNVER5, false),
1022   ARCH (btver1, BT, BTVER1, false),
1023   ARCH (btver2, BT, BTVER2, false),
1024
1025   SUBARCH (8087, 8087, ANY_8087, false),
1026   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1027   SUBARCH (287, 287, ANY_287, false),
1028   SUBARCH (387, 387, ANY_387, false),
1029   SUBARCH (687, 687, ANY_687, false),
1030   SUBARCH (cmov, CMOV, CMOV, false),
1031   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1032   SUBARCH (mmx, MMX, ANY_MMX, false),
1033   SUBARCH (sse, SSE, ANY_SSE, false),
1034   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1035   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1036   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1037   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1038   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1039   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1040   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1041   VECARCH (avx, AVX, ANY_AVX, reset),
1042   VECARCH (avx2, AVX2, ANY_AVX2, reset),
1043   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
1044   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
1045   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
1046   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
1047   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
1048   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
1049   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
1050   SUBARCH (monitor, MONITOR, MONITOR, false),
1051   SUBARCH (vmx, VMX, ANY_VMX, false),
1052   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1053   SUBARCH (smx, SMX, SMX, false),
1054   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1055   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1056   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1057   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1058   SUBARCH (aes, AES, ANY_AES, false),
1059   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
1060   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
1061   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1062   SUBARCH (rdrnd, RDRND, RDRND, false),
1063   SUBARCH (f16c, F16C, ANY_F16C, false),
1064   SUBARCH (bmi2, BMI2, BMI2, false),
1065   SUBARCH (fma, FMA, ANY_FMA, false),
1066   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1067   SUBARCH (xop, XOP, ANY_XOP, false),
1068   SUBARCH (lwp, LWP, ANY_LWP, false),
1069   SUBARCH (movbe, MOVBE, MOVBE, false),
1070   SUBARCH (cx16, CX16, CX16, false),
1071   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1072   SUBARCH (ept, EPT, ANY_EPT, false),
1073   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1074   SUBARCH (popcnt, POPCNT, POPCNT, false),
1075   SUBARCH (hle, HLE, HLE, false),
1076   SUBARCH (rtm, RTM, ANY_RTM, false),
1077   SUBARCH (tsx, TSX, TSX, false),
1078   SUBARCH (invpcid, INVPCID, INVPCID, false),
1079   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1080   SUBARCH (nop, NOP, NOP, false),
1081   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1082   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1083   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1084   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1085   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1086   SUBARCH (pacifica, SVME, ANY_SVME, true),
1087   SUBARCH (svme, SVME, ANY_SVME, false),
1088   SUBARCH (abm, ABM, ABM, false),
1089   SUBARCH (bmi, BMI, BMI, false),
1090   SUBARCH (tbm, TBM, TBM, false),
1091   SUBARCH (adx, ADX, ADX, false),
1092   SUBARCH (rdseed, RDSEED, RDSEED, false),
1093   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1094   SUBARCH (smap, SMAP, SMAP, false),
1095   SUBARCH (mpx, MPX, ANY_MPX, false),
1096   SUBARCH (sha, SHA, ANY_SHA, false),
1097   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1098   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1099   SUBARCH (se1, SE1, SE1, false),
1100   SUBARCH (clwb, CLWB, CLWB, false),
1101   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
1102   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
1103   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
1104   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
1105   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
1106   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
1107   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
1108   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
1109   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
1110   SUBARCH (clzero, CLZERO, CLZERO, false),
1111   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1112   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1113   SUBARCH (rdpid, RDPID, RDPID, false),
1114   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1115   SUBARCH (ibt, IBT, IBT, false),
1116   SUBARCH (shstk, SHSTK, SHSTK, false),
1117   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1118   VECARCH (vaes, VAES, ANY_VAES, reset),
1119   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
1120   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1121   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1122   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1123   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1124   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1125   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1126   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1127   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1128   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1129   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1130   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1131   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
1132   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1133            ANY_AVX512_VP2INTERSECT, reset),
1134   SUBARCH (tdx, TDX, TDX, false),
1135   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1136   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1137   SUBARCH (rdpru, RDPRU, RDPRU, false),
1138   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1139   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1140   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1141   SUBARCH (kl, KL, ANY_KL, false),
1142   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1143   SUBARCH (uintr, UINTR, UINTR, false),
1144   SUBARCH (hreset, HRESET, HRESET, false),
1145   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
1146   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1147   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
1148   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
1149   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1150   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1151   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1152   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
1153   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1154   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1155   SUBARCH (fred, FRED, ANY_FRED, false),
1156   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1157   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
1158   VECARCH (sha512, SHA512, ANY_SHA512, reset),
1159   VECARCH (sm3, SM3, ANY_SM3, reset),
1160   VECARCH (sm4, SM4, ANY_SM4, reset),
1161   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
1162   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
1163   SUBARCH (user_msr, USER_MSR, USER_MSR, false),
1164   SUBARCH (apx_f, APX_F, APX_F, false),
1165 };
1166
1167 #undef SUBARCH
1168 #undef ARCH
1169
1170 #ifdef I386COFF
1171 /* Like s_lcomm_internal in gas/read.c but the alignment string
1172    is allowed to be optional.  */
1173
1174 static symbolS *
1175 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1176 {
1177   addressT align = 0;
1178
1179   SKIP_WHITESPACE ();
1180
1181   if (needs_align
1182       && *input_line_pointer == ',')
1183     {
1184       align = parse_align (needs_align - 1);
1185
1186       if (align == (addressT) -1)
1187         return NULL;
1188     }
1189   else
1190     {
1191       if (size >= 8)
1192         align = 3;
1193       else if (size >= 4)
1194         align = 2;
1195       else if (size >= 2)
1196         align = 1;
1197       else
1198         align = 0;
1199     }
1200
1201   bss_alloc (symbolP, size, align);
1202   return symbolP;
1203 }
1204
1205 static void
1206 pe_lcomm (int needs_align)
1207 {
1208   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1209 }
1210 #endif
1211
1212 const pseudo_typeS md_pseudo_table[] =
1213 {
1214 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1215   {"align", s_align_bytes, 0},
1216 #else
1217   {"align", s_align_ptwo, 0},
1218 #endif
1219   {"arch", set_cpu_arch, 0},
1220 #ifdef OBJ_AOUT
1221   {"bss", s_bss, 0},
1222 #endif
1223 #ifdef I386COFF
1224   {"lcomm", pe_lcomm, 1},
1225 #endif
1226   {"ffloat", float_cons, 'f'},
1227   {"dfloat", float_cons, 'd'},
1228   {"tfloat", float_cons, 'x'},
1229   {"hfloat", float_cons, 'h'},
1230   {"bfloat16", float_cons, 'b'},
1231   {"value", cons, 2},
1232   {"slong", signed_cons, 4},
1233   {"insn", s_insn, 0},
1234   {"noopt", s_noopt, 0},
1235   {"optim", s_ignore, 0},
1236   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1237   {"code16", set_code_flag, CODE_16BIT},
1238   {"code32", set_code_flag, CODE_32BIT},
1239 #ifdef BFD64
1240   {"code64", set_code_flag, CODE_64BIT},
1241 #endif
1242   {"intel_syntax", set_intel_syntax, 1},
1243   {"att_syntax", set_intel_syntax, 0},
1244   {"intel_mnemonic", set_intel_mnemonic, 1},
1245   {"att_mnemonic", set_intel_mnemonic, 0},
1246   {"allow_index_reg", set_allow_index_reg, 1},
1247   {"disallow_index_reg", set_allow_index_reg, 0},
1248   {"sse_check", set_check, 0},
1249   {"operand_check", set_check, 1},
1250 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1251   {"largecomm", handle_large_common, 0},
1252 #else
1253   {"file", dwarf2_directive_file, 0},
1254   {"loc", dwarf2_directive_loc, 0},
1255   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1256 #endif
1257 #ifdef TE_PE
1258   {"secrel32", pe_directive_secrel, 0},
1259   {"secidx", pe_directive_secidx, 0},
1260 #endif
1261   {0, 0, 0}
1262 };
1263
1264 /* For interface with expression ().  */
1265 extern char *input_line_pointer;
1266
1267 /* Hash table for instruction mnemonic lookup.  */
1268 static htab_t op_hash;
1269
1270 /* Hash table for register lookup.  */
1271 static htab_t reg_hash;
1272 \f
1273   /* Various efficient no-op patterns for aligning code labels.
1274      Note: Don't try to assemble the instructions in the comments.
1275      0L and 0w are not legal.  */
1276 static const unsigned char f32_1[] =
1277   {0x90};                               /* nop                  */
1278 static const unsigned char f32_2[] =
1279   {0x66,0x90};                          /* xchg %ax,%ax         */
1280 static const unsigned char f32_3[] =
1281   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1282 #define f32_4 (f32_5 + 1)       /* leal 0(%esi,%eiz),%esi */
1283 static const unsigned char f32_5[] =
1284   {0x2e,0x8d,0x74,0x26,0x00};           /* leal %cs:0(%esi,%eiz),%esi   */
1285 static const unsigned char f32_6[] =
1286   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1287 #define f32_7 (f32_8 + 1)       /* leal 0L(%esi,%eiz),%esi */
1288 static const unsigned char f32_8[] =
1289   {0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal %cs:0L(%esi,%eiz),%esi */
1290 static const unsigned char f64_3[] =
1291   {0x48,0x89,0xf6};                     /* mov %rsi,%rsi        */
1292 static const unsigned char f64_4[] =
1293   {0x48,0x8d,0x76,0x00};                /* lea 0(%rsi),%rsi     */
1294 #define f64_5 (f64_6 + 1)               /* lea 0(%rsi,%riz),%rsi        */
1295 static const unsigned char f64_6[] =
1296   {0x2e,0x48,0x8d,0x74,0x26,0x00};      /* lea %cs:0(%rsi,%riz),%rsi    */
1297 static const unsigned char f64_7[] =
1298   {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00}; /* lea 0L(%rsi),%rsi    */
1299 #define f64_8 (f64_9 + 1)               /* lea 0L(%rsi,%riz),%rsi */
1300 static const unsigned char f64_9[] =
1301   {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
1302 #define f16_2 (f64_3 + 1)               /* mov %si,%si  */
1303 static const unsigned char f16_3[] =
1304   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1305 #define f16_4 (f16_5 + 1)               /* lea 0W(%si),%si */
1306 static const unsigned char f16_5[] =
1307   {0x2e,0x8d,0xb4,0x00,0x00};           /* lea %cs:0W(%si),%si  */
1308 static const unsigned char jump_disp8[] =
1309   {0xeb};                               /* jmp disp8           */
1310 static const unsigned char jump32_disp32[] =
1311   {0xe9};                               /* jmp disp32          */
1312 static const unsigned char jump16_disp32[] =
1313   {0x66,0xe9};                          /* jmp disp32          */
1314 /* 32-bit NOPs patterns.  */
1315 static const unsigned char *const f32_patt[] = {
1316   f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8
1317 };
1318 /* 64-bit NOPs patterns.  */
1319 static const unsigned char *const f64_patt[] = {
1320   f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
1321 };
1322 /* 16-bit NOPs patterns.  */
1323 static const unsigned char *const f16_patt[] = {
1324   f32_1, f16_2, f16_3, f16_4, f16_5
1325 };
1326 /* nopl (%[re]ax) */
1327 static const unsigned char alt_3[] =
1328   {0x0f,0x1f,0x00};
1329 /* nopl 0(%[re]ax) */
1330 static const unsigned char alt_4[] =
1331   {0x0f,0x1f,0x40,0x00};
1332 /* nopl 0(%[re]ax,%[re]ax,1) */
1333 #define alt_5 (alt_6 + 1)
1334 /* nopw 0(%[re]ax,%[re]ax,1) */
1335 static const unsigned char alt_6[] =
1336   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1337 /* nopl 0L(%[re]ax) */
1338 static const unsigned char alt_7[] =
1339   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1340 /* nopl 0L(%[re]ax,%[re]ax,1) */
1341 #define alt_8 (alt_9 + 1)
1342 /* nopw 0L(%[re]ax,%[re]ax,1) */
1343 static const unsigned char alt_9[] =
1344   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1345 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1346 #define alt_10 (alt_11 + 1)
1347 /* data16 nopw %cs:0L(%eax,%eax,1) */
1348 static const unsigned char alt_11[] =
1349   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1350 /* 32-bit and 64-bit NOPs patterns.  */
1351 static const unsigned char *const alt_patt[] = {
1352   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1353   alt_9, alt_10, alt_11
1354 };
1355
1356 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1357    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1358
1359 static void
1360 i386_output_nops (char *where, const unsigned char *const *patt,
1361                   int count, int max_single_nop_size)
1362
1363 {
1364   /* Place the longer NOP first.  */
1365   int last;
1366   int offset;
1367   const unsigned char *nops;
1368
1369   if (max_single_nop_size < 1)
1370     {
1371       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1372                 max_single_nop_size);
1373       return;
1374     }
1375
1376   nops = patt[max_single_nop_size - 1];
1377   last = count % max_single_nop_size;
1378
1379   count -= last;
1380   for (offset = 0; offset < count; offset += max_single_nop_size)
1381     memcpy (where + offset, nops, max_single_nop_size);
1382
1383   if (last)
1384     {
1385       nops = patt[last - 1];
1386       memcpy (where + offset, nops, last);
1387     }
1388 }
1389
1390 static INLINE int
1391 fits_in_imm7 (offsetT num)
1392 {
1393   return (num & 0x7f) == num;
1394 }
1395
1396 static INLINE int
1397 fits_in_imm31 (offsetT num)
1398 {
1399   return (num & 0x7fffffff) == num;
1400 }
1401
1402 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1403    single NOP instruction LIMIT.  */
1404
1405 void
1406 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1407 {
1408   const unsigned char *const *patt = NULL;
1409   int max_single_nop_size;
1410   /* Maximum number of NOPs before switching to jump over NOPs.  */
1411   int max_number_of_nops;
1412
1413   switch (fragP->fr_type)
1414     {
1415     case rs_fill_nop:
1416     case rs_align_code:
1417       break;
1418     case rs_machine_dependent:
1419       /* Allow NOP padding for jumps and calls.  */
1420       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1421           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1422         break;
1423       /* Fall through.  */
1424     default:
1425       return;
1426     }
1427
1428   /* We need to decide which NOP sequence to use for 32bit and
1429      64bit. When -mtune= is used:
1430
1431      1. For PROCESSOR_I?86, PROCESSOR_PENTIUM, PROCESSOR_IAMCU, and
1432      PROCESSOR_GENERIC32, f32_patt will be used.
1433      2. For the rest, alt_patt will be used.
1434
1435      When -mtune= isn't used, alt_patt will be used if
1436      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt/f64_patt will
1437      be used.
1438
1439      When -march= or .arch is used, we can't use anything beyond
1440      cpu_arch_isa_flags.   */
1441
1442   if (fragP->tc_frag_data.code == CODE_16BIT)
1443     {
1444       patt = f16_patt;
1445       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1446       /* Limit number of NOPs to 2 in 16-bit mode.  */
1447       max_number_of_nops = 2;
1448     }
1449   else
1450     {
1451       patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
1452       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1453         {
1454           /* PROCESSOR_UNKNOWN means that all ISAs may be used, unless
1455              explicitly disabled.  */
1456           switch (fragP->tc_frag_data.tune)
1457             {
1458             case PROCESSOR_UNKNOWN:
1459               /* We use cpu_arch_isa_flags to check if we SHOULD
1460                  optimize with nops.  */
1461               if (fragP->tc_frag_data.isanop)
1462                 patt = alt_patt;
1463               break;
1464
1465             case PROCESSOR_PENTIUMPRO:
1466             case PROCESSOR_PENTIUM4:
1467             case PROCESSOR_NOCONA:
1468             case PROCESSOR_CORE:
1469             case PROCESSOR_CORE2:
1470             case PROCESSOR_COREI7:
1471             case PROCESSOR_GENERIC64:
1472             case PROCESSOR_K6:
1473             case PROCESSOR_ATHLON:
1474             case PROCESSOR_K8:
1475             case PROCESSOR_AMDFAM10:
1476             case PROCESSOR_BD:
1477             case PROCESSOR_ZNVER:
1478             case PROCESSOR_BT:
1479               if (fragP->tc_frag_data.cpunop)
1480                 patt = alt_patt;
1481               break;
1482
1483             case PROCESSOR_I386:
1484             case PROCESSOR_I486:
1485             case PROCESSOR_PENTIUM:
1486             case PROCESSOR_I686:
1487             case PROCESSOR_IAMCU:
1488             case PROCESSOR_GENERIC32:
1489               break;
1490             case PROCESSOR_NONE:
1491               abort ();
1492             }
1493         }
1494       else
1495         {
1496           switch (fragP->tc_frag_data.tune)
1497             {
1498             case PROCESSOR_UNKNOWN:
1499               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1500                  PROCESSOR_UNKNOWN.  */
1501               abort ();
1502               break;
1503
1504             default:
1505               /* We use cpu_arch_isa_flags to check if we CAN optimize
1506                  with nops.  */
1507               if (fragP->tc_frag_data.isanop)
1508                 patt = alt_patt;
1509               break;
1510
1511             case PROCESSOR_NONE:
1512               abort ();
1513             }
1514         }
1515
1516       if (patt != alt_patt)
1517         {
1518           max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
1519                                                  : ARRAY_SIZE (f64_patt);
1520           /* Limit number of NOPs to 2 for older processors.  */
1521           max_number_of_nops = 2;
1522         }
1523       else
1524         {
1525           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1526           /* Limit number of NOPs to 7 for newer processors.  */
1527           max_number_of_nops = 7;
1528         }
1529     }
1530
1531   if (limit == 0)
1532     limit = max_single_nop_size;
1533
1534   if (fragP->fr_type == rs_fill_nop)
1535     {
1536       /* Output NOPs for .nop directive.  */
1537       if (limit > max_single_nop_size)
1538         {
1539           as_bad_where (fragP->fr_file, fragP->fr_line,
1540                         _("invalid single nop size: %d "
1541                           "(expect within [0, %d])"),
1542                         limit, max_single_nop_size);
1543           return;
1544         }
1545     }
1546   else if (fragP->fr_type != rs_machine_dependent)
1547     fragP->fr_var = count;
1548
1549   /* Emit a plain NOP first when the last thing we saw may not have been
1550      a proper instruction (e.g. a stand-alone prefix or .byte).  */
1551   if (!fragP->tc_frag_data.last_insn_normal)
1552     {
1553       *where++ = 0x90;
1554       --count;
1555     }
1556
1557   if ((count / max_single_nop_size) > max_number_of_nops)
1558     {
1559       /* Generate jump over NOPs.  */
1560       offsetT disp = count - 2;
1561       if (fits_in_imm7 (disp))
1562         {
1563           /* Use "jmp disp8" if possible.  */
1564           count = disp;
1565           where[0] = jump_disp8[0];
1566           where[1] = count;
1567           where += 2;
1568         }
1569       else
1570         {
1571           unsigned int size_of_jump;
1572
1573           if (flag_code == CODE_16BIT)
1574             {
1575               where[0] = jump16_disp32[0];
1576               where[1] = jump16_disp32[1];
1577               size_of_jump = 2;
1578             }
1579           else
1580             {
1581               where[0] = jump32_disp32[0];
1582               size_of_jump = 1;
1583             }
1584
1585           count -= size_of_jump + 4;
1586           if (!fits_in_imm31 (count))
1587             {
1588               as_bad_where (fragP->fr_file, fragP->fr_line,
1589                             _("jump over nop padding out of range"));
1590               return;
1591             }
1592
1593           md_number_to_chars (where + size_of_jump, count, 4);
1594           where += size_of_jump + 4;
1595         }
1596     }
1597
1598   /* Generate multiple NOPs.  */
1599   i386_output_nops (where, patt, count, limit);
1600 }
1601
1602 static INLINE int
1603 operand_type_all_zero (const union i386_operand_type *x)
1604 {
1605   switch (ARRAY_SIZE(x->array))
1606     {
1607     case 3:
1608       if (x->array[2])
1609         return 0;
1610       /* Fall through.  */
1611     case 2:
1612       if (x->array[1])
1613         return 0;
1614       /* Fall through.  */
1615     case 1:
1616       return !x->array[0];
1617     default:
1618       abort ();
1619     }
1620 }
1621
1622 static INLINE void
1623 operand_type_set (union i386_operand_type *x, unsigned int v)
1624 {
1625   switch (ARRAY_SIZE(x->array))
1626     {
1627     case 3:
1628       x->array[2] = v;
1629       /* Fall through.  */
1630     case 2:
1631       x->array[1] = v;
1632       /* Fall through.  */
1633     case 1:
1634       x->array[0] = v;
1635       /* Fall through.  */
1636       break;
1637     default:
1638       abort ();
1639     }
1640
1641   x->bitfield.class = ClassNone;
1642   x->bitfield.instance = InstanceNone;
1643 }
1644
1645 static INLINE int
1646 operand_type_equal (const union i386_operand_type *x,
1647                     const union i386_operand_type *y)
1648 {
1649   switch (ARRAY_SIZE(x->array))
1650     {
1651     case 3:
1652       if (x->array[2] != y->array[2])
1653         return 0;
1654       /* Fall through.  */
1655     case 2:
1656       if (x->array[1] != y->array[1])
1657         return 0;
1658       /* Fall through.  */
1659     case 1:
1660       return x->array[0] == y->array[0];
1661       break;
1662     default:
1663       abort ();
1664     }
1665 }
1666
1667 static INLINE bool
1668 _is_cpu (const i386_cpu_attr *a, enum i386_cpu cpu)
1669 {
1670   switch (cpu)
1671     {
1672     case Cpu287:      return a->bitfield.cpu287;
1673     case Cpu387:      return a->bitfield.cpu387;
1674     case Cpu3dnow:    return a->bitfield.cpu3dnow;
1675     case Cpu3dnowA:   return a->bitfield.cpu3dnowa;
1676     case CpuAVX:      return a->bitfield.cpuavx;
1677     case CpuHLE:      return a->bitfield.cpuhle;
1678     case CpuAVX512F:  return a->bitfield.cpuavx512f;
1679     case CpuAVX512VL: return a->bitfield.cpuavx512vl;
1680     case CpuAPX_F:    return a->bitfield.cpuapx_f;
1681     case Cpu64:       return a->bitfield.cpu64;
1682     case CpuNo64:     return a->bitfield.cpuno64;
1683     default:
1684       gas_assert (cpu < CpuAttrEnums);
1685     }
1686   return a->bitfield.isa == cpu + 1u;
1687 }
1688
1689 static INLINE bool
1690 is_cpu (const insn_template *t, enum i386_cpu cpu)
1691 {
1692   return _is_cpu(&t->cpu, cpu);
1693 }
1694
1695 static INLINE bool
1696 maybe_cpu (const insn_template *t, enum i386_cpu cpu)
1697 {
1698   return _is_cpu(&t->cpu_any, cpu);
1699 }
1700
1701 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
1702 {
1703   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
1704   i386_cpu_flags f = { .array[0] = 0 };
1705
1706   switch (ARRAY_SIZE (a.array))
1707     {
1708     case 1:
1709       f.array[CpuAttrEnums / bps]
1710 #ifndef WORDS_BIGENDIAN
1711         |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
1712 #else
1713         |= (a.array[0] << CpuIsaBits) >> (CpuAttrEnums % bps);
1714 #endif
1715       if (CpuMax / bps > CpuAttrEnums / bps)
1716         f.array[CpuAttrEnums / bps + 1]
1717 #ifndef WORDS_BIGENDIAN
1718           = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
1719 #else
1720           = (a.array[0] << CpuIsaBits) << (bps - CpuAttrEnums % bps);
1721 #endif
1722       break;
1723
1724     default:
1725       abort ();
1726     }
1727
1728   if (a.bitfield.isa)
1729 #ifndef WORDS_BIGENDIAN
1730     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
1731 #else
1732     f.array[(a.bitfield.isa - 1) / bps] |= 1u << (~(a.bitfield.isa - 1) % bps);
1733 #endif
1734
1735   return f;
1736 }
1737
1738 static INLINE int
1739 cpu_flags_all_zero (const union i386_cpu_flags *x)
1740 {
1741   switch (ARRAY_SIZE(x->array))
1742     {
1743     case 5:
1744       if (x->array[4])
1745         return 0;
1746       /* Fall through.  */
1747     case 4:
1748       if (x->array[3])
1749         return 0;
1750       /* Fall through.  */
1751     case 3:
1752       if (x->array[2])
1753         return 0;
1754       /* Fall through.  */
1755     case 2:
1756       if (x->array[1])
1757         return 0;
1758       /* Fall through.  */
1759     case 1:
1760       return !x->array[0];
1761     default:
1762       abort ();
1763     }
1764 }
1765
1766 static INLINE int
1767 cpu_flags_equal (const union i386_cpu_flags *x,
1768                  const union i386_cpu_flags *y)
1769 {
1770   switch (ARRAY_SIZE(x->array))
1771     {
1772     case 5:
1773       if (x->array[4] != y->array[4])
1774         return 0;
1775       /* Fall through.  */
1776     case 4:
1777       if (x->array[3] != y->array[3])
1778         return 0;
1779       /* Fall through.  */
1780     case 3:
1781       if (x->array[2] != y->array[2])
1782         return 0;
1783       /* Fall through.  */
1784     case 2:
1785       if (x->array[1] != y->array[1])
1786         return 0;
1787       /* Fall through.  */
1788     case 1:
1789       return x->array[0] == y->array[0];
1790       break;
1791     default:
1792       abort ();
1793     }
1794 }
1795
1796 static INLINE int
1797 cpu_flags_check_cpu64 (const insn_template *t)
1798 {
1799   return flag_code == CODE_64BIT
1800          ? !t->cpu.bitfield.cpuno64
1801          : !t->cpu.bitfield.cpu64;
1802 }
1803
1804 static INLINE i386_cpu_flags
1805 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1806 {
1807   switch (ARRAY_SIZE (x.array))
1808     {
1809     case 5:
1810       x.array [4] &= y.array [4];
1811       /* Fall through.  */
1812     case 4:
1813       x.array [3] &= y.array [3];
1814       /* Fall through.  */
1815     case 3:
1816       x.array [2] &= y.array [2];
1817       /* Fall through.  */
1818     case 2:
1819       x.array [1] &= y.array [1];
1820       /* Fall through.  */
1821     case 1:
1822       x.array [0] &= y.array [0];
1823       break;
1824     default:
1825       abort ();
1826     }
1827   return x;
1828 }
1829
1830 static INLINE i386_cpu_flags
1831 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1832 {
1833   switch (ARRAY_SIZE (x.array))
1834     {
1835     case 5:
1836       x.array [4] |= y.array [4];
1837       /* Fall through.  */
1838     case 4:
1839       x.array [3] |= y.array [3];
1840       /* Fall through.  */
1841     case 3:
1842       x.array [2] |= y.array [2];
1843       /* Fall through.  */
1844     case 2:
1845       x.array [1] |= y.array [1];
1846       /* Fall through.  */
1847     case 1:
1848       x.array [0] |= y.array [0];
1849       break;
1850     default:
1851       abort ();
1852     }
1853   return x;
1854 }
1855
1856 static INLINE i386_cpu_flags
1857 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1858 {
1859   switch (ARRAY_SIZE (x.array))
1860     {
1861     case 5:
1862       x.array [4] &= ~y.array [4];
1863       /* Fall through.  */
1864     case 4:
1865       x.array [3] &= ~y.array [3];
1866       /* Fall through.  */
1867     case 3:
1868       x.array [2] &= ~y.array [2];
1869       /* Fall through.  */
1870     case 2:
1871       x.array [1] &= ~y.array [1];
1872       /* Fall through.  */
1873     case 1:
1874       x.array [0] &= ~y.array [0];
1875       break;
1876     default:
1877       abort ();
1878     }
1879   return x;
1880 }
1881
1882 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1883
1884 static INLINE bool need_evex_encoding (const insn_template *t)
1885 {
1886   return i.encoding == encoding_evex
1887         || i.encoding == encoding_evex512
1888         || (t->opcode_modifier.vex && i.encoding == encoding_egpr)
1889         || i.mask.reg;
1890 }
1891
1892 #define CPU_FLAGS_ARCH_MATCH            0x1
1893 #define CPU_FLAGS_64BIT_MATCH           0x2
1894
1895 #define CPU_FLAGS_PERFECT_MATCH \
1896   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1897
1898 /* Return CPU flags match bits. */
1899
1900 static int
1901 cpu_flags_match (const insn_template *t)
1902 {
1903   i386_cpu_flags cpu, active, all = cpu_flags_from_attr (t->cpu);
1904   i386_cpu_flags any = cpu_flags_from_attr (t->cpu_any);
1905   int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
1906
1907   all.bitfield.cpu64 = 0;
1908   all.bitfield.cpuno64 = 0;
1909   gas_assert (!any.bitfield.cpu64);
1910   gas_assert (!any.bitfield.cpuno64);
1911
1912   if (cpu_flags_all_zero (&all) && cpu_flags_all_zero (&any))
1913     {
1914       /* This instruction is available on all archs.  */
1915       return match | CPU_FLAGS_ARCH_MATCH;
1916     }
1917
1918   /* This instruction is available only on some archs.  */
1919
1920   /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
1921   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
1922     {
1923       /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
1924          know that EVEX encoding will be needed.  */
1925       if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
1926           && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
1927         {
1928           if (need_evex_encoding (t))
1929             {
1930               any.bitfield.cpuavx = 0;
1931               any.bitfield.cpuavx2 = 0;
1932               any.bitfield.cpufma = 0;
1933             }
1934           /* need_evex_encoding(t) isn't reliable before operands were
1935              parsed.  */
1936           else if (i.operands)
1937             {
1938               any.bitfield.cpuavx512f = 0;
1939               any.bitfield.cpuavx512vl = 0;
1940             }
1941         }
1942
1943       /* Dual non-APX/APX templates need massaging from what APX_F() in the
1944          opcode table has produced.  While the direct transformation of the
1945          incoming cpuid&(cpuid|APX_F) would be to cpuid&(cpuid) / cpuid&(APX_F)
1946          respectively, it's cheaper to move to just cpuid / cpuid&APX_F
1947          instead.  */
1948       if (any.bitfield.cpuapx_f
1949           && (any.bitfield.cpubmi || any.bitfield.cpubmi2
1950               || any.bitfield.cpuavx512f || any.bitfield.cpuavx512bw
1951               || any.bitfield.cpuavx512dq || any.bitfield.cpuamx_tile
1952               || any.bitfield.cpucmpccxadd || any.bitfield.cpuuser_msr))
1953         {
1954           /* These checks (verifying that APX_F() was properly used in the
1955              opcode table entry) make sure there's no need for an "else" to
1956              the "if()" below.  */
1957           gas_assert (!cpu_flags_all_zero (&all));
1958           cpu = cpu_flags_and (all, any);
1959           gas_assert (cpu_flags_equal (&cpu, &all));
1960
1961           if (need_evex_encoding (t))
1962             all = any;
1963
1964           memset (&any, 0, sizeof (any));
1965         }
1966     }
1967
1968   if (flag_code != CODE_64BIT)
1969     active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
1970   else
1971     active = cpu_arch_flags;
1972   cpu = cpu_flags_and (all, active);
1973   if (cpu_flags_equal (&cpu, &all))
1974     {
1975       /* AVX and AVX2 present at the same time express an operand size
1976          dependency - strip AVX2 for the purposes here.  The operand size
1977          dependent check occurs in check_vecOperands().  */
1978       if (any.bitfield.cpuavx && any.bitfield.cpuavx2)
1979         any.bitfield.cpuavx2 = 0;
1980
1981       cpu = cpu_flags_and (any, active);
1982       if (cpu_flags_all_zero (&any) || !cpu_flags_all_zero (&cpu))
1983         match |= CPU_FLAGS_ARCH_MATCH;
1984     }
1985   return match;
1986 }
1987
1988 static INLINE i386_operand_type
1989 operand_type_and (i386_operand_type x, i386_operand_type y)
1990 {
1991   if (x.bitfield.class != y.bitfield.class)
1992     x.bitfield.class = ClassNone;
1993   if (x.bitfield.instance != y.bitfield.instance)
1994     x.bitfield.instance = InstanceNone;
1995
1996   switch (ARRAY_SIZE (x.array))
1997     {
1998     case 3:
1999       x.array [2] &= y.array [2];
2000       /* Fall through.  */
2001     case 2:
2002       x.array [1] &= y.array [1];
2003       /* Fall through.  */
2004     case 1:
2005       x.array [0] &= y.array [0];
2006       break;
2007     default:
2008       abort ();
2009     }
2010   return x;
2011 }
2012
2013 static INLINE i386_operand_type
2014 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2015 {
2016   gas_assert (y.bitfield.class == ClassNone);
2017   gas_assert (y.bitfield.instance == InstanceNone);
2018
2019   switch (ARRAY_SIZE (x.array))
2020     {
2021     case 3:
2022       x.array [2] &= ~y.array [2];
2023       /* Fall through.  */
2024     case 2:
2025       x.array [1] &= ~y.array [1];
2026       /* Fall through.  */
2027     case 1:
2028       x.array [0] &= ~y.array [0];
2029       break;
2030     default:
2031       abort ();
2032     }
2033   return x;
2034 }
2035
2036 static INLINE i386_operand_type
2037 operand_type_or (i386_operand_type x, i386_operand_type y)
2038 {
2039   gas_assert (x.bitfield.class == ClassNone ||
2040               y.bitfield.class == ClassNone ||
2041               x.bitfield.class == y.bitfield.class);
2042   gas_assert (x.bitfield.instance == InstanceNone ||
2043               y.bitfield.instance == InstanceNone ||
2044               x.bitfield.instance == y.bitfield.instance);
2045
2046   switch (ARRAY_SIZE (x.array))
2047     {
2048     case 3:
2049       x.array [2] |= y.array [2];
2050       /* Fall through.  */
2051     case 2:
2052       x.array [1] |= y.array [1];
2053       /* Fall through.  */
2054     case 1:
2055       x.array [0] |= y.array [0];
2056       break;
2057     default:
2058       abort ();
2059     }
2060   return x;
2061 }
2062
2063 static INLINE i386_operand_type
2064 operand_type_xor (i386_operand_type x, i386_operand_type y)
2065 {
2066   gas_assert (y.bitfield.class == ClassNone);
2067   gas_assert (y.bitfield.instance == InstanceNone);
2068
2069   switch (ARRAY_SIZE (x.array))
2070     {
2071     case 3:
2072       x.array [2] ^= y.array [2];
2073       /* Fall through.  */
2074     case 2:
2075       x.array [1] ^= y.array [1];
2076       /* Fall through.  */
2077     case 1:
2078       x.array [0] ^= y.array [0];
2079       break;
2080     default:
2081       abort ();
2082     }
2083   return x;
2084 }
2085
2086 static const i386_operand_type anydisp = {
2087   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
2088 };
2089
2090 enum operand_type
2091 {
2092   reg,
2093   imm,
2094   disp,
2095   anymem
2096 };
2097
2098 static INLINE int
2099 operand_type_check (i386_operand_type t, enum operand_type c)
2100 {
2101   switch (c)
2102     {
2103     case reg:
2104       return t.bitfield.class == Reg;
2105
2106     case imm:
2107       return (t.bitfield.imm8
2108               || t.bitfield.imm8s
2109               || t.bitfield.imm16
2110               || t.bitfield.imm32
2111               || t.bitfield.imm32s
2112               || t.bitfield.imm64);
2113
2114     case disp:
2115       return (t.bitfield.disp8
2116               || t.bitfield.disp16
2117               || t.bitfield.disp32
2118               || t.bitfield.disp64);
2119
2120     case anymem:
2121       return (t.bitfield.disp8
2122               || t.bitfield.disp16
2123               || t.bitfield.disp32
2124               || t.bitfield.disp64
2125               || t.bitfield.baseindex);
2126
2127     default:
2128       abort ();
2129     }
2130
2131   return 0;
2132 }
2133
2134 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2135    between operand GIVEN and opeand WANTED for instruction template T.  */
2136
2137 static INLINE int
2138 match_operand_size (const insn_template *t, unsigned int wanted,
2139                     unsigned int given)
2140 {
2141   return !((i.types[given].bitfield.byte
2142             && !t->operand_types[wanted].bitfield.byte)
2143            || (i.types[given].bitfield.word
2144                && !t->operand_types[wanted].bitfield.word)
2145            || (i.types[given].bitfield.dword
2146                && !t->operand_types[wanted].bitfield.dword)
2147            || (i.types[given].bitfield.qword
2148                && (!t->operand_types[wanted].bitfield.qword
2149                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2150                       mode, when they're used where a 64-bit GPR could also
2151                       be used.  Checking is needed for Intel Syntax only.  */
2152                    || (intel_syntax
2153                        && flag_code != CODE_64BIT
2154                        && (t->operand_types[wanted].bitfield.class == Reg
2155                            || t->operand_types[wanted].bitfield.class == Accum
2156                            || t->opcode_modifier.isstring))))
2157            || (i.types[given].bitfield.tbyte
2158                && !t->operand_types[wanted].bitfield.tbyte));
2159 }
2160
2161 /* Return 1 if there is no conflict in SIMD register between operand
2162    GIVEN and opeand WANTED for instruction template T.  */
2163
2164 static INLINE int
2165 match_simd_size (const insn_template *t, unsigned int wanted,
2166                  unsigned int given)
2167 {
2168   return !((i.types[given].bitfield.xmmword
2169             && !t->operand_types[wanted].bitfield.xmmword)
2170            || (i.types[given].bitfield.ymmword
2171                && !t->operand_types[wanted].bitfield.ymmword)
2172            || (i.types[given].bitfield.zmmword
2173                && !t->operand_types[wanted].bitfield.zmmword)
2174            || (i.types[given].bitfield.tmmword
2175                && !t->operand_types[wanted].bitfield.tmmword));
2176 }
2177
2178 /* Return 1 if there is no conflict in any size between operand GIVEN
2179    and opeand WANTED for instruction template T.  */
2180
2181 static INLINE int
2182 match_mem_size (const insn_template *t, unsigned int wanted,
2183                 unsigned int given)
2184 {
2185   return (match_operand_size (t, wanted, given)
2186           && !((i.types[given].bitfield.unspecified
2187                 && !i.broadcast.type
2188                 && !i.broadcast.bytes
2189                 && !t->operand_types[wanted].bitfield.unspecified)
2190                || (i.types[given].bitfield.fword
2191                    && !t->operand_types[wanted].bitfield.fword)
2192                /* For scalar opcode templates to allow register and memory
2193                   operands at the same time, some special casing is needed
2194                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2195                   down-conversion vpmov*.  */
2196                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2197                     && t->operand_types[wanted].bitfield.byte
2198                        + t->operand_types[wanted].bitfield.word
2199                        + t->operand_types[wanted].bitfield.dword
2200                        + t->operand_types[wanted].bitfield.qword
2201                        > !!t->opcode_modifier.broadcast)
2202                    ? (i.types[given].bitfield.xmmword
2203                       || i.types[given].bitfield.ymmword
2204                       || i.types[given].bitfield.zmmword)
2205                    : !match_simd_size(t, wanted, given))));
2206 }
2207
2208 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2209    operands for instruction template T, and it has MATCH_REVERSE set if there
2210    is no size conflict on any operands for the template with operands reversed
2211    (and the template allows for reversing in the first place).  */
2212
2213 #define MATCH_STRAIGHT 1
2214 #define MATCH_REVERSE  2
2215
2216 static INLINE unsigned int
2217 operand_size_match (const insn_template *t)
2218 {
2219   unsigned int j, match = MATCH_STRAIGHT;
2220
2221   /* Don't check non-absolute jump instructions.  */
2222   if (t->opcode_modifier.jump
2223       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2224     return match;
2225
2226   /* Check memory and accumulator operand size.  */
2227   for (j = 0; j < i.operands; j++)
2228     {
2229       if (i.types[j].bitfield.class != Reg
2230           && i.types[j].bitfield.class != RegSIMD
2231           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2232         continue;
2233
2234       if (t->operand_types[j].bitfield.class == Reg
2235           && !match_operand_size (t, j, j))
2236         {
2237           match = 0;
2238           break;
2239         }
2240
2241       if (t->operand_types[j].bitfield.class == RegSIMD
2242           && !match_simd_size (t, j, j))
2243         {
2244           match = 0;
2245           break;
2246         }
2247
2248       if (t->operand_types[j].bitfield.instance == Accum
2249           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2250         {
2251           match = 0;
2252           break;
2253         }
2254
2255       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2256         {
2257           match = 0;
2258           break;
2259         }
2260     }
2261
2262   if (!t->opcode_modifier.d)
2263     return match;
2264
2265   /* Check reverse.  */
2266   gas_assert (i.operands >= 2);
2267
2268   for (j = 0; j < i.operands; j++)
2269     {
2270       unsigned int given = i.operands - j - 1;
2271
2272       /* For FMA4 and XOP insns VEX.W controls just the first two
2273          register operands. And APX_F insns just swap the two source operands,
2274          with the 3rd one being the destination.  */
2275       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP)
2276           || is_cpu (t, CpuAPX_F))
2277         given = j < 2 ? 1 - j : j;
2278
2279       if (t->operand_types[j].bitfield.class == Reg
2280           && !match_operand_size (t, j, given))
2281         return match;
2282
2283       if (t->operand_types[j].bitfield.class == RegSIMD
2284           && !match_simd_size (t, j, given))
2285         return match;
2286
2287       if (t->operand_types[j].bitfield.instance == Accum
2288           && (!match_operand_size (t, j, given)
2289               || !match_simd_size (t, j, given)))
2290         return match;
2291
2292       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2293         return match;
2294     }
2295
2296   return match | MATCH_REVERSE;
2297 }
2298
2299 static INLINE int
2300 operand_type_match (i386_operand_type overlap,
2301                     i386_operand_type given)
2302 {
2303   i386_operand_type temp = overlap;
2304
2305   temp.bitfield.unspecified = 0;
2306   temp.bitfield.byte = 0;
2307   temp.bitfield.word = 0;
2308   temp.bitfield.dword = 0;
2309   temp.bitfield.fword = 0;
2310   temp.bitfield.qword = 0;
2311   temp.bitfield.tbyte = 0;
2312   temp.bitfield.xmmword = 0;
2313   temp.bitfield.ymmword = 0;
2314   temp.bitfield.zmmword = 0;
2315   temp.bitfield.tmmword = 0;
2316   if (operand_type_all_zero (&temp))
2317     goto mismatch;
2318
2319   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2320     return 1;
2321
2322  mismatch:
2323   i.error = operand_type_mismatch;
2324   return 0;
2325 }
2326
2327 /* If given types g0 and g1 are registers they must be of the same type
2328    unless the expected operand type register overlap is null.
2329    Intel syntax sized memory operands are also checked here.  */
2330
2331 static INLINE int
2332 operand_type_register_match (i386_operand_type g0,
2333                              i386_operand_type t0,
2334                              i386_operand_type g1,
2335                              i386_operand_type t1)
2336 {
2337   if (g0.bitfield.class != Reg
2338       && g0.bitfield.class != RegSIMD
2339       && (g0.bitfield.unspecified
2340           || !operand_type_check (g0, anymem)))
2341     return 1;
2342
2343   if (g1.bitfield.class != Reg
2344       && g1.bitfield.class != RegSIMD
2345       && (g1.bitfield.unspecified
2346           || !operand_type_check (g1, anymem)))
2347     return 1;
2348
2349   if (g0.bitfield.byte == g1.bitfield.byte
2350       && g0.bitfield.word == g1.bitfield.word
2351       && g0.bitfield.dword == g1.bitfield.dword
2352       && g0.bitfield.qword == g1.bitfield.qword
2353       && g0.bitfield.xmmword == g1.bitfield.xmmword
2354       && g0.bitfield.ymmword == g1.bitfield.ymmword
2355       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2356     return 1;
2357
2358   /* If expectations overlap in no more than a single size, all is fine. */
2359   g0 = operand_type_and (t0, t1);
2360   if (g0.bitfield.byte
2361       + g0.bitfield.word
2362       + g0.bitfield.dword
2363       + g0.bitfield.qword
2364       + g0.bitfield.xmmword
2365       + g0.bitfield.ymmword
2366       + g0.bitfield.zmmword <= 1)
2367     return 1;
2368
2369   i.error = register_type_mismatch;
2370
2371   return 0;
2372 }
2373
2374 static INLINE unsigned int
2375 register_number (const reg_entry *r)
2376 {
2377   unsigned int nr = r->reg_num;
2378
2379   if (r->reg_flags & RegRex)
2380     nr += 8;
2381
2382   if (r->reg_flags & (RegVRex | RegRex2))
2383     nr += 16;
2384
2385   return nr;
2386 }
2387
2388 static INLINE unsigned int
2389 mode_from_disp_size (i386_operand_type t)
2390 {
2391   if (t.bitfield.disp8)
2392     return 1;
2393   else if (t.bitfield.disp16
2394            || t.bitfield.disp32)
2395     return 2;
2396   else
2397     return 0;
2398 }
2399
2400 static INLINE int
2401 fits_in_signed_byte (addressT num)
2402 {
2403   return num + 0x80 <= 0xff;
2404 }
2405
2406 static INLINE int
2407 fits_in_unsigned_byte (addressT num)
2408 {
2409   return num <= 0xff;
2410 }
2411
2412 static INLINE int
2413 fits_in_unsigned_word (addressT num)
2414 {
2415   return num <= 0xffff;
2416 }
2417
2418 static INLINE int
2419 fits_in_signed_word (addressT num)
2420 {
2421   return num + 0x8000 <= 0xffff;
2422 }
2423
2424 static INLINE int
2425 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2426 {
2427 #ifndef BFD64
2428   return 1;
2429 #else
2430   return num + 0x80000000 <= 0xffffffff;
2431 #endif
2432 }                               /* fits_in_signed_long() */
2433
2434 static INLINE int
2435 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2436 {
2437 #ifndef BFD64
2438   return 1;
2439 #else
2440   return num <= 0xffffffff;
2441 #endif
2442 }                               /* fits_in_unsigned_long() */
2443
2444 static INLINE valueT extend_to_32bit_address (addressT num)
2445 {
2446 #ifdef BFD64
2447   if (fits_in_unsigned_long(num))
2448     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2449
2450   if (!fits_in_signed_long (num))
2451     return num & 0xffffffff;
2452 #endif
2453
2454   return num;
2455 }
2456
2457 static INLINE int
2458 fits_in_disp8 (offsetT num)
2459 {
2460   int shift = i.memshift;
2461   unsigned int mask;
2462
2463   if (shift == -1)
2464     abort ();
2465
2466   mask = (1 << shift) - 1;
2467
2468   /* Return 0 if NUM isn't properly aligned.  */
2469   if ((num & mask))
2470     return 0;
2471
2472   /* Check if NUM will fit in 8bit after shift.  */
2473   return fits_in_signed_byte (num >> shift);
2474 }
2475
2476 static INLINE int
2477 fits_in_imm4 (offsetT num)
2478 {
2479   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2480   return (num & (i.encoding != encoding_evex
2481                  && i.encoding != encoding_egpr ? 0xf : 7)) == num;
2482 }
2483
2484 static i386_operand_type
2485 smallest_imm_type (offsetT num)
2486 {
2487   i386_operand_type t;
2488
2489   operand_type_set (&t, 0);
2490   t.bitfield.imm64 = 1;
2491
2492   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2493     {
2494       /* This code is disabled on the 486 because all the Imm1 forms
2495          in the opcode table are slower on the i486.  They're the
2496          versions with the implicitly specified single-position
2497          displacement, which has another syntax if you really want to
2498          use that form.  */
2499       t.bitfield.imm1 = 1;
2500       t.bitfield.imm8 = 1;
2501       t.bitfield.imm8s = 1;
2502       t.bitfield.imm16 = 1;
2503       t.bitfield.imm32 = 1;
2504       t.bitfield.imm32s = 1;
2505     }
2506   else if (fits_in_signed_byte (num))
2507     {
2508       if (fits_in_unsigned_byte (num))
2509         t.bitfield.imm8 = 1;
2510       t.bitfield.imm8s = 1;
2511       t.bitfield.imm16 = 1;
2512       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2513         t.bitfield.imm32 = 1;
2514       t.bitfield.imm32s = 1;
2515     }
2516   else if (fits_in_unsigned_byte (num))
2517     {
2518       t.bitfield.imm8 = 1;
2519       t.bitfield.imm16 = 1;
2520       t.bitfield.imm32 = 1;
2521       t.bitfield.imm32s = 1;
2522     }
2523   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2524     {
2525       t.bitfield.imm16 = 1;
2526       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2527         t.bitfield.imm32 = 1;
2528       t.bitfield.imm32s = 1;
2529     }
2530   else if (fits_in_signed_long (num))
2531     {
2532       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
2533         t.bitfield.imm32 = 1;
2534       t.bitfield.imm32s = 1;
2535     }
2536   else if (fits_in_unsigned_long (num))
2537     t.bitfield.imm32 = 1;
2538
2539   return t;
2540 }
2541
2542 static offsetT
2543 offset_in_range (offsetT val, int size)
2544 {
2545   addressT mask;
2546
2547   switch (size)
2548     {
2549     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2550     case 2: mask = ((addressT) 1 << 16) - 1; break;
2551 #ifdef BFD64
2552     case 4: mask = ((addressT) 1 << 32) - 1; break;
2553 #endif
2554     case sizeof (val): return val;
2555     default: abort ();
2556     }
2557
2558   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2559     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2560              (uint64_t) val, (uint64_t) (val & mask));
2561
2562   return val & mask;
2563 }
2564
2565 static INLINE const char *insn_name (const insn_template *t)
2566 {
2567   return &i386_mnemonics[t->mnem_off];
2568 }
2569
2570 enum PREFIX_GROUP
2571 {
2572   PREFIX_EXIST = 0,
2573   PREFIX_LOCK,
2574   PREFIX_REP,
2575   PREFIX_DS,
2576   PREFIX_OTHER
2577 };
2578
2579 /* Returns
2580    a. PREFIX_EXIST if attempting to add a prefix where one from the
2581    same class already exists.
2582    b. PREFIX_LOCK if lock prefix is added.
2583    c. PREFIX_REP if rep/repne prefix is added.
2584    d. PREFIX_DS if ds prefix is added.
2585    e. PREFIX_OTHER if other prefix is added.
2586  */
2587
2588 static enum PREFIX_GROUP
2589 add_prefix (unsigned int prefix)
2590 {
2591   enum PREFIX_GROUP ret = PREFIX_OTHER;
2592   unsigned int q;
2593
2594   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2595       && flag_code == CODE_64BIT)
2596     {
2597       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2598           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2599           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2600           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2601         ret = PREFIX_EXIST;
2602       q = REX_PREFIX;
2603     }
2604   else
2605     {
2606       switch (prefix)
2607         {
2608         default:
2609           abort ();
2610
2611         case DS_PREFIX_OPCODE:
2612           ret = PREFIX_DS;
2613           /* Fall through.  */
2614         case CS_PREFIX_OPCODE:
2615         case ES_PREFIX_OPCODE:
2616         case FS_PREFIX_OPCODE:
2617         case GS_PREFIX_OPCODE:
2618         case SS_PREFIX_OPCODE:
2619           q = SEG_PREFIX;
2620           break;
2621
2622         case REPNE_PREFIX_OPCODE:
2623         case REPE_PREFIX_OPCODE:
2624           q = REP_PREFIX;
2625           ret = PREFIX_REP;
2626           break;
2627
2628         case LOCK_PREFIX_OPCODE:
2629           q = LOCK_PREFIX;
2630           ret = PREFIX_LOCK;
2631           break;
2632
2633         case FWAIT_OPCODE:
2634           q = WAIT_PREFIX;
2635           break;
2636
2637         case ADDR_PREFIX_OPCODE:
2638           q = ADDR_PREFIX;
2639           break;
2640
2641         case DATA_PREFIX_OPCODE:
2642           q = DATA_PREFIX;
2643           break;
2644         }
2645       if (i.prefix[q] != 0)
2646         ret = PREFIX_EXIST;
2647     }
2648
2649   if (ret)
2650     {
2651       if (!i.prefix[q])
2652         ++i.prefixes;
2653       i.prefix[q] |= prefix;
2654     }
2655   else
2656     as_bad (_("same type of prefix used twice"));
2657
2658   return ret;
2659 }
2660
2661 static void
2662 update_code_flag (int value, int check)
2663 {
2664   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
2665
2666   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
2667     {
2668       as_error (_("64bit mode not supported on `%s'."),
2669                 cpu_arch_name ? cpu_arch_name : default_arch);
2670       return;
2671     }
2672
2673   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2674     {
2675       as_error (_("32bit mode not supported on `%s'."),
2676                 cpu_arch_name ? cpu_arch_name : default_arch);
2677       return;
2678     }
2679
2680   flag_code = (enum flag_code) value;
2681
2682   stackop_size = '\0';
2683 }
2684
2685 static void
2686 set_code_flag (int value)
2687 {
2688   update_code_flag (value, 0);
2689 }
2690
2691 static void
2692 set_16bit_gcc_code_flag (int new_code_flag)
2693 {
2694   flag_code = (enum flag_code) new_code_flag;
2695   if (flag_code != CODE_16BIT)
2696     abort ();
2697   stackop_size = LONG_MNEM_SUFFIX;
2698 }
2699
2700 static void
2701 _set_intel_syntax (int syntax_flag)
2702 {
2703   intel_syntax = syntax_flag;
2704
2705   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2706
2707   register_prefix = allow_naked_reg ? "" : "%";
2708 }
2709
2710 static void
2711 set_intel_syntax (int syntax_flag)
2712 {
2713   /* Find out if register prefixing is specified.  */
2714   int ask_naked_reg = 0;
2715
2716   SKIP_WHITESPACE ();
2717   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2718     {
2719       char *string;
2720       int e = get_symbol_name (&string);
2721
2722       if (strcmp (string, "prefix") == 0)
2723         ask_naked_reg = 1;
2724       else if (strcmp (string, "noprefix") == 0)
2725         ask_naked_reg = -1;
2726       else
2727         as_bad (_("bad argument to syntax directive."));
2728       (void) restore_line_pointer (e);
2729     }
2730   demand_empty_rest_of_line ();
2731
2732   if (ask_naked_reg == 0)
2733     allow_naked_reg = (syntax_flag
2734                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2735   else
2736     allow_naked_reg = (ask_naked_reg < 0);
2737
2738   _set_intel_syntax (syntax_flag);
2739 }
2740
2741 static void
2742 set_intel_mnemonic (int mnemonic_flag)
2743 {
2744   intel_mnemonic = mnemonic_flag;
2745 }
2746
2747 static void
2748 set_allow_index_reg (int flag)
2749 {
2750   allow_index_reg = flag;
2751 }
2752
2753 static void
2754 set_check (int what)
2755 {
2756   enum check_kind *kind;
2757   const char *str;
2758
2759   if (what)
2760     {
2761       kind = &operand_check;
2762       str = "operand";
2763     }
2764   else
2765     {
2766       kind = &sse_check;
2767       str = "sse";
2768     }
2769
2770   SKIP_WHITESPACE ();
2771
2772   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2773     {
2774       char *string;
2775       int e = get_symbol_name (&string);
2776
2777       if (strcmp (string, "none") == 0)
2778         *kind = check_none;
2779       else if (strcmp (string, "warning") == 0)
2780         *kind = check_warning;
2781       else if (strcmp (string, "error") == 0)
2782         *kind = check_error;
2783       else
2784         as_bad (_("bad argument to %s_check directive."), str);
2785       (void) restore_line_pointer (e);
2786     }
2787   else
2788     as_bad (_("missing argument for %s_check directive"), str);
2789
2790   demand_empty_rest_of_line ();
2791 }
2792
2793 static void
2794 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2795                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2796 {
2797 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2798   static const char *arch;
2799
2800   /* Intel MCU is only supported on ELF.  */
2801   if (!IS_ELF)
2802     return;
2803
2804   if (!arch)
2805     {
2806       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2807          use default_arch.  */
2808       arch = cpu_arch_name;
2809       if (!arch)
2810         arch = default_arch;
2811     }
2812
2813   /* If we are targeting Intel MCU, we must enable it.  */
2814   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2815       == new_flag.bitfield.cpuiamcu)
2816     return;
2817
2818   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2819 #endif
2820 }
2821
2822 static void
2823 extend_cpu_sub_arch_name (const char *pfx, const char *name)
2824 {
2825   if (cpu_sub_arch_name)
2826     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2827                                   pfx, name, (const char *) NULL);
2828   else
2829     cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
2830 }
2831
2832 static void isa_enable (unsigned int idx)
2833 {
2834   i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
2835
2836   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2837     {
2838       extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
2839       cpu_arch_flags = flags;
2840     }
2841
2842   cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
2843 }
2844
2845 static void isa_disable (unsigned int idx)
2846 {
2847   i386_cpu_flags flags
2848     = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
2849
2850   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2851     {
2852       extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
2853       cpu_arch_flags = flags;
2854     }
2855
2856   cpu_arch_isa_flags
2857     = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
2858 }
2859
2860 static void
2861 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2862 {
2863   typedef struct arch_stack_entry
2864   {
2865     const struct arch_stack_entry *prev;
2866     const char *name;
2867     char *sub_name;
2868     i386_cpu_flags flags;
2869     i386_cpu_flags isa_flags;
2870     enum processor_type isa;
2871     enum flag_code flag_code;
2872     unsigned int vector_size;
2873     char stackop_size;
2874     bool no_cond_jump_promotion;
2875   } arch_stack_entry;
2876   static const arch_stack_entry *arch_stack_top;
2877   char *s;
2878   int e;
2879   const char *string;
2880   unsigned int j = 0;
2881
2882   SKIP_WHITESPACE ();
2883
2884   if (is_end_of_line[(unsigned char) *input_line_pointer])
2885     {
2886       as_bad (_("missing cpu architecture"));
2887       input_line_pointer++;
2888       return;
2889     }
2890
2891   e = get_symbol_name (&s);
2892   string = s;
2893
2894   if (strcmp (string, "push") == 0)
2895     {
2896       arch_stack_entry *top = XNEW (arch_stack_entry);
2897
2898       top->name = cpu_arch_name;
2899       if (cpu_sub_arch_name)
2900         top->sub_name = xstrdup (cpu_sub_arch_name);
2901       else
2902         top->sub_name = NULL;
2903       top->flags = cpu_arch_flags;
2904       top->isa = cpu_arch_isa;
2905       top->isa_flags = cpu_arch_isa_flags;
2906       top->flag_code = flag_code;
2907       top->vector_size = vector_size;
2908       top->stackop_size = stackop_size;
2909       top->no_cond_jump_promotion = no_cond_jump_promotion;
2910
2911       top->prev = arch_stack_top;
2912       arch_stack_top = top;
2913
2914       (void) restore_line_pointer (e);
2915       demand_empty_rest_of_line ();
2916       return;
2917     }
2918
2919   if (strcmp (string, "pop") == 0)
2920     {
2921       const arch_stack_entry *top = arch_stack_top;
2922
2923       if (!top)
2924         as_bad (_(".arch stack is empty"));
2925       else if (top->flag_code != flag_code
2926                || top->stackop_size != stackop_size)
2927         {
2928           static const unsigned int bits[] = {
2929             [CODE_16BIT] = 16,
2930             [CODE_32BIT] = 32,
2931             [CODE_64BIT] = 64,
2932           };
2933
2934           as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2935                   bits[top->flag_code],
2936                   top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2937         }
2938       else
2939         {
2940           arch_stack_top = top->prev;
2941
2942           cpu_arch_name = top->name;
2943           free (cpu_sub_arch_name);
2944           cpu_sub_arch_name = top->sub_name;
2945           cpu_arch_flags = top->flags;
2946           cpu_arch_isa = top->isa;
2947           cpu_arch_isa_flags = top->isa_flags;
2948           vector_size = top->vector_size;
2949           no_cond_jump_promotion = top->no_cond_jump_promotion;
2950
2951           XDELETE (top);
2952         }
2953
2954       (void) restore_line_pointer (e);
2955       demand_empty_rest_of_line ();
2956       return;
2957     }
2958
2959   if (strcmp (string, "default") == 0)
2960     {
2961       if (strcmp (default_arch, "iamcu") == 0)
2962         string = default_arch;
2963       else
2964         {
2965           static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2966
2967           cpu_arch_name = NULL;
2968           free (cpu_sub_arch_name);
2969           cpu_sub_arch_name = NULL;
2970           cpu_arch_flags = cpu_unknown_flags;
2971           cpu_arch_isa = PROCESSOR_UNKNOWN;
2972           cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2973           if (!cpu_arch_tune_set)
2974             cpu_arch_tune = PROCESSOR_UNKNOWN;
2975
2976           vector_size = VSZ_DEFAULT;
2977
2978           j = ARRAY_SIZE (cpu_arch) + 1;
2979         }
2980     }
2981
2982   for (; j < ARRAY_SIZE (cpu_arch); j++)
2983     {
2984       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2985           && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2986         {
2987           if (*string != '.')
2988             {
2989               check_cpu_arch_compatible (string, cpu_arch[j].enable);
2990
2991               if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
2992                 {
2993                   as_bad (_("64bit mode not supported on `%s'."),
2994                           cpu_arch[j].name);
2995                   (void) restore_line_pointer (e);
2996                   ignore_rest_of_line ();
2997                   return;
2998                 }
2999
3000               if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
3001                 {
3002                   as_bad (_("32bit mode not supported on `%s'."),
3003                           cpu_arch[j].name);
3004                   (void) restore_line_pointer (e);
3005                   ignore_rest_of_line ();
3006                   return;
3007                 }
3008
3009               cpu_arch_name = cpu_arch[j].name;
3010               free (cpu_sub_arch_name);
3011               cpu_sub_arch_name = NULL;
3012               cpu_arch_flags = cpu_arch[j].enable;
3013               cpu_arch_isa = cpu_arch[j].type;
3014               cpu_arch_isa_flags = cpu_arch[j].enable;
3015               if (!cpu_arch_tune_set)
3016                 cpu_arch_tune = cpu_arch_isa;
3017
3018               vector_size = VSZ_DEFAULT;
3019
3020               pre_386_16bit_warned = false;
3021               break;
3022             }
3023
3024           if (cpu_flags_all_zero (&cpu_arch[j].enable))
3025             continue;
3026
3027           isa_enable (j);
3028
3029           (void) restore_line_pointer (e);
3030
3031           switch (cpu_arch[j].vsz)
3032             {
3033             default:
3034               break;
3035
3036             case vsz_set:
3037 #ifdef SVR4_COMMENT_CHARS
3038               if (*input_line_pointer == ':' || *input_line_pointer == '/')
3039 #else
3040               if (*input_line_pointer == '/')
3041 #endif
3042                 {
3043                   ++input_line_pointer;
3044                   switch (get_absolute_expression ())
3045                     {
3046                     case 512: vector_size = VSZ512; break;
3047                     case 256: vector_size = VSZ256; break;
3048                     case 128: vector_size = VSZ128; break;
3049                     default:
3050                       as_bad (_("Unrecognized vector size specifier"));
3051                       ignore_rest_of_line ();
3052                       return;
3053                     }
3054                   break;
3055                 }
3056                 /* Fall through.  */
3057             case vsz_reset:
3058               vector_size = VSZ_DEFAULT;
3059               break;
3060             }
3061
3062           demand_empty_rest_of_line ();
3063           return;
3064         }
3065     }
3066
3067   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
3068     {
3069       /* Disable an ISA extension.  */
3070       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
3071         if (cpu_arch[j].type == PROCESSOR_NONE
3072             && strcmp (string + 3, cpu_arch[j].name) == 0)
3073           {
3074             isa_disable (j);
3075
3076             if (cpu_arch[j].vsz == vsz_set)
3077               vector_size = VSZ_DEFAULT;
3078
3079             (void) restore_line_pointer (e);
3080             demand_empty_rest_of_line ();
3081             return;
3082           }
3083     }
3084
3085   if (j == ARRAY_SIZE (cpu_arch))
3086     as_bad (_("no such architecture: `%s'"), string);
3087
3088   *input_line_pointer = e;
3089
3090   no_cond_jump_promotion = 0;
3091   if (*input_line_pointer == ','
3092       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
3093     {
3094       ++input_line_pointer;
3095       e = get_symbol_name (&s);
3096       string = s;
3097
3098       if (strcmp (string, "nojumps") == 0)
3099         no_cond_jump_promotion = 1;
3100       else if (strcmp (string, "jumps") == 0)
3101         ;
3102       else
3103         as_bad (_("no such architecture modifier: `%s'"), string);
3104
3105       (void) restore_line_pointer (e);
3106     }
3107
3108   demand_empty_rest_of_line ();
3109 }
3110
3111 enum bfd_architecture
3112 i386_arch (void)
3113 {
3114   if (cpu_arch_isa == PROCESSOR_IAMCU)
3115     {
3116       if (!IS_ELF || flag_code == CODE_64BIT)
3117         as_fatal (_("Intel MCU is 32bit ELF only"));
3118       return bfd_arch_iamcu;
3119     }
3120   else
3121     return bfd_arch_i386;
3122 }
3123
3124 unsigned long
3125 i386_mach (void)
3126 {
3127   if (startswith (default_arch, "x86_64"))
3128     {
3129       if (default_arch[6] == '\0')
3130         return bfd_mach_x86_64;
3131       else
3132         return bfd_mach_x64_32;
3133     }
3134   else if (!strcmp (default_arch, "i386")
3135            || !strcmp (default_arch, "iamcu"))
3136     {
3137       if (cpu_arch_isa == PROCESSOR_IAMCU)
3138         {
3139           if (!IS_ELF)
3140             as_fatal (_("Intel MCU is 32bit ELF only"));
3141           return bfd_mach_i386_iamcu;
3142         }
3143       else
3144         return bfd_mach_i386_i386;
3145     }
3146   else
3147     as_fatal (_("unknown architecture"));
3148 }
3149 \f
3150 #include "opcodes/i386-tbl.h"
3151
3152 static void
3153 op_lookup (const char *mnemonic)
3154 {
3155    i386_op_off_t *pos = str_hash_find (op_hash, mnemonic);
3156
3157    if (pos != NULL)
3158      {
3159        current_templates.start = &i386_optab[pos[0]];
3160        current_templates.end = &i386_optab[pos[1]];
3161      }
3162    else
3163      current_templates.end = current_templates.start = NULL;
3164 }
3165
3166 void
3167 md_begin (void)
3168 {
3169   /* Support pseudo prefixes like {disp32}.  */
3170   lex_type ['{'] = LEX_BEGIN_NAME;
3171
3172   /* Initialize op_hash hash table.  */
3173   op_hash = str_htab_create ();
3174
3175   {
3176     const i386_op_off_t *cur = i386_op_sets;
3177     const i386_op_off_t *end = cur + ARRAY_SIZE (i386_op_sets) - 1;
3178
3179     for (; cur < end; ++cur)
3180       if (str_hash_insert (op_hash, insn_name (&i386_optab[*cur]), cur, 0))
3181         as_fatal (_("duplicate %s"), insn_name (&i386_optab[*cur]));
3182   }
3183
3184   /* Initialize reg_hash hash table.  */
3185   reg_hash = str_htab_create ();
3186   {
3187     const reg_entry *regtab;
3188     unsigned int regtab_size = i386_regtab_size;
3189
3190     for (regtab = i386_regtab; regtab_size--; regtab++)
3191       {
3192         switch (regtab->reg_type.bitfield.class)
3193           {
3194           case Reg:
3195             if (regtab->reg_type.bitfield.dword)
3196               {
3197                 if (regtab->reg_type.bitfield.instance == Accum)
3198                   reg_eax = regtab;
3199               }
3200             else if (regtab->reg_type.bitfield.tbyte)
3201               {
3202                 /* There's no point inserting st(<N>) in the hash table, as
3203                    parentheses aren't included in register_chars[] anyway.  */
3204                 if (regtab->reg_type.bitfield.instance != Accum)
3205                   continue;
3206                 reg_st0 = regtab;
3207               }
3208             break;
3209
3210           case SReg:
3211             switch (regtab->reg_num)
3212               {
3213               case 0: reg_es = regtab; break;
3214               case 2: reg_ss = regtab; break;
3215               case 3: reg_ds = regtab; break;
3216               }
3217             break;
3218
3219           case RegMask:
3220             if (!regtab->reg_num)
3221               reg_k0 = regtab;
3222             break;
3223           }
3224
3225         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3226           as_fatal (_("duplicate %s"), regtab->reg_name);
3227       }
3228   }
3229
3230   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3231   {
3232     int c;
3233     const char *p;
3234
3235     for (c = 0; c < 256; c++)
3236       {
3237         if (ISDIGIT (c) || ISLOWER (c))
3238           {
3239             mnemonic_chars[c] = c;
3240             register_chars[c] = c;
3241             operand_chars[c] = c;
3242           }
3243         else if (ISUPPER (c))
3244           {
3245             mnemonic_chars[c] = TOLOWER (c);
3246             register_chars[c] = mnemonic_chars[c];
3247             operand_chars[c] = c;
3248           }
3249 #ifdef SVR4_COMMENT_CHARS
3250         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3251           operand_chars[c] = c;
3252 #endif
3253
3254         if (c >= 128)
3255           operand_chars[c] = c;
3256       }
3257
3258     mnemonic_chars['_'] = '_';
3259     mnemonic_chars['-'] = '-';
3260     mnemonic_chars['.'] = '.';
3261
3262     for (p = extra_symbol_chars; *p != '\0'; p++)
3263       operand_chars[(unsigned char) *p] = *p;
3264     for (p = operand_special_chars; *p != '\0'; p++)
3265       operand_chars[(unsigned char) *p] = *p;
3266   }
3267
3268   if (object_64bit)
3269     {
3270 #if defined (OBJ_COFF) && defined (TE_PE)
3271       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3272                                   ? 32 : 16);
3273 #else
3274       x86_dwarf2_return_column = 16;
3275 #endif
3276       x86_cie_data_alignment = -8;
3277 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3278       x86_sframe_cfa_sp_reg = REG_SP;
3279       x86_sframe_cfa_fp_reg = REG_FP;
3280 #endif
3281     }
3282   else
3283     {
3284       x86_dwarf2_return_column = 8;
3285       x86_cie_data_alignment = -4;
3286     }
3287
3288   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3289      can be turned into BRANCH_PREFIX frag.  */
3290   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3291     abort ();
3292 }
3293
3294 void
3295 i386_print_statistics (FILE *file)
3296 {
3297   htab_print_statistics (file, "i386 opcode", op_hash);
3298   htab_print_statistics (file, "i386 register", reg_hash);
3299 }
3300
3301 void
3302 i386_md_end (void)
3303 {
3304   htab_delete (op_hash);
3305   htab_delete (reg_hash);
3306 }
3307 \f
3308 #ifdef DEBUG386
3309
3310 /* Debugging routines for md_assemble.  */
3311 static void pte (insn_template *);
3312 static void pt (i386_operand_type);
3313 static void pe (expressionS *);
3314 static void ps (symbolS *);
3315
3316 static void
3317 pi (const char *line, i386_insn *x)
3318 {
3319   unsigned int j;
3320
3321   fprintf (stdout, "%s: template ", line);
3322   pte (&x->tm);
3323   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3324            x->base_reg ? x->base_reg->reg_name : "none",
3325            x->index_reg ? x->index_reg->reg_name : "none",
3326            x->log2_scale_factor);
3327   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3328            x->rm.mode, x->rm.reg, x->rm.regmem);
3329   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3330            x->sib.base, x->sib.index, x->sib.scale);
3331   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3332            (x->rex & REX_W) != 0,
3333            (x->rex & REX_R) != 0,
3334            (x->rex & REX_X) != 0,
3335            (x->rex & REX_B) != 0);
3336   for (j = 0; j < x->operands; j++)
3337     {
3338       fprintf (stdout, "    #%d:  ", j + 1);
3339       pt (x->types[j]);
3340       fprintf (stdout, "\n");
3341       if (x->types[j].bitfield.class == Reg
3342           || x->types[j].bitfield.class == RegMMX
3343           || x->types[j].bitfield.class == RegSIMD
3344           || x->types[j].bitfield.class == RegMask
3345           || x->types[j].bitfield.class == SReg
3346           || x->types[j].bitfield.class == RegCR
3347           || x->types[j].bitfield.class == RegDR
3348           || x->types[j].bitfield.class == RegTR
3349           || x->types[j].bitfield.class == RegBND)
3350         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3351       if (operand_type_check (x->types[j], imm))
3352         pe (x->op[j].imms);
3353       if (operand_type_check (x->types[j], disp))
3354         pe (x->op[j].disps);
3355     }
3356 }
3357
3358 static void
3359 pte (insn_template *t)
3360 {
3361   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3362   static const char *const opc_spc[] = {
3363     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3364     "XOP08", "XOP09", "XOP0A",
3365   };
3366   unsigned int j;
3367
3368   fprintf (stdout, " %d operands ", t->operands);
3369   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3370     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3371   if (opc_spc[t->opcode_space])
3372     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3373   fprintf (stdout, "opcode %x ", t->base_opcode);
3374   if (t->extension_opcode != None)
3375     fprintf (stdout, "ext %x ", t->extension_opcode);
3376   if (t->opcode_modifier.d)
3377     fprintf (stdout, "D");
3378   if (t->opcode_modifier.w)
3379     fprintf (stdout, "W");
3380   fprintf (stdout, "\n");
3381   for (j = 0; j < t->operands; j++)
3382     {
3383       fprintf (stdout, "    #%d type ", j + 1);
3384       pt (t->operand_types[j]);
3385       fprintf (stdout, "\n");
3386     }
3387 }
3388
3389 static void
3390 pe (expressionS *e)
3391 {
3392   fprintf (stdout, "    operation     %d\n", e->X_op);
3393   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3394            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3395   if (e->X_add_symbol)
3396     {
3397       fprintf (stdout, "    add_symbol    ");
3398       ps (e->X_add_symbol);
3399       fprintf (stdout, "\n");
3400     }
3401   if (e->X_op_symbol)
3402     {
3403       fprintf (stdout, "    op_symbol    ");
3404       ps (e->X_op_symbol);
3405       fprintf (stdout, "\n");
3406     }
3407 }
3408
3409 static void
3410 ps (symbolS *s)
3411 {
3412   fprintf (stdout, "%s type %s%s",
3413            S_GET_NAME (s),
3414            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3415            segment_name (S_GET_SEGMENT (s)));
3416 }
3417
3418 static struct type_name
3419   {
3420     i386_operand_type mask;
3421     const char *name;
3422   }
3423 const type_names[] =
3424 {
3425   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3426   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3427   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3428   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3429   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3430   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3431   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3432   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3433   { { .bitfield = { .imm8 = 1 } }, "i8" },
3434   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3435   { { .bitfield = { .imm16 = 1 } }, "i16" },
3436   { { .bitfield = { .imm32 = 1 } }, "i32" },
3437   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3438   { { .bitfield = { .imm64 = 1 } }, "i64" },
3439   { { .bitfield = { .imm1 = 1 } }, "i1" },
3440   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3441   { { .bitfield = { .disp8 = 1 } }, "d8" },
3442   { { .bitfield = { .disp16 = 1 } }, "d16" },
3443   { { .bitfield = { .disp32 = 1 } }, "d32" },
3444   { { .bitfield = { .disp64 = 1 } }, "d64" },
3445   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3446   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3447   { { .bitfield = { .class = RegCR } }, "control reg" },
3448   { { .bitfield = { .class = RegTR } }, "test reg" },
3449   { { .bitfield = { .class = RegDR } }, "debug reg" },
3450   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3451   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3452   { { .bitfield = { .class = SReg } }, "SReg" },
3453   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3454   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3455   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3456   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3457   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3458   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3459 };
3460
3461 static void
3462 pt (i386_operand_type t)
3463 {
3464   unsigned int j;
3465   i386_operand_type a;
3466
3467   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3468     {
3469       a = operand_type_and (t, type_names[j].mask);
3470       if (operand_type_equal (&a, &type_names[j].mask))
3471         fprintf (stdout, "%s, ",  type_names[j].name);
3472     }
3473   fflush (stdout);
3474 }
3475
3476 #endif /* DEBUG386 */
3477 \f
3478 static bfd_reloc_code_real_type
3479 reloc (unsigned int size,
3480        int pcrel,
3481        int sign,
3482        bfd_reloc_code_real_type other)
3483 {
3484   if (other != NO_RELOC)
3485     {
3486       reloc_howto_type *rel;
3487
3488       if (size == 8)
3489         switch (other)
3490           {
3491           case BFD_RELOC_X86_64_GOT32:
3492             return BFD_RELOC_X86_64_GOT64;
3493             break;
3494           case BFD_RELOC_X86_64_GOTPLT64:
3495             return BFD_RELOC_X86_64_GOTPLT64;
3496             break;
3497           case BFD_RELOC_X86_64_PLTOFF64:
3498             return BFD_RELOC_X86_64_PLTOFF64;
3499             break;
3500           case BFD_RELOC_X86_64_GOTPC32:
3501             other = BFD_RELOC_X86_64_GOTPC64;
3502             break;
3503           case BFD_RELOC_X86_64_GOTPCREL:
3504             other = BFD_RELOC_X86_64_GOTPCREL64;
3505             break;
3506           case BFD_RELOC_X86_64_TPOFF32:
3507             other = BFD_RELOC_X86_64_TPOFF64;
3508             break;
3509           case BFD_RELOC_X86_64_DTPOFF32:
3510             other = BFD_RELOC_X86_64_DTPOFF64;
3511             break;
3512           default:
3513             break;
3514           }
3515
3516 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3517       if (other == BFD_RELOC_SIZE32)
3518         {
3519           if (size == 8)
3520             other = BFD_RELOC_SIZE64;
3521           if (pcrel)
3522             {
3523               as_bad (_("there are no pc-relative size relocations"));
3524               return NO_RELOC;
3525             }
3526         }
3527 #endif
3528
3529       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3530       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3531         sign = -1;
3532
3533       rel = bfd_reloc_type_lookup (stdoutput, other);
3534       if (!rel)
3535         as_bad (_("unknown relocation (%u)"), other);
3536       else if (size != bfd_get_reloc_size (rel))
3537         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3538                 bfd_get_reloc_size (rel),
3539                 size);
3540       else if (pcrel && !rel->pc_relative)
3541         as_bad (_("non-pc-relative relocation for pc-relative field"));
3542       else if ((rel->complain_on_overflow == complain_overflow_signed
3543                 && !sign)
3544                || (rel->complain_on_overflow == complain_overflow_unsigned
3545                    && sign > 0))
3546         as_bad (_("relocated field and relocation type differ in signedness"));
3547       else
3548         return other;
3549       return NO_RELOC;
3550     }
3551
3552   if (pcrel)
3553     {
3554       if (!sign)
3555         as_bad (_("there are no unsigned pc-relative relocations"));
3556       switch (size)
3557         {
3558         case 1: return BFD_RELOC_8_PCREL;
3559         case 2: return BFD_RELOC_16_PCREL;
3560         case 4: return BFD_RELOC_32_PCREL;
3561         case 8: return BFD_RELOC_64_PCREL;
3562         }
3563       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3564     }
3565   else
3566     {
3567       if (sign > 0)
3568         switch (size)
3569           {
3570           case 4: return BFD_RELOC_X86_64_32S;
3571           }
3572       else
3573         switch (size)
3574           {
3575           case 1: return BFD_RELOC_8;
3576           case 2: return BFD_RELOC_16;
3577           case 4: return BFD_RELOC_32;
3578           case 8: return BFD_RELOC_64;
3579           }
3580       as_bad (_("cannot do %s %u byte relocation"),
3581               sign > 0 ? "signed" : "unsigned", size);
3582     }
3583
3584   return NO_RELOC;
3585 }
3586
3587 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3588 /* Here we decide which fixups can be adjusted to make them relative to
3589    the beginning of the section instead of the symbol.  Basically we need
3590    to make sure that the dynamic relocations are done correctly, so in
3591    some cases we force the original symbol to be used.  */
3592
3593 int
3594 tc_i386_fix_adjustable (fixS *fixP)
3595 {
3596   if (!IS_ELF)
3597     return 1;
3598
3599   /* Don't adjust pc-relative references to merge sections in 64-bit
3600      mode.  */
3601   if (use_rela_relocations
3602       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3603       && fixP->fx_pcrel)
3604     return 0;
3605
3606   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3607      and changed later by validate_fix.  */
3608   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3609       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3610     return 0;
3611
3612   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3613      for size relocations.  */
3614   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3615       || fixP->fx_r_type == BFD_RELOC_SIZE64
3616       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3617       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3618       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3619       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3620       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3621       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3622       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3623       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3624       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3625       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3626       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3627       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3628       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3629       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3630       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3631       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3632       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3633       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPCRELX
3634       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3635       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3636       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3637       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3638       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3639       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTTPOFF
3640       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTTPOFF
3641       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3642       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3643       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3644       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3645       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
3646       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3647       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3648       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3649     return 0;
3650   return 1;
3651 }
3652 #endif
3653
3654 static INLINE bool
3655 want_disp32 (const insn_template *t)
3656 {
3657   return flag_code != CODE_64BIT
3658          || i.prefix[ADDR_PREFIX]
3659          || (t->mnem_off == MN_lea
3660              && (!i.types[1].bitfield.qword
3661                 || t->opcode_modifier.size == SIZE32));
3662 }
3663
3664 static int
3665 intel_float_operand (const char *mnemonic)
3666 {
3667   /* Note that the value returned is meaningful only for opcodes with (memory)
3668      operands, hence the code here is free to improperly handle opcodes that
3669      have no operands (for better performance and smaller code). */
3670
3671   if (mnemonic[0] != 'f')
3672     return 0; /* non-math */
3673
3674   switch (mnemonic[1])
3675     {
3676     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3677        the fs segment override prefix not currently handled because no
3678        call path can make opcodes without operands get here */
3679     case 'i':
3680       return 2 /* integer op */;
3681     case 'l':
3682       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3683         return 3; /* fldcw/fldenv */
3684       break;
3685     case 'n':
3686       if (mnemonic[2] != 'o' /* fnop */)
3687         return 3; /* non-waiting control op */
3688       break;
3689     case 'r':
3690       if (mnemonic[2] == 's')
3691         return 3; /* frstor/frstpm */
3692       break;
3693     case 's':
3694       if (mnemonic[2] == 'a')
3695         return 3; /* fsave */
3696       if (mnemonic[2] == 't')
3697         {
3698           switch (mnemonic[3])
3699             {
3700             case 'c': /* fstcw */
3701             case 'd': /* fstdw */
3702             case 'e': /* fstenv */
3703             case 's': /* fsts[gw] */
3704               return 3;
3705             }
3706         }
3707       break;
3708     case 'x':
3709       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3710         return 0; /* fxsave/fxrstor are not really math ops */
3711       break;
3712     }
3713
3714   return 1;
3715 }
3716
3717 static INLINE void
3718 install_template (const insn_template *t)
3719 {
3720   unsigned int l;
3721
3722   i.tm = *t;
3723
3724   /* Dual VEX/EVEX templates need stripping one of the possible variants.  */
3725   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
3726     {
3727       if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
3728            || maybe_cpu (t, CpuFMA))
3729           && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
3730         {
3731           if (need_evex_encoding (t))
3732             {
3733               i.tm.opcode_modifier.vex = 0;
3734               i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
3735               i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
3736             }
3737           else
3738             {
3739               i.tm.opcode_modifier.evex = 0;
3740               if (i.tm.cpu_any.bitfield.cpuavx)
3741                 i.tm.cpu.bitfield.cpuavx = 1;
3742               else if (!i.tm.cpu.bitfield.isa)
3743                 i.tm.cpu.bitfield.isa = i.tm.cpu_any.bitfield.isa;
3744               else
3745                 gas_assert (i.tm.cpu.bitfield.isa == i.tm.cpu_any.bitfield.isa);
3746             }
3747         }
3748
3749       if ((maybe_cpu (t, CpuCMPCCXADD) || maybe_cpu (t, CpuAMX_TILE)
3750            || maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512DQ)
3751            || maybe_cpu (t, CpuAVX512BW) || maybe_cpu (t, CpuBMI)
3752            || maybe_cpu (t, CpuBMI2) || maybe_cpu (t, CpuUSER_MSR))
3753           && maybe_cpu (t, CpuAPX_F))
3754         {
3755           if (need_evex_encoding (t))
3756             i.tm.opcode_modifier.vex = 0;
3757           else
3758             i.tm.opcode_modifier.evex = 0;
3759         }
3760     }
3761
3762   /* Note that for pseudo prefixes this produces a length of 1. But for them
3763      the length isn't interesting at all.  */
3764   for (l = 1; l < 4; ++l)
3765     if (!(t->base_opcode >> (8 * l)))
3766       break;
3767
3768   i.opcode_length = l;
3769 }
3770
3771 /* Build the VEX prefix.  */
3772
3773 static void
3774 build_vex_prefix (const insn_template *t)
3775 {
3776   unsigned int register_specifier;
3777   unsigned int vector_length;
3778   unsigned int w;
3779
3780   /* Check register specifier.  */
3781   if (i.vex.register_specifier)
3782     {
3783       register_specifier =
3784         ~register_number (i.vex.register_specifier) & 0xf;
3785       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3786     }
3787   else
3788     register_specifier = 0xf;
3789
3790   /* Use 2-byte VEX prefix by swapping destination and source operand
3791      if there are more than 1 register operand.  */
3792   if (i.reg_operands > 1
3793       && i.encoding != encoding_vex3
3794       && i.dir_encoding == dir_encoding_default
3795       && i.operands == i.reg_operands
3796       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3797       && i.tm.opcode_space == SPACE_0F
3798       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3799       && i.rex == REX_B)
3800     {
3801       unsigned int xchg;
3802
3803       swap_2_operands (0, i.operands - 1);
3804
3805       gas_assert (i.rm.mode == 3);
3806
3807       i.rex = REX_R;
3808       xchg = i.rm.regmem;
3809       i.rm.regmem = i.rm.reg;
3810       i.rm.reg = xchg;
3811
3812       if (i.tm.opcode_modifier.d)
3813         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3814                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3815       else /* Use the next insn.  */
3816         install_template (&t[1]);
3817     }
3818
3819   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3820      are no memory operands and at least 3 register ones.  */
3821   if (i.reg_operands >= 3
3822       && i.encoding != encoding_vex3
3823       && i.reg_operands == i.operands - i.imm_operands
3824       && i.tm.opcode_modifier.vex
3825       && i.tm.opcode_modifier.commutative
3826       /* .commutative aliases .staticrounding; disambiguate.  */
3827       && !i.tm.opcode_modifier.sae
3828       && (i.tm.opcode_modifier.sse2avx
3829           || (optimize > 1 && !i.no_optimize))
3830       && i.rex == REX_B
3831       && i.vex.register_specifier
3832       && !(i.vex.register_specifier->reg_flags & RegRex))
3833     {
3834       unsigned int xchg = i.operands - i.reg_operands;
3835
3836       gas_assert (i.tm.opcode_space == SPACE_0F);
3837       gas_assert (!i.tm.opcode_modifier.sae);
3838       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3839                                       &i.types[i.operands - 3]));
3840       gas_assert (i.rm.mode == 3);
3841
3842       swap_2_operands (xchg, xchg + 1);
3843
3844       i.rex = 0;
3845       xchg = i.rm.regmem | 8;
3846       i.rm.regmem = ~register_specifier & 0xf;
3847       gas_assert (!(i.rm.regmem & 8));
3848       i.vex.register_specifier += xchg - i.rm.regmem;
3849       register_specifier = ~xchg & 0xf;
3850     }
3851
3852   if (i.tm.opcode_modifier.vex == VEXScalar)
3853     vector_length = avxscalar;
3854   else if (i.tm.opcode_modifier.vex == VEX256)
3855     vector_length = 1;
3856   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
3857     vector_length = 0;
3858   else
3859     {
3860       unsigned int op;
3861
3862       /* Determine vector length from the last multi-length vector
3863          operand.  */
3864       vector_length = 0;
3865       for (op = t->operands; op--;)
3866         if (t->operand_types[op].bitfield.xmmword
3867             && t->operand_types[op].bitfield.ymmword
3868             && i.types[op].bitfield.ymmword)
3869           {
3870             vector_length = 1;
3871             break;
3872           }
3873     }
3874
3875   /* Check the REX.W bit and VEXW.  */
3876   if (i.tm.opcode_modifier.vexw == VEXWIG)
3877     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3878   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
3879     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3880   else
3881     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3882
3883   /* Use 2-byte VEX prefix if possible.  */
3884   if (w == 0
3885       && i.encoding != encoding_vex3
3886       && i.tm.opcode_space == SPACE_0F
3887       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3888     {
3889       /* 2-byte VEX prefix.  */
3890       unsigned int r;
3891
3892       i.vex.length = 2;
3893       i.vex.bytes[0] = 0xc5;
3894
3895       /* Check the REX.R bit.  */
3896       r = (i.rex & REX_R) ? 0 : 1;
3897       i.vex.bytes[1] = (r << 7
3898                         | register_specifier << 3
3899                         | vector_length << 2
3900                         | i.tm.opcode_modifier.opcodeprefix);
3901     }
3902   else
3903     {
3904       /* 3-byte VEX prefix.  */
3905       i.vex.length = 3;
3906
3907       switch (i.tm.opcode_space)
3908         {
3909         case SPACE_0F:
3910         case SPACE_0F38:
3911         case SPACE_0F3A:
3912         case SPACE_VEXMAP7:
3913           i.vex.bytes[0] = 0xc4;
3914           break;
3915         case SPACE_XOP08:
3916         case SPACE_XOP09:
3917         case SPACE_XOP0A:
3918           i.vex.bytes[0] = 0x8f;
3919           break;
3920         default:
3921           abort ();
3922         }
3923
3924       /* The high 3 bits of the second VEX byte are 1's compliment
3925          of RXB bits from REX.  */
3926       i.vex.bytes[1] = ((~i.rex & 7) << 5)
3927                        | (!dot_insn () ? i.tm.opcode_space
3928                                        : i.insn_opcode_space);
3929
3930       i.vex.bytes[2] = (w << 7
3931                         | register_specifier << 3
3932                         | vector_length << 2
3933                         | i.tm.opcode_modifier.opcodeprefix);
3934     }
3935 }
3936
3937 static INLINE bool
3938 is_any_vex_encoding (const insn_template *t)
3939 {
3940   return t->opcode_modifier.vex || t->opcode_modifier.evex;
3941 }
3942
3943 /* We can use this function only when the current encoding is evex.  */
3944 static INLINE bool
3945 is_apx_evex_encoding (void)
3946 {
3947   return i.rex2 || i.tm.opcode_space == SPACE_EVEXMAP4
3948     || (i.vex.register_specifier
3949         && (i.vex.register_specifier->reg_flags & RegRex2));
3950 }
3951
3952 static INLINE bool
3953 is_apx_rex2_encoding (void)
3954 {
3955   return i.rex2 || i.rex2_encoding
3956         || i.tm.opcode_modifier.rex2;
3957 }
3958
3959 static unsigned int
3960 get_broadcast_bytes (const insn_template *t, bool diag)
3961 {
3962   unsigned int op, bytes;
3963   const i386_operand_type *types;
3964
3965   if (i.broadcast.type)
3966     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
3967
3968   gas_assert (intel_syntax);
3969
3970   for (op = 0; op < t->operands; ++op)
3971     if (t->operand_types[op].bitfield.baseindex)
3972       break;
3973
3974   gas_assert (op < t->operands);
3975
3976   if (t->opcode_modifier.evex != EVEXDYN)
3977     switch (i.broadcast.bytes)
3978       {
3979       case 1:
3980         if (t->operand_types[op].bitfield.word)
3981           return 2;
3982       /* Fall through.  */
3983       case 2:
3984         if (t->operand_types[op].bitfield.dword)
3985           return 4;
3986       /* Fall through.  */
3987       case 4:
3988         if (t->operand_types[op].bitfield.qword)
3989           return 8;
3990       /* Fall through.  */
3991       case 8:
3992         if (t->operand_types[op].bitfield.xmmword)
3993           return 16;
3994         if (t->operand_types[op].bitfield.ymmword)
3995           return 32;
3996         if (t->operand_types[op].bitfield.zmmword)
3997           return 64;
3998       /* Fall through.  */
3999       default:
4000         abort ();
4001       }
4002
4003   gas_assert (op + 1 < t->operands);
4004
4005   if (t->operand_types[op + 1].bitfield.xmmword
4006       + t->operand_types[op + 1].bitfield.ymmword
4007       + t->operand_types[op + 1].bitfield.zmmword > 1)
4008     {
4009       types = &i.types[op + 1];
4010       diag = false;
4011     }
4012   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
4013     types = &t->operand_types[op];
4014
4015   if (types->bitfield.zmmword)
4016     bytes = 64;
4017   else if (types->bitfield.ymmword)
4018     bytes = 32;
4019   else
4020     bytes = 16;
4021
4022   if (diag)
4023     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
4024              insn_name (t), bytes * 8);
4025
4026   return bytes;
4027 }
4028
4029 /* Build the EVEX prefix.  */
4030
4031 static void
4032 build_evex_prefix (void)
4033 {
4034   unsigned int register_specifier, w;
4035   rex_byte vrex_used = 0;
4036
4037   /* Check register specifier.  */
4038   if (i.vex.register_specifier)
4039     {
4040       gas_assert ((i.vrex & REX_X) == 0);
4041
4042       register_specifier = i.vex.register_specifier->reg_num;
4043       if ((i.vex.register_specifier->reg_flags & RegRex))
4044         register_specifier += 8;
4045       /* The upper 16 registers are encoded in the fourth byte of the
4046          EVEX prefix.  */
4047       if (!(i.vex.register_specifier->reg_flags & RegVRex))
4048         i.vex.bytes[3] = 0x8;
4049       register_specifier = ~register_specifier & 0xf;
4050     }
4051   else
4052     {
4053       register_specifier = 0xf;
4054
4055       /* Encode upper 16 vector index register in the fourth byte of
4056          the EVEX prefix.  */
4057       if (!(i.vrex & REX_X))
4058         i.vex.bytes[3] = 0x8;
4059       else
4060         vrex_used |= REX_X;
4061     }
4062
4063   /* 4 byte EVEX prefix.  */
4064   i.vex.length = 4;
4065   i.vex.bytes[0] = 0x62;
4066
4067   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
4068      bits from REX.  */
4069   gas_assert (i.tm.opcode_space >= SPACE_0F);
4070   gas_assert (i.tm.opcode_space <= SPACE_VEXMAP7);
4071   i.vex.bytes[1] = ((~i.rex & 7) << 5)
4072                    | (!dot_insn () ? i.tm.opcode_space
4073                                    : i.insn_opcode_space);
4074
4075   /* The fifth bit of the second EVEX byte is 1's compliment of the
4076      REX_R bit in VREX.  */
4077   if (!(i.vrex & REX_R))
4078     i.vex.bytes[1] |= 0x10;
4079   else
4080     vrex_used |= REX_R;
4081
4082   if ((i.reg_operands + i.imm_operands) == i.operands)
4083     {
4084       /* When all operands are registers, the REX_X bit in REX is not
4085          used.  We reuse it to encode the upper 16 registers, which is
4086          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
4087          as 1's compliment.  */
4088       if ((i.vrex & REX_B))
4089         {
4090           vrex_used |= REX_B;
4091           i.vex.bytes[1] &= ~0x40;
4092         }
4093     }
4094
4095   /* EVEX instructions shouldn't need the REX prefix.  */
4096   i.vrex &= ~vrex_used;
4097   gas_assert (i.vrex == 0);
4098
4099   /* Check the REX.W bit and VEXW.  */
4100   if (i.tm.opcode_modifier.vexw == VEXWIG)
4101     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
4102   else if (i.tm.opcode_modifier.vexw)
4103     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
4104   else
4105     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
4106
4107   /* The third byte of the EVEX prefix.  */
4108   i.vex.bytes[2] = ((w << 7)
4109                     | (register_specifier << 3)
4110                     | 4 /* Encode the U bit.  */
4111                     | i.tm.opcode_modifier.opcodeprefix);
4112
4113   /* The fourth byte of the EVEX prefix.  */
4114   /* The zeroing-masking bit.  */
4115   if (i.mask.reg && i.mask.zeroing)
4116     i.vex.bytes[3] |= 0x80;
4117
4118   /* Don't always set the broadcast bit if there is no RC.  */
4119   if (i.rounding.type == rc_none)
4120     {
4121       /* Encode the vector length.  */
4122       unsigned int vec_length;
4123
4124       if (i.tm.opcode_modifier.evex == EVEXDYN)
4125         {
4126           unsigned int op;
4127
4128           /* Determine vector length from the last multi-length vector
4129              operand.  */
4130           for (op = i.operands; op--;)
4131             if (i.tm.operand_types[op].bitfield.xmmword
4132                 + i.tm.operand_types[op].bitfield.ymmword
4133                 + i.tm.operand_types[op].bitfield.zmmword > 1)
4134               {
4135                 if (i.types[op].bitfield.zmmword)
4136                   {
4137                     i.tm.opcode_modifier.evex = EVEX512;
4138                     break;
4139                   }
4140                 else if (i.types[op].bitfield.ymmword)
4141                   {
4142                     i.tm.opcode_modifier.evex = EVEX256;
4143                     break;
4144                   }
4145                 else if (i.types[op].bitfield.xmmword)
4146                   {
4147                     i.tm.opcode_modifier.evex = EVEX128;
4148                     break;
4149                   }
4150                 else if ((i.broadcast.type || i.broadcast.bytes)
4151                          && op == i.broadcast.operand)
4152                   {
4153                     switch (get_broadcast_bytes (&i.tm, true))
4154                       {
4155                         case 64:
4156                           i.tm.opcode_modifier.evex = EVEX512;
4157                           break;
4158                         case 32:
4159                           i.tm.opcode_modifier.evex = EVEX256;
4160                           break;
4161                         case 16:
4162                           i.tm.opcode_modifier.evex = EVEX128;
4163                           break;
4164                         default:
4165                           abort ();
4166                       }
4167                     break;
4168                   }
4169               }
4170
4171           if (op >= MAX_OPERANDS)
4172             abort ();
4173         }
4174
4175       switch (i.tm.opcode_modifier.evex)
4176         {
4177         case EVEXLIG: /* LL' is ignored */
4178           vec_length = evexlig << 5;
4179           break;
4180         case EVEX128:
4181           vec_length = 0 << 5;
4182           break;
4183         case EVEX256:
4184           vec_length = 1 << 5;
4185           break;
4186         case EVEX512:
4187           vec_length = 2 << 5;
4188           break;
4189         case EVEX_L3:
4190           if (dot_insn ())
4191             {
4192               vec_length = 3 << 5;
4193               break;
4194             }
4195           /* Fall through.  */
4196         default:
4197           abort ();
4198           break;
4199         }
4200       i.vex.bytes[3] |= vec_length;
4201       /* Encode the broadcast bit.  */
4202       if (i.broadcast.type || i.broadcast.bytes)
4203         i.vex.bytes[3] |= 0x10;
4204     }
4205   else if (i.rounding.type != saeonly)
4206     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
4207   else
4208     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
4209
4210   if (i.mask.reg)
4211     i.vex.bytes[3] |= i.mask.reg->reg_num;
4212 }
4213
4214 /* Build (2 bytes) rex2 prefix.
4215    | D5h |
4216    | m | R4 X4 B4 | W R X B |
4217
4218    Rex2 reuses i.vex as they both encode i.tm.opcode_space in their prefixes.
4219  */
4220 static void
4221 build_rex2_prefix (void)
4222 {
4223   i.vex.length = 2;
4224   i.vex.bytes[0] = 0xd5;
4225   /* For the W R X B bits, the variables of rex prefix will be reused.  */
4226   i.vex.bytes[1] = ((i.tm.opcode_space << 7)
4227                     | (i.rex2 << 4) | i.rex);
4228 }
4229
4230 /* Build the EVEX prefix (4-byte) for evex insn
4231    | 62h |
4232    | `R`X`B`R' | B'mmm |
4233    | W | v`v`v`v | `x' | pp |
4234    | z| L'L | b | `v | aaa |
4235 */
4236 static void
4237 build_apx_evex_prefix (void)
4238 {
4239   build_evex_prefix ();
4240   if (i.rex2 & REX_R)
4241     i.vex.bytes[1] &= ~0x10;
4242   if (i.rex2 & REX_B)
4243     i.vex.bytes[1] |= 0x08;
4244   if (i.rex2 & REX_X)
4245     i.vex.bytes[2] &= ~0x04;
4246   if (i.vex.register_specifier
4247       && i.vex.register_specifier->reg_flags & RegRex2)
4248     i.vex.bytes[3] &= ~0x08;
4249
4250   /* Encode the NDD bit of the instruction promoted from the legacy
4251      space.  */
4252   if (i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
4253     i.vex.bytes[3] |= 0x10;
4254 }
4255
4256 static void establish_rex (void)
4257 {
4258   /* Note that legacy encodings have at most 2 non-immediate operands.  */
4259   unsigned int first = i.imm_operands;
4260   unsigned int last = i.operands > first ? i.operands - first - 1 : first;
4261
4262   /* Respect a user-specified REX prefix.  */
4263   i.rex |= i.prefix[REX_PREFIX] & REX_OPCODE;
4264
4265   /* For 8 bit registers we need an empty rex prefix.  Also if the
4266      instruction already has a prefix, we need to convert old
4267      registers to new ones.  */
4268
4269   if ((i.types[first].bitfield.class == Reg && i.types[first].bitfield.byte
4270        && ((i.op[first].regs->reg_flags & RegRex64) != 0 || i.rex != 0
4271            || i.rex2 != 0))
4272       || (i.types[last].bitfield.class == Reg && i.types[last].bitfield.byte
4273           && ((i.op[last].regs->reg_flags & RegRex64) != 0 || i.rex != 0
4274               || i.rex2 != 0)))
4275     {
4276       unsigned int x;
4277
4278       if (!is_apx_rex2_encoding () && !is_any_vex_encoding(&i.tm))
4279         i.rex |= REX_OPCODE;
4280       for (x = first; x <= last; x++)
4281         {
4282           /* Look for 8 bit operand that uses old registers.  */
4283           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4284               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4285             {
4286               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4287               /* In case it is "hi" register, give up.  */
4288               if (i.op[x].regs->reg_num > 3)
4289                 as_bad (_("can't encode register '%s%s' in an "
4290                           "instruction requiring REX/REX2 prefix"),
4291                         register_prefix, i.op[x].regs->reg_name);
4292
4293               /* Otherwise it is equivalent to the extended register.
4294                  Since the encoding doesn't change this is merely
4295                  cosmetic cleanup for debug output.  */
4296               i.op[x].regs += 8;
4297             }
4298         }
4299     }
4300
4301   if (i.rex == 0 && i.rex2 == 0 && (i.rex_encoding || i.rex2_encoding))
4302     {
4303       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4304          that uses legacy register.  If it is "hi" register, don't add
4305          rex and rex2 prefix.  */
4306       unsigned int x;
4307
4308       for (x = first; x <= last; x++)
4309         if (i.types[x].bitfield.class == Reg
4310             && i.types[x].bitfield.byte
4311             && (i.op[x].regs->reg_flags & RegRex64) == 0
4312             && i.op[x].regs->reg_num > 3)
4313           {
4314             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4315             i.rex_encoding = false;
4316             i.rex2_encoding = false;
4317             break;
4318           }
4319
4320       if (i.rex_encoding)
4321         i.rex = REX_OPCODE;
4322     }
4323
4324   if (is_apx_rex2_encoding ())
4325     {
4326       build_rex2_prefix ();
4327       /* The individual REX.RXBW bits got consumed.  */
4328       i.rex &= REX_OPCODE;
4329     }
4330   else if (i.rex != 0)
4331     add_prefix (REX_OPCODE | i.rex);
4332 }
4333
4334 static void
4335 process_immext (void)
4336 {
4337   expressionS *exp;
4338
4339   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4340      which is coded in the same place as an 8-bit immediate field
4341      would be.  Here we fake an 8-bit immediate operand from the
4342      opcode suffix stored in tm.extension_opcode.
4343
4344      AVX instructions also use this encoding, for some of
4345      3 argument instructions.  */
4346
4347   gas_assert (i.imm_operands <= 1
4348               && (i.operands <= 2
4349                   || (is_any_vex_encoding (&i.tm)
4350                       && i.operands <= 4)));
4351
4352   exp = &im_expressions[i.imm_operands++];
4353   i.op[i.operands].imms = exp;
4354   i.types[i.operands].bitfield.imm8 = 1;
4355   i.operands++;
4356   exp->X_op = O_constant;
4357   exp->X_add_number = i.tm.extension_opcode;
4358   i.tm.extension_opcode = None;
4359 }
4360
4361
4362 static int
4363 check_hle (void)
4364 {
4365   switch (i.tm.opcode_modifier.prefixok)
4366     {
4367     default:
4368       abort ();
4369     case PrefixLock:
4370     case PrefixNone:
4371     case PrefixNoTrack:
4372     case PrefixRep:
4373       as_bad (_("invalid instruction `%s' after `%s'"),
4374               insn_name (&i.tm), i.hle_prefix);
4375       return 0;
4376     case PrefixHLELock:
4377       if (i.prefix[LOCK_PREFIX])
4378         return 1;
4379       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4380       return 0;
4381     case PrefixHLEAny:
4382       return 1;
4383     case PrefixHLERelease:
4384       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4385         {
4386           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4387                   insn_name (&i.tm));
4388           return 0;
4389         }
4390       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4391         {
4392           as_bad (_("memory destination needed for instruction `%s'"
4393                     " after `xrelease'"), insn_name (&i.tm));
4394           return 0;
4395         }
4396       return 1;
4397     }
4398 }
4399
4400 /* Encode aligned vector move as unaligned vector move.  */
4401
4402 static void
4403 encode_with_unaligned_vector_move (void)
4404 {
4405   switch (i.tm.base_opcode)
4406     {
4407     case 0x28:  /* Load instructions.  */
4408     case 0x29:  /* Store instructions.  */
4409       /* movaps/movapd/vmovaps/vmovapd.  */
4410       if (i.tm.opcode_space == SPACE_0F
4411           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4412         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4413       break;
4414     case 0x6f:  /* Load instructions.  */
4415     case 0x7f:  /* Store instructions.  */
4416       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4417       if (i.tm.opcode_space == SPACE_0F
4418           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4419         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4420       break;
4421     default:
4422       break;
4423     }
4424 }
4425
4426 /* Try the shortest encoding by shortening operand size.  */
4427
4428 static void
4429 optimize_encoding (void)
4430 {
4431   unsigned int j;
4432
4433   if (i.tm.mnem_off == MN_lea)
4434     {
4435       /* Optimize: -O:
4436            lea symbol, %rN    -> mov $symbol, %rN
4437            lea (%rM), %rN     -> mov %rM, %rN
4438            lea (,%rM,1), %rN  -> mov %rM, %rN
4439
4440            and in 32-bit mode for 16-bit addressing
4441
4442            lea (%rM), %rN     -> movzx %rM, %rN
4443
4444            and in 64-bit mode zap 32-bit addressing in favor of using a
4445            32-bit (or less) destination.
4446        */
4447       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4448         {
4449           if (!i.op[1].regs->reg_type.bitfield.word)
4450             i.tm.opcode_modifier.size = SIZE32;
4451           i.prefix[ADDR_PREFIX] = 0;
4452         }
4453
4454       if (!i.index_reg && !i.base_reg)
4455         {
4456           /* Handle:
4457                lea symbol, %rN    -> mov $symbol, %rN
4458            */
4459           if (flag_code == CODE_64BIT)
4460             {
4461               /* Don't transform a relocation to a 16-bit one.  */
4462               if (i.op[0].disps
4463                   && i.op[0].disps->X_op != O_constant
4464                   && i.op[1].regs->reg_type.bitfield.word)
4465                 return;
4466
4467               if (!i.op[1].regs->reg_type.bitfield.qword
4468                   || i.tm.opcode_modifier.size == SIZE32)
4469                 {
4470                   i.tm.base_opcode = 0xb8;
4471                   i.tm.opcode_modifier.modrm = 0;
4472                   if (!i.op[1].regs->reg_type.bitfield.word)
4473                     i.types[0].bitfield.imm32 = 1;
4474                   else
4475                     {
4476                       i.tm.opcode_modifier.size = SIZE16;
4477                       i.types[0].bitfield.imm16 = 1;
4478                     }
4479                 }
4480               else
4481                 {
4482                   /* Subject to further optimization below.  */
4483                   i.tm.base_opcode = 0xc7;
4484                   i.tm.extension_opcode = 0;
4485                   i.types[0].bitfield.imm32s = 1;
4486                   i.types[0].bitfield.baseindex = 0;
4487                 }
4488             }
4489           /* Outside of 64-bit mode address and operand sizes have to match if
4490              a relocation is involved, as otherwise we wouldn't (currently) or
4491              even couldn't express the relocation correctly.  */
4492           else if (i.op[0].disps
4493                    && i.op[0].disps->X_op != O_constant
4494                    && ((!i.prefix[ADDR_PREFIX])
4495                        != (flag_code == CODE_32BIT
4496                            ? i.op[1].regs->reg_type.bitfield.dword
4497                            : i.op[1].regs->reg_type.bitfield.word)))
4498             return;
4499           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4500              destination is going to grow encoding size.  */
4501           else if (flag_code == CODE_16BIT
4502                    && (optimize <= 1 || optimize_for_space)
4503                    && !i.prefix[ADDR_PREFIX]
4504                    && i.op[1].regs->reg_type.bitfield.dword)
4505             return;
4506           else
4507             {
4508               i.tm.base_opcode = 0xb8;
4509               i.tm.opcode_modifier.modrm = 0;
4510               if (i.op[1].regs->reg_type.bitfield.dword)
4511                 i.types[0].bitfield.imm32 = 1;
4512               else
4513                 i.types[0].bitfield.imm16 = 1;
4514
4515               if (i.op[0].disps
4516                   && i.op[0].disps->X_op == O_constant
4517                   && i.op[1].regs->reg_type.bitfield.dword
4518                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4519                      GCC 5. */
4520                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4521                 i.op[0].disps->X_add_number &= 0xffff;
4522             }
4523
4524           i.tm.operand_types[0] = i.types[0];
4525           i.imm_operands = 1;
4526           if (!i.op[0].imms)
4527             {
4528               i.op[0].imms = &im_expressions[0];
4529               i.op[0].imms->X_op = O_absent;
4530             }
4531         }
4532       else if (i.op[0].disps
4533                   && (i.op[0].disps->X_op != O_constant
4534                       || i.op[0].disps->X_add_number))
4535         return;
4536       else
4537         {
4538           /* Handle:
4539                lea (%rM), %rN     -> mov %rM, %rN
4540                lea (,%rM,1), %rN  -> mov %rM, %rN
4541                lea (%rM), %rN     -> movzx %rM, %rN
4542            */
4543           const reg_entry *addr_reg;
4544
4545           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4546             addr_reg = i.base_reg;
4547           else if (!i.base_reg
4548                    && i.index_reg->reg_num != RegIZ
4549                    && !i.log2_scale_factor)
4550             addr_reg = i.index_reg;
4551           else
4552             return;
4553
4554           if (addr_reg->reg_type.bitfield.word
4555               && i.op[1].regs->reg_type.bitfield.dword)
4556             {
4557               if (flag_code != CODE_32BIT)
4558                 return;
4559               i.tm.opcode_space = SPACE_0F;
4560               i.tm.base_opcode = 0xb7;
4561             }
4562           else
4563             i.tm.base_opcode = 0x8b;
4564
4565           if (addr_reg->reg_type.bitfield.dword
4566               && i.op[1].regs->reg_type.bitfield.qword)
4567             i.tm.opcode_modifier.size = SIZE32;
4568
4569           i.op[0].regs = addr_reg;
4570           i.reg_operands = 2;
4571         }
4572
4573       i.mem_operands = 0;
4574       i.disp_operands = 0;
4575       i.prefix[ADDR_PREFIX] = 0;
4576       i.prefix[SEG_PREFIX] = 0;
4577       i.seg[0] = NULL;
4578     }
4579
4580   if (optimize_for_space
4581       && i.tm.mnem_off == MN_test
4582       && i.reg_operands == 1
4583       && i.imm_operands == 1
4584       && !i.types[1].bitfield.byte
4585       && i.op[0].imms->X_op == O_constant
4586       && fits_in_imm7 (i.op[0].imms->X_add_number))
4587     {
4588       /* Optimize: -Os:
4589            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4590        */
4591       unsigned int base_regnum = i.op[1].regs->reg_num;
4592       if (flag_code == CODE_64BIT || base_regnum < 4)
4593         {
4594           i.types[1].bitfield.byte = 1;
4595           /* Ignore the suffix.  */
4596           i.suffix = 0;
4597           /* Convert to byte registers. 8-bit registers are special,
4598              RegRex64 and non-RegRex64 each have 8 registers.  */
4599           if (i.types[1].bitfield.word)
4600             /* 32 (or 40) 8-bit registers.  */
4601             j = 32;
4602           else if (i.types[1].bitfield.dword)
4603             /* 32 (or 40) 8-bit registers + 32 16-bit registers.  */
4604             j = 64;
4605           else
4606             /* 32 (or 40) 8-bit registers + 32 16-bit registers
4607                + 32 32-bit registers.  */
4608             j = 96;
4609
4610           /* In 64-bit mode, the following byte registers cannot be accessed
4611              if using the Rex and Rex2 prefix: AH, BH, CH, DH */
4612           if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
4613             j += 8;
4614           i.op[1].regs -= j;
4615         }
4616     }
4617   else if (flag_code == CODE_64BIT
4618            && i.tm.opcode_space == SPACE_BASE
4619            && ((i.types[1].bitfield.qword
4620                 && i.reg_operands == 1
4621                 && i.imm_operands == 1
4622                 && i.op[0].imms->X_op == O_constant
4623                 && ((i.tm.base_opcode == 0xb8
4624                      && i.tm.extension_opcode == None
4625                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4626                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4627                         && (i.tm.base_opcode == 0x24
4628                             || (i.tm.base_opcode == 0x80
4629                                 && i.tm.extension_opcode == 0x4)
4630                             || i.tm.mnem_off == MN_test
4631                             || ((i.tm.base_opcode | 1) == 0xc7
4632                                 && i.tm.extension_opcode == 0x0)))
4633                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4634                         && i.tm.base_opcode == 0x83
4635                         && i.tm.extension_opcode == 0x4)))
4636                || (i.types[0].bitfield.qword
4637                    && ((i.reg_operands == 2
4638                         && i.op[0].regs == i.op[1].regs
4639                         && (i.tm.mnem_off == MN_xor
4640                             || i.tm.mnem_off == MN_sub))
4641                        || i.tm.mnem_off == MN_clr))))
4642     {
4643       /* Optimize: -O:
4644            andq $imm31, %r64   -> andl $imm31, %r32
4645            andq $imm7, %r64    -> andl $imm7, %r32
4646            testq $imm31, %r64  -> testl $imm31, %r32
4647            xorq %r64, %r64     -> xorl %r32, %r32
4648            subq %r64, %r64     -> subl %r32, %r32
4649            movq $imm31, %r64   -> movl $imm31, %r32
4650            movq $imm32, %r64   -> movl $imm32, %r32
4651         */
4652       i.tm.opcode_modifier.size = SIZE32;
4653       if (i.imm_operands)
4654         {
4655           i.types[0].bitfield.imm32 = 1;
4656           i.types[0].bitfield.imm32s = 0;
4657           i.types[0].bitfield.imm64 = 0;
4658         }
4659       else
4660         {
4661           i.types[0].bitfield.dword = 1;
4662           i.types[0].bitfield.qword = 0;
4663         }
4664       i.types[1].bitfield.dword = 1;
4665       i.types[1].bitfield.qword = 0;
4666       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4667         {
4668           /* Handle
4669                movq $imm31, %r64   -> movl $imm31, %r32
4670                movq $imm32, %r64   -> movl $imm32, %r32
4671            */
4672           i.tm.operand_types[0].bitfield.imm32 = 1;
4673           i.tm.operand_types[0].bitfield.imm32s = 0;
4674           i.tm.operand_types[0].bitfield.imm64 = 0;
4675           if ((i.tm.base_opcode | 1) == 0xc7)
4676             {
4677               /* Handle
4678                    movq $imm31, %r64   -> movl $imm31, %r32
4679                */
4680               i.tm.base_opcode = 0xb8;
4681               i.tm.extension_opcode = None;
4682               i.tm.opcode_modifier.w = 0;
4683               i.tm.opcode_modifier.modrm = 0;
4684             }
4685         }
4686     }
4687   else if (i.reg_operands == 3
4688            && i.op[0].regs == i.op[1].regs
4689            && i.encoding != encoding_evex
4690            && (i.tm.mnem_off == MN_xor
4691                || i.tm.mnem_off == MN_sub))
4692     {
4693       /* Optimize: -O:
4694            xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
4695            xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
4696            xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
4697            xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
4698            subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
4699            subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
4700            subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
4701            subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
4702         */
4703       i.tm.opcode_space = SPACE_BASE;
4704       i.tm.opcode_modifier.evex = 0;
4705       i.tm.opcode_modifier.size = SIZE32;
4706       i.types[0].bitfield.byte = 0;
4707       i.types[0].bitfield.word = 0;
4708       i.types[0].bitfield.dword = 1;
4709       i.types[0].bitfield.qword = 0;
4710       i.op[0].regs = i.op[2].regs;
4711       i.types[1] = i.types[0];
4712       i.op[1].regs = i.op[2].regs;
4713       i.reg_operands = 2;
4714     }
4715   else if (optimize > 1
4716            && !optimize_for_space
4717            && i.reg_operands == 2
4718            && i.op[0].regs == i.op[1].regs
4719            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4720            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4721     {
4722       /* Optimize: -O2:
4723            andb %rN, %rN  -> testb %rN, %rN
4724            andw %rN, %rN  -> testw %rN, %rN
4725            andq %rN, %rN  -> testq %rN, %rN
4726            orb %rN, %rN   -> testb %rN, %rN
4727            orw %rN, %rN   -> testw %rN, %rN
4728            orq %rN, %rN   -> testq %rN, %rN
4729
4730            and outside of 64-bit mode
4731
4732            andl %rN, %rN  -> testl %rN, %rN
4733            orl %rN, %rN   -> testl %rN, %rN
4734        */
4735       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4736     }
4737   else if (i.tm.base_opcode == 0xba
4738            && i.tm.opcode_space == SPACE_0F
4739            && i.reg_operands == 1
4740            && i.op[0].imms->X_op == O_constant
4741            && i.op[0].imms->X_add_number >= 0)
4742     {
4743       /* Optimize: -O:
4744            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4745            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4746            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4747
4748            With <BT> one of bts, btr, and bts also:
4749            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4750            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4751        */
4752       switch (flag_code)
4753         {
4754         case CODE_64BIT:
4755           if (i.tm.extension_opcode != 4)
4756             break;
4757           if (i.types[1].bitfield.qword
4758               && i.op[0].imms->X_add_number < 32
4759               && !(i.op[1].regs->reg_flags & RegRex))
4760             i.tm.opcode_modifier.size = SIZE32;
4761           /* Fall through.  */
4762         case CODE_32BIT:
4763           if (i.types[1].bitfield.word
4764               && i.op[0].imms->X_add_number < 16)
4765             i.tm.opcode_modifier.size = SIZE32;
4766           break;
4767         case CODE_16BIT:
4768           if (i.op[0].imms->X_add_number < 16)
4769             i.tm.opcode_modifier.size = SIZE16;
4770           break;
4771         }
4772     }
4773   else if (i.reg_operands == 3
4774            && i.op[0].regs == i.op[1].regs
4775            && !i.types[2].bitfield.xmmword
4776            && (i.tm.opcode_modifier.vex
4777                || ((!i.mask.reg || i.mask.zeroing)
4778                    && i.tm.opcode_modifier.evex
4779                    && (i.encoding != encoding_evex
4780                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4781                        || is_cpu (&i.tm, CpuAVX512VL)
4782                        || (i.tm.operand_types[2].bitfield.zmmword
4783                            && i.types[2].bitfield.ymmword))))
4784            && i.tm.opcode_space == SPACE_0F
4785            && ((i.tm.base_opcode | 2) == 0x57
4786                || i.tm.base_opcode == 0xdf
4787                || i.tm.base_opcode == 0xef
4788                || (i.tm.base_opcode | 3) == 0xfb
4789                || i.tm.base_opcode == 0x42
4790                || i.tm.base_opcode == 0x47))
4791     {
4792       /* Optimize: -O1:
4793            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4794            vpsubq and vpsubw:
4795              EVEX VOP %zmmM, %zmmM, %zmmN
4796                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4797                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4798              EVEX VOP %ymmM, %ymmM, %ymmN
4799                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4800                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4801              VEX VOP %ymmM, %ymmM, %ymmN
4802                -> VEX VOP %xmmM, %xmmM, %xmmN
4803            VOP, one of vpandn and vpxor:
4804              VEX VOP %ymmM, %ymmM, %ymmN
4805                -> VEX VOP %xmmM, %xmmM, %xmmN
4806            VOP, one of vpandnd and vpandnq:
4807              EVEX VOP %zmmM, %zmmM, %zmmN
4808                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4809                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4810              EVEX VOP %ymmM, %ymmM, %ymmN
4811                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4812                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4813            VOP, one of vpxord and vpxorq:
4814              EVEX VOP %zmmM, %zmmM, %zmmN
4815                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4816                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4817              EVEX VOP %ymmM, %ymmM, %ymmN
4818                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4819                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4820            VOP, one of kxord and kxorq:
4821              VEX VOP %kM, %kM, %kN
4822                -> VEX kxorw %kM, %kM, %kN
4823            VOP, one of kandnd and kandnq:
4824              VEX VOP %kM, %kM, %kN
4825                -> VEX kandnw %kM, %kM, %kN
4826        */
4827       if (i.tm.opcode_modifier.evex)
4828         {
4829           if (i.encoding != encoding_evex)
4830             {
4831               i.tm.opcode_modifier.vex = VEX128;
4832               i.tm.opcode_modifier.vexw = VEXW0;
4833               i.tm.opcode_modifier.evex = 0;
4834               i.encoding = encoding_vex;
4835               i.mask.reg = NULL;
4836             }
4837           else if (optimize > 1)
4838             i.tm.opcode_modifier.evex = EVEX128;
4839           else
4840             return;
4841         }
4842       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4843         {
4844           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4845           i.tm.opcode_modifier.vexw = VEXW0;
4846         }
4847       else
4848         i.tm.opcode_modifier.vex = VEX128;
4849
4850       if (i.tm.opcode_modifier.vex)
4851         for (j = 0; j < 3; j++)
4852           {
4853             i.types[j].bitfield.xmmword = 1;
4854             i.types[j].bitfield.ymmword = 0;
4855           }
4856     }
4857   else if (i.encoding != encoding_evex
4858            && i.encoding != encoding_egpr
4859            && !i.types[0].bitfield.zmmword
4860            && !i.types[1].bitfield.zmmword
4861            && !i.mask.reg
4862            && !i.broadcast.type
4863            && !i.broadcast.bytes
4864            && i.tm.opcode_modifier.evex
4865            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4866                || (i.tm.base_opcode & ~4) == 0xdb
4867                || (i.tm.base_opcode & ~4) == 0xeb)
4868            && i.tm.extension_opcode == None)
4869     {
4870       /* Optimize: -O1:
4871            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4872            vmovdqu32 and vmovdqu64:
4873              EVEX VOP %xmmM, %xmmN
4874                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4875              EVEX VOP %ymmM, %ymmN
4876                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4877              EVEX VOP %xmmM, mem
4878                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4879              EVEX VOP %ymmM, mem
4880                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4881              EVEX VOP mem, %xmmN
4882                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4883              EVEX VOP mem, %ymmN
4884                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4885            VOP, one of vpand, vpandn, vpor, vpxor:
4886              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4887                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4888              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4889                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4890              EVEX VOP{d,q} mem, %xmmM, %xmmN
4891                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4892              EVEX VOP{d,q} mem, %ymmM, %ymmN
4893                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4894        */
4895       for (j = 0; j < i.operands; j++)
4896         if (operand_type_check (i.types[j], disp)
4897             && i.op[j].disps->X_op == O_constant)
4898           {
4899             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4900                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4901                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4902             int evex_disp8, vex_disp8;
4903             unsigned int memshift = i.memshift;
4904             offsetT n = i.op[j].disps->X_add_number;
4905
4906             evex_disp8 = fits_in_disp8 (n);
4907             i.memshift = 0;
4908             vex_disp8 = fits_in_disp8 (n);
4909             if (evex_disp8 != vex_disp8)
4910               {
4911                 i.memshift = memshift;
4912                 return;
4913               }
4914
4915             i.types[j].bitfield.disp8 = vex_disp8;
4916             break;
4917           }
4918       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4919           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4920         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4921       i.tm.opcode_modifier.vex
4922         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4923       i.tm.opcode_modifier.vexw = VEXW0;
4924       /* VPAND, VPOR, and VPXOR are commutative.  */
4925       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4926         i.tm.opcode_modifier.commutative = 1;
4927       i.tm.opcode_modifier.evex = 0;
4928       i.tm.opcode_modifier.masking = 0;
4929       i.tm.opcode_modifier.broadcast = 0;
4930       i.tm.opcode_modifier.disp8memshift = 0;
4931       i.memshift = 0;
4932       if (j < i.operands)
4933         i.types[j].bitfield.disp8
4934           = fits_in_disp8 (i.op[j].disps->X_add_number);
4935     }
4936   else if (optimize_for_space
4937            && i.tm.base_opcode == 0x29
4938            && i.tm.opcode_space == SPACE_0F38
4939            && i.operands == i.reg_operands
4940            && i.op[0].regs == i.op[1].regs
4941            && (!i.tm.opcode_modifier.vex
4942                || !(i.op[0].regs->reg_flags & RegRex))
4943            && !i.tm.opcode_modifier.evex)
4944     {
4945       /* Optimize: -Os:
4946          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
4947          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
4948          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
4949        */
4950       i.tm.opcode_space = SPACE_0F;
4951       i.tm.base_opcode = 0x76;
4952     }
4953   else if (((i.tm.base_opcode >= 0x64
4954              && i.tm.base_opcode <= 0x66
4955              && i.tm.opcode_space == SPACE_0F)
4956             || (i.tm.base_opcode == 0x37
4957                 && i.tm.opcode_space == SPACE_0F38))
4958            && i.operands == i.reg_operands
4959            && i.op[0].regs == i.op[1].regs
4960            && !i.tm.opcode_modifier.evex)
4961     {
4962       /* Optimize: -O:
4963          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
4964          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
4965          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
4966          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
4967          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
4968          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
4969        */
4970       i.tm.opcode_space = SPACE_0F;
4971       i.tm.base_opcode = 0xef;
4972       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
4973         {
4974           if (i.operands == 2)
4975             {
4976               gas_assert (i.tm.opcode_modifier.sse2avx);
4977
4978               i.operands = 3;
4979               i.reg_operands = 3;
4980               i.tm.operands = 3;
4981
4982               i.op[2].regs = i.op[0].regs;
4983               i.types[2] = i.types[0];
4984               i.flags[2] = i.flags[0];
4985               i.tm.operand_types[2] = i.tm.operand_types[0];
4986
4987               i.tm.opcode_modifier.sse2avx = 0;
4988             }
4989           i.op[0].regs -= i.op[0].regs->reg_num + 8;
4990           i.op[1].regs = i.op[0].regs;
4991         }
4992     }
4993   else if (optimize_for_space
4994            && i.tm.base_opcode == 0x59
4995            && i.tm.opcode_space == SPACE_0F38
4996            && i.operands == i.reg_operands
4997            && i.tm.opcode_modifier.vex
4998            && !(i.op[0].regs->reg_flags & RegRex)
4999            && i.op[0].regs->reg_type.bitfield.xmmword
5000            && i.encoding != encoding_vex3)
5001     {
5002       /* Optimize: -Os:
5003          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
5004        */
5005       i.tm.opcode_space = SPACE_0F;
5006       i.tm.base_opcode = 0x6c;
5007       i.tm.opcode_modifier.vexvvvv = 1;
5008
5009       ++i.operands;
5010       ++i.reg_operands;
5011       ++i.tm.operands;
5012
5013       i.op[2].regs = i.op[0].regs;
5014       i.types[2] = i.types[0];
5015       i.flags[2] = i.flags[0];
5016       i.tm.operand_types[2] = i.tm.operand_types[0];
5017
5018       swap_2_operands (1, 2);
5019     }
5020 }
5021
5022 static void
5023 s_noopt (int dummy ATTRIBUTE_UNUSED)
5024 {
5025   if (!is_it_end_of_statement ())
5026     as_warn (_("`.noopt' arguments ignored"));
5027
5028   optimize = 0;
5029   optimize_for_space = 0;
5030
5031   ignore_rest_of_line ();
5032 }
5033
5034 /* Return non-zero for load instruction.  */
5035
5036 static int
5037 load_insn_p (void)
5038 {
5039   unsigned int dest;
5040   int any_vex_p = is_any_vex_encoding (&i.tm);
5041   unsigned int base_opcode = i.tm.base_opcode | 1;
5042
5043   if (!any_vex_p)
5044     {
5045       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
5046          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
5047       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
5048         return 0;
5049
5050       /* pop.   */
5051       if (i.tm.mnem_off == MN_pop)
5052         return 1;
5053     }
5054
5055   if (i.tm.opcode_space == SPACE_BASE)
5056     {
5057       /* popf, popa.   */
5058       if (i.tm.base_opcode == 0x9d
5059           || i.tm.base_opcode == 0x61)
5060         return 1;
5061
5062       /* movs, cmps, lods, scas.  */
5063       if ((i.tm.base_opcode | 0xb) == 0xaf)
5064         return 1;
5065
5066       /* outs, xlatb.  */
5067       if (base_opcode == 0x6f
5068           || i.tm.base_opcode == 0xd7)
5069         return 1;
5070       /* NB: For AMD-specific insns with implicit memory operands,
5071          they're intentionally not covered.  */
5072     }
5073
5074   /* No memory operand.  */
5075   if (!i.mem_operands)
5076     return 0;
5077
5078   if (any_vex_p)
5079     {
5080       if (i.tm.mnem_off == MN_vldmxcsr)
5081         return 1;
5082     }
5083   else if (i.tm.opcode_space == SPACE_BASE)
5084     {
5085       /* test, not, neg, mul, imul, div, idiv.  */
5086       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
5087         return 1;
5088
5089       /* inc, dec.  */
5090       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
5091         return 1;
5092
5093       /* add, or, adc, sbb, and, sub, xor, cmp.  */
5094       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
5095         return 1;
5096
5097       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
5098       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
5099           && i.tm.extension_opcode != 6)
5100         return 1;
5101
5102       /* Check for x87 instructions.  */
5103       if ((base_opcode | 6) == 0xdf)
5104         {
5105           /* Skip fst, fstp, fstenv, fstcw.  */
5106           if (i.tm.base_opcode == 0xd9
5107               && (i.tm.extension_opcode == 2
5108                   || i.tm.extension_opcode == 3
5109                   || i.tm.extension_opcode == 6
5110                   || i.tm.extension_opcode == 7))
5111             return 0;
5112
5113           /* Skip fisttp, fist, fistp, fstp.  */
5114           if (i.tm.base_opcode == 0xdb
5115               && (i.tm.extension_opcode == 1
5116                   || i.tm.extension_opcode == 2
5117                   || i.tm.extension_opcode == 3
5118                   || i.tm.extension_opcode == 7))
5119             return 0;
5120
5121           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
5122           if (i.tm.base_opcode == 0xdd
5123               && (i.tm.extension_opcode == 1
5124                   || i.tm.extension_opcode == 2
5125                   || i.tm.extension_opcode == 3
5126                   || i.tm.extension_opcode == 6
5127                   || i.tm.extension_opcode == 7))
5128             return 0;
5129
5130           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
5131           if (i.tm.base_opcode == 0xdf
5132               && (i.tm.extension_opcode == 1
5133                   || i.tm.extension_opcode == 2
5134                   || i.tm.extension_opcode == 3
5135                   || i.tm.extension_opcode == 6
5136                   || i.tm.extension_opcode == 7))
5137             return 0;
5138
5139           return 1;
5140         }
5141     }
5142   else if (i.tm.opcode_space == SPACE_0F)
5143     {
5144       /* bt, bts, btr, btc.  */
5145       if (i.tm.base_opcode == 0xba
5146           && (i.tm.extension_opcode | 3) == 7)
5147         return 1;
5148
5149       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
5150       if (i.tm.base_opcode == 0xc7
5151           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
5152           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
5153               || i.tm.extension_opcode == 6))
5154         return 1;
5155
5156       /* fxrstor, ldmxcsr, xrstor.  */
5157       if (i.tm.base_opcode == 0xae
5158           && (i.tm.extension_opcode == 1
5159               || i.tm.extension_opcode == 2
5160               || i.tm.extension_opcode == 5))
5161         return 1;
5162
5163       /* lgdt, lidt, lmsw.  */
5164       if (i.tm.base_opcode == 0x01
5165           && (i.tm.extension_opcode == 2
5166               || i.tm.extension_opcode == 3
5167               || i.tm.extension_opcode == 6))
5168         return 1;
5169     }
5170
5171   dest = i.operands - 1;
5172
5173   /* Check fake imm8 operand and 3 source operands.  */
5174   if ((i.tm.opcode_modifier.immext
5175        || i.reg_operands + i.mem_operands == 4)
5176       && i.types[dest].bitfield.imm8)
5177     dest--;
5178
5179   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
5180   if (i.tm.opcode_space == SPACE_BASE
5181       && ((base_opcode | 0x38) == 0x39
5182           || (base_opcode | 2) == 0x87))
5183     return 1;
5184
5185   if (i.tm.mnem_off == MN_xadd)
5186     return 1;
5187
5188   /* Check for load instruction.  */
5189   return (i.types[dest].bitfield.class != ClassNone
5190           || i.types[dest].bitfield.instance == Accum);
5191 }
5192
5193 /* Output lfence, 0xfaee8, after instruction.  */
5194
5195 static void
5196 insert_lfence_after (void)
5197 {
5198   if (lfence_after_load && load_insn_p ())
5199     {
5200       /* There are also two REP string instructions that require
5201          special treatment. Specifically, the compare string (CMPS)
5202          and scan string (SCAS) instructions set EFLAGS in a manner
5203          that depends on the data being compared/scanned. When used
5204          with a REP prefix, the number of iterations may therefore
5205          vary depending on this data. If the data is a program secret
5206          chosen by the adversary using an LVI method,
5207          then this data-dependent behavior may leak some aspect
5208          of the secret.  */
5209       if (((i.tm.base_opcode | 0x9) == 0xaf)
5210           && i.prefix[REP_PREFIX])
5211         {
5212             as_warn (_("`%s` changes flags which would affect control flow behavior"),
5213                      insn_name (&i.tm));
5214         }
5215       char *p = frag_more (3);
5216       *p++ = 0xf;
5217       *p++ = 0xae;
5218       *p = 0xe8;
5219     }
5220 }
5221
5222 /* Output lfence, 0xfaee8, before instruction.  */
5223
5224 static void
5225 insert_lfence_before (const struct last_insn *last_insn)
5226 {
5227   char *p;
5228
5229   if (i.tm.opcode_space != SPACE_BASE)
5230     return;
5231
5232   if (i.tm.base_opcode == 0xff
5233       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
5234     {
5235       /* Insert lfence before indirect branch if needed.  */
5236
5237       if (lfence_before_indirect_branch == lfence_branch_none)
5238         return;
5239
5240       if (i.operands != 1)
5241         abort ();
5242
5243       if (i.reg_operands == 1)
5244         {
5245           /* Indirect branch via register.  Don't insert lfence with
5246              -mlfence-after-load=yes.  */
5247           if (lfence_after_load
5248               || lfence_before_indirect_branch == lfence_branch_memory)
5249             return;
5250         }
5251       else if (i.mem_operands == 1
5252                && lfence_before_indirect_branch != lfence_branch_register)
5253         {
5254           as_warn (_("indirect `%s` with memory operand should be avoided"),
5255                    insn_name (&i.tm));
5256           return;
5257         }
5258       else
5259         return;
5260
5261       if (last_insn->kind != last_insn_other)
5262         {
5263           as_warn_where (last_insn->file, last_insn->line,
5264                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
5265                          last_insn->name, insn_name (&i.tm));
5266           return;
5267         }
5268
5269       p = frag_more (3);
5270       *p++ = 0xf;
5271       *p++ = 0xae;
5272       *p = 0xe8;
5273       return;
5274     }
5275
5276   /* Output or/not/shl and lfence before near ret.  */
5277   if (lfence_before_ret != lfence_before_ret_none
5278       && (i.tm.base_opcode | 1) == 0xc3)
5279     {
5280       if (last_insn->kind != last_insn_other)
5281         {
5282           as_warn_where (last_insn->file, last_insn->line,
5283                          _("`%s` skips -mlfence-before-ret on `%s`"),
5284                          last_insn->name, insn_name (&i.tm));
5285           return;
5286         }
5287
5288       /* Near ret ingore operand size override under CPU64.  */
5289       char prefix = flag_code == CODE_64BIT
5290                     ? 0x48
5291                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
5292
5293       if (lfence_before_ret == lfence_before_ret_not)
5294         {
5295           /* not: 0xf71424, may add prefix
5296              for operand size override or 64-bit code.  */
5297           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
5298           if (prefix)
5299             *p++ = prefix;
5300           *p++ = 0xf7;
5301           *p++ = 0x14;
5302           *p++ = 0x24;
5303           if (prefix)
5304             *p++ = prefix;
5305           *p++ = 0xf7;
5306           *p++ = 0x14;
5307           *p++ = 0x24;
5308         }
5309       else
5310         {
5311           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
5312           if (prefix)
5313             *p++ = prefix;
5314           if (lfence_before_ret == lfence_before_ret_or)
5315             {
5316               /* or: 0x830c2400, may add prefix
5317                  for operand size override or 64-bit code.  */
5318               *p++ = 0x83;
5319               *p++ = 0x0c;
5320             }
5321           else
5322             {
5323               /* shl: 0xc1242400, may add prefix
5324                  for operand size override or 64-bit code.  */
5325               *p++ = 0xc1;
5326               *p++ = 0x24;
5327             }
5328
5329           *p++ = 0x24;
5330           *p++ = 0x0;
5331         }
5332
5333       *p++ = 0xf;
5334       *p++ = 0xae;
5335       *p = 0xe8;
5336     }
5337 }
5338
5339 /* Shared helper for md_assemble() and s_insn().  */
5340 static void init_globals (void)
5341 {
5342   unsigned int j;
5343
5344   memset (&i, '\0', sizeof (i));
5345   i.rounding.type = rc_none;
5346   for (j = 0; j < MAX_OPERANDS; j++)
5347     i.reloc[j] = NO_RELOC;
5348   memset (disp_expressions, '\0', sizeof (disp_expressions));
5349   memset (im_expressions, '\0', sizeof (im_expressions));
5350   save_stack_p = save_stack;
5351 }
5352
5353 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
5354    parsing pass. Instead of introducing a rarely use new insn attribute this
5355    utilizes a common pattern between affected templates. It is deemed
5356    acceptable that this will lead to unnecessary pass 2 preparations in a
5357    limited set of cases.  */
5358 static INLINE bool may_need_pass2 (const insn_template *t)
5359 {
5360   return t->opcode_modifier.sse2avx
5361          /* Note that all SSE2AVX templates have at least one operand.  */
5362          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
5363          : (t->opcode_space == SPACE_0F
5364             && (t->base_opcode | 1) == 0xbf)
5365            || (t->opcode_space == SPACE_BASE
5366                && t->base_opcode == 0x63);
5367 }
5368
5369 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
5370
5371 /* DWARF register number for EFLAGS.  Used for pushf/popf insns.  */
5372 #define GINSN_DW2_REGNUM_EFLAGS     49
5373 /* DWARF register number for RSI.  Used as dummy value when RegIP/RegIZ.  */
5374 #define GINSN_DW2_REGNUM_RSI_DUMMY  4
5375
5376 /* Identify the callee-saved registers in System V AMD64 ABI.  */
5377
5378 bool
5379 x86_scfi_callee_saved_p (unsigned int dw2reg_num)
5380 {
5381   if (dw2reg_num == 3 /* rbx.  */
5382       || dw2reg_num == REG_FP /* rbp.  */
5383       || dw2reg_num == REG_SP /* rsp.  */
5384       || (dw2reg_num >= 12 && dw2reg_num <= 15) /* r12 - r15.  */)
5385     return true;
5386
5387   return false;
5388 }
5389
5390 /* Check whether an instruction prefix which affects operation size
5391    accompanies.  For insns in the legacy space, setting REX.W takes precedence
5392    over the operand-size prefix (66H) when both are used.
5393
5394    The current users of this API are in the handlers for PUSH, POP or other
5395    instructions which affect the stack pointer implicitly:  the operation size
5396    (16, 32, or 64 bits) determines the amount by which the stack pointer is
5397    incremented / decremented (2, 4 or 8).  */
5398
5399 static bool
5400 ginsn_opsize_prefix_p (void)
5401 {
5402   return (!(i.prefix[REX_PREFIX] & REX_W) && i.prefix[DATA_PREFIX]);
5403 }
5404
5405 /* Get the DWARF register number for the given register entry.
5406    For specific byte/word/dword register accesses like al, cl, ah, ch, r8d,
5407    r20w etc., we need to identify the DWARF register number for the
5408    corresponding 8-byte GPR.
5409
5410    This function is a hack - it relies on relative ordering of reg entries in
5411    the i386_regtab.  FIXME - it will be good to allow a more direct way to get
5412    this information.  */
5413
5414 static unsigned int
5415 ginsn_dw2_regnum (const reg_entry *ireg)
5416 {
5417   const reg_entry *temp = ireg;
5418   unsigned int dwarf_reg = Dw2Inval, idx = 0;
5419
5420   /* ginsn creation is available for AMD64 abi only ATM.  Other flag_code
5421      are not expected.  */
5422   gas_assert (ireg && flag_code == CODE_64BIT);
5423
5424   /* Watch out for RegIP, RegIZ.  These are expected to appear only with
5425      base/index addressing modes.  Although creating inaccurate data
5426      dependencies, using a dummy value (lets say volatile register rsi) will
5427      not hurt SCFI.  TBD_GINSN_GEN_NOT_SCFI.  */
5428   if (ireg->reg_num == RegIP || ireg->reg_num == RegIZ)
5429     return GINSN_DW2_REGNUM_RSI_DUMMY;
5430
5431   dwarf_reg = ireg->dw2_regnum[object_64bit];
5432
5433   if (dwarf_reg == Dw2Inval)
5434     {
5435       if (ireg <= &i386_regtab[3])
5436         /* For al, cl, dl, bl, bump over to axl, cxl, dxl, bxl respectively by
5437            adding 8.  */
5438         temp = ireg + 8;
5439       else if (ireg <= &i386_regtab[7])
5440         /* For ah, ch, dh, bh, bump over to axl, cxl, dxl, bxl respectively by
5441            adding 4.  */
5442         temp = ireg + 4;
5443       else
5444         {
5445           /* The code relies on the relative ordering of the reg entries in
5446              i386_regtab.  There are 32 register entries between axl-r31b,
5447              ax-r31w etc.  The assertions here ensures the code does not
5448              recurse indefinitely.  */
5449           gas_assert ((temp - &i386_regtab[0]) >= 0);
5450           idx = temp - &i386_regtab[0];
5451           gas_assert (idx + 32 < i386_regtab_size - 1);
5452
5453           temp = temp + 32;
5454         }
5455
5456       dwarf_reg = ginsn_dw2_regnum (temp);
5457     }
5458
5459   /* Sanity check - failure may indicate state corruption, bad ginsn or
5460      perhaps the i386-reg table and the current function got out of sync.  */
5461   gas_assert (dwarf_reg < Dw2Inval);
5462
5463   return dwarf_reg;
5464 }
5465
5466 static ginsnS *
5467 x86_ginsn_addsub_reg_mem (const symbolS *insn_end_sym)
5468 {
5469   unsigned int dw2_regnum;
5470   unsigned int src1_dw2_regnum;
5471   ginsnS *ginsn = NULL;
5472   ginsnS * (*ginsn_func) (const symbolS *, bool,
5473                           enum ginsn_src_type, unsigned int, offsetT,
5474                           enum ginsn_src_type, unsigned int, offsetT,
5475                           enum ginsn_dst_type, unsigned int, offsetT);
5476   uint16_t opcode = i.tm.base_opcode;
5477
5478   gas_assert (i.tm.opcode_space == SPACE_BASE
5479               && (opcode == 0x1 || opcode == 0x29));
5480   ginsn_func = (opcode == 0x1) ? ginsn_new_add : ginsn_new_sub;
5481
5482   /* op %reg, symbol or even other cases where destination involves indirect
5483      access are unnecessary for SCFI correctness.  TBD_GINSN_GEN_NOT_SCFI.  */
5484   if (i.mem_operands)
5485     return ginsn;
5486
5487   /* Skip detection of 8/16/32-bit op size; 'add/sub reg, reg/mem' ops always
5488      make the dest reg untraceable for SCFI.  */
5489
5490   /* op reg, reg/mem.  */
5491   src1_dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
5492   /* Of interest only when second opnd is not memory.  */
5493   if (i.reg_operands == 2)
5494     {
5495       dw2_regnum = ginsn_dw2_regnum (i.op[1].regs);
5496       ginsn = ginsn_func (insn_end_sym, true,
5497                           GINSN_SRC_REG, src1_dw2_regnum, 0,
5498                           GINSN_SRC_REG, dw2_regnum, 0,
5499                           GINSN_DST_REG, dw2_regnum, 0);
5500       ginsn_set_where (ginsn);
5501     }
5502
5503   return ginsn;
5504 }
5505
5506 static ginsnS *
5507 x86_ginsn_addsub_mem_reg (const symbolS *insn_end_sym)
5508 {
5509   unsigned int dw2_regnum;
5510   unsigned int src1_dw2_regnum;
5511   const reg_entry *mem_reg;
5512   int32_t gdisp = 0;
5513   ginsnS *ginsn = NULL;
5514   ginsnS * (*ginsn_func) (const symbolS *, bool,
5515                           enum ginsn_src_type, unsigned int, offsetT,
5516                           enum ginsn_src_type, unsigned int, offsetT,
5517                           enum ginsn_dst_type, unsigned int, offsetT);
5518   uint16_t opcode = i.tm.base_opcode;
5519
5520   gas_assert (i.tm.opcode_space == SPACE_BASE
5521               && (opcode == 0x3 || opcode == 0x2b));
5522   ginsn_func = (opcode == 0x3) ? ginsn_new_add : ginsn_new_sub;
5523
5524   /* op symbol, %reg.  */
5525   if (i.mem_operands && !i.base_reg && !i.index_reg)
5526     return ginsn;
5527
5528   /* Skip detection of 8/16/32-bit op size; 'add/sub reg/mem, reg' ops always
5529      make the dest reg untraceable for SCFI.  */
5530
5531   /* op reg/mem, %reg.  */
5532   dw2_regnum = ginsn_dw2_regnum (i.op[1].regs);
5533
5534   if (i.reg_operands == 2)
5535     {
5536       src1_dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
5537       ginsn = ginsn_func (insn_end_sym, true,
5538                           GINSN_SRC_REG, src1_dw2_regnum, 0,
5539                           GINSN_SRC_REG, dw2_regnum, 0,
5540                           GINSN_DST_REG, dw2_regnum, 0);
5541       ginsn_set_where (ginsn);
5542     }
5543   else if (i.mem_operands)
5544     {
5545       mem_reg = (i.base_reg) ? i.base_reg : i.index_reg;
5546       src1_dw2_regnum = ginsn_dw2_regnum (mem_reg);
5547       if (i.disp_operands == 1)
5548         gdisp = i.op[0].disps->X_add_number;
5549       ginsn = ginsn_func (insn_end_sym, true,
5550                           GINSN_SRC_INDIRECT, src1_dw2_regnum, gdisp,
5551                           GINSN_SRC_REG, dw2_regnum, 0,
5552                           GINSN_DST_REG, dw2_regnum, 0);
5553       ginsn_set_where (ginsn);
5554     }
5555
5556   return ginsn;
5557 }
5558
5559 static ginsnS *
5560 x86_ginsn_alu_imm (const symbolS *insn_end_sym)
5561 {
5562   offsetT src_imm;
5563   unsigned int dw2_regnum;
5564   ginsnS *ginsn = NULL;
5565   enum ginsn_src_type src_type = GINSN_SRC_REG;
5566   enum ginsn_dst_type dst_type = GINSN_DST_REG;
5567
5568   ginsnS * (*ginsn_func) (const symbolS *, bool,
5569                           enum ginsn_src_type, unsigned int, offsetT,
5570                           enum ginsn_src_type, unsigned int, offsetT,
5571                           enum ginsn_dst_type, unsigned int, offsetT);
5572
5573   /* FIXME - create ginsn where dest is REG_SP / REG_FP only ? */
5574   /* Map for insn.tm.extension_opcode
5575      000 ADD    100 AND
5576      001 OR     101 SUB
5577      010 ADC    110 XOR
5578      011 SBB    111 CMP  */
5579
5580   /* add/sub/and imm, %reg only at this time for SCFI.
5581      Although all three ('and', 'or' , 'xor') make the destination reg
5582      untraceable, 'and' op is handled but not 'or' / 'xor' because we will look
5583      into supporting the DRAP pattern at some point.  Other opcodes ('adc',
5584      'sbb' and 'cmp') are not generated here either.  The ginsn representation
5585      does not have support for the latter three opcodes;  GINSN_TYPE_OTHER may
5586      be added for these after x86_ginsn_unhandled () invocation if the
5587      destination register is REG_SP or REG_FP.  */
5588   if (i.tm.extension_opcode == 5)
5589     ginsn_func = ginsn_new_sub;
5590   else if (i.tm.extension_opcode == 4)
5591     ginsn_func = ginsn_new_and;
5592   else if (i.tm.extension_opcode == 0)
5593     ginsn_func = ginsn_new_add;
5594   else
5595     return ginsn;
5596
5597   /* TBD_GINSN_REPRESENTATION_LIMIT: There is no representation for when a
5598      symbol is used as an operand, like so:
5599           addq    $simd_cmp_op+8, %rdx
5600      Skip generating any ginsn for this.  */
5601   if (i.imm_operands == 1
5602       && i.op[0].imms->X_op != O_constant)
5603     return ginsn;
5604
5605   /* addq    $1, symbol
5606      addq    $1, -16(%rbp)
5607      These are not of interest for SCFI.  Also, TBD_GINSN_GEN_NOT_SCFI.  */
5608   if (i.mem_operands == 1)
5609     return ginsn;
5610
5611   /* 8/16/32-bit op size makes the destination reg untraceable for SCFI.
5612      Deal with this via the x86_ginsn_unhandled () code path.  */
5613   if (i.suffix != QWORD_MNEM_SUFFIX)
5614     return ginsn;
5615
5616   gas_assert (i.imm_operands == 1);
5617   src_imm = i.op[0].imms->X_add_number;
5618   /* The second operand may be a register or indirect access.  For SCFI, only
5619      the case when the second opnd is a register is interesting.  Revisit this
5620      if generating ginsns for a different gen mode TBD_GINSN_GEN_NOT_SCFI.  */
5621   if (i.reg_operands == 1)
5622     {
5623       dw2_regnum = ginsn_dw2_regnum (i.op[1].regs);
5624       /* For ginsn, keep the imm as second src operand.  */
5625       ginsn = ginsn_func (insn_end_sym, true,
5626                           src_type, dw2_regnum, 0,
5627                           GINSN_SRC_IMM, 0, src_imm,
5628                           dst_type, dw2_regnum, 0);
5629
5630       ginsn_set_where (ginsn);
5631     }
5632
5633   return ginsn;
5634 }
5635
5636 /* Create ginsn(s) for MOV operations.
5637
5638    The generated ginsns corresponding to mov with indirect access to memory
5639    (src or dest) suffer with loss of information: when both index and base
5640    registers are at play, only base register gets conveyed in ginsn.  Note
5641    this TBD_GINSN_GEN_NOT_SCFI.  */
5642
5643 static ginsnS *
5644 x86_ginsn_move (const symbolS *insn_end_sym)
5645 {
5646   ginsnS *ginsn = NULL;
5647   unsigned int dst_reg;
5648   unsigned int src_reg;
5649   offsetT src_disp = 0;
5650   offsetT dst_disp = 0;
5651   const reg_entry *dst = NULL;
5652   const reg_entry *src = NULL;
5653   uint16_t opcode = i.tm.base_opcode;
5654   enum ginsn_src_type src_type = GINSN_SRC_REG;
5655   enum ginsn_dst_type dst_type = GINSN_DST_REG;
5656
5657   /* mov %reg, symbol or mov symbol, %reg.
5658      Not of interest for SCFI.  Also, TBD_GINSN_GEN_NOT_SCFI.  */
5659   if (i.mem_operands == 1 && !i.base_reg && !i.index_reg)
5660     return ginsn;
5661
5662   /* 8/16/32-bit op size makes the destination reg untraceable for SCFI.
5663      Handle mov reg, reg only.  mov to or from a memory operand will make
5664      dest reg, when present, untraceable, irrespective of the op size.  */
5665   if (i.reg_operands == 2 && i.suffix != QWORD_MNEM_SUFFIX)
5666     return ginsn;
5667
5668   gas_assert (i.tm.opcode_space == SPACE_BASE);
5669   if (opcode == 0x8b || opcode == 0x8a)
5670     {
5671       /* mov  disp(%reg), %reg.  */
5672       if (i.mem_operands)
5673         {
5674           src = (i.base_reg) ? i.base_reg : i.index_reg;
5675           if (i.disp_operands == 1)
5676             src_disp = i.op[0].disps->X_add_number;
5677           src_type = GINSN_SRC_INDIRECT;
5678         }
5679       else
5680         src = i.op[0].regs;
5681
5682       dst = i.op[1].regs;
5683     }
5684   else if (opcode == 0x89 || opcode == 0x88)
5685     {
5686       /* mov %reg, disp(%reg).  */
5687       src = i.op[0].regs;
5688       if (i.mem_operands)
5689         {
5690           dst = (i.base_reg) ? i.base_reg : i.index_reg;
5691           if (i.disp_operands == 1)
5692             dst_disp = i.op[1].disps->X_add_number;
5693           dst_type = GINSN_DST_INDIRECT;
5694         }
5695       else
5696         dst = i.op[1].regs;
5697     }
5698
5699   src_reg = ginsn_dw2_regnum (src);
5700   dst_reg = ginsn_dw2_regnum (dst);
5701
5702   ginsn = ginsn_new_mov (insn_end_sym, true,
5703                          src_type, src_reg, src_disp,
5704                          dst_type, dst_reg, dst_disp);
5705   ginsn_set_where (ginsn);
5706
5707   return ginsn;
5708 }
5709
5710 /* Generate appropriate ginsn for lea.
5711
5712    Unhandled sub-cases (marked with TBD_GINSN_GEN_NOT_SCFI) also suffer with
5713    some loss of information in the final ginsn chosen eventually (type
5714    GINSN_TYPE_OTHER).  But this is fine for now for GINSN_GEN_SCFI generation
5715    mode.  */
5716
5717 static ginsnS *
5718 x86_ginsn_lea (const symbolS *insn_end_sym)
5719 {
5720   offsetT src_disp = 0;
5721   ginsnS *ginsn = NULL;
5722   unsigned int src1_reg;
5723   const reg_entry *src1;
5724   offsetT index_scale;
5725   unsigned int dst_reg;
5726   bool index_regiz_p;
5727
5728   if ((!i.base_reg) != (!i.index_reg || i.index_reg->reg_num == RegIZ))
5729     {
5730       /* lea disp(%base), %dst    or    lea disp(,%index,imm), %dst.
5731          Either index_reg or base_reg exists, but not both.  Further, as per
5732          above, the case when just %index exists but is equal to RegIZ is
5733          excluded.  If not excluded, a GINSN_TYPE_MOV of %rsi
5734          (GINSN_DW2_REGNUM_RSI_DUMMY) to %dst will be generated by this block.
5735          Such a mov ginsn is imprecise; so, exclude now and generate
5736          GINSN_TYPE_OTHER instead later via the x86_ginsn_unhandled ().
5737          Excluding other cases is required due to
5738          TBD_GINSN_REPRESENTATION_LIMIT.  */
5739
5740       index_scale = i.log2_scale_factor;
5741       index_regiz_p = i.index_reg && i.index_reg->reg_num == RegIZ;
5742       src1 = i.base_reg ? i.base_reg : i.index_reg;
5743       src1_reg = ginsn_dw2_regnum (src1);
5744       dst_reg = ginsn_dw2_regnum (i.op[1].regs);
5745       /* It makes sense to represent a scale factor of 1 precisely here
5746          (i.e., not using GINSN_TYPE_OTHER, but rather similar to the
5747          base-without-index case).  A non-zero scale factor is still OK if
5748          the index reg is zero reg.
5749          However, skip from here the case when disp has a symbol instead.
5750          TBD_GINSN_REPRESENTATION_LIMIT.  */
5751       if ((!index_scale || index_regiz_p)
5752           && (!i.disp_operands || i.op[0].disps->X_op == O_constant))
5753         {
5754           if (i.disp_operands)
5755             src_disp = i.op[0].disps->X_add_number;
5756
5757           if (src_disp)
5758             /* Generate an ADD ginsn.  */
5759             ginsn = ginsn_new_add (insn_end_sym, true,
5760                                    GINSN_SRC_REG, src1_reg, 0,
5761                                    GINSN_SRC_IMM, 0, src_disp,
5762                                    GINSN_DST_REG, dst_reg, 0);
5763           else
5764             /* Generate a MOV ginsn.  */
5765             ginsn = ginsn_new_mov (insn_end_sym, true,
5766                                    GINSN_SRC_REG, src1_reg, 0,
5767                                    GINSN_DST_REG, dst_reg, 0);
5768
5769           ginsn_set_where (ginsn);
5770         }
5771     }
5772   /* Skip handling other cases here,
5773      - when (i.index_reg && i.base_reg) is true,
5774        e.g., lea disp(%base,%index,imm), %dst
5775        We do not have a ginsn representation for multiply.
5776      - or, when (!i.index_reg && !i.base_reg) is true,
5777        e.g., lea symbol, %dst
5778        Not a frequent pattern.  If %dst is a register of interest, the user is
5779        likely to use a MOV op anyway.
5780      Deal with these via the x86_ginsn_unhandled () code path to generate
5781      GINSN_TYPE_OTHER when necessary.  TBD_GINSN_GEN_NOT_SCFI.  */
5782
5783   return ginsn;
5784 }
5785
5786 static ginsnS *
5787 x86_ginsn_jump (const symbolS *insn_end_sym, bool cond_p)
5788 {
5789   ginsnS *ginsn = NULL;
5790   const symbolS *src_symbol;
5791   ginsnS * (*ginsn_func) (const symbolS *sym, bool real_p,
5792                           enum ginsn_src_type src_type, unsigned int src_reg,
5793                           const symbolS *src_ginsn_sym);
5794
5795   gas_assert (i.disp_operands == 1);
5796
5797   ginsn_func = cond_p ? ginsn_new_jump_cond : ginsn_new_jump;
5798   if (i.op[0].disps->X_op == O_symbol && !i.op[0].disps->X_add_number)
5799     {
5800       src_symbol = i.op[0].disps->X_add_symbol;
5801       ginsn = ginsn_func (insn_end_sym, true,
5802                           GINSN_SRC_SYMBOL, 0, src_symbol);
5803
5804       ginsn_set_where (ginsn);
5805     }
5806   else
5807     {
5808       /* A non-zero addend in jump/JCC target makes control-flow tracking
5809          difficult.  Skip SCFI for now.  */
5810       as_bad (_("SCFI: `%s' insn with non-zero addend to sym not supported"),
5811               cond_p ? "JCC" : "jmp");
5812       return ginsn;
5813     }
5814
5815   return ginsn;
5816 }
5817
5818 static ginsnS *
5819 x86_ginsn_enter (const symbolS *insn_end_sym)
5820 {
5821   ginsnS *ginsn = NULL;
5822   ginsnS *ginsn_next = NULL;
5823   ginsnS *ginsn_last = NULL;
5824   /* In 64-bit mode, the default stack update size is 8 bytes.  */
5825   int stack_opnd_size = 8;
5826
5827   gas_assert (i.imm_operands == 2);
5828
5829   /* For non-zero size operands, bail out as untraceable for SCFI.  */
5830   if (i.op[0].imms->X_op != O_constant || i.op[0].imms->X_add_symbol != 0
5831       || i.op[1].imms->X_op != O_constant || i.op[1].imms->X_add_symbol != 0)
5832     {
5833       as_bad ("SCFI: enter insn with non-zero operand not supported");
5834       return ginsn;
5835     }
5836
5837   /* Check if this is a 16-bit op.  */
5838   if (ginsn_opsize_prefix_p ())
5839     stack_opnd_size = 2;
5840
5841   /* If the nesting level is 0, the processor pushes the frame pointer from
5842      the BP/EBP/RBP register onto the stack, copies the current stack
5843      pointer from the SP/ESP/RSP register into the BP/EBP/RBP register, and
5844      loads the SP/ESP/RSP register with the current stack-pointer value
5845      minus the value in the size operand.  */
5846   ginsn = ginsn_new_sub (insn_end_sym, false,
5847                          GINSN_SRC_REG, REG_SP, 0,
5848                          GINSN_SRC_IMM, 0, stack_opnd_size,
5849                          GINSN_DST_REG, REG_SP, 0);
5850   ginsn_set_where (ginsn);
5851   ginsn_next = ginsn_new_store (insn_end_sym, false,
5852                                 GINSN_SRC_REG, REG_FP,
5853                                 GINSN_DST_INDIRECT, REG_SP, 0);
5854   ginsn_set_where (ginsn_next);
5855   gas_assert (!ginsn_link_next (ginsn, ginsn_next));
5856   ginsn_last = ginsn_new_mov (insn_end_sym, false,
5857                               GINSN_SRC_REG, REG_SP, 0,
5858                               GINSN_DST_REG, REG_FP, 0);
5859   ginsn_set_where (ginsn_last);
5860   gas_assert (!ginsn_link_next (ginsn_next, ginsn_last));
5861
5862   return ginsn;
5863 }
5864
5865 static ginsnS *
5866 x86_ginsn_leave (const symbolS *insn_end_sym)
5867 {
5868   ginsnS *ginsn = NULL;
5869   ginsnS *ginsn_next = NULL;
5870   ginsnS *ginsn_last = NULL;
5871   /* In 64-bit mode, the default stack update size is 8 bytes.  */
5872   int stack_opnd_size = 8;
5873
5874   /* Check if this is a 16-bit op.  */
5875   if (ginsn_opsize_prefix_p ())
5876     stack_opnd_size = 2;
5877
5878   /* The 'leave' instruction copies the contents of the RBP register
5879      into the RSP register to release all stack space allocated to the
5880      procedure.  */
5881   ginsn = ginsn_new_mov (insn_end_sym, false,
5882                          GINSN_SRC_REG, REG_FP, 0,
5883                          GINSN_DST_REG, REG_SP, 0);
5884   ginsn_set_where (ginsn);
5885   /* Then it restores the old value of the RBP register from the stack.  */
5886   ginsn_next = ginsn_new_load (insn_end_sym, false,
5887                                GINSN_SRC_INDIRECT, REG_SP, 0,
5888                                GINSN_DST_REG, REG_FP);
5889   ginsn_set_where (ginsn_next);
5890   gas_assert (!ginsn_link_next (ginsn, ginsn_next));
5891   ginsn_last = ginsn_new_add (insn_end_sym, false,
5892                               GINSN_SRC_REG, REG_SP, 0,
5893                               GINSN_SRC_IMM, 0, stack_opnd_size,
5894                               GINSN_DST_REG, REG_SP, 0);
5895   ginsn_set_where (ginsn_next);
5896   gas_assert (!ginsn_link_next (ginsn_next, ginsn_last));
5897
5898   return ginsn;
5899 }
5900
5901 /* Check if an instruction is whitelisted.
5902
5903    Some instructions may appear with REG_SP or REG_FP as destination, because
5904    which they are deemed 'interesting' for SCFI.  Whitelist them here if they
5905    do not affect SCFI correctness.  */
5906
5907 static bool
5908 x86_ginsn_safe_to_skip_p (void)
5909 {
5910   bool skip_p = false;
5911   uint16_t opcode = i.tm.base_opcode;
5912
5913   switch (opcode)
5914     {
5915     case 0x80:
5916     case 0x81:
5917     case 0x83:
5918       if (i.tm.opcode_space != SPACE_BASE)
5919         break;
5920       /* cmp imm, reg/rem.  */
5921       if (i.tm.extension_opcode == 7)
5922         skip_p = true;
5923       break;
5924
5925     case 0x38:
5926     case 0x39:
5927     case 0x3a:
5928     case 0x3b:
5929       if (i.tm.opcode_space != SPACE_BASE)
5930         break;
5931       /* cmp imm/reg/mem, reg/rem.  */
5932       skip_p = true;
5933       break;
5934
5935     case 0xf6:
5936     case 0xf7:
5937     case 0x84:
5938     case 0x85:
5939       /* test imm/reg/mem, reg/mem.  */
5940       if (i.tm.opcode_space != SPACE_BASE)
5941         break;
5942       skip_p = true;
5943       break;
5944
5945     default:
5946       break;
5947     }
5948
5949   return skip_p;
5950 }
5951
5952 #define X86_GINSN_UNHANDLED_NONE        0
5953 #define X86_GINSN_UNHANDLED_DEST_REG    1
5954 #define X86_GINSN_UNHANDLED_CFG         2
5955 #define X86_GINSN_UNHANDLED_STACKOP     3
5956 #define X86_GINSN_UNHANDLED_UNEXPECTED  4
5957
5958 /* Check the input insn for its impact on the correctness of the synthesized
5959    CFI.  Returns an error code to the caller.  */
5960
5961 static int
5962 x86_ginsn_unhandled (void)
5963 {
5964   int err = X86_GINSN_UNHANDLED_NONE;
5965   const reg_entry *reg_op;
5966   unsigned int dw2_regnum;
5967
5968   /* Keep an eye out for instructions affecting control flow.  */
5969   if (i.tm.opcode_modifier.jump)
5970     err = X86_GINSN_UNHANDLED_CFG;
5971   /* Also, for any instructions involving an implicit update to the stack
5972      pointer.  */
5973   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_STACK_OP)
5974     err = X86_GINSN_UNHANDLED_STACKOP;
5975   /* Finally, also check if the missed instructions are affecting REG_SP or
5976      REG_FP.  The destination operand is the last at all stages of assembly
5977      (due to following AT&T syntax layout in the internal representation).  In
5978      case of Intel syntax input, this still remains true as swap_operands ()
5979      is done by now.
5980      PS: These checks do not involve index / base reg, as indirect memory
5981      accesses via REG_SP or REG_FP do not affect SCFI correctness.
5982      (Also note these instructions are candidates for other ginsn generation
5983      modes in future.  TBD_GINSN_GEN_NOT_SCFI.)  */
5984   else if (i.operands && i.reg_operands
5985            && !(i.flags[i.operands - 1] & Operand_Mem))
5986     {
5987       reg_op = i.op[i.operands - 1].regs;
5988       if (reg_op)
5989         {
5990           dw2_regnum = ginsn_dw2_regnum (reg_op);
5991           if (dw2_regnum == REG_SP || dw2_regnum == REG_FP)
5992             err = X86_GINSN_UNHANDLED_DEST_REG;
5993         }
5994       else
5995         /* Something unexpected.  Indicate to caller.  */
5996         err = X86_GINSN_UNHANDLED_UNEXPECTED;
5997     }
5998
5999   return err;
6000 }
6001
6002 /* Generate one or more generic GAS instructions, a.k.a, ginsns for the current
6003    machine instruction.
6004
6005    Returns the head of linked list of ginsn(s) added, if success; Returns NULL
6006    if failure.
6007
6008    The input ginsn_gen_mode GMODE determines the set of minimal necessary
6009    ginsns necessary for correctness of any passes applicable for that mode.
6010    For supporting the GINSN_GEN_SCFI generation mode, following is the list of
6011    machine instructions that must be translated into the corresponding ginsns
6012    to ensure correctness of SCFI:
6013      - All instructions affecting the two registers that could potentially
6014        be used as the base register for CFA tracking.  For SCFI, the base
6015        register for CFA tracking is limited to REG_SP and REG_FP only for
6016        now.
6017      - All change of flow instructions: conditional and unconditional branches,
6018        call and return from functions.
6019      - All instructions that can potentially be a register save / restore
6020        operation.
6021      - All instructions that perform stack manipulation implicitly: the CALL,
6022        RET, PUSH, POP, ENTER, and LEAVE instructions.
6023
6024    The function currently supports GINSN_GEN_SCFI ginsn generation mode only.
6025    To support other generation modes will require work on this target-specific
6026    process of creation of ginsns:
6027      - Some of such places are tagged with TBD_GINSN_GEN_NOT_SCFI to serve as
6028        possible starting points.
6029      - Also note that ginsn representation may need enhancements.  Specifically,
6030        note some TBD_GINSN_INFO_LOSS and TBD_GINSN_REPRESENTATION_LIMIT markers.
6031    */
6032
6033 static ginsnS *
6034 x86_ginsn_new (const symbolS *insn_end_sym, enum ginsn_gen_mode gmode)
6035 {
6036   int err = 0;
6037   uint16_t opcode;
6038   unsigned int dw2_regnum;
6039   const reg_entry *mem_reg;
6040   ginsnS *ginsn = NULL;
6041   ginsnS *ginsn_next = NULL;
6042   /* In 64-bit mode, the default stack update size is 8 bytes.  */
6043   int stack_opnd_size = 8;
6044
6045   /* Currently supports generation of selected ginsns, sufficient for
6046      the use-case of SCFI only.  */
6047   if (gmode != GINSN_GEN_SCFI)
6048     return ginsn;
6049
6050   opcode = i.tm.base_opcode;
6051
6052   /* Until it is clear how to handle APX NDD and other new opcodes, disallow
6053      them from SCFI.  */
6054   if (is_apx_rex2_encoding ()
6055       || (i.tm.opcode_modifier.evex && is_apx_evex_encoding ()))
6056     {
6057       as_bad (_("SCFI: unsupported APX op %#x may cause incorrect CFI"),
6058               opcode);
6059       return ginsn;
6060     }
6061
6062   switch (opcode)
6063     {
6064
6065     /* Add opcodes 0x0/0x2 and sub opcodes 0x28/0x2a (with opcode_space
6066        SPACE_BASE) are 8-bit ops.  While they are relevant for SCFI
6067        correctness,  skip handling them here and use the x86_ginsn_unhandled
6068        code path to generate GINSN_TYPE_OTHER when necessary.  */
6069
6070     case 0x1:  /* add reg, reg/mem.  */
6071     case 0x29: /* sub reg, reg/mem.  */
6072       if (i.tm.opcode_space != SPACE_BASE)
6073         break;
6074       ginsn = x86_ginsn_addsub_reg_mem (insn_end_sym);
6075       break;
6076
6077     case 0x3:  /* add reg/mem, reg.  */
6078     case 0x2b: /* sub reg/mem, reg.  */
6079       if (i.tm.opcode_space != SPACE_BASE)
6080         break;
6081       ginsn = x86_ginsn_addsub_mem_reg (insn_end_sym);
6082       break;
6083
6084     case 0xa0: /* push fs.  */
6085     case 0xa8: /* push gs.  */
6086       /* push fs / push gs have opcode_space == SPACE_0F.  */
6087       if (i.tm.opcode_space != SPACE_0F)
6088         break;
6089       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6090       /* Check if operation size is 16-bit.  */
6091       if (ginsn_opsize_prefix_p ())
6092         stack_opnd_size = 2;
6093       ginsn = ginsn_new_sub (insn_end_sym, false,
6094                              GINSN_SRC_REG, REG_SP, 0,
6095                              GINSN_SRC_IMM, 0, stack_opnd_size,
6096                              GINSN_DST_REG, REG_SP, 0);
6097       ginsn_set_where (ginsn);
6098       ginsn_next = ginsn_new_store (insn_end_sym, false,
6099                                     GINSN_SRC_REG, dw2_regnum,
6100                                     GINSN_DST_INDIRECT, REG_SP, 0);
6101       ginsn_set_where (ginsn_next);
6102       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6103       break;
6104
6105     case 0xa1: /* pop fs.  */
6106     case 0xa9: /* pop gs.  */
6107       /* pop fs / pop gs have opcode_space == SPACE_0F.  */
6108       if (i.tm.opcode_space != SPACE_0F)
6109         break;
6110       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6111       /* Check if operation size is 16-bit.  */
6112       if (ginsn_opsize_prefix_p ())
6113         stack_opnd_size = 2;
6114       ginsn = ginsn_new_load (insn_end_sym, false,
6115                               GINSN_SRC_INDIRECT, REG_SP, 0,
6116                               GINSN_DST_REG, dw2_regnum);
6117       ginsn_set_where (ginsn);
6118       ginsn_next = ginsn_new_add (insn_end_sym, false,
6119                                   GINSN_SRC_REG, REG_SP, 0,
6120                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6121                                   GINSN_DST_REG, REG_SP, 0);
6122       ginsn_set_where (ginsn_next);
6123       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6124       break;
6125
6126     case 0x50 ... 0x57:
6127       if (i.tm.opcode_space != SPACE_BASE)
6128         break;
6129       /* push reg.  */
6130       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6131       /* Check if operation size is 16-bit.  */
6132       if (ginsn_opsize_prefix_p ())
6133         stack_opnd_size = 2;
6134       ginsn = ginsn_new_sub (insn_end_sym, false,
6135                              GINSN_SRC_REG, REG_SP, 0,
6136                              GINSN_SRC_IMM, 0, stack_opnd_size,
6137                              GINSN_DST_REG, REG_SP, 0);
6138       ginsn_set_where (ginsn);
6139       ginsn_next = ginsn_new_store (insn_end_sym, false,
6140                                     GINSN_SRC_REG, dw2_regnum,
6141                                     GINSN_DST_INDIRECT, REG_SP, 0);
6142       ginsn_set_where (ginsn_next);
6143       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6144       break;
6145
6146     case 0x58 ... 0x5f:
6147       if (i.tm.opcode_space != SPACE_BASE)
6148         break;
6149       /* pop reg.  */
6150       dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6151       ginsn = ginsn_new_load (insn_end_sym, false,
6152                               GINSN_SRC_INDIRECT, REG_SP, 0,
6153                               GINSN_DST_REG, dw2_regnum);
6154       ginsn_set_where (ginsn);
6155       /* Check if operation size is 16-bit.  */
6156       if (ginsn_opsize_prefix_p ())
6157         stack_opnd_size = 2;
6158       ginsn_next = ginsn_new_add (insn_end_sym, false,
6159                                   GINSN_SRC_REG, REG_SP, 0,
6160                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6161                                   GINSN_DST_REG, REG_SP, 0);
6162       ginsn_set_where (ginsn_next);
6163       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6164       break;
6165
6166     case 0x6a: /* push imm8.  */
6167     case 0x68: /* push imm16/imm32.  */
6168       if (i.tm.opcode_space != SPACE_BASE)
6169         break;
6170       /* Check if operation size is 16-bit.  */
6171       if (ginsn_opsize_prefix_p ())
6172         stack_opnd_size = 2;
6173       /* Skip getting the value of imm from machine instruction
6174          because this is not important for SCFI.  */
6175       ginsn = ginsn_new_sub (insn_end_sym, false,
6176                              GINSN_SRC_REG, REG_SP, 0,
6177                              GINSN_SRC_IMM, 0, stack_opnd_size,
6178                              GINSN_DST_REG, REG_SP, 0);
6179       ginsn_set_where (ginsn);
6180       ginsn_next = ginsn_new_store (insn_end_sym, false,
6181                                     GINSN_SRC_IMM, 0,
6182                                     GINSN_DST_INDIRECT, REG_SP, 0);
6183       ginsn_set_where (ginsn_next);
6184       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6185       break;
6186
6187     /* PS: Opcodes 0x80 ... 0x8f with opcode_space SPACE_0F are present
6188        only after relaxation.  They do not need to be handled for ginsn
6189        creation.  */
6190     case 0x70 ... 0x7f:
6191       if (i.tm.opcode_space != SPACE_BASE)
6192         break;
6193       ginsn = x86_ginsn_jump (insn_end_sym, true);
6194       break;
6195
6196     case 0x80:
6197     case 0x81:
6198     case 0x83:
6199       if (i.tm.opcode_space != SPACE_BASE)
6200         break;
6201       ginsn = x86_ginsn_alu_imm (insn_end_sym);
6202       break;
6203
6204     case 0x8a: /* mov r/m8, r8.  */
6205     case 0x8b: /* mov r/m(16/32/64), r(16/32/64).  */
6206     case 0x88: /* mov r8, r/m8.  */
6207     case 0x89: /* mov r(16/32/64), r/m(16/32/64).  */
6208       if (i.tm.opcode_space != SPACE_BASE)
6209         break;
6210       ginsn = x86_ginsn_move (insn_end_sym);
6211       break;
6212
6213     case 0x8d:
6214       if (i.tm.opcode_space != SPACE_BASE)
6215         break;
6216       /* lea disp(%base,%index,imm), %dst.  */
6217       ginsn = x86_ginsn_lea (insn_end_sym);
6218       break;
6219
6220     case 0x8f:
6221       if (i.tm.opcode_space != SPACE_BASE)
6222         break;
6223       /* pop to reg/mem.  */
6224       if (i.mem_operands)
6225         {
6226           mem_reg = (i.base_reg) ? i.base_reg : i.index_reg;
6227           /* Use dummy register if no base or index.  Unlike other opcodes,
6228              ginsns must be generated as this affect stack pointer.  */
6229           dw2_regnum = (mem_reg
6230                         ? ginsn_dw2_regnum (mem_reg)
6231                         : GINSN_DW2_REGNUM_RSI_DUMMY);
6232         }
6233       else
6234         dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6235       ginsn = ginsn_new_load (insn_end_sym, false,
6236                               GINSN_SRC_INDIRECT, REG_SP, 0,
6237                               GINSN_DST_INDIRECT, dw2_regnum);
6238       ginsn_set_where (ginsn);
6239       /* Check if operation size is 16-bit.  */
6240       if (ginsn_opsize_prefix_p ())
6241         stack_opnd_size = 2;
6242       ginsn_next = ginsn_new_add (insn_end_sym, false,
6243                                   GINSN_SRC_REG, REG_SP, 0,
6244                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6245                                   GINSN_DST_REG, REG_SP, 0);
6246       ginsn_set_where (ginsn_next);
6247       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6248       break;
6249
6250     case 0x9c:
6251       if (i.tm.opcode_space != SPACE_BASE)
6252         break;
6253       /* pushf / pushfq.  */
6254       /* Check if operation size is 16-bit.  */
6255       if (ginsn_opsize_prefix_p ())
6256         stack_opnd_size = 2;
6257       ginsn = ginsn_new_sub (insn_end_sym, false,
6258                              GINSN_SRC_REG, REG_SP, 0,
6259                              GINSN_SRC_IMM, 0, stack_opnd_size,
6260                              GINSN_DST_REG, REG_SP, 0);
6261       ginsn_set_where (ginsn);
6262       /* FIXME - hardcode the actual DWARF reg number value.  As for SCFI
6263          correctness, although this behaves simply a placeholder value; its
6264          just clearer if the value is correct.  */
6265       dw2_regnum = GINSN_DW2_REGNUM_EFLAGS;
6266       ginsn_next = ginsn_new_store (insn_end_sym, false,
6267                                     GINSN_SRC_REG, dw2_regnum,
6268                                     GINSN_DST_INDIRECT, REG_SP, 0);
6269       ginsn_set_where (ginsn_next);
6270       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6271       break;
6272
6273     case 0x9d:
6274       if (i.tm.opcode_space != SPACE_BASE)
6275         break;
6276       /* popf / popfq.  */
6277       /* Check if operation size is 16-bit.  */
6278       if (ginsn_opsize_prefix_p ())
6279         stack_opnd_size = 2;
6280       /* FIXME - hardcode the actual DWARF reg number value.  As for SCFI
6281          correctness, although this behaves simply a placeholder value; its
6282          just clearer if the value is correct.  */
6283       dw2_regnum = GINSN_DW2_REGNUM_EFLAGS;
6284       ginsn = ginsn_new_load (insn_end_sym, false,
6285                               GINSN_SRC_INDIRECT, REG_SP, 0,
6286                               GINSN_DST_REG, dw2_regnum);
6287       ginsn_set_where (ginsn);
6288       ginsn_next = ginsn_new_add (insn_end_sym, false,
6289                                   GINSN_SRC_REG, REG_SP, 0,
6290                                   GINSN_SRC_IMM, 0, stack_opnd_size,
6291                                   GINSN_DST_REG, REG_SP, 0);
6292       ginsn_set_where (ginsn_next);
6293       gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6294       break;
6295
6296     case 0xff:
6297       if (i.tm.opcode_space != SPACE_BASE)
6298         break;
6299       /* push from reg/mem.  */
6300       if (i.tm.extension_opcode == 6)
6301         {
6302           /* Check if operation size is 16-bit.  */
6303           if (ginsn_opsize_prefix_p ())
6304             stack_opnd_size = 2;
6305           ginsn = ginsn_new_sub (insn_end_sym, false,
6306                                  GINSN_SRC_REG, REG_SP, 0,
6307                                  GINSN_SRC_IMM, 0, stack_opnd_size,
6308                                  GINSN_DST_REG, REG_SP, 0);
6309           ginsn_set_where (ginsn);
6310           if (i.mem_operands)
6311             {
6312               mem_reg = (i.base_reg) ? i.base_reg : i.index_reg;
6313               /* Use dummy register if no base or index.  Unlike other opcodes,
6314                  ginsns must be generated as this affect stack pointer.  */
6315               dw2_regnum = (mem_reg
6316                             ? ginsn_dw2_regnum (mem_reg)
6317                             : GINSN_DW2_REGNUM_RSI_DUMMY);
6318             }
6319           else
6320             dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6321           ginsn_next = ginsn_new_store (insn_end_sym, false,
6322                                         GINSN_SRC_INDIRECT, dw2_regnum,
6323                                         GINSN_DST_INDIRECT, REG_SP, 0);
6324           ginsn_set_where (ginsn_next);
6325           gas_assert (!ginsn_link_next (ginsn, ginsn_next));
6326         }
6327       else if (i.tm.extension_opcode == 4)
6328         {
6329           /* jmp r/m.  E.g., notrack jmp *%rax.  */
6330           if (i.reg_operands)
6331             {
6332               dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6333               ginsn = ginsn_new_jump (insn_end_sym, true,
6334                                       GINSN_SRC_REG, dw2_regnum, NULL);
6335               ginsn_set_where (ginsn);
6336             }
6337           else if (i.mem_operands && i.index_reg)
6338             {
6339               /* jmp    *0x0(,%rax,8).  */
6340               dw2_regnum = ginsn_dw2_regnum (i.index_reg);
6341               ginsn = ginsn_new_jump (insn_end_sym, true,
6342                                       GINSN_SRC_REG, dw2_regnum, NULL);
6343               ginsn_set_where (ginsn);
6344             }
6345           else if (i.mem_operands && i.base_reg)
6346             {
6347               dw2_regnum = ginsn_dw2_regnum (i.base_reg);
6348               ginsn = ginsn_new_jump (insn_end_sym, true,
6349                                       GINSN_SRC_REG, dw2_regnum, NULL);
6350               ginsn_set_where (ginsn);
6351             }
6352         }
6353       else if (i.tm.extension_opcode == 2)
6354         {
6355           /* 0xFF /2 (call).  */
6356           if (i.reg_operands)
6357             {
6358               dw2_regnum = ginsn_dw2_regnum (i.op[0].regs);
6359               ginsn = ginsn_new_call (insn_end_sym, true,
6360                                       GINSN_SRC_REG, dw2_regnum, NULL);
6361               ginsn_set_where (ginsn);
6362             }
6363           else if (i.mem_operands && i.base_reg)
6364             {
6365               dw2_regnum = ginsn_dw2_regnum (i.base_reg);
6366               ginsn = ginsn_new_call (insn_end_sym, true,
6367                                       GINSN_SRC_REG, dw2_regnum, NULL);
6368               ginsn_set_where (ginsn);
6369             }
6370         }
6371       break;
6372
6373     case 0xc2: /* ret imm16.  */
6374     case 0xc3: /* ret.  */
6375       if (i.tm.opcode_space != SPACE_BASE)
6376         break;
6377       /* Near ret.  */
6378       ginsn = ginsn_new_return (insn_end_sym, true);
6379       ginsn_set_where (ginsn);
6380       break;
6381
6382     case 0xc8:
6383       if (i.tm.opcode_space != SPACE_BASE)
6384         break;
6385       /* enter.  */
6386       ginsn = x86_ginsn_enter (insn_end_sym);
6387       break;
6388
6389     case 0xc9:
6390       if (i.tm.opcode_space != SPACE_BASE)
6391         break;
6392       /* leave.  */
6393       ginsn = x86_ginsn_leave (insn_end_sym);
6394       break;
6395
6396     case 0xe0 ... 0xe2: /* loop / loope / loopne.  */
6397     case 0xe3:          /* jecxz / jrcxz.  */
6398       if (i.tm.opcode_space != SPACE_BASE)
6399         break;
6400       ginsn = x86_ginsn_jump (insn_end_sym, true);
6401       ginsn_set_where (ginsn);
6402       break;
6403
6404     case 0xe8:
6405       if (i.tm.opcode_space != SPACE_BASE)
6406         break;
6407       /* PS: SCFI machinery does not care about which func is being
6408          called.  OK to skip that info.  */
6409       ginsn = ginsn_new_call (insn_end_sym, true,
6410                               GINSN_SRC_SYMBOL, 0, NULL);
6411       ginsn_set_where (ginsn);
6412       break;
6413
6414     /* PS: opcode 0xe9 appears only after relaxation.  Skip here.  */
6415     case 0xeb:
6416       /* If opcode_space != SPACE_BASE, this is not a jmp insn.  Skip it
6417          for GINSN_GEN_SCFI.  */
6418       if (i.tm.opcode_space != SPACE_BASE)
6419         break;
6420       /* Unconditional jmp.  */
6421       ginsn = x86_ginsn_jump (insn_end_sym, false);
6422       ginsn_set_where (ginsn);
6423       break;
6424
6425     default:
6426       /* TBD_GINSN_GEN_NOT_SCFI: Skip all other opcodes uninteresting for
6427          GINSN_GEN_SCFI mode.  */
6428       break;
6429     }
6430
6431   if (!ginsn && !x86_ginsn_safe_to_skip_p ())
6432     {
6433       /* For all unhandled insns that are not whitelisted, check that they do
6434          not impact SCFI correctness.  */
6435       err = x86_ginsn_unhandled ();
6436       switch (err)
6437         {
6438         case X86_GINSN_UNHANDLED_NONE:
6439           break;
6440         case X86_GINSN_UNHANDLED_DEST_REG:
6441           /* Not all writes to REG_FP are harmful in context of SCFI.  Simply
6442              generate a GINSN_TYPE_OTHER with destination set to the
6443              appropriate register.  The SCFI machinery will bail out if this
6444              ginsn affects SCFI correctness.  */
6445           dw2_regnum = ginsn_dw2_regnum (i.op[i.operands - 1].regs);
6446           ginsn = ginsn_new_other (insn_end_sym, true,
6447                                    GINSN_SRC_IMM, 0,
6448                                    GINSN_SRC_IMM, 0,
6449                                    GINSN_DST_REG, dw2_regnum);
6450           ginsn_set_where (ginsn);
6451           break;
6452         case X86_GINSN_UNHANDLED_CFG:
6453         case X86_GINSN_UNHANDLED_STACKOP:
6454           as_bad (_("SCFI: unhandled op %#x may cause incorrect CFI"), opcode);
6455           break;
6456         case X86_GINSN_UNHANDLED_UNEXPECTED:
6457           as_bad (_("SCFI: unexpected op %#x may cause incorrect CFI"),
6458                   opcode);
6459           break;
6460         default:
6461           abort ();
6462           break;
6463         }
6464     }
6465
6466   return ginsn;
6467 }
6468
6469 #endif
6470
6471 /* This is the guts of the machine-dependent assembler.  LINE points to a
6472    machine dependent instruction.  This function is supposed to emit
6473    the frags/bytes it assembles to.  */
6474
6475 void
6476 md_assemble (char *line)
6477 {
6478   unsigned int j;
6479   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
6480   const char *end, *pass1_mnem = NULL;
6481   enum i386_error pass1_err = 0;
6482   const insn_template *t;
6483   struct last_insn *last_insn
6484     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
6485
6486   /* Initialize globals.  */
6487   current_templates.end = current_templates.start = NULL;
6488  retry:
6489   init_globals ();
6490
6491   /* Suppress optimization when the last thing we saw may not have been
6492      a proper instruction (e.g. a stand-alone prefix or .byte).  */
6493   if (last_insn->kind != last_insn_other)
6494     i.no_optimize = true;
6495
6496   /* First parse an instruction mnemonic & call i386_operand for the operands.
6497      We assume that the scrubber has arranged it so that line[0] is the valid
6498      start of a (possibly prefixed) mnemonic.  */
6499
6500   end = parse_insn (line, mnemonic, false);
6501   if (end == NULL)
6502     {
6503       if (pass1_mnem != NULL)
6504         goto match_error;
6505       if (i.error != no_error)
6506         {
6507           gas_assert (current_templates.start != NULL);
6508           if (may_need_pass2 (current_templates.start) && !i.suffix)
6509             goto no_match;
6510           /* No point in trying a 2nd pass - it'll only find the same suffix
6511              again.  */
6512           mnem_suffix = i.suffix;
6513           goto match_error;
6514         }
6515       return;
6516     }
6517   t = current_templates.start;
6518   if (may_need_pass2 (t))
6519     {
6520       /* Make a copy of the full line in case we need to retry.  */
6521       copy = xstrdup (line);
6522     }
6523   line += end - line;
6524   mnem_suffix = i.suffix;
6525
6526   line = parse_operands (line, mnemonic);
6527   this_operand = -1;
6528   if (line == NULL)
6529     {
6530       free (copy);
6531       return;
6532     }
6533
6534   /* Now we've parsed the mnemonic into a set of templates, and have the
6535      operands at hand.  */
6536
6537   /* All Intel opcodes have reversed operands except for "bound", "enter",
6538      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
6539      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
6540      intersegment "jmp" and "call" instructions with 2 immediate operands so
6541      that the immediate segment precedes the offset consistently in Intel and
6542      AT&T modes.  */
6543   if (intel_syntax
6544       && i.operands > 1
6545       && (t->mnem_off != MN_bound)
6546       && !startswith (mnemonic, "invlpg")
6547       && !startswith (mnemonic, "monitor")
6548       && !startswith (mnemonic, "mwait")
6549       && (t->mnem_off != MN_pvalidate)
6550       && !startswith (mnemonic, "rmp")
6551       && (t->mnem_off != MN_tpause)
6552       && (t->mnem_off != MN_umwait)
6553       && !(i.operands == 2
6554            && operand_type_check (i.types[0], imm)
6555            && operand_type_check (i.types[1], imm)))
6556     swap_operands ();
6557
6558   /* The order of the immediates should be reversed for 2-immediates EXTRQ
6559      and INSERTQ instructions.  Also UWRMSR wants its immediate to be in the
6560      "canonical" place (first), despite it appearing last (in AT&T syntax, or
6561      because of the swapping above) in the incoming set of operands.  */
6562   if ((i.imm_operands == 2
6563        && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
6564       || (t->mnem_off == MN_uwrmsr && i.imm_operands
6565           && i.operands > i.imm_operands))
6566       swap_2_operands (0, 1);
6567
6568   if (i.imm_operands)
6569     {
6570       /* For USER_MSR instructions, imm32 stands for the name of an model specific
6571          register (MSR). That's an unsigned quantity, whereas all other insns with
6572          32-bit immediate and 64-bit operand size use sign-extended
6573          immediates (imm32s). Therefore these insns are special-cased, bypassing
6574          the normal handling of immediates here.  */
6575       if (is_cpu(current_templates.start, CpuUSER_MSR))
6576         {
6577           for (j = 0; j < i.operands; j++)
6578             {
6579               if (operand_type_check(i.types[j], imm))
6580                 i.types[j] = smallest_imm_type (i.op[j].imms->X_add_number);
6581             }
6582         }
6583       else
6584         optimize_imm ();
6585     }
6586
6587   if (i.disp_operands && !optimize_disp (t))
6588     return;
6589
6590   /* Next, we find a template that matches the given insn,
6591      making sure the overlap of the given operands types is consistent
6592      with the template operand types.  */
6593
6594   if (!(t = match_template (mnem_suffix)))
6595     {
6596       const char *err_msg;
6597
6598       if (copy && !mnem_suffix)
6599         {
6600           line = copy;
6601           copy = NULL;
6602   no_match:
6603           pass1_err = i.error;
6604           pass1_mnem = insn_name (current_templates.start);
6605           goto retry;
6606         }
6607
6608       /* If a non-/only-64bit template (group) was found in pass 1, and if
6609          _some_ template (group) was found in pass 2, squash pass 1's
6610          error.  */
6611       if (pass1_err == unsupported_64bit)
6612         pass1_mnem = NULL;
6613
6614   match_error:
6615       free (copy);
6616
6617       switch (pass1_mnem ? pass1_err : i.error)
6618         {
6619         default:
6620           abort ();
6621         case operand_size_mismatch:
6622           err_msg = _("operand size mismatch");
6623           break;
6624         case operand_type_mismatch:
6625           err_msg = _("operand type mismatch");
6626           break;
6627         case register_type_mismatch:
6628           err_msg = _("register type mismatch");
6629           break;
6630         case number_of_operands_mismatch:
6631           err_msg = _("number of operands mismatch");
6632           break;
6633         case invalid_instruction_suffix:
6634           err_msg = _("invalid instruction suffix");
6635           break;
6636         case bad_imm4:
6637           err_msg = _("constant doesn't fit in 4 bits");
6638           break;
6639         case unsupported_with_intel_mnemonic:
6640           err_msg = _("unsupported with Intel mnemonic");
6641           break;
6642         case unsupported_syntax:
6643           err_msg = _("unsupported syntax");
6644           break;
6645         case unsupported_EGPR_for_addressing:
6646           err_msg = _("extended GPR cannot be used as base/index");
6647           break;
6648         case unsupported:
6649           as_bad (_("unsupported instruction `%s'"),
6650                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6651           return;
6652         case unsupported_on_arch:
6653           as_bad (_("`%s' is not supported on `%s%s'"),
6654                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6655                   cpu_arch_name ? cpu_arch_name : default_arch,
6656                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
6657           return;
6658         case unsupported_64bit:
6659           if (ISLOWER (mnem_suffix))
6660             {
6661               if (flag_code == CODE_64BIT)
6662                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
6663                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6664                         mnem_suffix);
6665               else
6666                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
6667                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
6668                         mnem_suffix);
6669             }
6670           else
6671             {
6672               if (flag_code == CODE_64BIT)
6673                 as_bad (_("`%s' is not supported in 64-bit mode"),
6674                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6675               else
6676                 as_bad (_("`%s' is only supported in 64-bit mode"),
6677                         pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6678             }
6679           return;
6680         case no_vex_encoding:
6681           err_msg = _("no VEX/XOP encoding");
6682           break;
6683         case no_evex_encoding:
6684           err_msg = _("no EVEX encoding");
6685           break;
6686         case invalid_sib_address:
6687           err_msg = _("invalid SIB address");
6688           break;
6689         case invalid_vsib_address:
6690           err_msg = _("invalid VSIB address");
6691           break;
6692         case invalid_vector_register_set:
6693           err_msg = _("mask, index, and destination registers must be distinct");
6694           break;
6695         case invalid_tmm_register_set:
6696           err_msg = _("all tmm registers must be distinct");
6697           break;
6698         case invalid_dest_and_src_register_set:
6699           err_msg = _("destination and source registers must be distinct");
6700           break;
6701         case invalid_dest_register_set:
6702           err_msg = _("two dest registers must be distinct");
6703           break;
6704         case invalid_pseudo_prefix:
6705           err_msg = _("rex2 pseudo prefix cannot be used");
6706           break;
6707         case unsupported_vector_index_register:
6708           err_msg = _("unsupported vector index register");
6709           break;
6710         case unsupported_broadcast:
6711           err_msg = _("unsupported broadcast");
6712           break;
6713         case broadcast_needed:
6714           err_msg = _("broadcast is needed for operand of such type");
6715           break;
6716         case unsupported_masking:
6717           err_msg = _("unsupported masking");
6718           break;
6719         case mask_not_on_destination:
6720           err_msg = _("mask not on destination operand");
6721           break;
6722         case no_default_mask:
6723           err_msg = _("default mask isn't allowed");
6724           break;
6725         case unsupported_rc_sae:
6726           err_msg = _("unsupported static rounding/sae");
6727           break;
6728         case unsupported_vector_size:
6729           as_bad (_("vector size above %u required for `%s'"), 128u << vector_size,
6730                   pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6731           return;
6732         case unsupported_rsp_register:
6733           err_msg = _("'rsp' register cannot be used");
6734           break;
6735         case internal_error:
6736           err_msg = _("internal error");
6737           break;
6738         }
6739       as_bad (_("%s for `%s'"), err_msg,
6740               pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
6741       return;
6742     }
6743
6744   free (copy);
6745
6746   if (sse_check != check_none
6747       /* The opcode space check isn't strictly needed; it's there only to
6748          bypass the logic below when easily possible.  */
6749       && t->opcode_space >= SPACE_0F
6750       && t->opcode_space <= SPACE_0F3A
6751       && !is_cpu (&i.tm, CpuSSE4a)
6752       && !is_any_vex_encoding (t))
6753     {
6754       /* Some KL and all WideKL insns have only implicit %xmm operands.  */
6755       bool simd = is_cpu (t, CpuKL) || is_cpu (t, CpuWideKL);
6756
6757       for (j = 0; j < t->operands; ++j)
6758         {
6759           if (t->operand_types[j].bitfield.class == RegMMX)
6760             break;
6761           if (t->operand_types[j].bitfield.class == RegSIMD)
6762             simd = true;
6763         }
6764
6765       if (j >= t->operands && simd)
6766         (sse_check == check_warning
6767          ? as_warn
6768          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
6769     }
6770
6771   if (i.tm.opcode_modifier.fwait)
6772     if (!add_prefix (FWAIT_OPCODE))
6773       return;
6774
6775   /* Check if REP prefix is OK.  */
6776   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
6777     {
6778       as_bad (_("invalid instruction `%s' after `%s'"),
6779                 insn_name (&i.tm), i.rep_prefix);
6780       return;
6781     }
6782
6783   /* Check for lock without a lockable instruction.  Destination operand
6784      must be memory unless it is xchg (0x86).  */
6785   if (i.prefix[LOCK_PREFIX])
6786     {
6787       if (i.tm.opcode_modifier.prefixok < PrefixLock
6788           || i.mem_operands == 0
6789           || (i.tm.base_opcode != 0x86
6790               && !(i.flags[i.operands - 1] & Operand_Mem)))
6791         {
6792           as_bad (_("expecting lockable instruction after `lock'"));
6793           return;
6794         }
6795
6796       /* Zap the redundant prefix from XCHG when optimizing.  */
6797       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
6798         i.prefix[LOCK_PREFIX] = 0;
6799     }
6800
6801   if (is_any_vex_encoding (&i.tm)
6802       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
6803       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
6804     {
6805       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
6806       if (i.prefix[DATA_PREFIX])
6807         {
6808           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
6809           return;
6810         }
6811
6812       /* Don't allow e.g. KMOV in TLS code sequences.  */
6813       for (j = i.imm_operands; j < i.operands; ++j)
6814         switch (i.reloc[j])
6815           {
6816           case BFD_RELOC_X86_64_GOTTPOFF:
6817             if (i.tm.mnem_off == MN_add
6818                 && i.tm.opcode_space == SPACE_EVEXMAP4
6819                 && i.mem_operands == 1
6820                 && i.base_reg
6821                 && i.base_reg->reg_num == RegIP
6822                 && i.tm.operand_types[0].bitfield.class == Reg
6823                 && i.tm.operand_types[2].bitfield.class == Reg)
6824               /* Allow APX: add %reg1, foo@gottpoff(%rip), %reg2.  */
6825               break;
6826             /* Fall through.  */
6827           case BFD_RELOC_386_TLS_GOTIE:
6828           case BFD_RELOC_386_TLS_LE_32:
6829           case BFD_RELOC_X86_64_TLSLD:
6830             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
6831             return;
6832           default:
6833             break;
6834           }
6835     }
6836
6837   /* Check if HLE prefix is OK.  */
6838   if (i.hle_prefix && !check_hle ())
6839     return;
6840
6841   /* Check BND prefix.  */
6842   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
6843     as_bad (_("expecting valid branch instruction after `bnd'"));
6844
6845   /* Check NOTRACK prefix.  */
6846   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
6847     as_bad (_("expecting indirect branch instruction after `notrack'"));
6848
6849   if (is_cpu (&i.tm, CpuMPX))
6850     {
6851       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
6852         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
6853       else if (flag_code != CODE_16BIT
6854                ? i.prefix[ADDR_PREFIX]
6855                : i.mem_operands && !i.prefix[ADDR_PREFIX])
6856         as_bad (_("16-bit address isn't allowed in MPX instructions"));
6857     }
6858
6859   /* Insert BND prefix.  */
6860   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
6861     {
6862       if (!i.prefix[BND_PREFIX])
6863         add_prefix (BND_PREFIX_OPCODE);
6864       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
6865         {
6866           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
6867           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
6868         }
6869     }
6870
6871   /* Check string instruction segment overrides.  */
6872   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
6873     {
6874       gas_assert (i.mem_operands);
6875       if (!check_string ())
6876         return;
6877       i.disp_operands = 0;
6878     }
6879
6880   /* The memory operand of (%dx) should be only used with input/output
6881      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
6882   if (i.input_output_operand
6883       && ((i.tm.base_opcode | 0x82) != 0xee
6884           || i.tm.opcode_space != SPACE_BASE))
6885     {
6886       as_bad (_("input/output port address isn't allowed with `%s'"),
6887               insn_name (&i.tm));
6888       return;
6889     }
6890
6891   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
6892     optimize_encoding ();
6893
6894   /* Past optimization there's no need to distinguish encoding_evex,
6895      encoding_evex512, and encoding_egpr anymore.  */
6896   if (i.encoding == encoding_evex512)
6897     i.encoding = encoding_evex;
6898   else if (i.encoding == encoding_egpr)
6899     i.encoding = is_any_vex_encoding (&i.tm) ? encoding_evex
6900                                              : encoding_default;
6901
6902   if (use_unaligned_vector_move)
6903     encode_with_unaligned_vector_move ();
6904
6905   if (!process_suffix ())
6906     return;
6907
6908   /* Check if IP-relative addressing requirements can be satisfied.  */
6909   if (is_cpu (&i.tm, CpuPREFETCHI)
6910       && !(i.base_reg && i.base_reg->reg_num == RegIP))
6911     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
6912
6913   /* Update operand types and check extended states.  */
6914   for (j = 0; j < i.operands; j++)
6915     {
6916       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
6917       switch (i.tm.operand_types[j].bitfield.class)
6918         {
6919         default:
6920           break;
6921         case RegMMX:
6922           i.xstate |= xstate_mmx;
6923           break;
6924         case RegMask:
6925           i.xstate |= xstate_mask;
6926           break;
6927         case RegSIMD:
6928           if (i.tm.operand_types[j].bitfield.tmmword)
6929             i.xstate |= xstate_tmm;
6930           else if (i.tm.operand_types[j].bitfield.zmmword
6931                    && !i.tm.opcode_modifier.vex
6932                    && vector_size >= VSZ512)
6933             i.xstate |= xstate_zmm;
6934           else if (i.tm.operand_types[j].bitfield.ymmword
6935                    && vector_size >= VSZ256)
6936             i.xstate |= xstate_ymm;
6937           else if (i.tm.operand_types[j].bitfield.xmmword)
6938             i.xstate |= xstate_xmm;
6939           break;
6940         }
6941     }
6942
6943   /* Make still unresolved immediate matches conform to size of immediate
6944      given in i.suffix.  */
6945   if (!finalize_imm ())
6946     return;
6947
6948   if (i.types[0].bitfield.imm1)
6949     i.imm_operands = 0; /* kludge for shift insns.  */
6950
6951   /* For insns with operands there are more diddles to do to the opcode.  */
6952   if (i.operands)
6953     {
6954       if (!process_operands ())
6955         return;
6956     }
6957   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
6958     {
6959       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
6960       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
6961     }
6962
6963   if (is_any_vex_encoding (&i.tm))
6964     {
6965       if (!cpu_arch_flags.bitfield.cpui286)
6966         {
6967           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
6968                   insn_name (&i.tm));
6969           return;
6970         }
6971
6972       /* Check for explicit REX prefix.  */
6973       if (i.prefix[REX_PREFIX] || i.rex_encoding)
6974         {
6975           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
6976           return;
6977         }
6978
6979       /* Check for explicit REX2 prefix.  */
6980       if (i.rex2_encoding)
6981         {
6982           as_bad (_("{rex2} prefix invalid with `%s'"), insn_name (&i.tm));
6983           return;
6984         }
6985
6986       if (is_apx_evex_encoding ())
6987         build_apx_evex_prefix ();
6988       else if (i.tm.opcode_modifier.vex)
6989         build_vex_prefix (t);
6990       else
6991         build_evex_prefix ();
6992
6993       /* The individual REX.RXBW bits got consumed.  */
6994       i.rex &= REX_OPCODE;
6995
6996       /* The rex2 bits got consumed.  */
6997       i.rex2 = 0;
6998     }
6999
7000   /* Handle conversion of 'int $3' --> special int3 insn.  */
7001   if (i.tm.mnem_off == MN_int
7002       && i.op[0].imms->X_add_number == 3)
7003     {
7004       i.tm.base_opcode = INT3_OPCODE;
7005       i.imm_operands = 0;
7006     }
7007
7008   if ((i.tm.opcode_modifier.jump == JUMP
7009        || i.tm.opcode_modifier.jump == JUMP_BYTE
7010        || i.tm.opcode_modifier.jump == JUMP_DWORD)
7011       && i.op[0].disps->X_op == O_constant)
7012     {
7013       /* Convert "jmp constant" (and "call constant") to a jump (call) to
7014          the absolute address given by the constant.  Since ix86 jumps and
7015          calls are pc relative, we need to generate a reloc.  */
7016       i.op[0].disps->X_add_symbol = &abs_symbol;
7017       i.op[0].disps->X_op = O_symbol;
7018     }
7019
7020   establish_rex ();
7021
7022   insert_lfence_before (last_insn);
7023
7024   /* We are ready to output the insn.  */
7025   output_insn (last_insn);
7026
7027 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7028   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
7029      performed in i386_target_format.  */
7030   if (IS_ELF && flag_synth_cfi)
7031     {
7032       ginsnS *ginsn;
7033       ginsn = x86_ginsn_new (symbol_temp_new_now (), frch_ginsn_gen_mode ());
7034       frch_ginsn_data_append (ginsn);
7035     }
7036 #endif
7037
7038   insert_lfence_after ();
7039
7040   if (i.tm.opcode_modifier.isprefix)
7041     {
7042       last_insn->kind = last_insn_prefix;
7043       last_insn->name = insn_name (&i.tm);
7044       last_insn->file = as_where (&last_insn->line);
7045     }
7046   else
7047     last_insn->kind = last_insn_other;
7048 }
7049
7050 /* The Q suffix is generally valid only in 64-bit mode, with very few
7051    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
7052    and fisttp only one of their two templates is matched below: That's
7053    sufficient since other relevant attributes are the same between both
7054    respective templates.  */
7055 static INLINE bool q_suffix_allowed(const insn_template *t)
7056 {
7057   return flag_code == CODE_64BIT
7058          || (t->opcode_space == SPACE_BASE
7059              && t->base_opcode == 0xdf
7060              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
7061          || t->mnem_off == MN_cmpxchg8b;
7062 }
7063
7064 static const char *
7065 parse_insn (const char *line, char *mnemonic, bool prefix_only)
7066 {
7067   const char *l = line, *token_start = l;
7068   char *mnem_p;
7069   bool pass1 = !current_templates.start;
7070   int supported;
7071   const insn_template *t;
7072   char *dot_p = NULL;
7073
7074   while (1)
7075     {
7076       mnem_p = mnemonic;
7077       /* Pseudo-prefixes start with an opening figure brace.  */
7078       if ((*mnem_p = *l) == '{')
7079         {
7080           ++mnem_p;
7081           ++l;
7082         }
7083       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
7084         {
7085           if (*mnem_p == '.')
7086             dot_p = mnem_p;
7087           mnem_p++;
7088           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
7089             {
7090             too_long:
7091               as_bad (_("no such instruction: `%s'"), token_start);
7092               return NULL;
7093             }
7094           l++;
7095         }
7096       /* Pseudo-prefixes end with a closing figure brace.  */
7097       if (*mnemonic == '{' && *l == '}')
7098         {
7099           *mnem_p++ = *l++;
7100           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
7101             goto too_long;
7102           *mnem_p = '\0';
7103
7104           /* Point l at the closing brace if there's no other separator.  */
7105           if (*l != END_OF_INSN && !is_space_char (*l)
7106               && *l != PREFIX_SEPARATOR)
7107             --l;
7108         }
7109       else if (!is_space_char (*l)
7110                && *l != END_OF_INSN
7111                && (intel_syntax
7112                    || (*l != PREFIX_SEPARATOR && *l != ',')))
7113         {
7114           if (prefix_only)
7115             break;
7116           as_bad (_("invalid character %s in mnemonic"),
7117                   output_invalid (*l));
7118           return NULL;
7119         }
7120       if (token_start == l)
7121         {
7122           if (!intel_syntax && *l == PREFIX_SEPARATOR)
7123             as_bad (_("expecting prefix; got nothing"));
7124           else
7125             as_bad (_("expecting mnemonic; got nothing"));
7126           return NULL;
7127         }
7128
7129       /* Look up instruction (or prefix) via hash table.  */
7130       op_lookup (mnemonic);
7131
7132       if (*l != END_OF_INSN
7133           && (!is_space_char (*l) || l[1] != END_OF_INSN)
7134           && current_templates.start
7135           && current_templates.start->opcode_modifier.isprefix)
7136         {
7137           supported = cpu_flags_match (current_templates.start);
7138           if (!(supported & CPU_FLAGS_64BIT_MATCH))
7139             {
7140               as_bad ((flag_code != CODE_64BIT
7141                        ? _("`%s' is only supported in 64-bit mode")
7142                        : _("`%s' is not supported in 64-bit mode")),
7143                       insn_name (current_templates.start));
7144               return NULL;
7145             }
7146           if (supported != CPU_FLAGS_PERFECT_MATCH)
7147             {
7148               as_bad (_("`%s' is not supported on `%s%s'"),
7149                       insn_name (current_templates.start),
7150                       cpu_arch_name ? cpu_arch_name : default_arch,
7151                       cpu_sub_arch_name ? cpu_sub_arch_name : "");
7152               return NULL;
7153             }
7154           /* If we are in 16-bit mode, do not allow addr16 or data16.
7155              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
7156           if ((current_templates.start->opcode_modifier.size == SIZE16
7157                || current_templates.start->opcode_modifier.size == SIZE32)
7158               && flag_code != CODE_64BIT
7159               && ((current_templates.start->opcode_modifier.size == SIZE32)
7160                   ^ (flag_code == CODE_16BIT)))
7161             {
7162               as_bad (_("redundant %s prefix"),
7163                       insn_name (current_templates.start));
7164               return NULL;
7165             }
7166
7167           if (current_templates.start->base_opcode == PSEUDO_PREFIX)
7168             {
7169               /* Handle pseudo prefixes.  */
7170               switch (current_templates.start->extension_opcode)
7171                 {
7172                 case Prefix_Disp8:
7173                   /* {disp8} */
7174                   i.disp_encoding = disp_encoding_8bit;
7175                   break;
7176                 case Prefix_Disp16:
7177                   /* {disp16} */
7178                   i.disp_encoding = disp_encoding_16bit;
7179                   break;
7180                 case Prefix_Disp32:
7181                   /* {disp32} */
7182                   i.disp_encoding = disp_encoding_32bit;
7183                   break;
7184                 case Prefix_Load:
7185                   /* {load} */
7186                   i.dir_encoding = dir_encoding_load;
7187                   break;
7188                 case Prefix_Store:
7189                   /* {store} */
7190                   i.dir_encoding = dir_encoding_store;
7191                   break;
7192                 case Prefix_VEX:
7193                   /* {vex} */
7194                   i.encoding = encoding_vex;
7195                   break;
7196                 case Prefix_VEX3:
7197                   /* {vex3} */
7198                   i.encoding = encoding_vex3;
7199                   break;
7200                 case Prefix_EVEX:
7201                   /* {evex} */
7202                   i.encoding = encoding_evex;
7203                   break;
7204                 case Prefix_REX:
7205                   /* {rex} */
7206                   i.rex_encoding = true;
7207                   break;
7208                 case Prefix_REX2:
7209                   /* {rex2} */
7210                   i.rex2_encoding = true;
7211                   break;
7212                 case Prefix_NoOptimize:
7213                   /* {nooptimize} */
7214                   i.no_optimize = true;
7215                   break;
7216                 default:
7217                   abort ();
7218                 }
7219             }
7220           else
7221             {
7222               /* Add prefix, checking for repeated prefixes.  */
7223               switch (add_prefix (current_templates.start->base_opcode))
7224                 {
7225                 case PREFIX_EXIST:
7226                   return NULL;
7227                 case PREFIX_DS:
7228                   if (is_cpu (current_templates.start, CpuIBT))
7229                     i.notrack_prefix = insn_name (current_templates.start);
7230                   break;
7231                 case PREFIX_REP:
7232                   if (is_cpu (current_templates.start, CpuHLE))
7233                     i.hle_prefix = insn_name (current_templates.start);
7234                   else if (is_cpu (current_templates.start, CpuMPX))
7235                     i.bnd_prefix = insn_name (current_templates.start);
7236                   else
7237                     i.rep_prefix = insn_name (current_templates.start);
7238                   break;
7239                 default:
7240                   break;
7241                 }
7242             }
7243           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
7244           token_start = ++l;
7245         }
7246       else
7247         break;
7248     }
7249
7250   if (prefix_only)
7251     return token_start;
7252
7253   if (!current_templates.start)
7254     {
7255       /* Deprecated functionality (new code should use pseudo-prefixes instead):
7256          Check if we should swap operand or force 32bit displacement in
7257          encoding.  */
7258       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
7259         {
7260           if (i.dir_encoding == dir_encoding_default)
7261             i.dir_encoding = dir_encoding_swap;
7262           else
7263             as_warn (_("ignoring `.s' suffix due to earlier `{%s}'"),
7264                      i.dir_encoding == dir_encoding_load ? "load" : "store");
7265         }
7266       else if (mnem_p - 3 == dot_p
7267                && dot_p[1] == 'd'
7268                && dot_p[2] == '8')
7269         {
7270           if (i.disp_encoding == disp_encoding_default)
7271             i.disp_encoding = disp_encoding_8bit;
7272           else if (i.disp_encoding != disp_encoding_8bit)
7273             as_warn (_("ignoring `.d8' suffix due to earlier `{disp<N>}'"));
7274         }
7275       else if (mnem_p - 4 == dot_p
7276                && dot_p[1] == 'd'
7277                && dot_p[2] == '3'
7278                && dot_p[3] == '2')
7279         {
7280           if (i.disp_encoding == disp_encoding_default)
7281             i.disp_encoding = disp_encoding_32bit;
7282           else if (i.disp_encoding != disp_encoding_32bit)
7283             as_warn (_("ignoring `.d32' suffix due to earlier `{disp<N>}'"));
7284         }
7285       else
7286         goto check_suffix;
7287       mnem_p = dot_p;
7288       *dot_p = '\0';
7289       op_lookup (mnemonic);
7290     }
7291
7292   if (!current_templates.start || !pass1)
7293     {
7294       current_templates.start = NULL;
7295
7296     check_suffix:
7297       if (mnem_p > mnemonic)
7298         {
7299           /* See if we can get a match by trimming off a suffix.  */
7300           switch (mnem_p[-1])
7301             {
7302             case WORD_MNEM_SUFFIX:
7303               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
7304                 i.suffix = SHORT_MNEM_SUFFIX;
7305               else
7306                 /* Fall through.  */
7307               case BYTE_MNEM_SUFFIX:
7308               case QWORD_MNEM_SUFFIX:
7309                 i.suffix = mnem_p[-1];
7310               mnem_p[-1] = '\0';
7311               op_lookup (mnemonic);
7312               break;
7313             case SHORT_MNEM_SUFFIX:
7314             case LONG_MNEM_SUFFIX:
7315               if (!intel_syntax)
7316                 {
7317                   i.suffix = mnem_p[-1];
7318                   mnem_p[-1] = '\0';
7319                   op_lookup (mnemonic);
7320                 }
7321               break;
7322
7323               /* Intel Syntax.  */
7324             case 'd':
7325               if (intel_syntax)
7326                 {
7327                   if (intel_float_operand (mnemonic) == 1)
7328                     i.suffix = SHORT_MNEM_SUFFIX;
7329                   else
7330                     i.suffix = LONG_MNEM_SUFFIX;
7331                   mnem_p[-1] = '\0';
7332                   op_lookup (mnemonic);
7333                 }
7334               /* For compatibility reasons accept MOVSD and CMPSD without
7335                  operands even in AT&T mode.  */
7336               else if (*l == END_OF_INSN
7337                        || (is_space_char (*l) && l[1] == END_OF_INSN))
7338                 {
7339                   mnem_p[-1] = '\0';
7340                   op_lookup (mnemonic);
7341                   if (current_templates.start != NULL
7342                       /* MOVS or CMPS */
7343                       && (current_templates.start->base_opcode | 2) == 0xa6
7344                       && current_templates.start->opcode_space
7345                          == SPACE_BASE
7346                       && mnem_p[-2] == 's')
7347                     {
7348                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
7349                                mnemonic, mnemonic);
7350                       i.suffix = LONG_MNEM_SUFFIX;
7351                     }
7352                   else
7353                     {
7354                       current_templates.start = NULL;
7355                       mnem_p[-1] = 'd';
7356                     }
7357                 }
7358               break;
7359             }
7360         }
7361
7362       if (!current_templates.start)
7363         {
7364           if (pass1)
7365             as_bad (_("no such instruction: `%s'"), token_start);
7366           return NULL;
7367         }
7368     }
7369
7370   if (current_templates.start->opcode_modifier.jump == JUMP
7371       || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
7372     {
7373       /* Check for a branch hint.  We allow ",pt" and ",pn" for
7374          predict taken and predict not taken respectively.
7375          I'm not sure that branch hints actually do anything on loop
7376          and jcxz insns (JumpByte) for current Pentium4 chips.  They
7377          may work in the future and it doesn't hurt to accept them
7378          now.  */
7379       if (l[0] == ',' && l[1] == 'p')
7380         {
7381           if (l[2] == 't')
7382             {
7383               if (!add_prefix (DS_PREFIX_OPCODE))
7384                 return NULL;
7385               l += 3;
7386             }
7387           else if (l[2] == 'n')
7388             {
7389               if (!add_prefix (CS_PREFIX_OPCODE))
7390                 return NULL;
7391               l += 3;
7392             }
7393         }
7394     }
7395   /* Any other comma loses.  */
7396   if (*l == ',')
7397     {
7398       as_bad (_("invalid character %s in mnemonic"),
7399               output_invalid (*l));
7400       return NULL;
7401     }
7402
7403   /* Check if instruction is supported on specified architecture.  */
7404   supported = 0;
7405   for (t = current_templates.start; t < current_templates.end; ++t)
7406     {
7407       supported |= cpu_flags_match (t);
7408
7409       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
7410         supported &= ~CPU_FLAGS_64BIT_MATCH;
7411
7412       if (supported == CPU_FLAGS_PERFECT_MATCH)
7413         return l;
7414     }
7415
7416   if (pass1)
7417     {
7418       if (supported & CPU_FLAGS_64BIT_MATCH)
7419         i.error = unsupported_on_arch;
7420       else
7421         i.error = unsupported_64bit;
7422     }
7423
7424   return NULL;
7425 }
7426
7427 static char *
7428 parse_operands (char *l, const char *mnemonic)
7429 {
7430   char *token_start;
7431
7432   /* 1 if operand is pending after ','.  */
7433   unsigned int expecting_operand = 0;
7434
7435   while (*l != END_OF_INSN)
7436     {
7437       /* Non-zero if operand parens not balanced.  */
7438       unsigned int paren_not_balanced = 0;
7439       /* True if inside double quotes.  */
7440       bool in_quotes = false;
7441
7442       /* Skip optional white space before operand.  */
7443       if (is_space_char (*l))
7444         ++l;
7445       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
7446         {
7447           as_bad (_("invalid character %s before operand %d"),
7448                   output_invalid (*l),
7449                   i.operands + 1);
7450           return NULL;
7451         }
7452       token_start = l;  /* After white space.  */
7453       while (in_quotes || paren_not_balanced || *l != ',')
7454         {
7455           if (*l == END_OF_INSN)
7456             {
7457               if (in_quotes)
7458                 {
7459                   as_bad (_("unbalanced double quotes in operand %d."),
7460                           i.operands + 1);
7461                   return NULL;
7462                 }
7463               if (paren_not_balanced)
7464                 {
7465                   know (!intel_syntax);
7466                   as_bad (_("unbalanced parenthesis in operand %d."),
7467                           i.operands + 1);
7468                   return NULL;
7469                 }
7470               else
7471                 break;  /* we are done */
7472             }
7473           else if (*l == '\\' && l[1] == '"')
7474             ++l;
7475           else if (*l == '"')
7476             in_quotes = !in_quotes;
7477           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
7478             {
7479               as_bad (_("invalid character %s in operand %d"),
7480                       output_invalid (*l),
7481                       i.operands + 1);
7482               return NULL;
7483             }
7484           if (!intel_syntax && !in_quotes)
7485             {
7486               if (*l == '(')
7487                 ++paren_not_balanced;
7488               if (*l == ')')
7489                 --paren_not_balanced;
7490             }
7491           l++;
7492         }
7493       if (l != token_start)
7494         {                       /* Yes, we've read in another operand.  */
7495           unsigned int operand_ok;
7496           this_operand = i.operands++;
7497           if (i.operands > MAX_OPERANDS)
7498             {
7499               as_bad (_("spurious operands; (%d operands/instruction max)"),
7500                       MAX_OPERANDS);
7501               return NULL;
7502             }
7503           i.types[this_operand].bitfield.unspecified = 1;
7504           /* Now parse operand adding info to 'i' as we go along.  */
7505           END_STRING_AND_SAVE (l);
7506
7507           if (i.mem_operands > 1)
7508             {
7509               as_bad (_("too many memory references for `%s'"),
7510                       mnemonic);
7511               return 0;
7512             }
7513
7514           if (intel_syntax)
7515             operand_ok =
7516               i386_intel_operand (token_start,
7517                                   intel_float_operand (mnemonic));
7518           else
7519             operand_ok = i386_att_operand (token_start);
7520
7521           RESTORE_END_STRING (l);
7522           if (!operand_ok)
7523             return NULL;
7524         }
7525       else
7526         {
7527           if (expecting_operand)
7528             {
7529             expecting_operand_after_comma:
7530               as_bad (_("expecting operand after ','; got nothing"));
7531               return NULL;
7532             }
7533           if (*l == ',')
7534             {
7535               as_bad (_("expecting operand before ','; got nothing"));
7536               return NULL;
7537             }
7538         }
7539
7540       /* Now *l must be either ',' or END_OF_INSN.  */
7541       if (*l == ',')
7542         {
7543           if (*++l == END_OF_INSN)
7544             {
7545               /* Just skip it, if it's \n complain.  */
7546               goto expecting_operand_after_comma;
7547             }
7548           expecting_operand = 1;
7549         }
7550     }
7551   return l;
7552 }
7553
7554 static void
7555 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
7556 {
7557   union i386_op temp_op;
7558   i386_operand_type temp_type;
7559   unsigned int temp_flags;
7560   enum bfd_reloc_code_real temp_reloc;
7561
7562   temp_type = i.types[xchg2];
7563   i.types[xchg2] = i.types[xchg1];
7564   i.types[xchg1] = temp_type;
7565
7566   temp_flags = i.flags[xchg2];
7567   i.flags[xchg2] = i.flags[xchg1];
7568   i.flags[xchg1] = temp_flags;
7569
7570   temp_op = i.op[xchg2];
7571   i.op[xchg2] = i.op[xchg1];
7572   i.op[xchg1] = temp_op;
7573
7574   temp_reloc = i.reloc[xchg2];
7575   i.reloc[xchg2] = i.reloc[xchg1];
7576   i.reloc[xchg1] = temp_reloc;
7577
7578   temp_flags = i.imm_bits[xchg2];
7579   i.imm_bits[xchg2] = i.imm_bits[xchg1];
7580   i.imm_bits[xchg1] = temp_flags;
7581
7582   if (i.mask.reg)
7583     {
7584       if (i.mask.operand == xchg1)
7585         i.mask.operand = xchg2;
7586       else if (i.mask.operand == xchg2)
7587         i.mask.operand = xchg1;
7588     }
7589   if (i.broadcast.type || i.broadcast.bytes)
7590     {
7591       if (i.broadcast.operand == xchg1)
7592         i.broadcast.operand = xchg2;
7593       else if (i.broadcast.operand == xchg2)
7594         i.broadcast.operand = xchg1;
7595     }
7596 }
7597
7598 static void
7599 swap_operands (void)
7600 {
7601   switch (i.operands)
7602     {
7603     case 5:
7604     case 4:
7605       swap_2_operands (1, i.operands - 2);
7606       /* Fall through.  */
7607     case 3:
7608     case 2:
7609       swap_2_operands (0, i.operands - 1);
7610       break;
7611     default:
7612       abort ();
7613     }
7614
7615   if (i.mem_operands == 2)
7616     {
7617       const reg_entry *temp_seg;
7618       temp_seg = i.seg[0];
7619       i.seg[0] = i.seg[1];
7620       i.seg[1] = temp_seg;
7621     }
7622 }
7623
7624 /* Try to ensure constant immediates are represented in the smallest
7625    opcode possible.  */
7626 static void
7627 optimize_imm (void)
7628 {
7629   char guess_suffix = 0;
7630   int op;
7631
7632   if (i.suffix)
7633     guess_suffix = i.suffix;
7634   else if (i.reg_operands)
7635     {
7636       /* Figure out a suffix from the last register operand specified.
7637          We can't do this properly yet, i.e. excluding special register
7638          instances, but the following works for instructions with
7639          immediates.  In any case, we can't set i.suffix yet.  */
7640       for (op = i.operands; --op >= 0;)
7641         if (i.types[op].bitfield.class != Reg)
7642           continue;
7643         else if (i.types[op].bitfield.byte)
7644           {
7645             guess_suffix = BYTE_MNEM_SUFFIX;
7646             break;
7647           }
7648         else if (i.types[op].bitfield.word)
7649           {
7650             guess_suffix = WORD_MNEM_SUFFIX;
7651             break;
7652           }
7653         else if (i.types[op].bitfield.dword)
7654           {
7655             guess_suffix = LONG_MNEM_SUFFIX;
7656             break;
7657           }
7658         else if (i.types[op].bitfield.qword)
7659           {
7660             guess_suffix = QWORD_MNEM_SUFFIX;
7661             break;
7662           }
7663     }
7664   else if ((flag_code == CODE_16BIT)
7665             ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
7666     guess_suffix = WORD_MNEM_SUFFIX;
7667   else if (flag_code != CODE_64BIT
7668            || (!(i.prefix[REX_PREFIX] & REX_W)
7669                /* A more generic (but also more involved) way of dealing
7670                   with the special case(s) would be to go look for
7671                   DefaultSize attributes on any of the templates.  */
7672                && current_templates.start->mnem_off != MN_push))
7673     guess_suffix = LONG_MNEM_SUFFIX;
7674
7675   for (op = i.operands; --op >= 0;)
7676     if (operand_type_check (i.types[op], imm))
7677       {
7678         switch (i.op[op].imms->X_op)
7679           {
7680           case O_constant:
7681             /* If a suffix is given, this operand may be shortened.  */
7682             switch (guess_suffix)
7683               {
7684               case LONG_MNEM_SUFFIX:
7685                 i.types[op].bitfield.imm32 = 1;
7686                 i.types[op].bitfield.imm64 = 1;
7687                 break;
7688               case WORD_MNEM_SUFFIX:
7689                 i.types[op].bitfield.imm16 = 1;
7690                 i.types[op].bitfield.imm32 = 1;
7691                 i.types[op].bitfield.imm32s = 1;
7692                 i.types[op].bitfield.imm64 = 1;
7693                 break;
7694               case BYTE_MNEM_SUFFIX:
7695                 i.types[op].bitfield.imm8 = 1;
7696                 i.types[op].bitfield.imm8s = 1;
7697                 i.types[op].bitfield.imm16 = 1;
7698                 i.types[op].bitfield.imm32 = 1;
7699                 i.types[op].bitfield.imm32s = 1;
7700                 i.types[op].bitfield.imm64 = 1;
7701                 break;
7702               }
7703
7704             /* If this operand is at most 16 bits, convert it
7705                to a signed 16 bit number before trying to see
7706                whether it will fit in an even smaller size.
7707                This allows a 16-bit operand such as $0xffe0 to
7708                be recognised as within Imm8S range.  */
7709             if ((i.types[op].bitfield.imm16)
7710                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
7711               {
7712                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
7713                                                 ^ 0x8000) - 0x8000);
7714               }
7715 #ifdef BFD64
7716             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
7717             if ((i.types[op].bitfield.imm32)
7718                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
7719               {
7720                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
7721                                                 ^ ((offsetT) 1 << 31))
7722                                                - ((offsetT) 1 << 31));
7723               }
7724 #endif
7725             i.types[op]
7726               = operand_type_or (i.types[op],
7727                                  smallest_imm_type (i.op[op].imms->X_add_number));
7728
7729             /* We must avoid matching of Imm32 templates when 64bit
7730                only immediate is available.  */
7731             if (guess_suffix == QWORD_MNEM_SUFFIX)
7732               i.types[op].bitfield.imm32 = 0;
7733             break;
7734
7735           case O_absent:
7736           case O_register:
7737             abort ();
7738
7739             /* Symbols and expressions.  */
7740           default:
7741             /* Convert symbolic operand to proper sizes for matching, but don't
7742                prevent matching a set of insns that only supports sizes other
7743                than those matching the insn suffix.  */
7744             {
7745               i386_operand_type mask, allowed;
7746               const insn_template *t = current_templates.start;
7747
7748               operand_type_set (&mask, 0);
7749               switch (guess_suffix)
7750                 {
7751                 case QWORD_MNEM_SUFFIX:
7752                   mask.bitfield.imm64 = 1;
7753                   mask.bitfield.imm32s = 1;
7754                   break;
7755                 case LONG_MNEM_SUFFIX:
7756                   mask.bitfield.imm32 = 1;
7757                   break;
7758                 case WORD_MNEM_SUFFIX:
7759                   mask.bitfield.imm16 = 1;
7760                   break;
7761                 case BYTE_MNEM_SUFFIX:
7762                   mask.bitfield.imm8 = 1;
7763                   break;
7764                 default:
7765                   break;
7766                 }
7767
7768               allowed = operand_type_and (t->operand_types[op], mask);
7769               while (++t < current_templates.end)
7770                 {
7771                   allowed = operand_type_or (allowed, t->operand_types[op]);
7772                   allowed = operand_type_and (allowed, mask);
7773                 }
7774
7775               if (!operand_type_all_zero (&allowed))
7776                 i.types[op] = operand_type_and (i.types[op], mask);
7777             }
7778             break;
7779           }
7780       }
7781 }
7782
7783 /* Try to use the smallest displacement type too.  */
7784 static bool
7785 optimize_disp (const insn_template *t)
7786 {
7787   unsigned int op;
7788
7789   if (!want_disp32 (t)
7790       && (!t->opcode_modifier.jump
7791           || i.jumpabsolute || i.types[0].bitfield.baseindex))
7792     {
7793       for (op = 0; op < i.operands; ++op)
7794         {
7795           const expressionS *exp = i.op[op].disps;
7796
7797           if (!operand_type_check (i.types[op], disp))
7798             continue;
7799
7800           if (exp->X_op != O_constant)
7801             continue;
7802
7803           /* Since displacement is signed extended to 64bit, don't allow
7804              disp32 if it is out of range.  */
7805           if (fits_in_signed_long (exp->X_add_number))
7806             continue;
7807
7808           i.types[op].bitfield.disp32 = 0;
7809           if (i.types[op].bitfield.baseindex)
7810             {
7811               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
7812                       (uint64_t) exp->X_add_number);
7813               return false;
7814             }
7815         }
7816     }
7817
7818   /* Don't optimize displacement for movabs since it only takes 64bit
7819      displacement.  */
7820   if (i.disp_encoding > disp_encoding_8bit
7821       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
7822     return true;
7823
7824   for (op = i.operands; op-- > 0;)
7825     if (operand_type_check (i.types[op], disp))
7826       {
7827         if (i.op[op].disps->X_op == O_constant)
7828           {
7829             offsetT op_disp = i.op[op].disps->X_add_number;
7830
7831             if (!op_disp && i.types[op].bitfield.baseindex)
7832               {
7833                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
7834                 i.op[op].disps = NULL;
7835                 i.disp_operands--;
7836                 continue;
7837               }
7838
7839             if (i.types[op].bitfield.disp16
7840                 && fits_in_unsigned_word (op_disp))
7841               {
7842                 /* If this operand is at most 16 bits, convert
7843                    to a signed 16 bit number and don't use 64bit
7844                    displacement.  */
7845                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
7846                 i.types[op].bitfield.disp64 = 0;
7847               }
7848
7849 #ifdef BFD64
7850             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
7851             if ((flag_code != CODE_64BIT
7852                  ? i.types[op].bitfield.disp32
7853                  : want_disp32 (t)
7854                    && (!t->opcode_modifier.jump
7855                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
7856                 && fits_in_unsigned_long (op_disp))
7857               {
7858                 /* If this operand is at most 32 bits, convert
7859                    to a signed 32 bit number and don't use 64bit
7860                    displacement.  */
7861                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
7862                 i.types[op].bitfield.disp64 = 0;
7863                 i.types[op].bitfield.disp32 = 1;
7864               }
7865
7866             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
7867               {
7868                 i.types[op].bitfield.disp64 = 0;
7869                 i.types[op].bitfield.disp32 = 1;
7870               }
7871 #endif
7872             if ((i.types[op].bitfield.disp32
7873                  || i.types[op].bitfield.disp16)
7874                 && fits_in_disp8 (op_disp))
7875               i.types[op].bitfield.disp8 = 1;
7876
7877             i.op[op].disps->X_add_number = op_disp;
7878           }
7879         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
7880                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
7881           {
7882             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
7883                          i.op[op].disps, 0, i.reloc[op]);
7884             i.types[op] = operand_type_and_not (i.types[op], anydisp);
7885           }
7886         else
7887           /* We only support 64bit displacement on constants.  */
7888           i.types[op].bitfield.disp64 = 0;
7889       }
7890
7891   return true;
7892 }
7893
7894 /* Return 1 if there is a match in broadcast bytes between operand
7895    GIVEN and instruction template T.   */
7896
7897 static INLINE int
7898 match_broadcast_size (const insn_template *t, unsigned int given)
7899 {
7900   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
7901            && i.types[given].bitfield.byte)
7902           || (t->opcode_modifier.broadcast == WORD_BROADCAST
7903               && i.types[given].bitfield.word)
7904           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
7905               && i.types[given].bitfield.dword)
7906           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
7907               && i.types[given].bitfield.qword));
7908 }
7909
7910 /* Check if operands are valid for the instruction.  */
7911
7912 static int
7913 check_VecOperands (const insn_template *t)
7914 {
7915   unsigned int op;
7916   i386_cpu_flags cpu;
7917
7918   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
7919      any one operand are implicity requiring AVX512VL support if the actual
7920      operand size is YMMword or XMMword.  Since this function runs after
7921      template matching, there's no need to check for YMMword/XMMword in
7922      the template.  */
7923   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
7924   if (!cpu_flags_all_zero (&cpu)
7925       && !is_cpu (t, CpuAVX512VL)
7926       && !cpu_arch_flags.bitfield.cpuavx512vl
7927       && (!t->opcode_modifier.vex || need_evex_encoding (t)))
7928     {
7929       for (op = 0; op < t->operands; ++op)
7930         {
7931           if (t->operand_types[op].bitfield.zmmword
7932               && (i.types[op].bitfield.ymmword
7933                   || i.types[op].bitfield.xmmword))
7934             {
7935               i.error = operand_size_mismatch;
7936               return 1;
7937             }
7938         }
7939     }
7940
7941   /* Somewhat similarly, templates specifying both AVX and AVX2 are
7942      requiring AVX2 support if the actual operand size is YMMword.  */
7943   if (maybe_cpu (t, CpuAVX) && maybe_cpu (t, CpuAVX2)
7944       && !cpu_arch_flags.bitfield.cpuavx2)
7945     {
7946       for (op = 0; op < t->operands; ++op)
7947         {
7948           if (t->operand_types[op].bitfield.xmmword
7949               && i.types[op].bitfield.ymmword)
7950             {
7951               i.error = operand_size_mismatch;
7952               return 1;
7953             }
7954         }
7955     }
7956
7957   /* Without VSIB byte, we can't have a vector register for index.  */
7958   if (!t->opcode_modifier.sib
7959       && i.index_reg
7960       && (i.index_reg->reg_type.bitfield.xmmword
7961           || i.index_reg->reg_type.bitfield.ymmword
7962           || i.index_reg->reg_type.bitfield.zmmword))
7963     {
7964       i.error = unsupported_vector_index_register;
7965       return 1;
7966     }
7967
7968   /* Check if default mask is allowed.  */
7969   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
7970       && (!i.mask.reg || i.mask.reg->reg_num == 0))
7971     {
7972       i.error = no_default_mask;
7973       return 1;
7974     }
7975
7976   /* For VSIB byte, we need a vector register for index, and all vector
7977      registers must be distinct.  */
7978   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
7979     {
7980       if (!i.index_reg
7981           || !((t->opcode_modifier.sib == VECSIB128
7982                 && i.index_reg->reg_type.bitfield.xmmword)
7983                || (t->opcode_modifier.sib == VECSIB256
7984                    && i.index_reg->reg_type.bitfield.ymmword)
7985                || (t->opcode_modifier.sib == VECSIB512
7986                    && i.index_reg->reg_type.bitfield.zmmword)))
7987       {
7988         i.error = invalid_vsib_address;
7989         return 1;
7990       }
7991
7992       gas_assert (i.reg_operands == 2 || i.mask.reg);
7993       if (i.reg_operands == 2 && !i.mask.reg)
7994         {
7995           gas_assert (i.types[0].bitfield.class == RegSIMD);
7996           gas_assert (i.types[0].bitfield.xmmword
7997                       || i.types[0].bitfield.ymmword);
7998           gas_assert (i.types[2].bitfield.class == RegSIMD);
7999           gas_assert (i.types[2].bitfield.xmmword
8000                       || i.types[2].bitfield.ymmword);
8001           if (operand_check == check_none)
8002             return 0;
8003           if (register_number (i.op[0].regs)
8004               != register_number (i.index_reg)
8005               && register_number (i.op[2].regs)
8006                  != register_number (i.index_reg)
8007               && register_number (i.op[0].regs)
8008                  != register_number (i.op[2].regs))
8009             return 0;
8010           if (operand_check == check_error)
8011             {
8012               i.error = invalid_vector_register_set;
8013               return 1;
8014             }
8015           as_warn (_("mask, index, and destination registers should be distinct"));
8016         }
8017       else if (i.reg_operands == 1 && i.mask.reg)
8018         {
8019           if (i.types[1].bitfield.class == RegSIMD
8020               && (i.types[1].bitfield.xmmword
8021                   || i.types[1].bitfield.ymmword
8022                   || i.types[1].bitfield.zmmword)
8023               && (register_number (i.op[1].regs)
8024                   == register_number (i.index_reg)))
8025             {
8026               if (operand_check == check_error)
8027                 {
8028                   i.error = invalid_vector_register_set;
8029                   return 1;
8030                 }
8031               if (operand_check != check_none)
8032                 as_warn (_("index and destination registers should be distinct"));
8033             }
8034         }
8035     }
8036
8037   /* For AMX instructions with 3 TMM register operands, all operands
8038       must be distinct.  */
8039   if (i.reg_operands == 3
8040       && t->operand_types[0].bitfield.tmmword
8041       && (i.op[0].regs == i.op[1].regs
8042           || i.op[0].regs == i.op[2].regs
8043           || i.op[1].regs == i.op[2].regs))
8044     {
8045       i.error = invalid_tmm_register_set;
8046       return 1;
8047     }
8048
8049   /* For some special instructions require that destination must be distinct
8050      from source registers.  */
8051   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
8052     {
8053       unsigned int dest_reg = i.operands - 1;
8054
8055       know (i.operands >= 3);
8056
8057       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
8058       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
8059           || (i.reg_operands > 2
8060               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
8061         {
8062           i.error = invalid_dest_and_src_register_set;
8063           return 1;
8064         }
8065     }
8066
8067   /* Check if broadcast is supported by the instruction and is applied
8068      to the memory operand.  */
8069   if (i.broadcast.type || i.broadcast.bytes)
8070     {
8071       i386_operand_type type, overlap;
8072
8073       /* Check if specified broadcast is supported in this instruction,
8074          and its broadcast bytes match the memory operand.  */
8075       op = i.broadcast.operand;
8076       if (!t->opcode_modifier.broadcast
8077           || !(i.flags[op] & Operand_Mem)
8078           || (!i.types[op].bitfield.unspecified
8079               && !match_broadcast_size (t, op)))
8080         {
8081         bad_broadcast:
8082           i.error = unsupported_broadcast;
8083           return 1;
8084         }
8085
8086       operand_type_set (&type, 0);
8087       switch (get_broadcast_bytes (t, false))
8088         {
8089         case 2:
8090           type.bitfield.word = 1;
8091           break;
8092         case 4:
8093           type.bitfield.dword = 1;
8094           break;
8095         case 8:
8096           type.bitfield.qword = 1;
8097           break;
8098         case 16:
8099           type.bitfield.xmmword = 1;
8100           break;
8101         case 32:
8102           if (vector_size < VSZ256)
8103             goto bad_broadcast;
8104           type.bitfield.ymmword = 1;
8105           break;
8106         case 64:
8107           if (vector_size < VSZ512)
8108             goto bad_broadcast;
8109           type.bitfield.zmmword = 1;
8110           break;
8111         default:
8112           goto bad_broadcast;
8113         }
8114
8115       overlap = operand_type_and (type, t->operand_types[op]);
8116       if (t->operand_types[op].bitfield.class == RegSIMD
8117           && t->operand_types[op].bitfield.byte
8118              + t->operand_types[op].bitfield.word
8119              + t->operand_types[op].bitfield.dword
8120              + t->operand_types[op].bitfield.qword > 1)
8121         {
8122           overlap.bitfield.xmmword = 0;
8123           overlap.bitfield.ymmword = 0;
8124           overlap.bitfield.zmmword = 0;
8125         }
8126       if (operand_type_all_zero (&overlap))
8127           goto bad_broadcast;
8128
8129       if (t->opcode_modifier.checkoperandsize)
8130         {
8131           unsigned int j;
8132
8133           type.bitfield.baseindex = 1;
8134           for (j = 0; j < i.operands; ++j)
8135             {
8136               if (j != op
8137                   && !operand_type_register_match(i.types[j],
8138                                                   t->operand_types[j],
8139                                                   type,
8140                                                   t->operand_types[op]))
8141                 goto bad_broadcast;
8142             }
8143         }
8144     }
8145   /* If broadcast is supported in this instruction, we need to check if
8146      operand of one-element size isn't specified without broadcast.  */
8147   else if (t->opcode_modifier.broadcast && i.mem_operands)
8148     {
8149       /* Find memory operand.  */
8150       for (op = 0; op < i.operands; op++)
8151         if (i.flags[op] & Operand_Mem)
8152           break;
8153       gas_assert (op < i.operands);
8154       /* Check size of the memory operand.  */
8155       if (match_broadcast_size (t, op))
8156         {
8157           i.error = broadcast_needed;
8158           return 1;
8159         }
8160     }
8161   else
8162     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
8163
8164   /* Check if requested masking is supported.  */
8165   if (i.mask.reg)
8166     {
8167       if (!t->opcode_modifier.masking)
8168         {
8169           i.error = unsupported_masking;
8170           return 1;
8171         }
8172
8173       /* Common rules for masking:
8174          - mask register destinations permit only zeroing-masking, without
8175            that actually being expressed by a {z} operand suffix or EVEX.z,
8176          - memory destinations allow only merging-masking,
8177          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
8178            masking.  */
8179       if (i.mask.zeroing
8180           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
8181               || (i.flags[t->operands - 1] & Operand_Mem)
8182               || t->opcode_modifier.sib))
8183         {
8184           i.error = unsupported_masking;
8185           return 1;
8186         }
8187     }
8188
8189   /* Check if masking is applied to dest operand.  */
8190   if (i.mask.reg && (i.mask.operand != i.operands - 1))
8191     {
8192       i.error = mask_not_on_destination;
8193       return 1;
8194     }
8195
8196   /* Check RC/SAE.  */
8197   if (i.rounding.type != rc_none)
8198     {
8199       if (!t->opcode_modifier.sae
8200           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
8201           || i.mem_operands)
8202         {
8203           i.error = unsupported_rc_sae;
8204           return 1;
8205         }
8206
8207       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
8208          operand.  */
8209       if (t->opcode_modifier.evex != EVEXLIG)
8210         {
8211           for (op = 0; op < t->operands; ++op)
8212             if (i.types[op].bitfield.zmmword)
8213               break;
8214           if (op >= t->operands)
8215             {
8216               i.error = operand_size_mismatch;
8217               return 1;
8218             }
8219         }
8220     }
8221
8222   /* Check the special Imm4 cases; must be the first operand.  */
8223   if ((is_cpu (t, CpuXOP) && t->operands == 5)
8224       || (t->opcode_space == SPACE_0F3A
8225           && (t->base_opcode | 3) == 0x0b
8226           && is_cpu (t, CpuAPX_F)))
8227     {
8228       if (i.op[0].imms->X_op != O_constant
8229           || !fits_in_imm4 (i.op[0].imms->X_add_number))
8230         {
8231           i.error = bad_imm4;
8232           return 1;
8233         }
8234
8235       /* Turn off Imm<N> so that update_imm won't complain.  */
8236       if (t->operands == 5)
8237         operand_type_set (&i.types[0], 0);
8238     }
8239
8240   /* Check vector Disp8 operand.  */
8241   if (t->opcode_modifier.disp8memshift
8242       && (!t->opcode_modifier.vex
8243           || need_evex_encoding (t))
8244       && i.disp_encoding <= disp_encoding_8bit)
8245     {
8246       if (i.broadcast.type || i.broadcast.bytes)
8247         i.memshift = t->opcode_modifier.broadcast - 1;
8248       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
8249         i.memshift = t->opcode_modifier.disp8memshift;
8250       else
8251         {
8252           const i386_operand_type *type = NULL, *fallback = NULL;
8253
8254           i.memshift = 0;
8255           for (op = 0; op < i.operands; op++)
8256             if (i.flags[op] & Operand_Mem)
8257               {
8258                 if (t->opcode_modifier.evex == EVEXLIG)
8259                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
8260                 else if (t->operand_types[op].bitfield.xmmword
8261                          + t->operand_types[op].bitfield.ymmword
8262                          + t->operand_types[op].bitfield.zmmword <= 1)
8263                   type = &t->operand_types[op];
8264                 else if (!i.types[op].bitfield.unspecified)
8265                   type = &i.types[op];
8266                 else /* Ambiguities get resolved elsewhere.  */
8267                   fallback = &t->operand_types[op];
8268               }
8269             else if (i.types[op].bitfield.class == RegSIMD
8270                      && t->opcode_modifier.evex != EVEXLIG)
8271               {
8272                 if (i.types[op].bitfield.zmmword)
8273                   i.memshift = 6;
8274                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
8275                   i.memshift = 5;
8276                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
8277                   i.memshift = 4;
8278               }
8279
8280           if (!type && !i.memshift)
8281             type = fallback;
8282           if (type)
8283             {
8284               if (type->bitfield.zmmword)
8285                 i.memshift = 6;
8286               else if (type->bitfield.ymmword)
8287                 i.memshift = 5;
8288               else if (type->bitfield.xmmword)
8289                 i.memshift = 4;
8290             }
8291
8292           /* For the check in fits_in_disp8().  */
8293           if (i.memshift == 0)
8294             i.memshift = -1;
8295         }
8296
8297       for (op = 0; op < i.operands; op++)
8298         if (operand_type_check (i.types[op], disp)
8299             && i.op[op].disps->X_op == O_constant)
8300           {
8301             if (fits_in_disp8 (i.op[op].disps->X_add_number))
8302               {
8303                 i.types[op].bitfield.disp8 = 1;
8304                 return 0;
8305               }
8306             i.types[op].bitfield.disp8 = 0;
8307           }
8308     }
8309
8310   i.memshift = 0;
8311
8312   return 0;
8313 }
8314
8315 /* Check if encoding requirements are met by the instruction.  */
8316
8317 static int
8318 VEX_check_encoding (const insn_template *t)
8319 {
8320   if (i.encoding == encoding_error)
8321     {
8322       i.error = unsupported;
8323       return 1;
8324     }
8325
8326   /* Vector size restrictions.  */
8327   if ((vector_size < VSZ512
8328        && t->opcode_modifier.evex == EVEX512)
8329       || (vector_size < VSZ256
8330           && (t->opcode_modifier.evex == EVEX256
8331               || t->opcode_modifier.vex == VEX256)))
8332     {
8333       i.error = unsupported_vector_size;
8334       return 1;
8335     }
8336
8337   switch (i.encoding)
8338     {
8339     case encoding_default:
8340       break;
8341
8342     case encoding_vex:
8343     case encoding_vex3:
8344       /* This instruction must be encoded with VEX prefix.  */
8345       if (!t->opcode_modifier.vex)
8346         {
8347           i.error = no_vex_encoding;
8348           return 1;
8349         }
8350       break;
8351
8352     case encoding_evex:
8353     case encoding_evex512:
8354       /* This instruction must be encoded with EVEX prefix.  */
8355       if (!t->opcode_modifier.evex)
8356         {
8357           i.error = no_evex_encoding;
8358           return 1;
8359         }
8360       break;
8361
8362     case encoding_egpr:
8363       /* This instruction must be encoded with REX2 or EVEX prefix.  */
8364       if (t->opcode_modifier.vex && !t->opcode_modifier.evex)
8365         {
8366           i.error = no_evex_encoding;
8367           return 1;
8368         }
8369       break;
8370
8371     default:
8372       abort ();
8373     }
8374
8375   return 0;
8376 }
8377
8378 /* Check if Egprs operands are valid for the instruction.  */
8379
8380 static bool
8381 check_EgprOperands (const insn_template *t)
8382 {
8383   if (!t->opcode_modifier.noegpr)
8384     return false;
8385
8386   for (unsigned int op = 0; op < i.operands; op++)
8387     {
8388       if (i.types[op].bitfield.class != Reg)
8389         continue;
8390
8391       if (i.op[op].regs->reg_flags & RegRex2)
8392         {
8393           i.error = register_type_mismatch;
8394           return true;
8395         }
8396     }
8397
8398   if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
8399       || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
8400     {
8401       i.error = unsupported_EGPR_for_addressing;
8402       return true;
8403     }
8404
8405   /* Check if pseudo prefix {rex2} is valid.  */
8406   if (i.rex2_encoding && !t->opcode_modifier.sse2avx)
8407     {
8408       i.error = invalid_pseudo_prefix;
8409       return true;
8410     }
8411
8412   return false;
8413 }
8414
8415 /* Check if APX operands are valid for the instruction.  */
8416 static bool
8417 check_APX_operands (const insn_template *t)
8418 {
8419   /* Push2* and Pop2* cannot use RSP and Pop2* cannot pop two same registers.
8420    */
8421   switch (t->mnem_off)
8422     {
8423     case MN_pop2:
8424     case MN_pop2p:
8425       if (register_number (i.op[0].regs) == register_number (i.op[1].regs))
8426         {
8427           i.error = invalid_dest_register_set;
8428           return 1;
8429         }
8430     /* fall through */
8431     case MN_push2:
8432     case MN_push2p:
8433       if (register_number (i.op[0].regs) == 4
8434           || register_number (i.op[1].regs) == 4)
8435         {
8436           i.error = unsupported_rsp_register;
8437           return 1;
8438         }
8439       break;
8440     }
8441   return 0;
8442 }
8443
8444 /* Check if the instruction use the REX registers or REX prefix.  */
8445 static bool
8446 check_Rex_required (void)
8447 {
8448   for (unsigned int op = 0; op < i.operands; op++)
8449     {
8450       if (i.types[op].bitfield.class != Reg)
8451         continue;
8452
8453       if (i.op[op].regs->reg_flags & (RegRex | RegRex64))
8454         return true;
8455     }
8456
8457   if ((i.index_reg && (i.index_reg->reg_flags & (RegRex | RegRex64)))
8458       || (i.base_reg && (i.base_reg->reg_flags & (RegRex | RegRex64))))
8459     return true;
8460
8461   /* Check pseudo prefix {rex} are valid.  */
8462   return i.rex_encoding;
8463 }
8464
8465 /* Optimize APX NDD insns to legacy insns.  */
8466 static unsigned int
8467 can_convert_NDD_to_legacy (const insn_template *t)
8468 {
8469   unsigned int match_dest_op = ~0;
8470
8471   if (!i.tm.opcode_modifier.nf
8472       && i.reg_operands >= 2)
8473     {
8474       unsigned int dest = i.operands - 1;
8475       unsigned int src1 = i.operands - 2;
8476       unsigned int src2 = (i.operands > 3) ? i.operands - 3 : 0;
8477
8478       if (i.types[src1].bitfield.class == Reg
8479           && i.op[src1].regs == i.op[dest].regs)
8480         match_dest_op = src1;
8481       /* If the first operand is the same as the third operand,
8482          these instructions need to support the ability to commutative
8483          the first two operands and still not change the semantics in order
8484          to be optimized.  */
8485       else if (optimize > 1
8486                && t->opcode_modifier.commutative
8487                && i.types[src2].bitfield.class == Reg
8488                && i.op[src2].regs == i.op[dest].regs)
8489         match_dest_op = src2;
8490     }
8491   return match_dest_op;
8492 }
8493
8494 /* Helper function for the progress() macro in match_template().  */
8495 static INLINE enum i386_error progress (enum i386_error new,
8496                                         enum i386_error last,
8497                                         unsigned int line, unsigned int *line_p)
8498 {
8499   if (line <= *line_p)
8500     return last;
8501   *line_p = line;
8502   return new;
8503 }
8504
8505 static const insn_template *
8506 match_template (char mnem_suffix)
8507 {
8508   /* Points to template once we've found it.  */
8509   const insn_template *t;
8510   i386_operand_type overlap0, overlap1, overlap2, overlap3;
8511   i386_operand_type overlap4;
8512   unsigned int found_reverse_match;
8513   i386_operand_type operand_types [MAX_OPERANDS];
8514   int addr_prefix_disp;
8515   unsigned int j, size_match, check_register, errline = __LINE__;
8516   enum i386_error specific_error = number_of_operands_mismatch;
8517 #define progress(err) progress (err, specific_error, __LINE__, &errline)
8518
8519 #if MAX_OPERANDS != 5
8520 # error "MAX_OPERANDS must be 5."
8521 #endif
8522
8523   found_reverse_match = 0;
8524   addr_prefix_disp = -1;
8525
8526   for (t = current_templates.start; t < current_templates.end; t++)
8527     {
8528       addr_prefix_disp = -1;
8529       found_reverse_match = 0;
8530
8531       /* Must have right number of operands.  */
8532       if (i.operands != t->operands)
8533         continue;
8534
8535       /* Skip SSE2AVX templates when inapplicable.  */
8536       if (t->opcode_modifier.sse2avx
8537           && (!sse2avx || i.prefix[DATA_PREFIX]))
8538         {
8539           /* Another non-SSE2AVX template has to follow.  */
8540           gas_assert (t + 1 < current_templates.end);
8541           continue;
8542         }
8543
8544       /* Check processor support.  */
8545       specific_error = progress (unsupported);
8546       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
8547         continue;
8548
8549       /* Check AT&T mnemonic.   */
8550       specific_error = progress (unsupported_with_intel_mnemonic);
8551       if (!intel_syntax && intel_mnemonic
8552           && t->opcode_modifier.dialect == ATT_MNEMONIC)
8553         continue;
8554
8555       /* Check AT&T/Intel syntax.  */
8556       specific_error = progress (unsupported_syntax);
8557       if (intel_syntax
8558            ? t->opcode_modifier.dialect >= ATT_SYNTAX
8559            : t->opcode_modifier.dialect == INTEL_SYNTAX)
8560         continue;
8561
8562       /* Check Intel64/AMD64 ISA.   */
8563       switch (isa64)
8564         {
8565         default:
8566           /* Default: Don't accept Intel64.  */
8567           if (t->opcode_modifier.isa64 == INTEL64)
8568             continue;
8569           break;
8570         case amd64:
8571           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
8572           if (t->opcode_modifier.isa64 >= INTEL64)
8573             continue;
8574           break;
8575         case intel64:
8576           /* -mintel64: Don't accept AMD64.  */
8577           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
8578             continue;
8579           break;
8580         }
8581
8582       /* Check the suffix.  */
8583       specific_error = progress (invalid_instruction_suffix);
8584       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
8585           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
8586           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
8587           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
8588           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
8589         continue;
8590
8591       specific_error = progress (operand_size_mismatch);
8592       size_match = operand_size_match (t);
8593       if (!size_match)
8594         continue;
8595
8596       /* This is intentionally not
8597
8598          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
8599
8600          as the case of a missing * on the operand is accepted (perhaps with
8601          a warning, issued further down).  */
8602       specific_error = progress (operand_type_mismatch);
8603       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
8604         continue;
8605
8606       /* In Intel syntax, normally we can check for memory operand size when
8607          there is no mnemonic suffix.  But jmp and call have 2 different
8608          encodings with Dword memory operand size.  Skip the "near" one
8609          (permitting a register operand) when "far" was requested.  */
8610       if (i.far_branch
8611           && t->opcode_modifier.jump == JUMP_ABSOLUTE
8612           && t->operand_types[0].bitfield.class == Reg)
8613         continue;
8614
8615       for (j = 0; j < MAX_OPERANDS; j++)
8616         operand_types[j] = t->operand_types[j];
8617
8618       /* In general, don't allow 32-bit operands on pre-386.  */
8619       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
8620                                              : operand_size_mismatch);
8621       j = i.imm_operands + (t->operands > i.imm_operands + 1);
8622       if (i.suffix == LONG_MNEM_SUFFIX
8623           && !cpu_arch_flags.bitfield.cpui386
8624           && (intel_syntax
8625               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
8626                  && !intel_float_operand (insn_name (t)))
8627               : intel_float_operand (insn_name (t)) != 2)
8628           && (t->operands == i.imm_operands
8629               || (operand_types[i.imm_operands].bitfield.class != RegMMX
8630                && operand_types[i.imm_operands].bitfield.class != RegSIMD
8631                && operand_types[i.imm_operands].bitfield.class != RegMask)
8632               || (operand_types[j].bitfield.class != RegMMX
8633                   && operand_types[j].bitfield.class != RegSIMD
8634                   && operand_types[j].bitfield.class != RegMask))
8635           && !t->opcode_modifier.sib)
8636         continue;
8637
8638       /* Do not verify operands when there are none.  */
8639       if (!t->operands)
8640         {
8641           if (VEX_check_encoding (t))
8642             {
8643               specific_error = progress (i.error);
8644               continue;
8645             }
8646
8647           /* Check if pseudo prefix {rex2} is valid.  */
8648           if (t->opcode_modifier.noegpr && i.rex2_encoding)
8649             {
8650               specific_error = progress (invalid_pseudo_prefix);
8651               continue;
8652             }
8653
8654           /* We've found a match; break out of loop.  */
8655           break;
8656         }
8657
8658       if (!t->opcode_modifier.jump
8659           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
8660         {
8661           /* There should be only one Disp operand.  */
8662           for (j = 0; j < MAX_OPERANDS; j++)
8663             if (operand_type_check (operand_types[j], disp))
8664               break;
8665           if (j < MAX_OPERANDS)
8666             {
8667               bool override = (i.prefix[ADDR_PREFIX] != 0);
8668
8669               addr_prefix_disp = j;
8670
8671               /* Address size prefix will turn Disp64 operand into Disp32 and
8672                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
8673               switch (flag_code)
8674                 {
8675                 case CODE_16BIT:
8676                   override = !override;
8677                   /* Fall through.  */
8678                 case CODE_32BIT:
8679                   if (operand_types[j].bitfield.disp32
8680                       && operand_types[j].bitfield.disp16)
8681                     {
8682                       operand_types[j].bitfield.disp16 = override;
8683                       operand_types[j].bitfield.disp32 = !override;
8684                     }
8685                   gas_assert (!operand_types[j].bitfield.disp64);
8686                   break;
8687
8688                 case CODE_64BIT:
8689                   if (operand_types[j].bitfield.disp64)
8690                     {
8691                       gas_assert (!operand_types[j].bitfield.disp32);
8692                       operand_types[j].bitfield.disp32 = override;
8693                       operand_types[j].bitfield.disp64 = !override;
8694                     }
8695                   operand_types[j].bitfield.disp16 = 0;
8696                   break;
8697                 }
8698             }
8699         }
8700
8701       /* We check register size if needed.  */
8702       if (t->opcode_modifier.checkoperandsize)
8703         {
8704           check_register = (1 << t->operands) - 1;
8705           if (i.broadcast.type || i.broadcast.bytes)
8706             check_register &= ~(1 << i.broadcast.operand);
8707         }
8708       else
8709         check_register = 0;
8710
8711       overlap0 = operand_type_and (i.types[0], operand_types[0]);
8712       switch (t->operands)
8713         {
8714         case 1:
8715           if (!operand_type_match (overlap0, i.types[0]))
8716             continue;
8717
8718           /* Allow the ModR/M encoding to be requested by using the {load} or
8719              {store} pseudo prefix on an applicable insn.  */
8720           if (!t->opcode_modifier.modrm
8721               && i.reg_operands == 1
8722               && ((i.dir_encoding == dir_encoding_load
8723                    && t->mnem_off != MN_pop)
8724                   || (i.dir_encoding == dir_encoding_store
8725                       && t->mnem_off != MN_push))
8726               /* Avoid BSWAP.  */
8727               && t->mnem_off != MN_bswap)
8728             continue;
8729           break;
8730
8731         case 2:
8732           /* xchg %eax, %eax is a special case. It is an alias for nop
8733              only in 32bit mode and we can use opcode 0x90.  In 64bit
8734              mode, we can't use 0x90 for xchg %eax, %eax since it should
8735              zero-extend %eax to %rax.  */
8736           if (t->base_opcode == 0x90
8737               && t->opcode_space == SPACE_BASE)
8738             {
8739               if (flag_code == CODE_64BIT
8740                   && i.types[0].bitfield.instance == Accum
8741                   && i.types[0].bitfield.dword
8742                   && i.types[1].bitfield.instance == Accum)
8743                 continue;
8744
8745               /* Allow the ModR/M encoding to be requested by using the
8746                  {load} or {store} pseudo prefix.  */
8747               if (i.dir_encoding == dir_encoding_load
8748                   || i.dir_encoding == dir_encoding_store)
8749                 continue;
8750             }
8751
8752           if (t->base_opcode == MOV_AX_DISP32
8753               && t->opcode_space == SPACE_BASE
8754               && t->mnem_off != MN_movabs)
8755             {
8756               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
8757               if (i.reloc[0] == BFD_RELOC_386_GOT32)
8758                 continue;
8759
8760               /* xrelease mov %eax, <disp> is another special case. It must not
8761                  match the accumulator-only encoding of mov.  */
8762               if (i.hle_prefix)
8763                 continue;
8764
8765               /* Allow the ModR/M encoding to be requested by using a suitable
8766                  {load} or {store} pseudo prefix.  */
8767               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
8768                                      ? dir_encoding_store
8769                                      : dir_encoding_load)
8770                   && !i.types[0].bitfield.disp64
8771                   && !i.types[1].bitfield.disp64)
8772                 continue;
8773             }
8774
8775           /* Allow the ModR/M encoding to be requested by using the {load} or
8776              {store} pseudo prefix on an applicable insn.  */
8777           if (!t->opcode_modifier.modrm
8778               && i.reg_operands == 1
8779               && i.imm_operands == 1
8780               && (i.dir_encoding == dir_encoding_load
8781                   || i.dir_encoding == dir_encoding_store)
8782               && t->opcode_space == SPACE_BASE)
8783             {
8784               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
8785                   && i.dir_encoding == dir_encoding_store)
8786                 continue;
8787
8788               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
8789                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
8790                       || i.dir_encoding == dir_encoding_load))
8791                 continue;
8792
8793               if (t->base_opcode == 0xa8 /* test $imm, %acc */
8794                   && i.dir_encoding == dir_encoding_load)
8795                 continue;
8796             }
8797           /* Fall through.  */
8798
8799         case 3:
8800           if (!(size_match & MATCH_STRAIGHT))
8801             goto check_reverse;
8802           /* Reverse direction of operands if swapping is possible in the first
8803              place (operands need to be symmetric) and
8804              - the load form is requested, and the template is a store form,
8805              - the store form is requested, and the template is a load form,
8806              - the non-default (swapped) form is requested.  */
8807           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
8808
8809           j = i.operands - 1 - (t->opcode_space == SPACE_EVEXMAP4
8810                                 && t->opcode_modifier.vexvvvv);
8811
8812           if (t->opcode_modifier.d && i.reg_operands == i.operands
8813               && !operand_type_all_zero (&overlap1))
8814             switch (i.dir_encoding)
8815               {
8816               case dir_encoding_load:
8817                 if (operand_type_check (operand_types[j], anymem)
8818                     || t->opcode_modifier.regmem)
8819                   goto check_reverse;
8820                 break;
8821
8822               case dir_encoding_store:
8823                 if (!operand_type_check (operand_types[j], anymem)
8824                     && !t->opcode_modifier.regmem)
8825                   goto check_reverse;
8826                 break;
8827
8828               case dir_encoding_swap:
8829                 goto check_reverse;
8830
8831               case dir_encoding_default:
8832                 break;
8833               }
8834
8835           /* If we want store form, we skip the current load.  */
8836           if ((i.dir_encoding == dir_encoding_store
8837                || i.dir_encoding == dir_encoding_swap)
8838               && i.mem_operands == 0
8839               && t->opcode_modifier.load)
8840             continue;
8841           /* Fall through.  */
8842         case 4:
8843         case 5:
8844           overlap1 = operand_type_and (i.types[1], operand_types[1]);
8845           if (!operand_type_match (overlap0, i.types[0])
8846               || !operand_type_match (overlap1, i.types[1])
8847               || ((check_register & 3) == 3
8848                   && !operand_type_register_match (i.types[0],
8849                                                    operand_types[0],
8850                                                    i.types[1],
8851                                                    operand_types[1])))
8852             {
8853               specific_error = progress (i.error);
8854
8855               /* Check if other direction is valid ...  */
8856               if (!t->opcode_modifier.d)
8857                 continue;
8858
8859             check_reverse:
8860               if (!(size_match & MATCH_REVERSE))
8861                 continue;
8862               /* Try reversing direction of operands.  */
8863               j = is_cpu (t, CpuFMA4)
8864                   || is_cpu (t, CpuXOP)
8865                   || is_cpu (t, CpuAPX_F) ? 1 : i.operands - 1;
8866               overlap0 = operand_type_and (i.types[0], operand_types[j]);
8867               overlap1 = operand_type_and (i.types[j], operand_types[0]);
8868               overlap2 = operand_type_and (i.types[1], operand_types[1]);
8869               gas_assert (t->operands != 3 || !check_register
8870                           || is_cpu (t, CpuAPX_F));
8871               if (!operand_type_match (overlap0, i.types[0])
8872                   || !operand_type_match (overlap1, i.types[j])
8873                   || (t->operands == 3
8874                       && !operand_type_match (overlap2, i.types[1]))
8875                   || (check_register
8876                       && !operand_type_register_match (i.types[0],
8877                                                        operand_types[j],
8878                                                        i.types[j],
8879                                                        operand_types[0])))
8880                 {
8881                   /* Does not match either direction.  */
8882                   specific_error = progress (i.error);
8883                   continue;
8884                 }
8885               /* found_reverse_match holds which variant of D
8886                  we've found.  */
8887               if (!t->opcode_modifier.d)
8888                 found_reverse_match = 0;
8889               else if (operand_types[0].bitfield.tbyte)
8890                 {
8891                   if (t->opcode_modifier.operandconstraint != UGH)
8892                     found_reverse_match = Opcode_FloatD;
8893                   else
8894                     found_reverse_match = ~0;
8895                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
8896                   if ((t->extension_opcode & 4)
8897                       && (intel_syntax || intel_mnemonic))
8898                     found_reverse_match |= Opcode_FloatR;
8899                 }
8900               else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
8901                 {
8902                   found_reverse_match = Opcode_VexW;
8903                   goto check_operands_345;
8904                 }
8905               else if (is_cpu (t, CpuAPX_F) && i.operands == 3)
8906                 {
8907                   found_reverse_match = Opcode_D;
8908                   goto check_operands_345;
8909                 }
8910               else if (t->opcode_space != SPACE_BASE
8911                        && (t->opcode_space != SPACE_0F
8912                            /* MOV to/from CR/DR/TR, as an exception, follow
8913                               the base opcode space encoding model.  */
8914                            || (t->base_opcode | 7) != 0x27))
8915                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
8916                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
8917               else if (!t->opcode_modifier.commutative)
8918                 found_reverse_match = Opcode_D;
8919               else
8920                 found_reverse_match = ~0;
8921             }
8922           else
8923             {
8924               /* Found a forward 2 operand match here.  */
8925             check_operands_345:
8926               switch (t->operands)
8927                 {
8928                 case 5:
8929                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
8930                   if (!operand_type_match (overlap4, i.types[4])
8931                       || !operand_type_register_match (i.types[3],
8932                                                        operand_types[3],
8933                                                        i.types[4],
8934                                                        operand_types[4]))
8935                     {
8936                       specific_error = progress (i.error);
8937                       continue;
8938                     }
8939                   /* Fall through.  */
8940                 case 4:
8941                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
8942                   if (!operand_type_match (overlap3, i.types[3])
8943                       || ((check_register & 0xa) == 0xa
8944                           && !operand_type_register_match (i.types[1],
8945                                                             operand_types[1],
8946                                                             i.types[3],
8947                                                             operand_types[3]))
8948                       || ((check_register & 0xc) == 0xc
8949                           && !operand_type_register_match (i.types[2],
8950                                                             operand_types[2],
8951                                                             i.types[3],
8952                                                             operand_types[3])))
8953                     {
8954                       specific_error = progress (i.error);
8955                       continue;
8956                     }
8957                   /* Fall through.  */
8958                 case 3:
8959                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
8960                   if (!operand_type_match (overlap2, i.types[2])
8961                       || ((check_register & 5) == 5
8962                           && !operand_type_register_match (i.types[0],
8963                                                             operand_types[0],
8964                                                             i.types[2],
8965                                                             operand_types[2]))
8966                       || ((check_register & 6) == 6
8967                           && !operand_type_register_match (i.types[1],
8968                                                             operand_types[1],
8969                                                             i.types[2],
8970                                                             operand_types[2])))
8971                     {
8972                       specific_error = progress (i.error);
8973                       continue;
8974                     }
8975                   break;
8976                 }
8977             }
8978           /* Found either forward/reverse 2, 3 or 4 operand match here:
8979              slip through to break.  */
8980         }
8981
8982       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
8983       if (VEX_check_encoding (t))
8984         {
8985           specific_error = progress (i.error);
8986           continue;
8987         }
8988
8989       /* Check if EGPR operands(r16-r31) are valid.  */
8990       if (check_EgprOperands (t))
8991         {
8992           specific_error = progress (i.error);
8993           continue;
8994         }
8995
8996       /* Check if vector operands are valid.  */
8997       if (check_VecOperands (t))
8998         {
8999           specific_error = progress (i.error);
9000           continue;
9001         }
9002
9003       /* Check if APX operands are valid.  */
9004       if (check_APX_operands (t))
9005         {
9006           specific_error = progress (i.error);
9007           continue;
9008         }
9009
9010       /* Check whether to use the shorter VEX encoding for certain insns where
9011          the EVEX encoding comes first in the table.  This requires the respective
9012          AVX-* feature to be explicitly enabled.
9013
9014          Most of the respective insns have just a single EVEX and a single VEX
9015          template.  The one that's presently different is generated using the
9016          Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
9017          two of which may fall back to their two corresponding VEX forms.  */
9018       j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
9019       if ((t == current_templates.start || j > 1)
9020           && t->opcode_modifier.disp8memshift
9021           && !t->opcode_modifier.vex
9022           && !need_evex_encoding (t)
9023           && t + j < current_templates.end
9024           && t[j].opcode_modifier.vex)
9025         {
9026           i386_cpu_flags cpu;
9027           unsigned int memshift = i.memshift;
9028
9029           i.memshift = 0;
9030           cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
9031                                cpu_arch_isa_flags);
9032           if (!cpu_flags_all_zero (&cpu)
9033               && (!i.types[0].bitfield.disp8
9034                   || !operand_type_check (i.types[0], disp)
9035                   || i.op[0].disps->X_op != O_constant
9036                   || fits_in_disp8 (i.op[0].disps->X_add_number)))
9037             {
9038               specific_error = progress (internal_error);
9039               t += j - 1;
9040               continue;
9041             }
9042           i.memshift = memshift;
9043         }
9044
9045       /* If we can optimize a NDD insn to legacy insn, like
9046          add %r16, %r8, %r8 -> add %r16, %r8,
9047          add  %r8, %r16, %r8 -> add %r16, %r8, then rematch template.
9048          Note that the semantics have not been changed.  */
9049       if (optimize
9050           && !i.no_optimize
9051           && i.encoding != encoding_evex
9052           && ((t + 1 < current_templates.end
9053                && !t[1].opcode_modifier.evex
9054                && t[1].opcode_space <= SPACE_0F38
9055                && t->opcode_modifier.vexvvvv == VexVVVV_DST)
9056               || t->mnem_off == MN_movbe)
9057           && (i.types[i.operands - 1].bitfield.dword
9058               || i.types[i.operands - 1].bitfield.qword))
9059         {
9060           unsigned int match_dest_op = can_convert_NDD_to_legacy (t);
9061
9062           if (match_dest_op != (unsigned int) ~0)
9063             {
9064               size_match = true;
9065               /* We ensure that the next template has the same input
9066                  operands as the original matching template by the first
9067                  opernd (ATT). To avoid someone support new NDD insns and
9068                  put it in the wrong position.  */
9069               overlap0 = operand_type_and (i.types[0],
9070                                            t[1].operand_types[0]);
9071               if (t->opcode_modifier.d)
9072                 overlap1 = operand_type_and (i.types[0],
9073                                              t[1].operand_types[1]);
9074               if (!operand_type_match (overlap0, i.types[0])
9075                   && (!t->opcode_modifier.d
9076                       || !operand_type_match (overlap1, i.types[0])))
9077                 size_match = false;
9078
9079               if (size_match
9080                   && (t[1].opcode_space <= SPACE_0F
9081                       /* Some non-legacy-map0/1 insns can be shorter when
9082                          legacy-encoded and when no REX prefix is required.  */
9083                       || (!check_EgprOperands (t + 1)
9084                           && !check_Rex_required ()
9085                           && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)))
9086                 {
9087                   if (i.operands > 2 && match_dest_op == i.operands - 3)
9088                     swap_2_operands (match_dest_op, i.operands - 2);
9089
9090                   --i.operands;
9091                   --i.reg_operands;
9092
9093                   if (t->mnem_off == MN_movbe)
9094                     {
9095                       gas_assert (t[1].mnem_off == MN_bswap);
9096                       ++current_templates.end;
9097                     }
9098
9099                   specific_error = progress (internal_error);
9100                   continue;
9101                 }
9102
9103             }
9104         }
9105
9106       /* We've found a match; break out of loop.  */
9107       break;
9108     }
9109
9110 #undef progress
9111
9112   if (t == current_templates.end)
9113     {
9114       /* We found no match.  */
9115       i.error = specific_error;
9116       return NULL;
9117     }
9118
9119   if (!quiet_warnings)
9120     {
9121       if (!intel_syntax
9122           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
9123         as_warn (_("indirect %s without `*'"), insn_name (t));
9124
9125       if (t->opcode_modifier.isprefix
9126           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
9127         {
9128           /* Warn them that a data or address size prefix doesn't
9129              affect assembly of the next line of code.  */
9130           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
9131         }
9132     }
9133
9134   /* Copy the template we found.  */
9135   install_template (t);
9136
9137   if (addr_prefix_disp != -1)
9138     i.tm.operand_types[addr_prefix_disp]
9139       = operand_types[addr_prefix_disp];
9140
9141   /* APX insns acting on byte operands are WIG, yet that can't be expressed
9142      in the templates (they're also covering word/dword/qword operands).  */
9143   if (t->opcode_space == SPACE_EVEXMAP4 && !t->opcode_modifier.vexw &&
9144       i.types[i.operands - 1].bitfield.byte)
9145     {
9146       gas_assert (t->opcode_modifier.w);
9147       i.tm.opcode_modifier.vexw = VEXWIG;
9148     }
9149
9150   switch (found_reverse_match)
9151     {
9152     case 0:
9153       break;
9154
9155     case Opcode_FloatR:
9156     case Opcode_FloatR | Opcode_FloatD:
9157       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
9158       found_reverse_match &= Opcode_FloatD;
9159
9160       /* Fall through.  */
9161     default:
9162       /* If we found a reverse match we must alter the opcode direction
9163          bit and clear/flip the regmem modifier one.  found_reverse_match
9164          holds bits to change (different for int & float insns).  */
9165
9166       i.tm.base_opcode ^= found_reverse_match;
9167
9168       if (i.tm.opcode_space == SPACE_EVEXMAP4)
9169         goto swap_first_2;
9170
9171       /* Certain SIMD insns have their load forms specified in the opcode
9172          table, and hence we need to _set_ RegMem instead of clearing it.
9173          We need to avoid setting the bit though on insns like KMOVW.  */
9174       i.tm.opcode_modifier.regmem
9175         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
9176           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
9177           && !i.tm.opcode_modifier.regmem;
9178
9179       /* Fall through.  */
9180     case ~0:
9181       i.tm.operand_types[0] = operand_types[i.operands - 1];
9182       i.tm.operand_types[i.operands - 1] = operand_types[0];
9183       break;
9184
9185     case Opcode_VexW:
9186       /* Only the first two register operands need reversing, alongside
9187          flipping VEX.W.  */
9188       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
9189
9190     swap_first_2:
9191       j = i.tm.operand_types[0].bitfield.imm8;
9192       i.tm.operand_types[j] = operand_types[j + 1];
9193       i.tm.operand_types[j + 1] = operand_types[j];
9194       break;
9195     }
9196
9197   return t;
9198 }
9199
9200 static int
9201 check_string (void)
9202 {
9203   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
9204   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
9205
9206   if (i.seg[op] != NULL && i.seg[op] != reg_es)
9207     {
9208       as_bad (_("`%s' operand %u must use `%ses' segment"),
9209               insn_name (&i.tm),
9210               intel_syntax ? i.tm.operands - es_op : es_op + 1,
9211               register_prefix);
9212       return 0;
9213     }
9214
9215   /* There's only ever one segment override allowed per instruction.
9216      This instruction possibly has a legal segment override on the
9217      second operand, so copy the segment to where non-string
9218      instructions store it, allowing common code.  */
9219   i.seg[op] = i.seg[1];
9220
9221   return 1;
9222 }
9223
9224 static int
9225 process_suffix (void)
9226 {
9227   bool is_movx = false;
9228
9229   /* If matched instruction specifies an explicit instruction mnemonic
9230      suffix, use it.  */
9231   if (i.tm.opcode_modifier.size == SIZE16)
9232     i.suffix = WORD_MNEM_SUFFIX;
9233   else if (i.tm.opcode_modifier.size == SIZE32)
9234     i.suffix = LONG_MNEM_SUFFIX;
9235   else if (i.tm.opcode_modifier.size == SIZE64)
9236     i.suffix = QWORD_MNEM_SUFFIX;
9237   else if (i.reg_operands
9238            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
9239            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
9240     {
9241       unsigned int numop = i.operands;
9242
9243       /* MOVSX/MOVZX */
9244       is_movx = (i.tm.opcode_space == SPACE_0F
9245                  && (i.tm.base_opcode | 8) == 0xbe)
9246                 || (i.tm.opcode_space == SPACE_BASE
9247                     && i.tm.base_opcode == 0x63
9248                     && is_cpu (&i.tm, Cpu64));
9249
9250       /* movsx/movzx want only their source operand considered here, for the
9251          ambiguity checking below.  The suffix will be replaced afterwards
9252          to represent the destination (register).  */
9253       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
9254         --i.operands;
9255
9256       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
9257       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
9258         i.rex |= REX_W;
9259
9260       /* If there's no instruction mnemonic suffix we try to invent one
9261          based on GPR operands.  */
9262       if (!i.suffix)
9263         {
9264           /* We take i.suffix from the last register operand specified,
9265              Destination register type is more significant than source
9266              register type.  crc32 in SSE4.2 prefers source register
9267              type. */
9268           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
9269
9270           while (op--)
9271             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
9272                 || i.tm.operand_types[op].bitfield.instance == Accum)
9273               {
9274                 if (i.types[op].bitfield.class != Reg)
9275                   continue;
9276                 if (i.types[op].bitfield.byte)
9277                   i.suffix = BYTE_MNEM_SUFFIX;
9278                 else if (i.types[op].bitfield.word)
9279                   i.suffix = WORD_MNEM_SUFFIX;
9280                 else if (i.types[op].bitfield.dword)
9281                   i.suffix = LONG_MNEM_SUFFIX;
9282                 else if (i.types[op].bitfield.qword)
9283                   i.suffix = QWORD_MNEM_SUFFIX;
9284                 else
9285                   continue;
9286                 break;
9287               }
9288
9289           /* As an exception, movsx/movzx silently default to a byte source
9290              in AT&T mode.  */
9291           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
9292             i.suffix = BYTE_MNEM_SUFFIX;
9293         }
9294       else if (i.suffix == BYTE_MNEM_SUFFIX)
9295         {
9296           if (!check_byte_reg ())
9297             return 0;
9298         }
9299       else if (i.suffix == LONG_MNEM_SUFFIX)
9300         {
9301           if (!check_long_reg ())
9302             return 0;
9303         }
9304       else if (i.suffix == QWORD_MNEM_SUFFIX)
9305         {
9306           if (!check_qword_reg ())
9307             return 0;
9308         }
9309       else if (i.suffix == WORD_MNEM_SUFFIX)
9310         {
9311           if (!check_word_reg ())
9312             return 0;
9313         }
9314       else if (intel_syntax
9315                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
9316         /* Do nothing if the instruction is going to ignore the prefix.  */
9317         ;
9318       else
9319         abort ();
9320
9321       /* Undo the movsx/movzx change done above.  */
9322       i.operands = numop;
9323     }
9324   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
9325            && !i.suffix)
9326     {
9327       i.suffix = stackop_size;
9328       if (stackop_size == LONG_MNEM_SUFFIX)
9329         {
9330           /* stackop_size is set to LONG_MNEM_SUFFIX for the
9331              .code16gcc directive to support 16-bit mode with
9332              32-bit address.  For IRET without a suffix, generate
9333              16-bit IRET (opcode 0xcf) to return from an interrupt
9334              handler.  */
9335           if (i.tm.base_opcode == 0xcf)
9336             {
9337               i.suffix = WORD_MNEM_SUFFIX;
9338               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
9339             }
9340           /* Warn about changed behavior for segment register push/pop.  */
9341           else if ((i.tm.base_opcode | 1) == 0x07)
9342             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
9343                      insn_name (&i.tm));
9344         }
9345     }
9346   else if (!i.suffix
9347            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
9348                || i.tm.opcode_modifier.jump == JUMP_BYTE
9349                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
9350                || (i.tm.opcode_space == SPACE_0F
9351                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
9352                    && i.tm.extension_opcode <= 3)))
9353     {
9354       switch (flag_code)
9355         {
9356         case CODE_64BIT:
9357           if (!i.tm.opcode_modifier.no_qsuf)
9358             {
9359               if (i.tm.opcode_modifier.jump == JUMP_BYTE
9360                   || i.tm.opcode_modifier.no_lsuf)
9361                 i.suffix = QWORD_MNEM_SUFFIX;
9362               break;
9363             }
9364           /* Fall through.  */
9365         case CODE_32BIT:
9366           if (!i.tm.opcode_modifier.no_lsuf)
9367             i.suffix = LONG_MNEM_SUFFIX;
9368           break;
9369         case CODE_16BIT:
9370           if (!i.tm.opcode_modifier.no_wsuf)
9371             i.suffix = WORD_MNEM_SUFFIX;
9372           break;
9373         }
9374     }
9375
9376   if (!i.suffix
9377       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9378           /* Also cover lret/retf/iret in 64-bit mode.  */
9379           || (flag_code == CODE_64BIT
9380               && !i.tm.opcode_modifier.no_lsuf
9381               && !i.tm.opcode_modifier.no_qsuf))
9382       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9383       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
9384       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
9385       /* Accept FLDENV et al without suffix.  */
9386       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
9387     {
9388       unsigned int suffixes, evex = 0;
9389
9390       suffixes = !i.tm.opcode_modifier.no_bsuf;
9391       if (!i.tm.opcode_modifier.no_wsuf)
9392         suffixes |= 1 << 1;
9393       if (!i.tm.opcode_modifier.no_lsuf)
9394         suffixes |= 1 << 2;
9395       if (!i.tm.opcode_modifier.no_ssuf)
9396         suffixes |= 1 << 4;
9397       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
9398         suffixes |= 1 << 5;
9399
9400       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
9401          also suitable for AT&T syntax mode, it was requested that this be
9402          restricted to just Intel syntax.  */
9403       if (intel_syntax && is_any_vex_encoding (&i.tm)
9404           && !i.broadcast.type && !i.broadcast.bytes)
9405         {
9406           unsigned int op;
9407
9408           for (op = 0; op < i.tm.operands; ++op)
9409             {
9410               if (vector_size < VSZ512)
9411                 {
9412                   i.tm.operand_types[op].bitfield.zmmword = 0;
9413                   if (vector_size < VSZ256)
9414                     {
9415                       i.tm.operand_types[op].bitfield.ymmword = 0;
9416                       if (i.tm.operand_types[op].bitfield.xmmword
9417                           && i.tm.opcode_modifier.evex == EVEXDYN)
9418                         i.tm.opcode_modifier.evex = EVEX128;
9419                     }
9420                   else if (i.tm.operand_types[op].bitfield.ymmword
9421                            && !i.tm.operand_types[op].bitfield.xmmword
9422                            && i.tm.opcode_modifier.evex == EVEXDYN)
9423                     i.tm.opcode_modifier.evex = EVEX256;
9424                 }
9425               else if (i.tm.opcode_modifier.evex
9426                        && !cpu_arch_flags.bitfield.cpuavx512vl)
9427                 {
9428                   if (i.tm.operand_types[op].bitfield.ymmword)
9429                     i.tm.operand_types[op].bitfield.xmmword = 0;
9430                   if (i.tm.operand_types[op].bitfield.zmmword)
9431                     i.tm.operand_types[op].bitfield.ymmword = 0;
9432                   if (i.tm.opcode_modifier.evex == EVEXDYN)
9433                     i.tm.opcode_modifier.evex = EVEX512;
9434                 }
9435
9436               if (i.tm.operand_types[op].bitfield.xmmword
9437                   + i.tm.operand_types[op].bitfield.ymmword
9438                   + i.tm.operand_types[op].bitfield.zmmword < 2)
9439                 continue;
9440
9441               /* Any properly sized operand disambiguates the insn.  */
9442               if (i.types[op].bitfield.xmmword
9443                   || i.types[op].bitfield.ymmword
9444                   || i.types[op].bitfield.zmmword)
9445                 {
9446                   suffixes &= ~(7 << 6);
9447                   evex = 0;
9448                   break;
9449                 }
9450
9451               if ((i.flags[op] & Operand_Mem)
9452                   && i.tm.operand_types[op].bitfield.unspecified)
9453                 {
9454                   if (i.tm.operand_types[op].bitfield.xmmword)
9455                     suffixes |= 1 << 6;
9456                   if (i.tm.operand_types[op].bitfield.ymmword)
9457                     suffixes |= 1 << 7;
9458                   if (i.tm.operand_types[op].bitfield.zmmword)
9459                     suffixes |= 1 << 8;
9460                   if (i.tm.opcode_modifier.evex)
9461                     evex = EVEX512;
9462                 }
9463             }
9464         }
9465
9466       /* Are multiple suffixes / operand sizes allowed?  */
9467       if (suffixes & (suffixes - 1))
9468         {
9469           if (intel_syntax
9470               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
9471                   || operand_check == check_error))
9472             {
9473               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
9474               return 0;
9475             }
9476           if (operand_check == check_error)
9477             {
9478               as_bad (_("no instruction mnemonic suffix given and "
9479                         "no register operands; can't size `%s'"), insn_name (&i.tm));
9480               return 0;
9481             }
9482           if (operand_check == check_warning)
9483             as_warn (_("%s; using default for `%s'"),
9484                        intel_syntax
9485                        ? _("ambiguous operand size")
9486                        : _("no instruction mnemonic suffix given and "
9487                            "no register operands"),
9488                        insn_name (&i.tm));
9489
9490           if (i.tm.opcode_modifier.floatmf)
9491             i.suffix = SHORT_MNEM_SUFFIX;
9492           else if (is_movx)
9493             /* handled below */;
9494           else if (evex)
9495             i.tm.opcode_modifier.evex = evex;
9496           else if (flag_code == CODE_16BIT)
9497             i.suffix = WORD_MNEM_SUFFIX;
9498           else if (!i.tm.opcode_modifier.no_lsuf)
9499             i.suffix = LONG_MNEM_SUFFIX;
9500           else
9501             i.suffix = QWORD_MNEM_SUFFIX;
9502         }
9503     }
9504
9505   if (is_movx)
9506     {
9507       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
9508          In AT&T syntax, if there is no suffix (warned about above), the default
9509          will be byte extension.  */
9510       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
9511         i.tm.base_opcode |= 1;
9512
9513       /* For further processing, the suffix should represent the destination
9514          (register).  This is already the case when one was used with
9515          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
9516          no suffix to begin with.  */
9517       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
9518         {
9519           if (i.types[1].bitfield.word)
9520             i.suffix = WORD_MNEM_SUFFIX;
9521           else if (i.types[1].bitfield.qword)
9522             i.suffix = QWORD_MNEM_SUFFIX;
9523           else
9524             i.suffix = LONG_MNEM_SUFFIX;
9525
9526           i.tm.opcode_modifier.w = 0;
9527         }
9528     }
9529
9530   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
9531     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
9532                    != (i.tm.operand_types[1].bitfield.class == Reg);
9533
9534   /* Change the opcode based on the operand size given by i.suffix.  */
9535   switch (i.suffix)
9536     {
9537     /* Size floating point instruction.  */
9538     case LONG_MNEM_SUFFIX:
9539       if (i.tm.opcode_modifier.floatmf)
9540         {
9541           i.tm.base_opcode ^= 4;
9542           break;
9543         }
9544     /* fall through */
9545     case WORD_MNEM_SUFFIX:
9546     case QWORD_MNEM_SUFFIX:
9547       /* It's not a byte, select word/dword operation.  */
9548       if (i.tm.opcode_modifier.w)
9549         {
9550           if (i.short_form)
9551             i.tm.base_opcode |= 8;
9552           else
9553             i.tm.base_opcode |= 1;
9554         }
9555
9556       /* Set mode64 for an operand.  */
9557       if (i.suffix == QWORD_MNEM_SUFFIX)
9558         {
9559           if (flag_code == CODE_64BIT
9560               && !i.tm.opcode_modifier.norex64
9561               && !i.tm.opcode_modifier.vexw
9562               /* Special case for xchg %rax,%rax.  It is NOP and doesn't
9563                  need rex64. */
9564               && ! (i.operands == 2
9565                     && i.tm.base_opcode == 0x90
9566                     && i.tm.opcode_space == SPACE_BASE
9567                     && i.types[0].bitfield.instance == Accum
9568                     && i.types[1].bitfield.instance == Accum))
9569             i.rex |= REX_W;
9570
9571           break;
9572         }
9573
9574     /* fall through */
9575     case SHORT_MNEM_SUFFIX:
9576       /* Now select between word & dword operations via the operand
9577          size prefix, except for instructions that will ignore this
9578          prefix anyway.  */
9579       if (i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
9580           && !i.tm.opcode_modifier.floatmf
9581           && (!is_any_vex_encoding (&i.tm)
9582               || i.tm.opcode_space == SPACE_EVEXMAP4)
9583           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
9584               || (flag_code == CODE_64BIT
9585                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
9586         {
9587           unsigned int prefix = DATA_PREFIX_OPCODE;
9588
9589           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
9590             prefix = ADDR_PREFIX_OPCODE;
9591
9592           /* The DATA PREFIX of EVEX promoted from legacy APX instructions
9593              needs to be adjusted.  */
9594           if (i.tm.opcode_space == SPACE_EVEXMAP4)
9595             {
9596               gas_assert (!i.tm.opcode_modifier.opcodeprefix);
9597               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
9598             }
9599           else if (!add_prefix (prefix))
9600             return 0;
9601         }
9602
9603       break;
9604
9605     case 0:
9606       /* Select word/dword/qword operation with explicit data sizing prefix
9607          when there are no suitable register operands.  */
9608       if (i.tm.opcode_modifier.w
9609           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
9610           && (!i.reg_operands
9611               || (i.reg_operands == 1
9612                       /* ShiftCount */
9613                   && (i.tm.operand_types[0].bitfield.instance == RegC
9614                       /* InOutPortReg */
9615                       || i.tm.operand_types[0].bitfield.instance == RegD
9616                       || i.tm.operand_types[1].bitfield.instance == RegD
9617                       || i.tm.mnem_off == MN_crc32))))
9618         i.tm.base_opcode |= 1;
9619       break;
9620     }
9621
9622   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
9623     {
9624       gas_assert (!i.suffix);
9625       gas_assert (i.reg_operands);
9626
9627       if (i.tm.operand_types[0].bitfield.instance == Accum
9628           || i.operands == 1)
9629         {
9630           /* The address size override prefix changes the size of the
9631              first operand.  */
9632           if (flag_code == CODE_64BIT
9633               && i.op[0].regs->reg_type.bitfield.word)
9634             {
9635               as_bad (_("16-bit addressing unavailable for `%s'"),
9636                       insn_name (&i.tm));
9637               return 0;
9638             }
9639
9640           if ((flag_code == CODE_32BIT
9641                ? i.op[0].regs->reg_type.bitfield.word
9642                : i.op[0].regs->reg_type.bitfield.dword)
9643               && !add_prefix (ADDR_PREFIX_OPCODE))
9644             return 0;
9645         }
9646       else
9647         {
9648           /* Check invalid register operand when the address size override
9649              prefix changes the size of register operands.  */
9650           unsigned int op;
9651           enum { need_word, need_dword, need_qword } need;
9652
9653           /* Check the register operand for the address size prefix if
9654              the memory operand has no real registers, like symbol, DISP
9655              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
9656           if (i.mem_operands == 1
9657               && i.reg_operands == 1
9658               && i.operands == 2
9659               && i.types[1].bitfield.class == Reg
9660               && (flag_code == CODE_32BIT
9661                   ? i.op[1].regs->reg_type.bitfield.word
9662                   : i.op[1].regs->reg_type.bitfield.dword)
9663               && ((i.base_reg == NULL && i.index_reg == NULL)
9664 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9665                   || (x86_elf_abi == X86_64_X32_ABI
9666                       && i.base_reg
9667                       && i.base_reg->reg_num == RegIP
9668                       && i.base_reg->reg_type.bitfield.qword))
9669 #else
9670                   || 0)
9671 #endif
9672               && !add_prefix (ADDR_PREFIX_OPCODE))
9673             return 0;
9674
9675           if (flag_code == CODE_32BIT)
9676             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
9677           else if (i.prefix[ADDR_PREFIX])
9678             need = need_dword;
9679           else
9680             need = flag_code == CODE_64BIT ? need_qword : need_word;
9681
9682           for (op = 0; op < i.operands; op++)
9683             {
9684               if (i.types[op].bitfield.class != Reg)
9685                 continue;
9686
9687               switch (need)
9688                 {
9689                 case need_word:
9690                   if (i.op[op].regs->reg_type.bitfield.word)
9691                     continue;
9692                   break;
9693                 case need_dword:
9694                   if (i.op[op].regs->reg_type.bitfield.dword)
9695                     continue;
9696                   break;
9697                 case need_qword:
9698                   if (i.op[op].regs->reg_type.bitfield.qword)
9699                     continue;
9700                   break;
9701                 }
9702
9703               as_bad (_("invalid register operand size for `%s'"),
9704                       insn_name (&i.tm));
9705               return 0;
9706             }
9707         }
9708     }
9709
9710   return 1;
9711 }
9712
9713 static int
9714 check_byte_reg (void)
9715 {
9716   int op;
9717
9718   for (op = i.operands; --op >= 0;)
9719     {
9720       /* Skip non-register operands. */
9721       if (i.types[op].bitfield.class != Reg)
9722         continue;
9723
9724       /* If this is an eight bit register, it's OK.  If it's the 16 or
9725          32 bit version of an eight bit register, we will just use the
9726          low portion, and that's OK too.  */
9727       if (i.types[op].bitfield.byte)
9728         continue;
9729
9730       /* I/O port address operands are OK too.  */
9731       if (i.tm.operand_types[op].bitfield.instance == RegD
9732           && i.tm.operand_types[op].bitfield.word)
9733         continue;
9734
9735       /* crc32 only wants its source operand checked here.  */
9736       if (i.tm.mnem_off == MN_crc32 && op != 0)
9737         continue;
9738
9739       /* Any other register is bad.  */
9740       as_bad (_("`%s%s' not allowed with `%s%c'"),
9741               register_prefix, i.op[op].regs->reg_name,
9742               insn_name (&i.tm), i.suffix);
9743       return 0;
9744     }
9745   return 1;
9746 }
9747
9748 static int
9749 check_long_reg (void)
9750 {
9751   int op;
9752
9753   for (op = i.operands; --op >= 0;)
9754     /* Skip non-register operands. */
9755     if (i.types[op].bitfield.class != Reg)
9756       continue;
9757     /* Reject eight bit registers, except where the template requires
9758        them. (eg. movzb)  */
9759     else if (i.types[op].bitfield.byte
9760              && (i.tm.operand_types[op].bitfield.class == Reg
9761                  || i.tm.operand_types[op].bitfield.instance == Accum)
9762              && (i.tm.operand_types[op].bitfield.word
9763                  || i.tm.operand_types[op].bitfield.dword))
9764       {
9765         as_bad (_("`%s%s' not allowed with `%s%c'"),
9766                 register_prefix,
9767                 i.op[op].regs->reg_name,
9768                 insn_name (&i.tm),
9769                 i.suffix);
9770         return 0;
9771       }
9772     /* Error if the e prefix on a general reg is missing, or if the r
9773        prefix on a general reg is present.  */
9774     else if ((i.types[op].bitfield.word
9775               || i.types[op].bitfield.qword)
9776              && (i.tm.operand_types[op].bitfield.class == Reg
9777                  || i.tm.operand_types[op].bitfield.instance == Accum)
9778              && i.tm.operand_types[op].bitfield.dword)
9779       {
9780         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9781                 register_prefix, i.op[op].regs->reg_name,
9782                 i.suffix);
9783         return 0;
9784       }
9785   return 1;
9786 }
9787
9788 static int
9789 check_qword_reg (void)
9790 {
9791   int op;
9792
9793   for (op = i.operands; --op >= 0; )
9794     /* Skip non-register operands. */
9795     if (i.types[op].bitfield.class != Reg)
9796       continue;
9797     /* Reject eight bit registers, except where the template requires
9798        them. (eg. movzb)  */
9799     else if (i.types[op].bitfield.byte
9800              && (i.tm.operand_types[op].bitfield.class == Reg
9801                  || i.tm.operand_types[op].bitfield.instance == Accum)
9802              && (i.tm.operand_types[op].bitfield.word
9803                  || i.tm.operand_types[op].bitfield.dword
9804                  || i.tm.operand_types[op].bitfield.qword))
9805       {
9806         as_bad (_("`%s%s' not allowed with `%s%c'"),
9807                 register_prefix,
9808                 i.op[op].regs->reg_name,
9809                 insn_name (&i.tm),
9810                 i.suffix);
9811         return 0;
9812       }
9813     /* Error if the r prefix on a general reg is missing.  */
9814     else if ((i.types[op].bitfield.word
9815               || i.types[op].bitfield.dword)
9816              && (i.tm.operand_types[op].bitfield.class == Reg
9817                  || i.tm.operand_types[op].bitfield.instance == Accum)
9818              && i.tm.operand_types[op].bitfield.qword)
9819       {
9820         /* Prohibit these changes in the 64bit mode, since the
9821            lowering is more complicated.  */
9822         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9823                 register_prefix, i.op[op].regs->reg_name, i.suffix);
9824         return 0;
9825       }
9826   return 1;
9827 }
9828
9829 static int
9830 check_word_reg (void)
9831 {
9832   int op;
9833   for (op = i.operands; --op >= 0;)
9834     /* Skip non-register operands. */
9835     if (i.types[op].bitfield.class != Reg)
9836       continue;
9837     /* Reject eight bit registers, except where the template requires
9838        them. (eg. movzb)  */
9839     else if (i.types[op].bitfield.byte
9840              && (i.tm.operand_types[op].bitfield.class == Reg
9841                  || i.tm.operand_types[op].bitfield.instance == Accum)
9842              && (i.tm.operand_types[op].bitfield.word
9843                  || i.tm.operand_types[op].bitfield.dword))
9844       {
9845         as_bad (_("`%s%s' not allowed with `%s%c'"),
9846                 register_prefix,
9847                 i.op[op].regs->reg_name,
9848                 insn_name (&i.tm),
9849                 i.suffix);
9850         return 0;
9851       }
9852     /* Error if the e or r prefix on a general reg is present.  */
9853     else if ((i.types[op].bitfield.dword
9854                  || i.types[op].bitfield.qword)
9855              && (i.tm.operand_types[op].bitfield.class == Reg
9856                  || i.tm.operand_types[op].bitfield.instance == Accum)
9857              && i.tm.operand_types[op].bitfield.word)
9858       {
9859         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
9860                 register_prefix, i.op[op].regs->reg_name,
9861                 i.suffix);
9862         return 0;
9863       }
9864   return 1;
9865 }
9866
9867 static int
9868 update_imm (unsigned int j)
9869 {
9870   i386_operand_type overlap = i.types[j];
9871
9872   if (i.tm.operand_types[j].bitfield.imm8
9873       && i.tm.operand_types[j].bitfield.imm8s
9874       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
9875     {
9876       /* This combination is used on 8-bit immediates where e.g. $~0 is
9877          desirable to permit.  We're past operand type matching, so simply
9878          put things back in the shape they were before introducing the
9879          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
9880       overlap.bitfield.imm8s = 0;
9881     }
9882
9883   if (overlap.bitfield.imm8
9884       + overlap.bitfield.imm8s
9885       + overlap.bitfield.imm16
9886       + overlap.bitfield.imm32
9887       + overlap.bitfield.imm32s
9888       + overlap.bitfield.imm64 > 1)
9889     {
9890       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
9891       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
9892       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
9893       static const i386_operand_type imm16_32 = { .bitfield =
9894         { .imm16 = 1, .imm32 = 1 }
9895       };
9896       static const i386_operand_type imm16_32s =  { .bitfield =
9897         { .imm16 = 1, .imm32s = 1 }
9898       };
9899       static const i386_operand_type imm16_32_32s = { .bitfield =
9900         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
9901       };
9902
9903       if (i.suffix)
9904         {
9905           i386_operand_type temp;
9906
9907           operand_type_set (&temp, 0);
9908           if (i.suffix == BYTE_MNEM_SUFFIX)
9909             {
9910               temp.bitfield.imm8 = overlap.bitfield.imm8;
9911               temp.bitfield.imm8s = overlap.bitfield.imm8s;
9912             }
9913           else if (i.suffix == WORD_MNEM_SUFFIX)
9914             temp.bitfield.imm16 = overlap.bitfield.imm16;
9915           else if (i.suffix == QWORD_MNEM_SUFFIX)
9916             {
9917               temp.bitfield.imm64 = overlap.bitfield.imm64;
9918               temp.bitfield.imm32s = overlap.bitfield.imm32s;
9919             }
9920           else
9921             temp.bitfield.imm32 = overlap.bitfield.imm32;
9922           overlap = temp;
9923         }
9924       else if (operand_type_equal (&overlap, &imm16_32_32s)
9925                || operand_type_equal (&overlap, &imm16_32)
9926                || operand_type_equal (&overlap, &imm16_32s))
9927         {
9928           if ((flag_code == CODE_16BIT)
9929               ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
9930             overlap = imm16;
9931           else
9932             overlap = imm32s;
9933         }
9934       else if (i.prefix[REX_PREFIX] & REX_W)
9935         overlap = operand_type_and (overlap, imm32s);
9936       else if (i.prefix[DATA_PREFIX])
9937         overlap = operand_type_and (overlap,
9938                                     flag_code != CODE_16BIT ? imm16 : imm32);
9939       if (overlap.bitfield.imm8
9940           + overlap.bitfield.imm8s
9941           + overlap.bitfield.imm16
9942           + overlap.bitfield.imm32
9943           + overlap.bitfield.imm32s
9944           + overlap.bitfield.imm64 != 1)
9945         {
9946           as_bad (_("no instruction mnemonic suffix given; "
9947                     "can't determine immediate size"));
9948           return 0;
9949         }
9950     }
9951   i.types[j] = overlap;
9952
9953   return 1;
9954 }
9955
9956 static int
9957 finalize_imm (void)
9958 {
9959   unsigned int j, n;
9960
9961   /* Update the first 2 immediate operands.  */
9962   n = i.operands > 2 ? 2 : i.operands;
9963   if (n)
9964     {
9965       for (j = 0; j < n; j++)
9966         if (update_imm (j) == 0)
9967           return 0;
9968
9969       /* The 3rd operand can't be immediate operand.  */
9970       gas_assert (operand_type_check (i.types[2], imm) == 0);
9971     }
9972
9973   return 1;
9974 }
9975
9976 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
9977                                  bool do_sse2avx)
9978 {
9979   if (r->reg_flags & RegRex)
9980     {
9981       if (i.rex & rex_bit)
9982         as_bad (_("same type of prefix used twice"));
9983       i.rex |= rex_bit;
9984     }
9985   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
9986     {
9987       gas_assert (i.vex.register_specifier == r);
9988       i.vex.register_specifier += 8;
9989     }
9990
9991   if (r->reg_flags & RegVRex)
9992     i.vrex |= rex_bit;
9993
9994   if (r->reg_flags & RegRex2)
9995     i.rex2 |= rex_bit;
9996 }
9997
9998 static INLINE void
9999 set_rex_rex2 (const reg_entry *r, unsigned int rex_bit)
10000 {
10001   if ((r->reg_flags & RegRex) != 0)
10002     i.rex |= rex_bit;
10003   if ((r->reg_flags & RegRex2) != 0)
10004     i.rex2 |= rex_bit;
10005 }
10006
10007 static int
10008 process_operands (void)
10009 {
10010   /* Default segment register this instruction will use for memory
10011      accesses.  0 means unknown.  This is only for optimizing out
10012      unnecessary segment overrides.  */
10013   const reg_entry *default_seg = NULL;
10014
10015   for (unsigned int j = 0; j < i.operands; j++)
10016     if (i.types[j].bitfield.instance != InstanceNone)
10017       i.reg_operands--;
10018     else if (i.tm.opcode_space == SPACE_EVEXMAP4
10019              && i.types[j].bitfield.class == RegSIMD
10020              && (i.op[j].regs->reg_flags & RegVRex)
10021              && !dot_insn ())
10022       /* Just raise an error, but continue processing.  */
10023       as_bad (_("`%s%s' cannot be used with `%s'"),
10024               register_prefix, i.op[j].regs->reg_name, insn_name (&i.tm));
10025
10026   if (i.tm.opcode_modifier.sse2avx)
10027     {
10028       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
10029          need converting.  */
10030       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
10031       i.prefix[REX_PREFIX] = 0;
10032       i.rex_encoding = 0;
10033       i.rex2_encoding = 0;
10034     }
10035   /* ImmExt should be processed after SSE2AVX.  */
10036   else if (i.tm.opcode_modifier.immext)
10037     process_immext ();
10038
10039   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
10040      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
10041      new destination operand here, while converting the source one to register
10042      number 0.  */
10043   if (i.tm.mnem_off == MN_tilezero)
10044     {
10045       i.op[1].regs = i.op[0].regs;
10046       i.op[0].regs -= i.op[0].regs->reg_num;
10047       i.types[1] = i.types[0];
10048       i.tm.operand_types[1] = i.tm.operand_types[0];
10049       i.flags[1] = i.flags[0];
10050       i.operands++;
10051       i.reg_operands++;
10052       i.tm.operands++;
10053     }
10054
10055   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
10056     {
10057       static const i386_operand_type regxmm = {
10058         .bitfield = { .class = RegSIMD, .xmmword = 1 }
10059       };
10060       unsigned int dupl = i.operands;
10061       unsigned int dest = dupl - 1;
10062       unsigned int j;
10063
10064       /* The destination must be an xmm register.  */
10065       gas_assert (i.reg_operands
10066                   && MAX_OPERANDS > dupl
10067                   && operand_type_equal (&i.types[dest], &regxmm));
10068
10069       if (i.tm.operand_types[0].bitfield.instance == Accum
10070           && i.tm.operand_types[0].bitfield.xmmword)
10071         {
10072           /* Keep xmm0 for instructions with VEX prefix and 3
10073              sources.  */
10074           i.tm.operand_types[0].bitfield.instance = InstanceNone;
10075           i.tm.operand_types[0].bitfield.class = RegSIMD;
10076           i.reg_operands++;
10077           goto duplicate;
10078         }
10079
10080       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
10081         {
10082           gas_assert ((MAX_OPERANDS - 1) > dupl);
10083
10084           /* Add the implicit xmm0 for instructions with VEX prefix
10085              and 3 sources.  */
10086           for (j = i.operands; j > 0; j--)
10087             {
10088               i.op[j] = i.op[j - 1];
10089               i.types[j] = i.types[j - 1];
10090               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
10091               i.flags[j] = i.flags[j - 1];
10092             }
10093           i.op[0].regs
10094             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
10095           i.types[0] = regxmm;
10096           i.tm.operand_types[0] = regxmm;
10097
10098           i.operands += 2;
10099           i.reg_operands += 2;
10100           i.tm.operands += 2;
10101
10102           dupl++;
10103           dest++;
10104           i.op[dupl] = i.op[dest];
10105           i.types[dupl] = i.types[dest];
10106           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
10107           i.flags[dupl] = i.flags[dest];
10108         }
10109       else
10110         {
10111         duplicate:
10112           i.operands++;
10113           i.reg_operands++;
10114           i.tm.operands++;
10115
10116           i.op[dupl] = i.op[dest];
10117           i.types[dupl] = i.types[dest];
10118           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
10119           i.flags[dupl] = i.flags[dest];
10120         }
10121
10122        if (i.tm.opcode_modifier.immext)
10123          process_immext ();
10124     }
10125   else if (i.tm.operand_types[0].bitfield.instance == Accum
10126            && i.tm.opcode_modifier.modrm)
10127     {
10128       unsigned int j;
10129
10130       for (j = 1; j < i.operands; j++)
10131         {
10132           i.op[j - 1] = i.op[j];
10133           i.types[j - 1] = i.types[j];
10134
10135           /* We need to adjust fields in i.tm since they are used by
10136              build_modrm_byte.  */
10137           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
10138
10139           i.flags[j - 1] = i.flags[j];
10140         }
10141
10142       /* No adjustment to i.reg_operands: This was already done at the top
10143          of the function.  */
10144       i.operands--;
10145       i.tm.operands--;
10146     }
10147   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
10148     {
10149       unsigned int regnum, first_reg_in_group, last_reg_in_group;
10150
10151       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
10152       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
10153       regnum = register_number (i.op[1].regs);
10154       first_reg_in_group = regnum & ~3;
10155       last_reg_in_group = first_reg_in_group + 3;
10156       if (regnum != first_reg_in_group)
10157         as_warn (_("source register `%s%s' implicitly denotes"
10158                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
10159                  register_prefix, i.op[1].regs->reg_name,
10160                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
10161                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
10162                  insn_name (&i.tm));
10163     }
10164   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
10165     {
10166       /* The imul $imm, %reg instruction is converted into
10167          imul $imm, %reg, %reg, and the clr %reg instruction
10168          is converted into xor %reg, %reg.  */
10169
10170       unsigned int first_reg_op;
10171
10172       if (operand_type_check (i.types[0], reg))
10173         first_reg_op = 0;
10174       else
10175         first_reg_op = 1;
10176       /* Pretend we saw the extra register operand.  */
10177       gas_assert (i.reg_operands == 1
10178                   && i.op[first_reg_op + 1].regs == 0);
10179       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
10180       i.types[first_reg_op + 1] = i.types[first_reg_op];
10181       i.operands++;
10182       i.reg_operands++;
10183     }
10184
10185   if (i.tm.opcode_modifier.modrm)
10186     {
10187       /* The opcode is completed (modulo i.tm.extension_opcode which
10188          must be put into the modrm byte).  Now, we make the modrm and
10189          index base bytes based on all the info we've collected.  */
10190
10191       default_seg = build_modrm_byte ();
10192
10193       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
10194         {
10195           /* Warn about some common errors, but press on regardless.  */
10196           if (i.operands == 2)
10197             {
10198               /* Reversed arguments on faddp or fmulp.  */
10199               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
10200                        register_prefix, i.op[!intel_syntax].regs->reg_name,
10201                        register_prefix, i.op[intel_syntax].regs->reg_name);
10202             }
10203           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
10204             {
10205               /* Extraneous `l' suffix on fp insn.  */
10206               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
10207                        register_prefix, i.op[0].regs->reg_name);
10208             }
10209         }
10210     }
10211   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
10212     {
10213       if (flag_code != CODE_64BIT
10214           ? i.tm.base_opcode == POP_SEG_SHORT
10215             && i.op[0].regs->reg_num == 1
10216           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
10217             && i.op[0].regs->reg_num < 4)
10218         {
10219           as_bad (_("you can't `%s %s%s'"),
10220                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
10221           return 0;
10222         }
10223       if (i.op[0].regs->reg_num > 3
10224           && i.tm.opcode_space == SPACE_BASE )
10225         {
10226           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
10227           i.tm.opcode_space = SPACE_0F;
10228         }
10229       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
10230     }
10231   else if (i.tm.opcode_space == SPACE_BASE
10232            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
10233     {
10234       default_seg = reg_ds;
10235     }
10236   else if (i.tm.opcode_modifier.isstring)
10237     {
10238       /* For the string instructions that allow a segment override
10239          on one of their operands, the default segment is ds.  */
10240       default_seg = reg_ds;
10241     }
10242   else if (i.short_form)
10243     {
10244       /* The register operand is in the 1st or 2nd non-immediate operand.  */
10245       const reg_entry *r = i.op[i.imm_operands].regs;
10246
10247       if (!dot_insn ()
10248           && r->reg_type.bitfield.instance == Accum
10249           && i.op[i.imm_operands + 1].regs)
10250         r = i.op[i.imm_operands + 1].regs;
10251       /* Register goes in low 3 bits of opcode.  */
10252       i.tm.base_opcode |= r->reg_num;
10253       set_rex_vrex (r, REX_B, false);
10254
10255       if (dot_insn () && i.reg_operands == 2)
10256         {
10257           gas_assert (is_any_vex_encoding (&i.tm)
10258                       || i.encoding != encoding_default);
10259           i.vex.register_specifier = i.op[i.operands - 1].regs;
10260         }
10261     }
10262   else if (i.reg_operands == 1
10263            && !i.flags[i.operands - 1]
10264            && i.tm.operand_types[i.operands - 1].bitfield.instance
10265               == InstanceNone)
10266     {
10267       gas_assert (is_any_vex_encoding (&i.tm)
10268                   || i.encoding != encoding_default);
10269       i.vex.register_specifier = i.op[i.operands - 1].regs;
10270     }
10271
10272   if ((i.seg[0] || i.prefix[SEG_PREFIX])
10273       && i.tm.mnem_off == MN_lea)
10274     {
10275       if (!quiet_warnings)
10276         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
10277       if (optimize && !i.no_optimize)
10278         {
10279           i.seg[0] = NULL;
10280           i.prefix[SEG_PREFIX] = 0;
10281         }
10282     }
10283
10284   /* If a segment was explicitly specified, and the specified segment
10285      is neither the default nor the one already recorded from a prefix,
10286      use an opcode prefix to select it.  If we never figured out what
10287      the default segment is, then default_seg will be zero at this
10288      point, and the specified segment prefix will always be used.  */
10289   if (i.seg[0]
10290       && i.seg[0] != default_seg
10291       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
10292     {
10293       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
10294         return 0;
10295     }
10296   return 1;
10297 }
10298
10299 static const reg_entry *
10300 build_modrm_byte (void)
10301 {
10302   const reg_entry *default_seg = NULL;
10303   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
10304                         /* Compensate for kludge in md_assemble().  */
10305                         + i.tm.operand_types[0].bitfield.imm1;
10306   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
10307   unsigned int v, op, reg_slot = ~0;
10308
10309   /* Accumulator (in particular %st), shift count (%cl), and alike need
10310      to be skipped just like immediate operands do.  */
10311   if (i.tm.operand_types[source].bitfield.instance)
10312     ++source;
10313   while (i.tm.operand_types[dest].bitfield.instance)
10314     --dest;
10315
10316   for (op = source; op < i.operands; ++op)
10317     if (i.tm.operand_types[op].bitfield.baseindex)
10318       break;
10319
10320   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
10321     {
10322       expressionS *exp;
10323
10324       /* There are 2 kinds of instructions:
10325          1. 5 operands: 4 register operands or 3 register operands
10326          plus 1 memory operand plus one Imm4 operand, VexXDS, and
10327          VexW0 or VexW1.  The destination must be either XMM, YMM or
10328          ZMM register.
10329          2. 4 operands: 4 register operands or 3 register operands
10330          plus 1 memory operand, with VexXDS.
10331          3. Other equivalent combinations when coming from s_insn().  */
10332       gas_assert (i.tm.opcode_modifier.vexvvvv
10333                   && i.tm.opcode_modifier.vexw);
10334       gas_assert (dot_insn ()
10335                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
10336
10337       /* Of the first two non-immediate operands the one with the template
10338          not allowing for a memory one is encoded in the immediate operand.  */
10339       if (source == op)
10340         reg_slot = source + 1;
10341       else
10342         reg_slot = source++;
10343
10344       if (!dot_insn ())
10345         {
10346           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
10347           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
10348         }
10349       else
10350         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
10351
10352       if (i.imm_operands == 0)
10353         {
10354           /* When there is no immediate operand, generate an 8bit
10355              immediate operand to encode the first operand.  */
10356           exp = &im_expressions[i.imm_operands++];
10357           i.op[i.operands].imms = exp;
10358           i.types[i.operands].bitfield.imm8 = 1;
10359           i.operands++;
10360
10361           exp->X_op = O_constant;
10362         }
10363       else
10364         {
10365           gas_assert (i.imm_operands == 1);
10366           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
10367           gas_assert (!i.tm.opcode_modifier.immext);
10368
10369           /* Turn on Imm8 again so that output_imm will generate it.  */
10370           i.types[0].bitfield.imm8 = 1;
10371
10372           exp = i.op[0].imms;
10373         }
10374       exp->X_add_number |= register_number (i.op[reg_slot].regs)
10375                            << (3 + !(i.tm.opcode_modifier.evex
10376                                      || i.encoding == encoding_evex));
10377     }
10378
10379   if (i.tm.opcode_modifier.vexvvvv == VexVVVV_DST)
10380     {
10381       v = dest;
10382       dest-- ;
10383     }
10384   else
10385     {
10386       for (v = source + 1; v < dest; ++v)
10387         if (v != reg_slot)
10388           break;
10389       if (v >= dest)
10390         v = ~0;
10391     }
10392   if (i.tm.extension_opcode != None)
10393     {
10394       if (dest != source)
10395         v = dest;
10396       dest = ~0;
10397     }
10398   gas_assert (source < dest);
10399   if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
10400       && source != op)
10401     {
10402       unsigned int tmp = source;
10403
10404       source = v;
10405       v = tmp;
10406     }
10407
10408   if (v < MAX_OPERANDS)
10409     {
10410       gas_assert (i.tm.opcode_modifier.vexvvvv);
10411       i.vex.register_specifier = i.op[v].regs;
10412     }
10413
10414   if (op < i.operands)
10415     {
10416       if (i.mem_operands)
10417         {
10418           unsigned int fake_zero_displacement = 0;
10419
10420           gas_assert (i.flags[op] & Operand_Mem);
10421
10422           if (i.tm.opcode_modifier.sib)
10423             {
10424               /* The index register of VSIB shouldn't be RegIZ.  */
10425               if (i.tm.opcode_modifier.sib != SIBMEM
10426                   && i.index_reg->reg_num == RegIZ)
10427                 abort ();
10428
10429               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10430               if (!i.base_reg)
10431                 {
10432                   i.sib.base = NO_BASE_REGISTER;
10433                   i.sib.scale = i.log2_scale_factor;
10434                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10435                   i.types[op].bitfield.disp32 = 1;
10436                 }
10437
10438               /* Since the mandatory SIB always has index register, so
10439                  the code logic remains unchanged. The non-mandatory SIB
10440                  without index register is allowed and will be handled
10441                  later.  */
10442               if (i.index_reg)
10443                 {
10444                   if (i.index_reg->reg_num == RegIZ)
10445                     i.sib.index = NO_INDEX_REGISTER;
10446                   else
10447                     i.sib.index = i.index_reg->reg_num;
10448                   set_rex_vrex (i.index_reg, REX_X, false);
10449                 }
10450             }
10451
10452           default_seg = reg_ds;
10453
10454           if (i.base_reg == 0)
10455             {
10456               i.rm.mode = 0;
10457               if (!i.disp_operands)
10458                 fake_zero_displacement = 1;
10459               if (i.index_reg == 0)
10460                 {
10461                   /* Both check for VSIB and mandatory non-vector SIB. */
10462                   gas_assert (!i.tm.opcode_modifier.sib
10463                               || i.tm.opcode_modifier.sib == SIBMEM);
10464                   /* Operand is just <disp>  */
10465                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10466                   if (flag_code == CODE_64BIT)
10467                     {
10468                       /* 64bit mode overwrites the 32bit absolute
10469                          addressing by RIP relative addressing and
10470                          absolute addressing is encoded by one of the
10471                          redundant SIB forms.  */
10472                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10473                       i.sib.base = NO_BASE_REGISTER;
10474                       i.sib.index = NO_INDEX_REGISTER;
10475                       i.types[op].bitfield.disp32 = 1;
10476                     }
10477                   else if ((flag_code == CODE_16BIT)
10478                            ^ (i.prefix[ADDR_PREFIX] != 0))
10479                     {
10480                       i.rm.regmem = NO_BASE_REGISTER_16;
10481                       i.types[op].bitfield.disp16 = 1;
10482                     }
10483                   else
10484                     {
10485                       i.rm.regmem = NO_BASE_REGISTER;
10486                       i.types[op].bitfield.disp32 = 1;
10487                     }
10488                 }
10489               else if (!i.tm.opcode_modifier.sib)
10490                 {
10491                   /* !i.base_reg && i.index_reg  */
10492                   if (i.index_reg->reg_num == RegIZ)
10493                     i.sib.index = NO_INDEX_REGISTER;
10494                   else
10495                     i.sib.index = i.index_reg->reg_num;
10496                   i.sib.base = NO_BASE_REGISTER;
10497                   i.sib.scale = i.log2_scale_factor;
10498                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10499                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
10500                   i.types[op].bitfield.disp32 = 1;
10501                   set_rex_rex2 (i.index_reg, REX_X);
10502                 }
10503             }
10504           /* RIP addressing for 64bit mode.  */
10505           else if (i.base_reg->reg_num == RegIP)
10506             {
10507               gas_assert (!i.tm.opcode_modifier.sib);
10508               i.rm.regmem = NO_BASE_REGISTER;
10509               i.types[op].bitfield.disp8 = 0;
10510               i.types[op].bitfield.disp16 = 0;
10511               i.types[op].bitfield.disp32 = 1;
10512               i.types[op].bitfield.disp64 = 0;
10513               i.flags[op] |= Operand_PCrel;
10514               if (! i.disp_operands)
10515                 fake_zero_displacement = 1;
10516             }
10517           else if (i.base_reg->reg_type.bitfield.word)
10518             {
10519               gas_assert (!i.tm.opcode_modifier.sib);
10520               switch (i.base_reg->reg_num)
10521                 {
10522                 case 3: /* (%bx)  */
10523                   if (i.index_reg == 0)
10524                     i.rm.regmem = 7;
10525                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
10526                     i.rm.regmem = i.index_reg->reg_num - 6;
10527                   break;
10528                 case 5: /* (%bp)  */
10529                   default_seg = reg_ss;
10530                   if (i.index_reg == 0)
10531                     {
10532                       i.rm.regmem = 6;
10533                       if (operand_type_check (i.types[op], disp) == 0)
10534                         {
10535                           /* fake (%bp) into 0(%bp)  */
10536                           if (i.disp_encoding == disp_encoding_16bit)
10537                             i.types[op].bitfield.disp16 = 1;
10538                           else
10539                             i.types[op].bitfield.disp8 = 1;
10540                           fake_zero_displacement = 1;
10541                         }
10542                     }
10543                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
10544                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
10545                   break;
10546                 default: /* (%si) -> 4 or (%di) -> 5  */
10547                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
10548                 }
10549               if (!fake_zero_displacement
10550                   && !i.disp_operands
10551                   && i.disp_encoding)
10552                 {
10553                   fake_zero_displacement = 1;
10554                   if (i.disp_encoding == disp_encoding_8bit)
10555                     i.types[op].bitfield.disp8 = 1;
10556                   else
10557                     i.types[op].bitfield.disp16 = 1;
10558                 }
10559               i.rm.mode = mode_from_disp_size (i.types[op]);
10560             }
10561           else /* i.base_reg and 32/64 bit mode  */
10562             {
10563               if (operand_type_check (i.types[op], disp))
10564                 {
10565                   i.types[op].bitfield.disp16 = 0;
10566                   i.types[op].bitfield.disp64 = 0;
10567                   i.types[op].bitfield.disp32 = 1;
10568                 }
10569
10570               if (!i.tm.opcode_modifier.sib)
10571                 i.rm.regmem = i.base_reg->reg_num;
10572               set_rex_rex2 (i.base_reg, REX_B);
10573               i.sib.base = i.base_reg->reg_num;
10574               /* x86-64 ignores REX prefix bit here to avoid decoder
10575                  complications.  */
10576               if (!(i.base_reg->reg_flags & RegRex)
10577                   && (i.base_reg->reg_num == EBP_REG_NUM
10578                    || i.base_reg->reg_num == ESP_REG_NUM))
10579                   default_seg = reg_ss;
10580               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
10581                 {
10582                   fake_zero_displacement = 1;
10583                   if (i.disp_encoding == disp_encoding_32bit)
10584                     i.types[op].bitfield.disp32 = 1;
10585                   else
10586                     i.types[op].bitfield.disp8 = 1;
10587                 }
10588               i.sib.scale = i.log2_scale_factor;
10589               if (i.index_reg == 0)
10590                 {
10591                   /* Only check for VSIB. */
10592                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
10593                               && i.tm.opcode_modifier.sib != VECSIB256
10594                               && i.tm.opcode_modifier.sib != VECSIB512);
10595
10596                   /* <disp>(%esp) becomes two byte modrm with no index
10597                      register.  We've already stored the code for esp
10598                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
10599                      Any base register besides %esp will not use the
10600                      extra modrm byte.  */
10601                   i.sib.index = NO_INDEX_REGISTER;
10602                 }
10603               else if (!i.tm.opcode_modifier.sib)
10604                 {
10605                   if (i.index_reg->reg_num == RegIZ)
10606                     i.sib.index = NO_INDEX_REGISTER;
10607                   else
10608                     i.sib.index = i.index_reg->reg_num;
10609                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
10610                   set_rex_rex2 (i.index_reg, REX_X);
10611                 }
10612
10613               if (i.disp_operands
10614                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
10615                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
10616                 i.rm.mode = 0;
10617               else
10618                 {
10619                   if (!fake_zero_displacement
10620                       && !i.disp_operands
10621                       && i.disp_encoding)
10622                     {
10623                       fake_zero_displacement = 1;
10624                       if (i.disp_encoding == disp_encoding_8bit)
10625                         i.types[op].bitfield.disp8 = 1;
10626                       else
10627                         i.types[op].bitfield.disp32 = 1;
10628                     }
10629                   i.rm.mode = mode_from_disp_size (i.types[op]);
10630                 }
10631             }
10632
10633           if (fake_zero_displacement)
10634             {
10635               /* Fakes a zero displacement assuming that i.types[op]
10636                  holds the correct displacement size.  */
10637               expressionS *exp;
10638
10639               gas_assert (i.op[op].disps == 0);
10640               exp = &disp_expressions[i.disp_operands++];
10641               i.op[op].disps = exp;
10642               exp->X_op = O_constant;
10643               exp->X_add_number = 0;
10644               exp->X_add_symbol = (symbolS *) 0;
10645               exp->X_op_symbol = (symbolS *) 0;
10646             }
10647         }
10648     else
10649         {
10650       i.rm.mode = 3;
10651       i.rm.regmem = i.op[op].regs->reg_num;
10652       set_rex_vrex (i.op[op].regs, REX_B, false);
10653         }
10654
10655       if (op == dest)
10656         dest = ~0;
10657       if (op == source)
10658         source = ~0;
10659     }
10660   else
10661     {
10662       i.rm.mode = 3;
10663       if (!i.tm.opcode_modifier.regmem)
10664         {
10665           gas_assert (source < MAX_OPERANDS);
10666           i.rm.regmem = i.op[source].regs->reg_num;
10667           set_rex_vrex (i.op[source].regs, REX_B,
10668                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
10669           source = ~0;
10670         }
10671       else
10672         {
10673           gas_assert (dest < MAX_OPERANDS);
10674           i.rm.regmem = i.op[dest].regs->reg_num;
10675           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
10676           dest = ~0;
10677         }
10678     }
10679
10680   /* Fill in i.rm.reg field with extension opcode (if any) or the
10681      appropriate register.  */
10682   if (i.tm.extension_opcode != None)
10683     i.rm.reg = i.tm.extension_opcode;
10684   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
10685     {
10686       i.rm.reg = i.op[dest].regs->reg_num;
10687       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
10688     }
10689   else
10690     {
10691       gas_assert (source < MAX_OPERANDS);
10692       i.rm.reg = i.op[source].regs->reg_num;
10693       set_rex_vrex (i.op[source].regs, REX_R, false);
10694     }
10695
10696   if (flag_code != CODE_64BIT && (i.rex & REX_R))
10697     {
10698       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
10699       i.rex &= ~REX_R;
10700       add_prefix (LOCK_PREFIX_OPCODE);
10701     }
10702
10703   return default_seg;
10704 }
10705
10706 static INLINE void
10707 frag_opcode_byte (unsigned char byte)
10708 {
10709   if (now_seg != absolute_section)
10710     FRAG_APPEND_1_CHAR (byte);
10711   else
10712     ++abs_section_offset;
10713 }
10714
10715 static unsigned int
10716 flip_code16 (unsigned int code16)
10717 {
10718   gas_assert (i.tm.operands == 1);
10719
10720   return !(i.prefix[REX_PREFIX] & REX_W)
10721          && (code16 ? i.tm.operand_types[0].bitfield.disp32
10722                     : i.tm.operand_types[0].bitfield.disp16)
10723          ? CODE16 : 0;
10724 }
10725
10726 static void
10727 output_branch (void)
10728 {
10729   char *p;
10730   int size;
10731   int code16;
10732   int prefix;
10733   relax_substateT subtype;
10734   symbolS *sym;
10735   offsetT off;
10736
10737   if (now_seg == absolute_section)
10738     {
10739       as_bad (_("relaxable branches not supported in absolute section"));
10740       return;
10741     }
10742
10743   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
10744   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
10745
10746   prefix = 0;
10747   if (i.prefix[DATA_PREFIX] != 0)
10748     {
10749       prefix = 1;
10750       i.prefixes -= 1;
10751       code16 ^= flip_code16(code16);
10752     }
10753   /* Pentium4 branch hints.  */
10754   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
10755       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
10756     {
10757       prefix++;
10758       i.prefixes--;
10759     }
10760   if (i.prefix[REX_PREFIX] != 0)
10761     {
10762       prefix++;
10763       i.prefixes--;
10764     }
10765
10766   /* BND prefixed jump.  */
10767   if (i.prefix[BND_PREFIX] != 0)
10768     {
10769       prefix++;
10770       i.prefixes--;
10771     }
10772
10773   if (i.prefixes != 0)
10774     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10775
10776   /* It's always a symbol;  End frag & setup for relax.
10777      Make sure there is enough room in this frag for the largest
10778      instruction we may generate in md_convert_frag.  This is 2
10779      bytes for the opcode and room for the prefix and largest
10780      displacement.  */
10781   frag_grow (prefix + 2 + 4);
10782   /* Prefix and 1 opcode byte go in fr_fix.  */
10783   p = frag_more (prefix + 1);
10784   if (i.prefix[DATA_PREFIX] != 0)
10785     *p++ = DATA_PREFIX_OPCODE;
10786   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
10787       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
10788     *p++ = i.prefix[SEG_PREFIX];
10789   if (i.prefix[BND_PREFIX] != 0)
10790     *p++ = BND_PREFIX_OPCODE;
10791   if (i.prefix[REX_PREFIX] != 0)
10792     *p++ = i.prefix[REX_PREFIX];
10793   *p = i.tm.base_opcode;
10794
10795   if ((unsigned char) *p == JUMP_PC_RELATIVE)
10796     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
10797   else if (cpu_arch_flags.bitfield.cpui386)
10798     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
10799   else
10800     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
10801   subtype |= code16;
10802
10803   sym = i.op[0].disps->X_add_symbol;
10804   off = i.op[0].disps->X_add_number;
10805
10806   if (i.op[0].disps->X_op != O_constant
10807       && i.op[0].disps->X_op != O_symbol)
10808     {
10809       /* Handle complex expressions.  */
10810       sym = make_expr_symbol (i.op[0].disps);
10811       off = 0;
10812     }
10813
10814   /* 1 possible extra opcode + 4 byte displacement go in var part.
10815      Pass reloc in fr_var.  */
10816   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
10817 }
10818
10819 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10820 /* Return TRUE iff PLT32 relocation should be used for branching to
10821    symbol S.  */
10822
10823 static bool
10824 need_plt32_p (symbolS *s)
10825 {
10826   /* PLT32 relocation is ELF only.  */
10827   if (!IS_ELF)
10828     return false;
10829
10830 #ifdef TE_SOLARIS
10831   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
10832      krtld support it.  */
10833   return false;
10834 #endif
10835
10836   /* Since there is no need to prepare for PLT branch on x86-64, we
10837      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
10838      be used as a marker for 32-bit PC-relative branches.  */
10839   if (!object_64bit)
10840     return false;
10841
10842   if (s == NULL)
10843     return false;
10844
10845   /* Weak or undefined symbol need PLT32 relocation.  */
10846   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
10847     return true;
10848
10849   /* Non-global symbol doesn't need PLT32 relocation.  */
10850   if (! S_IS_EXTERNAL (s))
10851     return false;
10852
10853   /* Other global symbols need PLT32 relocation.  NB: Symbol with
10854      non-default visibilities are treated as normal global symbol
10855      so that PLT32 relocation can be used as a marker for 32-bit
10856      PC-relative branches.  It is useful for linker relaxation.  */
10857   return true;
10858 }
10859 #endif
10860
10861 static void
10862 output_jump (void)
10863 {
10864   char *p;
10865   int size;
10866   fixS *fixP;
10867   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
10868
10869   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
10870     {
10871       /* This is a loop or jecxz type instruction.  */
10872       size = 1;
10873       if (i.prefix[ADDR_PREFIX] != 0)
10874         {
10875           frag_opcode_byte (ADDR_PREFIX_OPCODE);
10876           i.prefixes -= 1;
10877         }
10878       /* Pentium4 branch hints.  */
10879       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
10880           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
10881         {
10882           frag_opcode_byte (i.prefix[SEG_PREFIX]);
10883           i.prefixes--;
10884         }
10885     }
10886   else
10887     {
10888       int code16;
10889
10890       code16 = 0;
10891       if (flag_code == CODE_16BIT)
10892         code16 = CODE16;
10893
10894       if (i.prefix[DATA_PREFIX] != 0)
10895         {
10896           frag_opcode_byte (DATA_PREFIX_OPCODE);
10897           i.prefixes -= 1;
10898           code16 ^= flip_code16(code16);
10899         }
10900
10901       size = 4;
10902       if (code16)
10903         size = 2;
10904     }
10905
10906   /* BND prefixed jump.  */
10907   if (i.prefix[BND_PREFIX] != 0)
10908     {
10909       frag_opcode_byte (i.prefix[BND_PREFIX]);
10910       i.prefixes -= 1;
10911     }
10912
10913   if (i.prefix[REX_PREFIX] != 0)
10914     {
10915       frag_opcode_byte (i.prefix[REX_PREFIX]);
10916       i.prefixes -= 1;
10917     }
10918
10919   if (i.prefixes != 0)
10920     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
10921
10922   if (now_seg == absolute_section)
10923     {
10924       abs_section_offset += i.opcode_length + size;
10925       return;
10926     }
10927
10928   p = frag_more (i.opcode_length + size);
10929   switch (i.opcode_length)
10930     {
10931     case 2:
10932       *p++ = i.tm.base_opcode >> 8;
10933       /* Fall through.  */
10934     case 1:
10935       *p++ = i.tm.base_opcode;
10936       break;
10937     default:
10938       abort ();
10939     }
10940
10941 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10942   if (flag_code == CODE_64BIT && size == 4
10943       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
10944       && need_plt32_p (i.op[0].disps->X_add_symbol))
10945     jump_reloc = BFD_RELOC_X86_64_PLT32;
10946 #endif
10947
10948   jump_reloc = reloc (size, 1, 1, jump_reloc);
10949
10950   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10951                       i.op[0].disps, 1, jump_reloc);
10952
10953   /* All jumps handled here are signed, but don't unconditionally use a
10954      signed limit check for 32 and 16 bit jumps as we want to allow wrap
10955      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
10956      respectively.  */
10957   switch (size)
10958     {
10959     case 1:
10960       fixP->fx_signed = 1;
10961       break;
10962
10963     case 2:
10964       if (i.tm.mnem_off == MN_xbegin)
10965         fixP->fx_signed = 1;
10966       break;
10967
10968     case 4:
10969       if (flag_code == CODE_64BIT)
10970         fixP->fx_signed = 1;
10971       break;
10972     }
10973 }
10974
10975 static void
10976 output_interseg_jump (void)
10977 {
10978   char *p;
10979   int size;
10980   int prefix;
10981   int code16;
10982
10983   code16 = 0;
10984   if (flag_code == CODE_16BIT)
10985     code16 = CODE16;
10986
10987   prefix = 0;
10988   if (i.prefix[DATA_PREFIX] != 0)
10989     {
10990       prefix = 1;
10991       i.prefixes -= 1;
10992       code16 ^= CODE16;
10993     }
10994
10995   gas_assert (!i.prefix[REX_PREFIX]);
10996
10997   size = 4;
10998   if (code16)
10999     size = 2;
11000
11001   if (i.prefixes != 0)
11002     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
11003
11004   if (now_seg == absolute_section)
11005     {
11006       abs_section_offset += prefix + 1 + 2 + size;
11007       return;
11008     }
11009
11010   /* 1 opcode; 2 segment; offset  */
11011   p = frag_more (prefix + 1 + 2 + size);
11012
11013   if (i.prefix[DATA_PREFIX] != 0)
11014     *p++ = DATA_PREFIX_OPCODE;
11015
11016   if (i.prefix[REX_PREFIX] != 0)
11017     *p++ = i.prefix[REX_PREFIX];
11018
11019   *p++ = i.tm.base_opcode;
11020   if (i.op[1].imms->X_op == O_constant)
11021     {
11022       offsetT n = i.op[1].imms->X_add_number;
11023
11024       if (size == 2
11025           && !fits_in_unsigned_word (n)
11026           && !fits_in_signed_word (n))
11027         {
11028           as_bad (_("16-bit jump out of range"));
11029           return;
11030         }
11031       md_number_to_chars (p, n, size);
11032     }
11033   else
11034     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
11035                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
11036
11037   p += size;
11038   if (i.op[0].imms->X_op == O_constant)
11039     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
11040   else
11041     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
11042                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
11043 }
11044
11045 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11046 void
11047 x86_cleanup (void)
11048 {
11049   char *p;
11050   asection *seg = now_seg;
11051   subsegT subseg = now_subseg;
11052   asection *sec;
11053   unsigned int alignment, align_size_1;
11054   unsigned int isa_1_descsz, feature_2_descsz, descsz;
11055   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
11056   unsigned int padding;
11057
11058   if (!IS_ELF || !x86_used_note)
11059     return;
11060
11061   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
11062
11063   /* The .note.gnu.property section layout:
11064
11065      Field      Length          Contents
11066      ----       ----            ----
11067      n_namsz    4               4
11068      n_descsz   4               The note descriptor size
11069      n_type     4               NT_GNU_PROPERTY_TYPE_0
11070      n_name     4               "GNU"
11071      n_desc     n_descsz        The program property array
11072      ....       ....            ....
11073    */
11074
11075   /* Create the .note.gnu.property section.  */
11076   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
11077   bfd_set_section_flags (sec,
11078                          (SEC_ALLOC
11079                           | SEC_LOAD
11080                           | SEC_DATA
11081                           | SEC_HAS_CONTENTS
11082                           | SEC_READONLY));
11083
11084   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
11085     {
11086       align_size_1 = 7;
11087       alignment = 3;
11088     }
11089   else
11090     {
11091       align_size_1 = 3;
11092       alignment = 2;
11093     }
11094
11095   bfd_set_section_alignment (sec, alignment);
11096   elf_section_type (sec) = SHT_NOTE;
11097
11098   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
11099                                   + 4-byte data  */
11100   isa_1_descsz_raw = 4 + 4 + 4;
11101   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
11102   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
11103
11104   feature_2_descsz_raw = isa_1_descsz;
11105   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
11106                                       + 4-byte data  */
11107   feature_2_descsz_raw += 4 + 4 + 4;
11108   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
11109   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
11110                       & ~align_size_1);
11111
11112   descsz = feature_2_descsz;
11113   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
11114   p = frag_more (4 + 4 + 4 + 4 + descsz);
11115
11116   /* Write n_namsz.  */
11117   md_number_to_chars (p, (valueT) 4, 4);
11118
11119   /* Write n_descsz.  */
11120   md_number_to_chars (p + 4, (valueT) descsz, 4);
11121
11122   /* Write n_type.  */
11123   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
11124
11125   /* Write n_name.  */
11126   memcpy (p + 4 * 3, "GNU", 4);
11127
11128   /* Write 4-byte type.  */
11129   md_number_to_chars (p + 4 * 4,
11130                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
11131
11132   /* Write 4-byte data size.  */
11133   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
11134
11135   /* Write 4-byte data.  */
11136   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
11137
11138   /* Zero out paddings.  */
11139   padding = isa_1_descsz - isa_1_descsz_raw;
11140   if (padding)
11141     memset (p + 4 * 7, 0, padding);
11142
11143   /* Write 4-byte type.  */
11144   md_number_to_chars (p + isa_1_descsz + 4 * 4,
11145                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
11146
11147   /* Write 4-byte data size.  */
11148   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
11149
11150   /* Write 4-byte data.  */
11151   md_number_to_chars (p + isa_1_descsz + 4 * 6,
11152                       (valueT) x86_feature_2_used, 4);
11153
11154   /* Zero out paddings.  */
11155   padding = feature_2_descsz - feature_2_descsz_raw;
11156   if (padding)
11157     memset (p + isa_1_descsz + 4 * 7, 0, padding);
11158
11159   /* We probably can't restore the current segment, for there likely
11160      isn't one yet...  */
11161   if (seg && subseg)
11162     subseg_set (seg, subseg);
11163 }
11164
11165 bool
11166 x86_support_sframe_p (void)
11167 {
11168   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
11169   return (x86_elf_abi == X86_64_ABI);
11170 }
11171
11172 bool
11173 x86_sframe_ra_tracking_p (void)
11174 {
11175   /* In AMD64, return address is always stored on the stack at a fixed offset
11176      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
11177      Do not track explicitly via an SFrame Frame Row Entry.  */
11178   return false;
11179 }
11180
11181 offsetT
11182 x86_sframe_cfa_ra_offset (void)
11183 {
11184   gas_assert (x86_elf_abi == X86_64_ABI);
11185   return (offsetT) -8;
11186 }
11187
11188 unsigned char
11189 x86_sframe_get_abi_arch (void)
11190 {
11191   unsigned char sframe_abi_arch = 0;
11192
11193   if (x86_support_sframe_p ())
11194     {
11195       gas_assert (!target_big_endian);
11196       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
11197     }
11198
11199   return sframe_abi_arch;
11200 }
11201
11202 #endif
11203
11204 static unsigned int
11205 encoding_length (const fragS *start_frag, offsetT start_off,
11206                  const char *frag_now_ptr)
11207 {
11208   unsigned int len = 0;
11209
11210   if (start_frag != frag_now)
11211     {
11212       const fragS *fr = start_frag;
11213
11214       do {
11215         len += fr->fr_fix;
11216         fr = fr->fr_next;
11217       } while (fr && fr != frag_now);
11218     }
11219
11220   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
11221 }
11222
11223 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
11224    be macro-fused with conditional jumps.
11225    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
11226    or is one of the following format:
11227
11228     cmp m, imm
11229     add m, imm
11230     sub m, imm
11231    test m, imm
11232     and m, imm
11233     inc m
11234     dec m
11235
11236    it is unfusible.  */
11237
11238 static int
11239 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
11240 {
11241   /* No RIP address.  */
11242   if (i.base_reg && i.base_reg->reg_num == RegIP)
11243     return 0;
11244
11245   /* No opcodes outside of base encoding space.  */
11246   if (i.tm.opcode_space != SPACE_BASE)
11247     return 0;
11248
11249   /* add, sub without add/sub m, imm.  */
11250   if (i.tm.base_opcode <= 5
11251       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
11252       || ((i.tm.base_opcode | 3) == 0x83
11253           && (i.tm.extension_opcode == 0x5
11254               || i.tm.extension_opcode == 0x0)))
11255     {
11256       *mf_cmp_p = mf_cmp_alu_cmp;
11257       return !(i.mem_operands && i.imm_operands);
11258     }
11259
11260   /* and without and m, imm.  */
11261   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
11262       || ((i.tm.base_opcode | 3) == 0x83
11263           && i.tm.extension_opcode == 0x4))
11264     {
11265       *mf_cmp_p = mf_cmp_test_and;
11266       return !(i.mem_operands && i.imm_operands);
11267     }
11268
11269   /* test without test m imm.  */
11270   if ((i.tm.base_opcode | 1) == 0x85
11271       || (i.tm.base_opcode | 1) == 0xa9
11272       || ((i.tm.base_opcode | 1) == 0xf7
11273           && i.tm.extension_opcode == 0))
11274     {
11275       *mf_cmp_p = mf_cmp_test_and;
11276       return !(i.mem_operands && i.imm_operands);
11277     }
11278
11279   /* cmp without cmp m, imm.  */
11280   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
11281       || ((i.tm.base_opcode | 3) == 0x83
11282           && (i.tm.extension_opcode == 0x7)))
11283     {
11284       *mf_cmp_p = mf_cmp_alu_cmp;
11285       return !(i.mem_operands && i.imm_operands);
11286     }
11287
11288   /* inc, dec without inc/dec m.   */
11289   if ((is_cpu (&i.tm, CpuNo64)
11290        && (i.tm.base_opcode | 0xf) == 0x4f)
11291       || ((i.tm.base_opcode | 1) == 0xff
11292           && i.tm.extension_opcode <= 0x1))
11293     {
11294       *mf_cmp_p = mf_cmp_incdec;
11295       return !i.mem_operands;
11296     }
11297
11298   return 0;
11299 }
11300
11301 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
11302
11303 static int
11304 add_fused_jcc_padding_frag_p (enum mf_cmp_kind *mf_cmp_p,
11305                               const struct last_insn *last_insn)
11306 {
11307   /* NB: Don't work with COND_JUMP86 without i386.  */
11308   if (!align_branch_power
11309       || now_seg == absolute_section
11310       || !cpu_arch_flags.bitfield.cpui386
11311       || !(align_branch & align_branch_fused_bit))
11312     return 0;
11313
11314   if (maybe_fused_with_jcc_p (mf_cmp_p))
11315     {
11316       if (last_insn->kind == last_insn_other)
11317         return 1;
11318       if (flag_debug)
11319         as_warn_where (last_insn->file, last_insn->line,
11320                        _("`%s` skips -malign-branch-boundary on `%s`"),
11321                        last_insn->name, insn_name (&i.tm));
11322     }
11323
11324   return 0;
11325 }
11326
11327 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
11328
11329 static int
11330 add_branch_prefix_frag_p (const struct last_insn *last_insn)
11331 {
11332   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
11333      to PadLock instructions since they include prefixes in opcode.  */
11334   if (!align_branch_power
11335       || !align_branch_prefix_size
11336       || now_seg == absolute_section
11337       || is_cpu (&i.tm, CpuPadLock)
11338       || !cpu_arch_flags.bitfield.cpui386)
11339     return 0;
11340
11341   /* Don't add prefix if it is a prefix or there is no operand in case
11342      that segment prefix is special.  */
11343   if (!i.operands || i.tm.opcode_modifier.isprefix)
11344     return 0;
11345
11346   if (last_insn->kind == last_insn_other)
11347     return 1;
11348
11349   if (flag_debug)
11350     as_warn_where (last_insn->file, last_insn->line,
11351                    _("`%s` skips -malign-branch-boundary on `%s`"),
11352                    last_insn->name, insn_name (&i.tm));
11353
11354   return 0;
11355 }
11356
11357 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
11358
11359 static int
11360 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
11361                            enum mf_jcc_kind *mf_jcc_p,
11362                            const struct last_insn *last_insn)
11363 {
11364   int add_padding;
11365
11366   /* NB: Don't work with COND_JUMP86 without i386.  */
11367   if (!align_branch_power
11368       || now_seg == absolute_section
11369       || !cpu_arch_flags.bitfield.cpui386
11370       || i.tm.opcode_space != SPACE_BASE)
11371     return 0;
11372
11373   add_padding = 0;
11374
11375   /* Check for jcc and direct jmp.  */
11376   if (i.tm.opcode_modifier.jump == JUMP)
11377     {
11378       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
11379         {
11380           *branch_p = align_branch_jmp;
11381           add_padding = align_branch & align_branch_jmp_bit;
11382         }
11383       else
11384         {
11385           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
11386              igore the lowest bit.  */
11387           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
11388           *branch_p = align_branch_jcc;
11389           if ((align_branch & align_branch_jcc_bit))
11390             add_padding = 1;
11391         }
11392     }
11393   else if ((i.tm.base_opcode | 1) == 0xc3)
11394     {
11395       /* Near ret.  */
11396       *branch_p = align_branch_ret;
11397       if ((align_branch & align_branch_ret_bit))
11398         add_padding = 1;
11399     }
11400   else
11401     {
11402       /* Check for indirect jmp, direct and indirect calls.  */
11403       if (i.tm.base_opcode == 0xe8)
11404         {
11405           /* Direct call.  */
11406           *branch_p = align_branch_call;
11407           if ((align_branch & align_branch_call_bit))
11408             add_padding = 1;
11409         }
11410       else if (i.tm.base_opcode == 0xff
11411                && (i.tm.extension_opcode == 2
11412                    || i.tm.extension_opcode == 4))
11413         {
11414           /* Indirect call and jmp.  */
11415           *branch_p = align_branch_indirect;
11416           if ((align_branch & align_branch_indirect_bit))
11417             add_padding = 1;
11418         }
11419
11420       if (add_padding
11421           && i.disp_operands
11422           && tls_get_addr
11423           && (i.op[0].disps->X_op == O_symbol
11424               || (i.op[0].disps->X_op == O_subtract
11425                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
11426         {
11427           symbolS *s = i.op[0].disps->X_add_symbol;
11428           /* No padding to call to global or undefined tls_get_addr.  */
11429           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
11430               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
11431             return 0;
11432         }
11433     }
11434
11435   if (add_padding
11436       && last_insn->kind != last_insn_other)
11437     {
11438       if (flag_debug)
11439         as_warn_where (last_insn->file, last_insn->line,
11440                        _("`%s` skips -malign-branch-boundary on `%s`"),
11441                        last_insn->name, insn_name (&i.tm));
11442       return 0;
11443     }
11444
11445   return add_padding;
11446 }
11447
11448 static void
11449 output_insn (const struct last_insn *last_insn)
11450 {
11451   fragS *insn_start_frag;
11452   offsetT insn_start_off;
11453   fragS *fragP = NULL;
11454   enum align_branch_kind branch = align_branch_none;
11455   /* The initializer is arbitrary just to avoid uninitialized error.
11456      it's actually either assigned in add_branch_padding_frag_p
11457      or never be used.  */
11458   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
11459
11460 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11461   if (IS_ELF && x86_used_note && now_seg != absolute_section)
11462     {
11463       if ((i.xstate & xstate_tmm) == xstate_tmm
11464           || is_cpu (&i.tm, CpuAMX_TILE))
11465         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
11466
11467       if (is_cpu (&i.tm, Cpu8087)
11468           || is_cpu (&i.tm, Cpu287)
11469           || is_cpu (&i.tm, Cpu387)
11470           || is_cpu (&i.tm, Cpu687)
11471           || is_cpu (&i.tm, CpuFISTTP))
11472         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
11473
11474       if ((i.xstate & xstate_mmx)
11475           || i.tm.mnem_off == MN_emms
11476           || i.tm.mnem_off == MN_femms)
11477         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
11478
11479       if (i.index_reg)
11480         {
11481           if (i.index_reg->reg_type.bitfield.zmmword)
11482             i.xstate |= xstate_zmm;
11483           else if (i.index_reg->reg_type.bitfield.ymmword)
11484             i.xstate |= xstate_ymm;
11485           else if (i.index_reg->reg_type.bitfield.xmmword)
11486             i.xstate |= xstate_xmm;
11487         }
11488
11489       /* vzeroall / vzeroupper */
11490       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
11491         i.xstate |= xstate_ymm;
11492
11493       if ((i.xstate & xstate_xmm)
11494           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
11495           || (i.tm.base_opcode == 0xae
11496               && (is_cpu (&i.tm, CpuSSE)
11497                   || is_cpu (&i.tm, CpuAVX)))
11498           || is_cpu (&i.tm, CpuWideKL)
11499           || is_cpu (&i.tm, CpuKL))
11500         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
11501
11502       if ((i.xstate & xstate_ymm) == xstate_ymm)
11503         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
11504       if ((i.xstate & xstate_zmm) == xstate_zmm)
11505         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
11506       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
11507         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
11508       if (is_cpu (&i.tm, CpuFXSR))
11509         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
11510       if (is_cpu (&i.tm, CpuXsave))
11511         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
11512       if (is_cpu (&i.tm, CpuXsaveopt))
11513         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
11514       if (is_cpu (&i.tm, CpuXSAVEC))
11515         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
11516
11517       if (x86_feature_2_used
11518           || is_cpu (&i.tm, CpuCMOV)
11519           || is_cpu (&i.tm, CpuSYSCALL)
11520           || i.tm.mnem_off == MN_cmpxchg8b)
11521         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
11522       if (is_cpu (&i.tm, CpuSSE3)
11523           || is_cpu (&i.tm, CpuSSSE3)
11524           || is_cpu (&i.tm, CpuSSE4_1)
11525           || is_cpu (&i.tm, CpuSSE4_2)
11526           || is_cpu (&i.tm, CpuCX16)
11527           || is_cpu (&i.tm, CpuPOPCNT)
11528           /* LAHF-SAHF insns in 64-bit mode.  */
11529           || (flag_code == CODE_64BIT
11530               && (i.tm.base_opcode | 1) == 0x9f
11531               && i.tm.opcode_space == SPACE_BASE))
11532         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
11533       if (is_cpu (&i.tm, CpuAVX)
11534           || is_cpu (&i.tm, CpuAVX2)
11535           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
11536              XOP, FMA4, LPW, TBM, and AMX.  */
11537           || (i.tm.opcode_modifier.vex
11538               && !is_cpu (&i.tm, CpuAVX512F)
11539               && !is_cpu (&i.tm, CpuAVX512BW)
11540               && !is_cpu (&i.tm, CpuAVX512DQ)
11541               && !is_cpu (&i.tm, CpuXOP)
11542               && !is_cpu (&i.tm, CpuFMA4)
11543               && !is_cpu (&i.tm, CpuLWP)
11544               && !is_cpu (&i.tm, CpuTBM)
11545               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
11546           || is_cpu (&i.tm, CpuF16C)
11547           || is_cpu (&i.tm, CpuFMA)
11548           || is_cpu (&i.tm, CpuLZCNT)
11549           || is_cpu (&i.tm, CpuMovbe)
11550           || is_cpu (&i.tm, CpuXSAVES)
11551           || (x86_feature_2_used
11552               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
11553                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
11554                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
11555         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
11556       if (is_cpu (&i.tm, CpuAVX512F)
11557           || is_cpu (&i.tm, CpuAVX512BW)
11558           || is_cpu (&i.tm, CpuAVX512DQ)
11559           || is_cpu (&i.tm, CpuAVX512VL)
11560           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
11561              AVX512-4FMAPS, and AVX512-4VNNIW.  */
11562           || (i.tm.opcode_modifier.evex
11563               && !is_cpu (&i.tm, CpuAVX512ER)
11564               && !is_cpu (&i.tm, CpuAVX512PF)
11565               && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
11566               && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
11567         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
11568     }
11569 #endif
11570
11571   /* Tie dwarf2 debug info to the address at the start of the insn.
11572      We can't do this after the insn has been output as the current
11573      frag may have been closed off.  eg. by frag_var.  */
11574   dwarf2_emit_insn (0);
11575
11576   insn_start_frag = frag_now;
11577   insn_start_off = frag_now_fix ();
11578
11579   if (add_branch_padding_frag_p (&branch, &mf_jcc, last_insn))
11580     {
11581       char *p;
11582       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
11583       unsigned int max_branch_padding_size = 14;
11584
11585       /* Align section to boundary.  */
11586       record_alignment (now_seg, align_branch_power);
11587
11588       /* Make room for padding.  */
11589       frag_grow (max_branch_padding_size);
11590
11591       /* Start of the padding.  */
11592       p = frag_more (0);
11593
11594       fragP = frag_now;
11595
11596       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
11597                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
11598                 NULL, 0, p);
11599
11600       fragP->tc_frag_data.mf_type = mf_jcc;
11601       fragP->tc_frag_data.branch_type = branch;
11602       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
11603     }
11604
11605   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
11606       && !pre_386_16bit_warned)
11607     {
11608       as_warn (_("use .code16 to ensure correct addressing mode"));
11609       pre_386_16bit_warned = true;
11610     }
11611
11612   /* Output jumps.  */
11613   if (i.tm.opcode_modifier.jump == JUMP)
11614     output_branch ();
11615   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
11616            || i.tm.opcode_modifier.jump == JUMP_DWORD)
11617     output_jump ();
11618   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
11619     output_interseg_jump ();
11620   else
11621     {
11622       /* Output normal instructions here.  */
11623       char *p;
11624       unsigned char *q;
11625       unsigned int j;
11626       enum mf_cmp_kind mf_cmp;
11627
11628       if (avoid_fence
11629           && (i.tm.base_opcode == 0xaee8
11630               || i.tm.base_opcode == 0xaef0
11631               || i.tm.base_opcode == 0xaef8))
11632         {
11633           /* Encode lfence, mfence, and sfence as
11634              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
11635           if (flag_code == CODE_16BIT)
11636             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
11637           else if (omit_lock_prefix)
11638             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
11639                     insn_name (&i.tm));
11640           else if (now_seg != absolute_section)
11641             {
11642               offsetT val = 0x240483f0ULL;
11643
11644               p = frag_more (5);
11645               md_number_to_chars (p, val, 5);
11646             }
11647           else
11648             abs_section_offset += 5;
11649           return;
11650         }
11651
11652       /* Some processors fail on LOCK prefix. This options makes
11653          assembler ignore LOCK prefix and serves as a workaround.  */
11654       if (omit_lock_prefix)
11655         {
11656           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
11657               && i.tm.opcode_modifier.isprefix)
11658             return;
11659           i.prefix[LOCK_PREFIX] = 0;
11660         }
11661
11662       if (branch)
11663         /* Skip if this is a branch.  */
11664         ;
11665       else if (add_fused_jcc_padding_frag_p (&mf_cmp, last_insn))
11666         {
11667           /* Make room for padding.  */
11668           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
11669           p = frag_more (0);
11670
11671           fragP = frag_now;
11672
11673           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
11674                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
11675                     NULL, 0, p);
11676
11677           fragP->tc_frag_data.mf_type = mf_cmp;
11678           fragP->tc_frag_data.branch_type = align_branch_fused;
11679           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
11680         }
11681       else if (add_branch_prefix_frag_p (last_insn))
11682         {
11683           unsigned int max_prefix_size = align_branch_prefix_size;
11684
11685           /* Make room for padding.  */
11686           frag_grow (max_prefix_size);
11687           p = frag_more (0);
11688
11689           fragP = frag_now;
11690
11691           frag_var (rs_machine_dependent, max_prefix_size, 0,
11692                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
11693                     NULL, 0, p);
11694
11695           fragP->tc_frag_data.max_bytes = max_prefix_size;
11696         }
11697
11698       /* Since the VEX/EVEX prefix contains the implicit prefix, we
11699          don't need the explicit prefix.  */
11700       if (!is_any_vex_encoding (&i.tm))
11701         {
11702           switch (i.tm.opcode_modifier.opcodeprefix)
11703             {
11704             case PREFIX_0X66:
11705               add_prefix (0x66);
11706               break;
11707             case PREFIX_0XF2:
11708               add_prefix (0xf2);
11709               break;
11710             case PREFIX_0XF3:
11711               if (!is_cpu (&i.tm, CpuPadLock)
11712                   || (i.prefix[REP_PREFIX] != 0xf3))
11713                 add_prefix (0xf3);
11714               break;
11715             case PREFIX_NONE:
11716               switch (i.opcode_length)
11717                 {
11718                 case 2:
11719                   break;
11720                 case 1:
11721                   /* Check for pseudo prefixes.  */
11722                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
11723                     break;
11724                   as_bad_where (insn_start_frag->fr_file,
11725                                 insn_start_frag->fr_line,
11726                                 _("pseudo prefix without instruction"));
11727                   return;
11728                 default:
11729                   abort ();
11730                 }
11731               break;
11732             default:
11733               abort ();
11734             }
11735
11736 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
11737           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
11738              R_X86_64_GOTTPOFF relocation so that linker can safely
11739              perform IE->LE optimization.  A dummy REX_OPCODE prefix
11740              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
11741              relocation for GDesc -> IE/LE optimization.  */
11742           if (x86_elf_abi == X86_64_X32_ABI
11743               && !is_apx_rex2_encoding ()
11744               && i.operands == 2
11745               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
11746                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
11747               && i.prefix[REX_PREFIX] == 0)
11748             add_prefix (REX_OPCODE);
11749 #endif
11750
11751           /* The prefix bytes.  */
11752           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
11753             if (*q)
11754               frag_opcode_byte (*q);
11755
11756           if (is_apx_rex2_encoding ())
11757             {
11758               frag_opcode_byte (i.vex.bytes[0]);
11759               frag_opcode_byte (i.vex.bytes[1]);
11760             }
11761         }
11762       else
11763         {
11764           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
11765             if (*q)
11766               switch (j)
11767                 {
11768                 case SEG_PREFIX:
11769                 case ADDR_PREFIX:
11770                   frag_opcode_byte (*q);
11771                   break;
11772                 default:
11773                   /* There should be no other prefixes for instructions
11774                      with VEX prefix.  */
11775                   abort ();
11776                 }
11777
11778           /* For EVEX instructions i.vrex should become 0 after
11779              build_evex_prefix.  For VEX instructions upper 16 registers
11780              aren't available, so VREX should be 0.  */
11781           if (i.vrex)
11782             abort ();
11783           /* Now the VEX prefix.  */
11784           if (now_seg != absolute_section)
11785             {
11786               p = frag_more (i.vex.length);
11787               for (j = 0; j < i.vex.length; j++)
11788                 p[j] = i.vex.bytes[j];
11789             }
11790           else
11791             abs_section_offset += i.vex.length;
11792         }
11793
11794       /* Now the opcode; be careful about word order here!  */
11795       j = i.opcode_length;
11796       if (!i.vex.length)
11797         switch (i.tm.opcode_space)
11798           {
11799           case SPACE_BASE:
11800             break;
11801           case SPACE_0F:
11802             ++j;
11803             break;
11804           case SPACE_0F38:
11805           case SPACE_0F3A:
11806             j += 2;
11807             break;
11808           default:
11809             abort ();
11810           }
11811
11812       if (now_seg == absolute_section)
11813         abs_section_offset += j;
11814       else if (j == 1)
11815         {
11816           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
11817         }
11818       else
11819         {
11820           p = frag_more (j);
11821           if (!i.vex.length
11822               && i.tm.opcode_space != SPACE_BASE)
11823             {
11824               *p++ = 0x0f;
11825               if (i.tm.opcode_space != SPACE_0F)
11826                 *p++ = i.tm.opcode_space == SPACE_0F38
11827                        ? 0x38 : 0x3a;
11828             }
11829
11830           switch (i.opcode_length)
11831             {
11832             case 2:
11833               /* Put out high byte first: can't use md_number_to_chars!  */
11834               *p++ = (i.tm.base_opcode >> 8) & 0xff;
11835               /* Fall through.  */
11836             case 1:
11837               *p = i.tm.base_opcode & 0xff;
11838               break;
11839             default:
11840               abort ();
11841               break;
11842             }
11843
11844         }
11845
11846       /* Now the modrm byte and sib byte (if present).  */
11847       if (i.tm.opcode_modifier.modrm)
11848         {
11849           frag_opcode_byte ((i.rm.regmem << 0)
11850                              | (i.rm.reg << 3)
11851                              | (i.rm.mode << 6));
11852           /* If i.rm.regmem == ESP (4)
11853              && i.rm.mode != (Register mode)
11854              && not 16 bit
11855              ==> need second modrm byte.  */
11856           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
11857               && i.rm.mode != 3
11858               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
11859             frag_opcode_byte ((i.sib.base << 0)
11860                               | (i.sib.index << 3)
11861                               | (i.sib.scale << 6));
11862         }
11863
11864       if (i.disp_operands)
11865         output_disp (insn_start_frag, insn_start_off);
11866
11867       if (i.imm_operands)
11868         output_imm (insn_start_frag, insn_start_off);
11869
11870       /*
11871        * frag_now_fix () returning plain abs_section_offset when we're in the
11872        * absolute section, and abs_section_offset not getting updated as data
11873        * gets added to the frag breaks the logic below.
11874        */
11875       if (now_seg != absolute_section)
11876         {
11877           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
11878           if (j > 15)
11879             {
11880               if (dot_insn ())
11881                 as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
11882                         j);
11883               else
11884                 as_bad (_("instruction length of %u bytes exceeds the limit of 15"),
11885                         j);
11886             }
11887           else if (fragP)
11888             {
11889               /* NB: Don't add prefix with GOTPC relocation since
11890                  output_disp() above depends on the fixed encoding
11891                  length.  Can't add prefix with TLS relocation since
11892                  it breaks TLS linker optimization.  */
11893               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
11894               /* Prefix count on the current instruction.  */
11895               unsigned int count = i.vex.length;
11896               unsigned int k;
11897               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
11898                 /* REX byte is encoded in VEX/EVEX prefix.  */
11899                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
11900                   count++;
11901
11902               /* Count prefixes for extended opcode maps.  */
11903               if (!i.vex.length)
11904                 switch (i.tm.opcode_space)
11905                   {
11906                   case SPACE_BASE:
11907                     break;
11908                   case SPACE_0F:
11909                     count++;
11910                     break;
11911                   case SPACE_0F38:
11912                   case SPACE_0F3A:
11913                     count += 2;
11914                     break;
11915                   default:
11916                     abort ();
11917                   }
11918
11919               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11920                   == BRANCH_PREFIX)
11921                 {
11922                   /* Set the maximum prefix size in BRANCH_PREFIX
11923                      frag.  */
11924                   if (fragP->tc_frag_data.max_bytes > max)
11925                     fragP->tc_frag_data.max_bytes = max;
11926                   if (fragP->tc_frag_data.max_bytes > count)
11927                     fragP->tc_frag_data.max_bytes -= count;
11928                   else
11929                     fragP->tc_frag_data.max_bytes = 0;
11930                 }
11931               else
11932                 {
11933                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
11934                      frag.  */
11935                   unsigned int max_prefix_size;
11936                   if (align_branch_prefix_size > max)
11937                     max_prefix_size = max;
11938                   else
11939                     max_prefix_size = align_branch_prefix_size;
11940                   if (max_prefix_size > count)
11941                     fragP->tc_frag_data.max_prefix_length
11942                       = max_prefix_size - count;
11943                 }
11944
11945               /* Use existing segment prefix if possible.  Use CS
11946                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
11947                  segment prefix with ESP/EBP base register and use DS
11948                  segment prefix without ESP/EBP base register.  */
11949               if (i.prefix[SEG_PREFIX])
11950                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
11951               else if (flag_code == CODE_64BIT)
11952                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
11953               else if (i.base_reg
11954                        && (i.base_reg->reg_num == 4
11955                            || i.base_reg->reg_num == 5))
11956                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
11957               else
11958                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
11959             }
11960         }
11961     }
11962
11963   /* NB: Don't work with COND_JUMP86 without i386.  */
11964   if (align_branch_power
11965       && now_seg != absolute_section
11966       && cpu_arch_flags.bitfield.cpui386)
11967     {
11968       /* Terminate each frag so that we can add prefix and check for
11969          fused jcc.  */
11970       frag_wane (frag_now);
11971       frag_new (0);
11972     }
11973
11974 #ifdef DEBUG386
11975   if (flag_debug)
11976     {
11977       pi ("" /*line*/, &i);
11978     }
11979 #endif /* DEBUG386  */
11980 }
11981
11982 /* Return the size of the displacement operand N.  */
11983
11984 static int
11985 disp_size (unsigned int n)
11986 {
11987   int size = 4;
11988
11989   if (i.types[n].bitfield.disp64)
11990     size = 8;
11991   else if (i.types[n].bitfield.disp8)
11992     size = 1;
11993   else if (i.types[n].bitfield.disp16)
11994     size = 2;
11995   return size;
11996 }
11997
11998 /* Return the size of the immediate operand N.  */
11999
12000 static int
12001 imm_size (unsigned int n)
12002 {
12003   int size = 4;
12004   if (i.types[n].bitfield.imm64)
12005     size = 8;
12006   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
12007     size = 1;
12008   else if (i.types[n].bitfield.imm16)
12009     size = 2;
12010   return size;
12011 }
12012
12013 static void
12014 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
12015 {
12016   char *p;
12017   unsigned int n;
12018
12019   for (n = 0; n < i.operands; n++)
12020     {
12021       if (operand_type_check (i.types[n], disp))
12022         {
12023           int size = disp_size (n);
12024
12025           if (now_seg == absolute_section)
12026             abs_section_offset += size;
12027           else if (i.op[n].disps->X_op == O_constant)
12028             {
12029               offsetT val = i.op[n].disps->X_add_number;
12030
12031               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
12032                                      size);
12033               p = frag_more (size);
12034               md_number_to_chars (p, val, size);
12035             }
12036           else
12037             {
12038               enum bfd_reloc_code_real reloc_type;
12039               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
12040               bool sign = (flag_code == CODE_64BIT && size == 4
12041                            && (!want_disp32 (&i.tm)
12042                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
12043                                    && !i.types[n].bitfield.baseindex)))
12044                           || pcrel;
12045               fixS *fixP;
12046
12047               /* We can't have 8 bit displacement here.  */
12048               gas_assert (!i.types[n].bitfield.disp8);
12049
12050               /* The PC relative address is computed relative
12051                  to the instruction boundary, so in case immediate
12052                  fields follows, we need to adjust the value.  */
12053               if (pcrel && i.imm_operands)
12054                 {
12055                   unsigned int n1;
12056                   int sz = 0;
12057
12058                   for (n1 = 0; n1 < i.operands; n1++)
12059                     if (operand_type_check (i.types[n1], imm))
12060                       {
12061                         /* Only one immediate is allowed for PC
12062                            relative address, except with .insn.  */
12063                         gas_assert (sz == 0 || dot_insn ());
12064                         sz += imm_size (n1);
12065                       }
12066                   /* We should find at least one immediate.  */
12067                   gas_assert (sz != 0);
12068                   i.op[n].disps->X_add_number -= sz;
12069                 }
12070
12071               p = frag_more (size);
12072               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
12073               if (GOT_symbol
12074                   && GOT_symbol == i.op[n].disps->X_add_symbol
12075                   && (((reloc_type == BFD_RELOC_32
12076                         || reloc_type == BFD_RELOC_X86_64_32S
12077                         || (reloc_type == BFD_RELOC_64
12078                             && object_64bit))
12079                        && (i.op[n].disps->X_op == O_symbol
12080                            || (i.op[n].disps->X_op == O_add
12081                                && ((symbol_get_value_expression
12082                                     (i.op[n].disps->X_op_symbol)->X_op)
12083                                    == O_subtract))))
12084                       || reloc_type == BFD_RELOC_32_PCREL))
12085                 {
12086                   if (!object_64bit)
12087                     {
12088                       reloc_type = BFD_RELOC_386_GOTPC;
12089                       i.has_gotpc_tls_reloc = true;
12090                       i.op[n].disps->X_add_number +=
12091                         encoding_length (insn_start_frag, insn_start_off, p);
12092                     }
12093                   else if (reloc_type == BFD_RELOC_64)
12094                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12095                   else
12096                     /* Don't do the adjustment for x86-64, as there
12097                        the pcrel addressing is relative to the _next_
12098                        insn, and that is taken care of in other code.  */
12099                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12100                 }
12101               else if (align_branch_power)
12102                 {
12103                   switch (reloc_type)
12104                     {
12105                     case BFD_RELOC_386_TLS_GD:
12106                     case BFD_RELOC_386_TLS_LDM:
12107                     case BFD_RELOC_386_TLS_IE:
12108                     case BFD_RELOC_386_TLS_IE_32:
12109                     case BFD_RELOC_386_TLS_GOTIE:
12110                     case BFD_RELOC_386_TLS_GOTDESC:
12111                     case BFD_RELOC_386_TLS_DESC_CALL:
12112                     case BFD_RELOC_X86_64_TLSGD:
12113                     case BFD_RELOC_X86_64_TLSLD:
12114                     case BFD_RELOC_X86_64_GOTTPOFF:
12115                     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
12116                     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
12117                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12118                     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
12119                     case BFD_RELOC_X86_64_TLSDESC_CALL:
12120                       i.has_gotpc_tls_reloc = true;
12121                     default:
12122                       break;
12123                     }
12124                 }
12125               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
12126                                   size, i.op[n].disps, pcrel,
12127                                   reloc_type);
12128
12129               if (flag_code == CODE_64BIT && size == 4 && pcrel
12130                   && !i.prefix[ADDR_PREFIX])
12131                 fixP->fx_signed = 1;
12132
12133               if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF
12134                   && i.tm.opcode_space == SPACE_EVEXMAP4)
12135                 {
12136                   /* Only "add %reg1, foo@gottpoff(%rip), %reg2" is
12137                      allowed in md_assemble.  Set fx_tcbit2 for EVEX
12138                      prefix.  */
12139                   fixP->fx_tcbit2 = 1;
12140                   continue;
12141                 }
12142
12143               if (i.base_reg && i.base_reg->reg_num == RegIP)
12144                 {
12145                   if (reloc_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
12146                     {
12147                       /* Set fx_tcbit for REX2 prefix.  */
12148                       if (is_apx_rex2_encoding ())
12149                         fixP->fx_tcbit = 1;
12150                       continue;
12151                     }
12152                 }
12153               /* In 64-bit, i386_validate_fix updates only (%rip)
12154                  relocations.  */
12155               else if (object_64bit)
12156                 continue;
12157
12158               /* Check for "call/jmp *mem", "mov mem, %reg",
12159                  "test %reg, mem" and "binop mem, %reg" where binop
12160                  is one of adc, add, and, cmp, or, sbb, sub, xor
12161                  instructions without data prefix.  Always generate
12162                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
12163               if (i.prefix[DATA_PREFIX] == 0
12164                   && (i.rm.mode == 2
12165                       || (i.rm.mode == 0 && i.rm.regmem == 5))
12166                   && i.tm.opcode_space == SPACE_BASE
12167                   && ((i.operands == 1
12168                        && i.tm.base_opcode == 0xff
12169                        && (i.rm.reg == 2 || i.rm.reg == 4))
12170                       || (i.operands == 2
12171                           && (i.tm.base_opcode == 0x8b
12172                               || i.tm.base_opcode == 0x85
12173                               || (i.tm.base_opcode & ~0x38) == 0x03))))
12174                 {
12175                   if (object_64bit)
12176                     {
12177                       if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF)
12178                         {
12179                           /* Set fx_tcbit for REX2 prefix.  */
12180                           if (is_apx_rex2_encoding ())
12181                             fixP->fx_tcbit = 1;
12182                         }
12183                       else if (generate_relax_relocations)
12184                         {
12185                           /* Set fx_tcbit3 for REX2 prefix.  */
12186                           if (is_apx_rex2_encoding ())
12187                             fixP->fx_tcbit3 = 1;
12188                           else if (i.rex)
12189                             fixP->fx_tcbit2 = 1;
12190                           else
12191                             fixP->fx_tcbit = 1;
12192                         }
12193                     }
12194                   else if (generate_relax_relocations
12195                            || (i.rm.mode == 0 && i.rm.regmem == 5))
12196                     fixP->fx_tcbit2 = 1;
12197                 }
12198             }
12199         }
12200     }
12201 }
12202
12203 static void
12204 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
12205 {
12206   char *p;
12207   unsigned int n;
12208
12209   for (n = 0; n < i.operands; n++)
12210     {
12211       if (operand_type_check (i.types[n], imm))
12212         {
12213           int size = imm_size (n);
12214
12215           if (now_seg == absolute_section)
12216             abs_section_offset += size;
12217           else if (i.op[n].imms->X_op == O_constant)
12218             {
12219               offsetT val;
12220
12221               val = offset_in_range (i.op[n].imms->X_add_number,
12222                                      size);
12223               p = frag_more (size);
12224               md_number_to_chars (p, val, size);
12225             }
12226           else
12227             {
12228               /* Not absolute_section.
12229                  Need a 32-bit fixup (don't support 8bit
12230                  non-absolute imms).  Try to support other
12231                  sizes ...  */
12232               enum bfd_reloc_code_real reloc_type;
12233               int sign;
12234
12235               if (i.types[n].bitfield.imm32s
12236                   && (i.suffix == QWORD_MNEM_SUFFIX
12237                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
12238                       || (i.prefix[REX_PREFIX] & REX_W)
12239                       || dot_insn ()))
12240                 sign = 1;
12241               else
12242                 sign = 0;
12243
12244               p = frag_more (size);
12245               reloc_type = reloc (size, 0, sign, i.reloc[n]);
12246
12247               /*   This is tough to explain.  We end up with this one if we
12248                * have operands that look like
12249                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
12250                * obtain the absolute address of the GOT, and it is strongly
12251                * preferable from a performance point of view to avoid using
12252                * a runtime relocation for this.  The actual sequence of
12253                * instructions often look something like:
12254                *
12255                *        call    .L66
12256                * .L66:
12257                *        popl    %ebx
12258                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
12259                *
12260                *   The call and pop essentially return the absolute address
12261                * of the label .L66 and store it in %ebx.  The linker itself
12262                * will ultimately change the first operand of the addl so
12263                * that %ebx points to the GOT, but to keep things simple, the
12264                * .o file must have this operand set so that it generates not
12265                * the absolute address of .L66, but the absolute address of
12266                * itself.  This allows the linker itself simply treat a GOTPC
12267                * relocation as asking for a pcrel offset to the GOT to be
12268                * added in, and the addend of the relocation is stored in the
12269                * operand field for the instruction itself.
12270                *
12271                *   Our job here is to fix the operand so that it would add
12272                * the correct offset so that %ebx would point to itself.  The
12273                * thing that is tricky is that .-.L66 will point to the
12274                * beginning of the instruction, so we need to further modify
12275                * the operand so that it will point to itself.  There are
12276                * other cases where you have something like:
12277                *
12278                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
12279                *
12280                * and here no correction would be required.  Internally in
12281                * the assembler we treat operands of this form as not being
12282                * pcrel since the '.' is explicitly mentioned, and I wonder
12283                * whether it would simplify matters to do it this way.  Who
12284                * knows.  In earlier versions of the PIC patches, the
12285                * pcrel_adjust field was used to store the correction, but
12286                * since the expression is not pcrel, I felt it would be
12287                * confusing to do it this way.  */
12288
12289               if ((reloc_type == BFD_RELOC_32
12290                    || reloc_type == BFD_RELOC_X86_64_32S
12291                    || reloc_type == BFD_RELOC_64)
12292                   && GOT_symbol
12293                   && GOT_symbol == i.op[n].imms->X_add_symbol
12294                   && (i.op[n].imms->X_op == O_symbol
12295                       || (i.op[n].imms->X_op == O_add
12296                           && ((symbol_get_value_expression
12297                                (i.op[n].imms->X_op_symbol)->X_op)
12298                               == O_subtract))))
12299                 {
12300                   if (!object_64bit)
12301                     reloc_type = BFD_RELOC_386_GOTPC;
12302                   else if (size == 4)
12303                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
12304                   else if (size == 8)
12305                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
12306                   i.has_gotpc_tls_reloc = true;
12307                   i.op[n].imms->X_add_number +=
12308                     encoding_length (insn_start_frag, insn_start_off, p);
12309                 }
12310               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
12311                            i.op[n].imms, 0, reloc_type);
12312             }
12313         }
12314     }
12315 }
12316 \f
12317 /* x86_cons_fix_new is called via the expression parsing code when a
12318    reloc is needed.  We use this hook to get the correct .got reloc.  */
12319 static int cons_sign = -1;
12320
12321 void
12322 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
12323                   expressionS *exp, bfd_reloc_code_real_type r)
12324 {
12325   r = reloc (len, 0, cons_sign, r);
12326
12327 #ifdef TE_PE
12328   if (exp->X_op == O_secrel)
12329     {
12330       exp->X_op = O_symbol;
12331       r = BFD_RELOC_32_SECREL;
12332     }
12333   else if (exp->X_op == O_secidx)
12334     r = BFD_RELOC_16_SECIDX;
12335 #endif
12336
12337   fix_new_exp (frag, off, len, exp, 0, r);
12338 }
12339
12340 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
12341    purpose of the `.dc.a' internal pseudo-op.  */
12342
12343 int
12344 x86_address_bytes (void)
12345 {
12346   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
12347     return 4;
12348   return stdoutput->arch_info->bits_per_address / 8;
12349 }
12350
12351 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
12352      || defined (LEX_AT)) && !defined (TE_PE)
12353 # define lex_got(reloc, adjust, types) NULL
12354 #else
12355 /* Parse operands of the form
12356    <symbol>@GOTOFF+<nnn>
12357    and similar .plt or .got references.
12358
12359    If we find one, set up the correct relocation in RELOC and copy the
12360    input string, minus the `@GOTOFF' into a malloc'd buffer for
12361    parsing by the calling routine.  Return this buffer, and if ADJUST
12362    is non-null set it to the length of the string we removed from the
12363    input line.  Otherwise return NULL.  */
12364 static char *
12365 lex_got (enum bfd_reloc_code_real *rel,
12366          int *adjust,
12367          i386_operand_type *types)
12368 {
12369   /* Some of the relocations depend on the size of what field is to
12370      be relocated.  But in our callers i386_immediate and i386_displacement
12371      we don't yet know the operand size (this will be set by insn
12372      matching).  Hence we record the word32 relocation here,
12373      and adjust the reloc according to the real size in reloc().  */
12374   static const struct
12375   {
12376     const char *str;
12377     int len;
12378     const enum bfd_reloc_code_real rel[2];
12379     const i386_operand_type types64;
12380     bool need_GOT_symbol;
12381   }
12382     gotrel[] =
12383   {
12384
12385 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
12386   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
12387 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
12388   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
12389 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
12390   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
12391 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
12392   { .imm64 = 1, .disp64 = 1 } }
12393
12394 #ifndef TE_PE
12395 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12396     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
12397                                         BFD_RELOC_SIZE32 },
12398       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
12399 #endif
12400     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
12401                                        BFD_RELOC_X86_64_PLTOFF64 },
12402       { .bitfield = { .imm64 = 1 } }, true },
12403     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
12404                                        BFD_RELOC_X86_64_PLT32    },
12405       OPERAND_TYPE_IMM32_32S_DISP32, false },
12406     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
12407                                        BFD_RELOC_X86_64_GOTPLT64 },
12408       OPERAND_TYPE_IMM64_DISP64, true },
12409     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
12410                                        BFD_RELOC_X86_64_GOTOFF64 },
12411       OPERAND_TYPE_IMM64_DISP64, true },
12412     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
12413                                        BFD_RELOC_X86_64_GOTPCREL },
12414       OPERAND_TYPE_IMM32_32S_DISP32, true },
12415     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
12416                                        BFD_RELOC_X86_64_TLSGD    },
12417       OPERAND_TYPE_IMM32_32S_DISP32, true },
12418     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
12419                                        _dummy_first_bfd_reloc_code_real },
12420       OPERAND_TYPE_NONE, true },
12421     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
12422                                        BFD_RELOC_X86_64_TLSLD    },
12423       OPERAND_TYPE_IMM32_32S_DISP32, true },
12424     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
12425                                        BFD_RELOC_X86_64_GOTTPOFF },
12426       OPERAND_TYPE_IMM32_32S_DISP32, true },
12427     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
12428                                        BFD_RELOC_X86_64_TPOFF32  },
12429       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
12430     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
12431                                        _dummy_first_bfd_reloc_code_real },
12432       OPERAND_TYPE_NONE, true },
12433     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
12434                                        BFD_RELOC_X86_64_DTPOFF32 },
12435       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
12436     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
12437                                        _dummy_first_bfd_reloc_code_real },
12438       OPERAND_TYPE_NONE, true },
12439     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
12440                                        _dummy_first_bfd_reloc_code_real },
12441       OPERAND_TYPE_NONE, true },
12442     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
12443                                        BFD_RELOC_X86_64_GOT32    },
12444       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
12445     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
12446                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
12447       OPERAND_TYPE_IMM32_32S_DISP32, true },
12448     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
12449                                        BFD_RELOC_X86_64_TLSDESC_CALL },
12450       OPERAND_TYPE_IMM32_32S_DISP32, true },
12451 #else /* TE_PE */
12452     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
12453                                        BFD_RELOC_32_SECREL },
12454       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
12455 #endif
12456
12457 #undef OPERAND_TYPE_IMM32_32S_DISP32
12458 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
12459 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
12460 #undef OPERAND_TYPE_IMM64_DISP64
12461
12462   };
12463   char *cp;
12464   unsigned int j;
12465
12466 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
12467   if (!IS_ELF)
12468     return NULL;
12469 #endif
12470
12471   for (cp = input_line_pointer; *cp != '@'; cp++)
12472     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
12473       return NULL;
12474
12475   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
12476     {
12477       int len = gotrel[j].len;
12478       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
12479         {
12480           if (gotrel[j].rel[object_64bit] != 0)
12481             {
12482               int first, second;
12483               char *tmpbuf, *past_reloc;
12484
12485               *rel = gotrel[j].rel[object_64bit];
12486
12487               if (types)
12488                 {
12489                   if (flag_code != CODE_64BIT)
12490                     {
12491                       types->bitfield.imm32 = 1;
12492                       types->bitfield.disp32 = 1;
12493                     }
12494                   else
12495                     *types = gotrel[j].types64;
12496                 }
12497
12498               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
12499                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
12500
12501               /* The length of the first part of our input line.  */
12502               first = cp - input_line_pointer;
12503
12504               /* The second part goes from after the reloc token until
12505                  (and including) an end_of_line char or comma.  */
12506               past_reloc = cp + 1 + len;
12507               cp = past_reloc;
12508               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
12509                 ++cp;
12510               second = cp + 1 - past_reloc;
12511
12512               /* Allocate and copy string.  The trailing NUL shouldn't
12513                  be necessary, but be safe.  */
12514               tmpbuf = XNEWVEC (char, first + second + 2);
12515               memcpy (tmpbuf, input_line_pointer, first);
12516               if (second != 0 && *past_reloc != ' ')
12517                 /* Replace the relocation token with ' ', so that
12518                    errors like foo@GOTOFF1 will be detected.  */
12519                 tmpbuf[first++] = ' ';
12520               else
12521                 /* Increment length by 1 if the relocation token is
12522                    removed.  */
12523                 len++;
12524               if (adjust)
12525                 *adjust = len;
12526               memcpy (tmpbuf + first, past_reloc, second);
12527               tmpbuf[first + second] = '\0';
12528               return tmpbuf;
12529             }
12530
12531           as_bad (_("@%s reloc is not supported with %d-bit output format"),
12532                   gotrel[j].str, 1 << (5 + object_64bit));
12533           return NULL;
12534         }
12535     }
12536
12537   /* Might be a symbol version string.  Don't as_bad here.  */
12538   return NULL;
12539 }
12540 #endif
12541
12542 bfd_reloc_code_real_type
12543 x86_cons (expressionS *exp, int size)
12544 {
12545   bfd_reloc_code_real_type got_reloc = NO_RELOC;
12546
12547   intel_syntax = -intel_syntax;
12548   exp->X_md = 0;
12549   expr_mode = expr_operator_none;
12550
12551 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
12552       && !defined (LEX_AT)) \
12553     || defined (TE_PE)
12554   if (size == 4 || (object_64bit && size == 8))
12555     {
12556       /* Handle @GOTOFF and the like in an expression.  */
12557       char *save;
12558       char *gotfree_input_line;
12559       int adjust = 0;
12560
12561       save = input_line_pointer;
12562       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
12563       if (gotfree_input_line)
12564         input_line_pointer = gotfree_input_line;
12565
12566       expression (exp);
12567
12568       if (gotfree_input_line)
12569         {
12570           /* expression () has merrily parsed up to the end of line,
12571              or a comma - in the wrong buffer.  Transfer how far
12572              input_line_pointer has moved to the right buffer.  */
12573           input_line_pointer = (save
12574                                 + (input_line_pointer - gotfree_input_line)
12575                                 + adjust);
12576           free (gotfree_input_line);
12577           if (exp->X_op == O_constant
12578               || exp->X_op == O_absent
12579               || exp->X_op == O_illegal
12580               || exp->X_op == O_register
12581               || exp->X_op == O_big)
12582             {
12583               char c = *input_line_pointer;
12584               *input_line_pointer = 0;
12585               as_bad (_("missing or invalid expression `%s'"), save);
12586               *input_line_pointer = c;
12587             }
12588           else if ((got_reloc == BFD_RELOC_386_PLT32
12589                     || got_reloc == BFD_RELOC_X86_64_PLT32)
12590                    && exp->X_op != O_symbol)
12591             {
12592               char c = *input_line_pointer;
12593               *input_line_pointer = 0;
12594               as_bad (_("invalid PLT expression `%s'"), save);
12595               *input_line_pointer = c;
12596             }
12597         }
12598     }
12599   else
12600 #endif
12601     expression (exp);
12602
12603   intel_syntax = -intel_syntax;
12604
12605   if (intel_syntax)
12606     i386_intel_simplify (exp);
12607
12608   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12609   if (size <= 4 && expr_mode == expr_operator_present
12610       && exp->X_op == O_constant && !object_64bit)
12611     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
12612
12613   return got_reloc;
12614 }
12615
12616 static void
12617 signed_cons (int size)
12618 {
12619   if (object_64bit)
12620     cons_sign = 1;
12621   cons (size);
12622   cons_sign = -1;
12623 }
12624
12625 static void
12626 s_insn (int dummy ATTRIBUTE_UNUSED)
12627 {
12628   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
12629   char *saved_ilp = find_end_of_line (line, false), saved_char;
12630   const char *end;
12631   unsigned int j;
12632   valueT val;
12633   bool vex = false, xop = false, evex = false;
12634   struct last_insn *last_insn;
12635
12636   init_globals ();
12637
12638   saved_char = *saved_ilp;
12639   *saved_ilp = 0;
12640
12641   end = parse_insn (line, mnemonic, true);
12642   if (end == NULL)
12643     {
12644   bad:
12645       *saved_ilp = saved_char;
12646       ignore_rest_of_line ();
12647       i.tm.mnem_off = 0;
12648       return;
12649     }
12650   line += end - line;
12651
12652   current_templates.start = &i.tm;
12653   current_templates.end = &i.tm + 1;
12654   i.tm.mnem_off = MN__insn;
12655   i.tm.extension_opcode = None;
12656
12657   if (startswith (line, "VEX")
12658       && (line[3] == '.' || is_space_char (line[3])))
12659     {
12660       vex = true;
12661       line += 3;
12662     }
12663   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
12664     {
12665       char *e;
12666       unsigned long n = strtoul (line + 3, &e, 16);
12667
12668       if (e == line + 5 && n >= 0x08 && n <= 0x1f
12669           && (*e == '.' || is_space_char (*e)))
12670         {
12671           xop = true;
12672           /* Arrange for build_vex_prefix() to emit 0x8f.  */
12673           i.tm.opcode_space = SPACE_XOP08;
12674           i.insn_opcode_space = n;
12675           line = e;
12676         }
12677     }
12678   else if (startswith (line, "EVEX")
12679            && (line[4] == '.' || is_space_char (line[4])))
12680     {
12681       evex = true;
12682       line += 4;
12683     }
12684
12685   if (vex || xop
12686       ? i.encoding == encoding_evex
12687       : evex
12688         ? i.encoding == encoding_vex
12689           || i.encoding == encoding_vex3
12690         : i.encoding != encoding_default)
12691     {
12692       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
12693       goto bad;
12694     }
12695
12696   if (line > end && i.encoding == encoding_default)
12697     i.encoding = evex ? encoding_evex : encoding_vex;
12698
12699   if (i.encoding != encoding_default)
12700     {
12701       /* Only address size and segment override prefixes are permitted with
12702          VEX/XOP/EVEX encodings.  */
12703       const unsigned char *p = i.prefix;
12704
12705       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
12706         {
12707           if (!*p)
12708             continue;
12709
12710           switch (j)
12711             {
12712             case SEG_PREFIX:
12713             case ADDR_PREFIX:
12714               break;
12715             default:
12716                   as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
12717                   goto bad;
12718             }
12719         }
12720     }
12721
12722   if (line > end && *line == '.')
12723     {
12724       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
12725       switch (line[1])
12726         {
12727         case 'L':
12728           switch (line[2])
12729             {
12730             case '0':
12731               if (evex)
12732                 i.tm.opcode_modifier.evex = EVEX128;
12733               else
12734                 i.tm.opcode_modifier.vex = VEX128;
12735               break;
12736
12737             case '1':
12738               if (evex)
12739                 i.tm.opcode_modifier.evex = EVEX256;
12740               else
12741                 i.tm.opcode_modifier.vex = VEX256;
12742               break;
12743
12744             case '2':
12745               if (evex)
12746                 i.tm.opcode_modifier.evex = EVEX512;
12747               break;
12748
12749             case '3':
12750               if (evex)
12751                 i.tm.opcode_modifier.evex = EVEX_L3;
12752               break;
12753
12754             case 'I':
12755               if (line[3] == 'G')
12756                 {
12757                   if (evex)
12758                     i.tm.opcode_modifier.evex = EVEXLIG;
12759                   else
12760                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
12761                   ++line;
12762                 }
12763               break;
12764             }
12765
12766           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
12767             line += 3;
12768           break;
12769
12770         case '1':
12771           if (line[2] == '2' && line[3] == '8')
12772             {
12773               if (evex)
12774                 i.tm.opcode_modifier.evex = EVEX128;
12775               else
12776                 i.tm.opcode_modifier.vex = VEX128;
12777               line += 4;
12778             }
12779           break;
12780
12781         case '2':
12782           if (line[2] == '5' && line[3] == '6')
12783             {
12784               if (evex)
12785                 i.tm.opcode_modifier.evex = EVEX256;
12786               else
12787                 i.tm.opcode_modifier.vex = VEX256;
12788               line += 4;
12789             }
12790           break;
12791
12792         case '5':
12793           if (evex && line[2] == '1' && line[3] == '2')
12794             {
12795               i.tm.opcode_modifier.evex = EVEX512;
12796               line += 4;
12797             }
12798           break;
12799         }
12800     }
12801
12802   if (line > end && *line == '.')
12803     {
12804       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
12805       switch (line[1])
12806         {
12807         case 'N':
12808           if (line[2] == 'P')
12809             line += 3;
12810           break;
12811
12812         case '6':
12813           if (line[2] == '6')
12814             {
12815               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
12816               line += 3;
12817             }
12818           break;
12819
12820         case 'F': case 'f':
12821           if (line[2] == '3')
12822             {
12823               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
12824               line += 3;
12825             }
12826           else if (line[2] == '2')
12827             {
12828               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
12829               line += 3;
12830             }
12831           break;
12832         }
12833     }
12834
12835   if (line > end && !xop && *line == '.')
12836     {
12837       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
12838       switch (line[1])
12839         {
12840         case '0':
12841           if (TOUPPER (line[2]) != 'F')
12842             break;
12843           if (line[3] == '.' || is_space_char (line[3]))
12844             {
12845               i.insn_opcode_space = SPACE_0F;
12846               line += 3;
12847             }
12848           else if (line[3] == '3'
12849                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
12850                    && (line[5] == '.' || is_space_char (line[5])))
12851             {
12852               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
12853               line += 5;
12854             }
12855           break;
12856
12857         case 'M':
12858           if (ISDIGIT (line[2]) && line[2] != '0')
12859             {
12860               char *e;
12861               unsigned long n = strtoul (line + 2, &e, 10);
12862
12863               if (n <= (evex ? 15 : 31)
12864                   && (*e == '.' || is_space_char (*e)))
12865                 {
12866                   i.insn_opcode_space = n;
12867                   line = e;
12868                 }
12869             }
12870           break;
12871         }
12872     }
12873
12874   if (line > end && *line == '.' && line[1] == 'W')
12875     {
12876       /* VEX.W, XOP.W, EVEX.W  */
12877       switch (line[2])
12878         {
12879         case '0':
12880           i.tm.opcode_modifier.vexw = VEXW0;
12881           break;
12882
12883         case '1':
12884           i.tm.opcode_modifier.vexw = VEXW1;
12885           break;
12886
12887         case 'I':
12888           if (line[3] == 'G')
12889             {
12890               i.tm.opcode_modifier.vexw = VEXWIG;
12891               ++line;
12892             }
12893           break;
12894         }
12895
12896       if (i.tm.opcode_modifier.vexw)
12897         line += 3;
12898     }
12899
12900   if (line > end && *line && !is_space_char (*line))
12901     {
12902       /* Improve diagnostic a little.  */
12903       if (*line == '.' && line[1] && !is_space_char (line[1]))
12904         ++line;
12905       goto done;
12906     }
12907
12908   /* Before processing the opcode expression, find trailing "+r" or
12909      "/<digit>" specifiers.  */
12910   for (ptr = line; ; ++ptr)
12911     {
12912       unsigned long n;
12913       char *e;
12914
12915       ptr = strpbrk (ptr, "+/,");
12916       if (ptr == NULL || *ptr == ',')
12917         break;
12918
12919       if (*ptr == '+' && ptr[1] == 'r'
12920           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
12921         {
12922           *ptr = ' ';
12923           ptr[1] = ' ';
12924           i.short_form = true;
12925           break;
12926         }
12927
12928       if (*ptr == '/' && ISDIGIT (ptr[1])
12929           && (n = strtoul (ptr + 1, &e, 8)) < 8
12930           && e == ptr + 2
12931           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
12932         {
12933           *ptr = ' ';
12934           ptr[1] = ' ';
12935           i.tm.extension_opcode = n;
12936           i.tm.opcode_modifier.modrm = 1;
12937           break;
12938         }
12939     }
12940
12941   input_line_pointer = line;
12942   val = get_absolute_expression ();
12943   line = input_line_pointer;
12944
12945   if (i.short_form && (val & 7))
12946     as_warn ("`+r' assumes low three opcode bits to be clear");
12947
12948   for (j = 1; j < sizeof(val); ++j)
12949     if (!(val >> (j * 8)))
12950       break;
12951
12952   /* Trim off a prefix if present.  */
12953   if (j > 1 && !vex && !xop && !evex)
12954     {
12955       uint8_t byte = val >> ((j - 1) * 8);
12956
12957       switch (byte)
12958         {
12959         case DATA_PREFIX_OPCODE:
12960         case REPE_PREFIX_OPCODE:
12961         case REPNE_PREFIX_OPCODE:
12962           if (!add_prefix (byte))
12963             goto bad;
12964           val &= ((uint64_t)1 << (--j * 8)) - 1;
12965           break;
12966         }
12967     }
12968
12969   /* Parse operands, if any, before evaluating encoding space.  */
12970   if (*line == ',')
12971     {
12972       i.memshift = -1;
12973
12974       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
12975       this_operand = -1;
12976       if (!ptr)
12977         goto bad;
12978       line = ptr;
12979
12980       if (!i.operands)
12981         {
12982           as_bad (_("expecting operand after ','; got nothing"));
12983           goto done;
12984         }
12985
12986       if (i.mem_operands > 1)
12987         {
12988           as_bad (_("too many memory references for `%s'"),
12989                   &i386_mnemonics[MN__insn]);
12990           goto done;
12991         }
12992
12993       /* No need to distinguish encoding_evex and encoding_evex512.  */
12994       if (i.encoding == encoding_evex512)
12995         i.encoding = encoding_evex;
12996     }
12997
12998   /* Trim off encoding space.  */
12999   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
13000     {
13001       uint8_t byte = val >> ((--j - 1) * 8);
13002
13003       i.insn_opcode_space = SPACE_0F;
13004       switch (byte & -(j > 1 && !i.rex2_encoding
13005                        && (i.encoding != encoding_egpr || evex)))
13006         {
13007         case 0x38:
13008           i.insn_opcode_space = SPACE_0F38;
13009           --j;
13010           break;
13011         case 0x3a:
13012           i.insn_opcode_space = SPACE_0F3A;
13013           --j;
13014           break;
13015         }
13016       i.tm.opcode_space = i.insn_opcode_space;
13017       val &= ((uint64_t)1 << (j * 8)) - 1;
13018     }
13019   if (!i.tm.opcode_space && (vex || evex))
13020     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
13021        Also avoid hitting abort() there or in build_evex_prefix().  */
13022     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
13023                                                    : SPACE_0F38;
13024
13025   if (j > 2)
13026     {
13027       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
13028       goto done;
13029     }
13030   i.opcode_length = j;
13031
13032   /* Handle operands, if any.  */
13033   if (i.operands)
13034     {
13035       i386_operand_type combined;
13036       expressionS *disp_exp = NULL;
13037       bool changed;
13038
13039       if (i.encoding == encoding_egpr)
13040         {
13041           if (vex || xop)
13042             {
13043               as_bad (_("eGPR use conflicts with encoding specifier"));
13044               goto done;
13045             }
13046           if (evex)
13047             i.encoding = encoding_evex;
13048           else
13049             i.encoding = encoding_default;
13050         }
13051
13052       /* Are we to emit ModR/M encoding?  */
13053       if (!i.short_form
13054           && (i.mem_operands
13055               || i.reg_operands > (i.encoding != encoding_default)
13056               || i.tm.extension_opcode != None))
13057         i.tm.opcode_modifier.modrm = 1;
13058
13059       if (!i.tm.opcode_modifier.modrm
13060           && (i.reg_operands
13061               > i.short_form + 0U + (i.encoding != encoding_default)
13062               || i.mem_operands))
13063         {
13064           as_bad (_("too many register/memory operands"));
13065           goto done;
13066         }
13067
13068       /* Enforce certain constraints on operands.  */
13069       switch (i.reg_operands + i.mem_operands
13070               + (i.tm.extension_opcode != None))
13071         {
13072         case 0:
13073           if (i.short_form)
13074             {
13075               as_bad (_("too few register/memory operands"));
13076               goto done;
13077             }
13078           /* Fall through.  */
13079         case 1:
13080           if (i.tm.opcode_modifier.modrm)
13081             {
13082               as_bad (_("too few register/memory operands"));
13083               goto done;
13084             }
13085           break;
13086
13087         case 2:
13088           break;
13089
13090         case 4:
13091           if (i.imm_operands
13092               && (i.op[0].imms->X_op != O_constant
13093                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
13094             {
13095               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
13096               goto done;
13097             }
13098           /* Fall through.  */
13099         case 3:
13100           if (i.encoding != encoding_default)
13101             {
13102               i.tm.opcode_modifier.vexvvvv = 1;
13103               break;
13104             }
13105           /* Fall through.  */
13106         default:
13107           as_bad (_("too many register/memory operands"));
13108           goto done;
13109         }
13110
13111       /* Bring operands into canonical order (imm, mem, reg).  */
13112       do
13113         {
13114           changed = false;
13115
13116           for (j = 1; j < i.operands; ++j)
13117             {
13118               if ((!operand_type_check (i.types[j - 1], imm)
13119                    && operand_type_check (i.types[j], imm))
13120                   || (i.types[j - 1].bitfield.class != ClassNone
13121                       && i.types[j].bitfield.class == ClassNone))
13122                 {
13123                   swap_2_operands (j - 1, j);
13124                   changed = true;
13125                 }
13126             }
13127         }
13128       while (changed);
13129
13130       /* For Intel syntax swap the order of register operands.  */
13131       if (intel_syntax)
13132         switch (i.reg_operands)
13133           {
13134           case 0:
13135           case 1:
13136             break;
13137
13138           case 4:
13139             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
13140             /* Fall through.  */
13141           case 3:
13142           case 2:
13143             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
13144             break;
13145
13146           default:
13147             abort ();
13148           }
13149
13150       /* Enforce constraints when using VSIB.  */
13151       if (i.index_reg
13152           && (i.index_reg->reg_type.bitfield.xmmword
13153               || i.index_reg->reg_type.bitfield.ymmword
13154               || i.index_reg->reg_type.bitfield.zmmword))
13155         {
13156           if (i.encoding == encoding_default)
13157             {
13158               as_bad (_("VSIB unavailable with legacy encoding"));
13159               goto done;
13160             }
13161
13162           if (i.encoding == encoding_evex
13163               && i.reg_operands > 1)
13164             {
13165               /* We could allow two register operands, encoding the 2nd one in
13166                  an 8-bit immediate like for 4-register-operand insns, but that
13167                  would require ugly fiddling with process_operands() and/or
13168                  build_modrm_byte().  */
13169               as_bad (_("too many register operands with VSIB"));
13170               goto done;
13171             }
13172
13173           i.tm.opcode_modifier.sib = 1;
13174         }
13175
13176       /* Establish operand size encoding.  */
13177       operand_type_set (&combined, 0);
13178
13179       for (j = i.imm_operands; j < i.operands; ++j)
13180         {
13181           /* Look for 8-bit operands that use old registers.  */
13182           if (i.encoding != encoding_default
13183               && flag_code == CODE_64BIT
13184               && i.types[j].bitfield.class == Reg
13185               && i.types[j].bitfield.byte
13186               && !(i.op[j].regs->reg_flags & RegRex64)
13187               && i.op[j].regs->reg_num > 3)
13188             as_bad (_("can't encode register '%s%s' with VEX/XOP/EVEX"),
13189                     register_prefix, i.op[j].regs->reg_name);
13190
13191           i.types[j].bitfield.instance = InstanceNone;
13192
13193           if (operand_type_check (i.types[j], disp))
13194             {
13195               i.types[j].bitfield.baseindex = 1;
13196               disp_exp = i.op[j].disps;
13197             }
13198
13199           if (evex && i.types[j].bitfield.baseindex)
13200             {
13201               unsigned int n = i.memshift;
13202
13203               if (i.types[j].bitfield.byte)
13204                 n = 0;
13205               else if (i.types[j].bitfield.word)
13206                 n = 1;
13207               else if (i.types[j].bitfield.dword)
13208                 n = 2;
13209               else if (i.types[j].bitfield.qword)
13210                 n = 3;
13211               else if (i.types[j].bitfield.xmmword)
13212                 n = 4;
13213               else if (i.types[j].bitfield.ymmword)
13214                 n = 5;
13215               else if (i.types[j].bitfield.zmmword)
13216                 n = 6;
13217
13218               if (i.memshift < 32 && n != i.memshift)
13219                 as_warn ("conflicting memory operand size specifiers");
13220               i.memshift = n;
13221             }
13222
13223           if ((i.broadcast.type || i.broadcast.bytes)
13224               && j == i.broadcast.operand)
13225             continue;
13226
13227           combined = operand_type_or (combined, i.types[j]);
13228           combined.bitfield.class = ClassNone;
13229         }
13230
13231       switch ((i.broadcast.type ? i.broadcast.type : 1)
13232               << (i.memshift < 32 ? i.memshift : 0))
13233         {
13234         case 64: combined.bitfield.zmmword = 1; break;
13235         case 32: combined.bitfield.ymmword = 1; break;
13236         case 16: combined.bitfield.xmmword = 1; break;
13237         case  8: combined.bitfield.qword = 1; break;
13238         case  4: combined.bitfield.dword = 1; break;
13239         }
13240
13241       if (i.encoding == encoding_default)
13242         {
13243           if (flag_code == CODE_64BIT && combined.bitfield.qword)
13244             i.rex |= REX_W;
13245           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
13246                                             : combined.bitfield.word)
13247                    && !add_prefix (DATA_PREFIX_OPCODE))
13248             goto done;
13249         }
13250       else if (!i.tm.opcode_modifier.vexw)
13251         {
13252           if (flag_code == CODE_64BIT)
13253             {
13254               if (combined.bitfield.qword)
13255                 i.tm.opcode_modifier.vexw = VEXW1;
13256               else if (combined.bitfield.dword)
13257                 i.tm.opcode_modifier.vexw = VEXW0;
13258             }
13259
13260           if (!i.tm.opcode_modifier.vexw)
13261             i.tm.opcode_modifier.vexw = VEXWIG;
13262         }
13263
13264       if (vex || xop)
13265         {
13266           if (!i.tm.opcode_modifier.vex)
13267             {
13268               if (combined.bitfield.ymmword)
13269                 i.tm.opcode_modifier.vex = VEX256;
13270               else if (combined.bitfield.xmmword)
13271                 i.tm.opcode_modifier.vex = VEX128;
13272             }
13273         }
13274       else if (evex)
13275         {
13276           if (!i.tm.opcode_modifier.evex)
13277             {
13278               /* Do _not_ consider AVX512VL here.  */
13279               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
13280                 i.tm.opcode_modifier.evex = EVEX512;
13281               else if (combined.bitfield.ymmword)
13282                 i.tm.opcode_modifier.evex = EVEX256;
13283               else if (combined.bitfield.xmmword)
13284                 i.tm.opcode_modifier.evex = EVEX128;
13285             }
13286
13287           if (i.memshift >= 32)
13288             {
13289               unsigned int n = 0;
13290
13291               switch (i.tm.opcode_modifier.evex)
13292                 {
13293                 case EVEX512: n = 64; break;
13294                 case EVEX256: n = 32; break;
13295                 case EVEX128: n = 16; break;
13296                 }
13297
13298               if (i.broadcast.type)
13299                 n /= i.broadcast.type;
13300
13301               if (n > 0)
13302                 for (i.memshift = 0; !(n & 1); n >>= 1)
13303                   ++i.memshift;
13304               else if (disp_exp != NULL && disp_exp->X_op == O_constant
13305                        && disp_exp->X_add_number != 0
13306                        && i.disp_encoding != disp_encoding_32bit)
13307                 {
13308                   if (!quiet_warnings)
13309                     as_warn ("cannot determine memory operand size");
13310                   i.disp_encoding = disp_encoding_32bit;
13311                 }
13312             }
13313         }
13314
13315       if (i.memshift >= 32)
13316         i.memshift = 0;
13317       else if (!evex)
13318         i.encoding = encoding_error;
13319
13320       if (i.disp_operands && !optimize_disp (&i.tm))
13321         goto done;
13322
13323       /* Establish size for immediate operands.  */
13324       for (j = 0; j < i.imm_operands; ++j)
13325         {
13326           expressionS *expP = i.op[j].imms;
13327
13328           gas_assert (operand_type_check (i.types[j], imm));
13329           operand_type_set (&i.types[j], 0);
13330
13331           if (i.imm_bits[j] > 32)
13332             i.types[j].bitfield.imm64 = 1;
13333           else if (i.imm_bits[j] > 16)
13334             {
13335               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
13336                 i.types[j].bitfield.imm32s = 1;
13337               else
13338                 i.types[j].bitfield.imm32 = 1;
13339             }
13340           else if (i.imm_bits[j] > 8)
13341             i.types[j].bitfield.imm16 = 1;
13342           else if (i.imm_bits[j] > 0)
13343             {
13344               if (i.flags[j] & Operand_Signed)
13345                 i.types[j].bitfield.imm8s = 1;
13346               else
13347                 i.types[j].bitfield.imm8 = 1;
13348             }
13349           else if (expP->X_op == O_constant)
13350             {
13351               i.types[j] = smallest_imm_type (expP->X_add_number);
13352               i.types[j].bitfield.imm1 = 0;
13353               /* Oddly enough imm_size() checks imm64 first, so the bit needs
13354                  zapping since smallest_imm_type() sets it unconditionally.  */
13355               if (flag_code != CODE_64BIT)
13356                 {
13357                   i.types[j].bitfield.imm64 = 0;
13358                   i.types[j].bitfield.imm32s = 0;
13359                   i.types[j].bitfield.imm32 = 1;
13360                 }
13361               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
13362                 i.types[j].bitfield.imm64 = 0;
13363             }
13364           else
13365             /* Non-constant expressions are sized heuristically.  */
13366             switch (flag_code)
13367               {
13368               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
13369               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
13370               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
13371               }
13372         }
13373
13374       for (j = 0; j < i.operands; ++j)
13375         i.tm.operand_types[j] = i.types[j];
13376
13377       process_operands ();
13378     }
13379
13380   /* Don't set opcode until after processing operands, to avoid any
13381      potential special casing there.  */
13382   i.tm.base_opcode |= val;
13383
13384   if (i.encoding == encoding_error
13385       || (i.encoding != encoding_evex
13386           ? i.broadcast.type || i.broadcast.bytes
13387             || i.rounding.type != rc_none
13388             || i.mask.reg
13389           : (i.mem_operands && i.rounding.type != rc_none)
13390             || ((i.broadcast.type || i.broadcast.bytes)
13391                 && !(i.flags[i.broadcast.operand] & Operand_Mem))))
13392     {
13393       as_bad (_("conflicting .insn operands"));
13394       goto done;
13395     }
13396
13397   if (vex || xop)
13398     {
13399       if (!i.tm.opcode_modifier.vex)
13400         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
13401
13402       build_vex_prefix (NULL);
13403       i.rex &= REX_OPCODE;
13404     }
13405   else if (evex)
13406     {
13407       if (!i.tm.opcode_modifier.evex)
13408         i.tm.opcode_modifier.evex = EVEXLIG;
13409
13410       build_evex_prefix ();
13411       i.rex &= REX_OPCODE;
13412     }
13413   else
13414     establish_rex ();
13415
13416   last_insn = &seg_info(now_seg)->tc_segment_info_data.last_insn;
13417   output_insn (last_insn);
13418   last_insn->kind = last_insn_directive;
13419   last_insn->name = ".insn directive";
13420   last_insn->file = as_where (&last_insn->line);
13421
13422 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13423   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
13424      performed in i386_target_format.  */
13425   if (IS_ELF && flag_synth_cfi)
13426     as_bad (_("SCFI: hand-crafting instructions not supported"));
13427 #endif
13428
13429  done:
13430   *saved_ilp = saved_char;
13431   input_line_pointer = line;
13432
13433   demand_empty_rest_of_line ();
13434
13435   /* Make sure dot_insn() won't yield "true" anymore.  */
13436   i.tm.mnem_off = 0;
13437 }
13438
13439 #ifdef TE_PE
13440 static void
13441 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
13442 {
13443   expressionS exp;
13444
13445   do
13446     {
13447       expression (&exp);
13448       if (exp.X_op == O_symbol)
13449         exp.X_op = O_secrel;
13450
13451       emit_expr (&exp, 4);
13452     }
13453   while (*input_line_pointer++ == ',');
13454
13455   input_line_pointer--;
13456   demand_empty_rest_of_line ();
13457 }
13458
13459 static void
13460 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
13461 {
13462   expressionS exp;
13463
13464   do
13465     {
13466       expression (&exp);
13467       if (exp.X_op == O_symbol)
13468         exp.X_op = O_secidx;
13469
13470       emit_expr (&exp, 2);
13471     }
13472   while (*input_line_pointer++ == ',');
13473
13474   input_line_pointer--;
13475   demand_empty_rest_of_line ();
13476 }
13477 #endif
13478
13479 /* Handle Rounding Control / SAE specifiers.  */
13480
13481 static char *
13482 RC_SAE_specifier (const char *pstr)
13483 {
13484   unsigned int j;
13485
13486   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
13487     {
13488       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
13489         {
13490           if (i.rounding.type != rc_none)
13491             {
13492               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
13493               return NULL;
13494             }
13495
13496           switch (i.encoding)
13497             {
13498             case encoding_default:
13499             case encoding_egpr:
13500               i.encoding = encoding_evex512;
13501               break;
13502             case encoding_evex:
13503             case encoding_evex512:
13504               break;
13505             default:
13506               return NULL;
13507             }
13508
13509           i.rounding.type = RC_NamesTable[j].type;
13510
13511           return (char *)(pstr + RC_NamesTable[j].len);
13512         }
13513     }
13514
13515   return NULL;
13516 }
13517
13518 /* Handle Vector operations.  */
13519
13520 static char *
13521 check_VecOperations (char *op_string)
13522 {
13523   const reg_entry *mask;
13524   const char *saved;
13525   char *end_op;
13526
13527   while (*op_string)
13528     {
13529       saved = op_string;
13530       if (*op_string == '{')
13531         {
13532           op_string++;
13533
13534           /* Check broadcasts.  */
13535           if (startswith (op_string, "1to"))
13536             {
13537               unsigned int bcst_type;
13538
13539               if (i.broadcast.type)
13540                 goto duplicated_vec_op;
13541
13542               op_string += 3;
13543               if (*op_string == '8')
13544                 bcst_type = 8;
13545               else if (*op_string == '4')
13546                 bcst_type = 4;
13547               else if (*op_string == '2')
13548                 bcst_type = 2;
13549               else if (*op_string == '1'
13550                        && *(op_string+1) == '6')
13551                 {
13552                   bcst_type = 16;
13553                   op_string++;
13554                 }
13555               else if (*op_string == '3'
13556                        && *(op_string+1) == '2')
13557                 {
13558                   bcst_type = 32;
13559                   op_string++;
13560                 }
13561               else
13562                 {
13563                   as_bad (_("Unsupported broadcast: `%s'"), saved);
13564                   return NULL;
13565                 }
13566               op_string++;
13567
13568               switch (i.encoding)
13569                 {
13570                 case encoding_default:
13571                 case encoding_egpr:
13572                   i.encoding = encoding_evex;
13573                   break;
13574                 case encoding_evex:
13575                 case encoding_evex512:
13576                   break;
13577                 default:
13578                   goto unknown_vec_op;
13579                 }
13580
13581               i.broadcast.type = bcst_type;
13582               i.broadcast.operand = this_operand;
13583
13584               /* For .insn a data size specifier may be appended.  */
13585               if (dot_insn () && *op_string == ':')
13586                 goto dot_insn_modifier;
13587             }
13588           /* Check .insn special cases.  */
13589           else if (dot_insn () && *op_string == ':')
13590             {
13591             dot_insn_modifier:
13592               switch (op_string[1])
13593                 {
13594                   unsigned long n;
13595
13596                 case 'd':
13597                   if (i.memshift < 32)
13598                     goto duplicated_vec_op;
13599
13600                   n = strtoul (op_string + 2, &end_op, 0);
13601                   if (n)
13602                     for (i.memshift = 0; !(n & 1); n >>= 1)
13603                       ++i.memshift;
13604                   if (i.memshift < 32 && n == 1)
13605                     op_string = end_op;
13606                   break;
13607
13608                 case 's': case 'u':
13609                   /* This isn't really a "vector" operation, but a sign/size
13610                      specifier for immediate operands of .insn.  Note that AT&T
13611                      syntax handles the same in i386_immediate().  */
13612                   if (!intel_syntax)
13613                     break;
13614
13615                   if (i.imm_bits[this_operand])
13616                     goto duplicated_vec_op;
13617
13618                   n = strtoul (op_string + 2, &end_op, 0);
13619                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
13620                     {
13621                       i.imm_bits[this_operand] = n;
13622                       if (op_string[1] == 's')
13623                         i.flags[this_operand] |= Operand_Signed;
13624                       op_string = end_op;
13625                     }
13626                   break;
13627                 }
13628             }
13629           /* Check masking operation.  */
13630           else if ((mask = parse_register (op_string, &end_op)) != NULL)
13631             {
13632               if (mask == &bad_reg)
13633                 return NULL;
13634
13635               /* k0 can't be used for write mask.  */
13636               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
13637                 {
13638                   as_bad (_("`%s%s' can't be used for write mask"),
13639                           register_prefix, mask->reg_name);
13640                   return NULL;
13641                 }
13642
13643               if (!i.mask.reg)
13644                 {
13645                   i.mask.reg = mask;
13646                   i.mask.operand = this_operand;
13647                 }
13648               else if (i.mask.reg->reg_num)
13649                 goto duplicated_vec_op;
13650               else
13651                 {
13652                   i.mask.reg = mask;
13653
13654                   /* Only "{z}" is allowed here.  No need to check
13655                      zeroing mask explicitly.  */
13656                   if (i.mask.operand != (unsigned int) this_operand)
13657                     {
13658                       as_bad (_("invalid write mask `%s'"), saved);
13659                       return NULL;
13660                     }
13661                 }
13662
13663               op_string = end_op;
13664             }
13665           /* Check zeroing-flag for masking operation.  */
13666           else if (*op_string == 'z')
13667             {
13668               if (!i.mask.reg)
13669                 {
13670                   i.mask.reg = reg_k0;
13671                   i.mask.zeroing = 1;
13672                   i.mask.operand = this_operand;
13673                 }
13674               else
13675                 {
13676                   if (i.mask.zeroing)
13677                     {
13678                     duplicated_vec_op:
13679                       as_bad (_("duplicated `%s'"), saved);
13680                       return NULL;
13681                     }
13682
13683                   i.mask.zeroing = 1;
13684
13685                   /* Only "{%k}" is allowed here.  No need to check mask
13686                      register explicitly.  */
13687                   if (i.mask.operand != (unsigned int) this_operand)
13688                     {
13689                       as_bad (_("invalid zeroing-masking `%s'"),
13690                               saved);
13691                       return NULL;
13692                     }
13693                 }
13694
13695               op_string++;
13696             }
13697           else if (intel_syntax
13698                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
13699             i.rounding.modifier = true;
13700           else
13701             goto unknown_vec_op;
13702
13703           if (*op_string != '}')
13704             {
13705               as_bad (_("missing `}' in `%s'"), saved);
13706               return NULL;
13707             }
13708           op_string++;
13709
13710           /* Strip whitespace since the addition of pseudo prefixes
13711              changed how the scrubber treats '{'.  */
13712           if (is_space_char (*op_string))
13713             ++op_string;
13714
13715           continue;
13716         }
13717     unknown_vec_op:
13718       /* We don't know this one.  */
13719       as_bad (_("unknown vector operation: `%s'"), saved);
13720       return NULL;
13721     }
13722
13723   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
13724     {
13725       as_bad (_("zeroing-masking only allowed with write mask"));
13726       return NULL;
13727     }
13728
13729   return op_string;
13730 }
13731
13732 static int
13733 i386_immediate (char *imm_start)
13734 {
13735   char *save_input_line_pointer;
13736   char *gotfree_input_line;
13737   segT exp_seg = 0;
13738   expressionS *exp;
13739   i386_operand_type types;
13740
13741   operand_type_set (&types, ~0);
13742
13743   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
13744     {
13745       as_bad (_("at most %d immediate operands are allowed"),
13746               MAX_IMMEDIATE_OPERANDS);
13747       return 0;
13748     }
13749
13750   exp = &im_expressions[i.imm_operands++];
13751   i.op[this_operand].imms = exp;
13752
13753   if (is_space_char (*imm_start))
13754     ++imm_start;
13755
13756   save_input_line_pointer = input_line_pointer;
13757   input_line_pointer = imm_start;
13758
13759   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
13760   if (gotfree_input_line)
13761     input_line_pointer = gotfree_input_line;
13762
13763   expr_mode = expr_operator_none;
13764   exp_seg = expression (exp);
13765
13766   /* For .insn immediates there may be a size specifier.  */
13767   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
13768       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
13769     {
13770       char *e;
13771       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
13772
13773       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
13774         {
13775           i.imm_bits[this_operand] = n;
13776           if (input_line_pointer[2] == 's')
13777             i.flags[this_operand] |= Operand_Signed;
13778           input_line_pointer = e + 1;
13779         }
13780     }
13781
13782   SKIP_WHITESPACE ();
13783   if (*input_line_pointer)
13784     as_bad (_("junk `%s' after expression"), input_line_pointer);
13785
13786   input_line_pointer = save_input_line_pointer;
13787   if (gotfree_input_line)
13788     {
13789       free (gotfree_input_line);
13790
13791       if (exp->X_op == O_constant)
13792         exp->X_op = O_illegal;
13793     }
13794
13795   if (exp_seg == reg_section)
13796     {
13797       as_bad (_("illegal immediate register operand %s"), imm_start);
13798       return 0;
13799     }
13800
13801   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
13802 }
13803
13804 static int
13805 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
13806                          i386_operand_type types, const char *imm_start)
13807 {
13808   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
13809     {
13810       if (imm_start)
13811         as_bad (_("missing or invalid immediate expression `%s'"),
13812                 imm_start);
13813       return 0;
13814     }
13815   else if (exp->X_op == O_constant)
13816     {
13817       /* Size it properly later.  */
13818       i.types[this_operand].bitfield.imm64 = 1;
13819
13820       /* If not 64bit, sign/zero extend val, to account for wraparound
13821          when !BFD64.  */
13822       if (expr_mode == expr_operator_present
13823           && flag_code != CODE_64BIT && !object_64bit)
13824         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
13825     }
13826 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13827   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
13828            && exp_seg != absolute_section
13829            && exp_seg != text_section
13830            && exp_seg != data_section
13831            && exp_seg != bss_section
13832            && exp_seg != undefined_section
13833            && !bfd_is_com_section (exp_seg))
13834     {
13835       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
13836       return 0;
13837     }
13838 #endif
13839   else
13840     {
13841       /* This is an address.  The size of the address will be
13842          determined later, depending on destination register,
13843          suffix, or the default for the section.  */
13844       i.types[this_operand].bitfield.imm8 = 1;
13845       i.types[this_operand].bitfield.imm16 = 1;
13846       i.types[this_operand].bitfield.imm32 = 1;
13847       i.types[this_operand].bitfield.imm32s = 1;
13848       i.types[this_operand].bitfield.imm64 = 1;
13849       i.types[this_operand] = operand_type_and (i.types[this_operand],
13850                                                 types);
13851     }
13852
13853   return 1;
13854 }
13855
13856 static char *
13857 i386_scale (char *scale)
13858 {
13859   offsetT val;
13860   char *save = input_line_pointer;
13861
13862   input_line_pointer = scale;
13863   val = get_absolute_expression ();
13864
13865   switch (val)
13866     {
13867     case 1:
13868       i.log2_scale_factor = 0;
13869       break;
13870     case 2:
13871       i.log2_scale_factor = 1;
13872       break;
13873     case 4:
13874       i.log2_scale_factor = 2;
13875       break;
13876     case 8:
13877       i.log2_scale_factor = 3;
13878       break;
13879     default:
13880       {
13881         char sep = *input_line_pointer;
13882
13883         *input_line_pointer = '\0';
13884         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
13885                 scale);
13886         *input_line_pointer = sep;
13887         input_line_pointer = save;
13888         return NULL;
13889       }
13890     }
13891   if (i.log2_scale_factor != 0 && i.index_reg == 0)
13892     {
13893       as_warn (_("scale factor of %d without an index register"),
13894                1 << i.log2_scale_factor);
13895       i.log2_scale_factor = 0;
13896     }
13897   scale = input_line_pointer;
13898   input_line_pointer = save;
13899   return scale;
13900 }
13901
13902 static int
13903 i386_displacement (char *disp_start, char *disp_end)
13904 {
13905   expressionS *exp;
13906   segT exp_seg = 0;
13907   char *save_input_line_pointer;
13908   char *gotfree_input_line;
13909   int override;
13910   i386_operand_type bigdisp, types = anydisp;
13911   int ret;
13912
13913   if (i.disp_operands == MAX_MEMORY_OPERANDS)
13914     {
13915       as_bad (_("at most %d displacement operands are allowed"),
13916               MAX_MEMORY_OPERANDS);
13917       return 0;
13918     }
13919
13920   operand_type_set (&bigdisp, 0);
13921   if (i.jumpabsolute
13922       || i.types[this_operand].bitfield.baseindex
13923       || (current_templates.start->opcode_modifier.jump != JUMP
13924           && current_templates.start->opcode_modifier.jump != JUMP_DWORD))
13925     {
13926       i386_addressing_mode ();
13927       override = (i.prefix[ADDR_PREFIX] != 0);
13928       if (flag_code == CODE_64BIT)
13929         {
13930           bigdisp.bitfield.disp32 = 1;
13931           if (!override)
13932             bigdisp.bitfield.disp64 = 1;
13933         }
13934       else if ((flag_code == CODE_16BIT) ^ override)
13935           bigdisp.bitfield.disp16 = 1;
13936       else
13937           bigdisp.bitfield.disp32 = 1;
13938     }
13939   else
13940     {
13941       /* For PC-relative branches, the width of the displacement may be
13942          dependent upon data size, but is never dependent upon address size.
13943          Also make sure to not unintentionally match against a non-PC-relative
13944          branch template.  */
13945       const insn_template *t = current_templates.start;
13946       bool has_intel64 = false;
13947
13948       while (++t < current_templates.end)
13949         {
13950           if (t->opcode_modifier.jump
13951               != current_templates.start->opcode_modifier.jump)
13952             break;
13953           if ((t->opcode_modifier.isa64 >= INTEL64))
13954             has_intel64 = true;
13955         }
13956       current_templates.end = t;
13957
13958       override = (i.prefix[DATA_PREFIX] != 0);
13959       if (flag_code == CODE_64BIT)
13960         {
13961           if ((override || i.suffix == WORD_MNEM_SUFFIX)
13962               && (!intel64 || !has_intel64))
13963             bigdisp.bitfield.disp16 = 1;
13964           else
13965             bigdisp.bitfield.disp32 = 1;
13966         }
13967       else
13968         {
13969           if (!override)
13970             override = (i.suffix == (flag_code != CODE_16BIT
13971                                      ? WORD_MNEM_SUFFIX
13972                                      : LONG_MNEM_SUFFIX));
13973           bigdisp.bitfield.disp32 = 1;
13974           if ((flag_code == CODE_16BIT) ^ override)
13975             {
13976               bigdisp.bitfield.disp32 = 0;
13977               bigdisp.bitfield.disp16 = 1;
13978             }
13979         }
13980     }
13981   i.types[this_operand] = operand_type_or (i.types[this_operand],
13982                                            bigdisp);
13983
13984   exp = &disp_expressions[i.disp_operands];
13985   i.op[this_operand].disps = exp;
13986   i.disp_operands++;
13987   save_input_line_pointer = input_line_pointer;
13988   input_line_pointer = disp_start;
13989   END_STRING_AND_SAVE (disp_end);
13990
13991 #ifndef GCC_ASM_O_HACK
13992 #define GCC_ASM_O_HACK 0
13993 #endif
13994 #if GCC_ASM_O_HACK
13995   END_STRING_AND_SAVE (disp_end + 1);
13996   if (i.types[this_operand].bitfield.baseIndex
13997       && displacement_string_end[-1] == '+')
13998     {
13999       /* This hack is to avoid a warning when using the "o"
14000          constraint within gcc asm statements.
14001          For instance:
14002
14003          #define _set_tssldt_desc(n,addr,limit,type) \
14004          __asm__ __volatile__ ( \
14005          "movw %w2,%0\n\t" \
14006          "movw %w1,2+%0\n\t" \
14007          "rorl $16,%1\n\t" \
14008          "movb %b1,4+%0\n\t" \
14009          "movb %4,5+%0\n\t" \
14010          "movb $0,6+%0\n\t" \
14011          "movb %h1,7+%0\n\t" \
14012          "rorl $16,%1" \
14013          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
14014
14015          This works great except that the output assembler ends
14016          up looking a bit weird if it turns out that there is
14017          no offset.  You end up producing code that looks like:
14018
14019          #APP
14020          movw $235,(%eax)
14021          movw %dx,2+(%eax)
14022          rorl $16,%edx
14023          movb %dl,4+(%eax)
14024          movb $137,5+(%eax)
14025          movb $0,6+(%eax)
14026          movb %dh,7+(%eax)
14027          rorl $16,%edx
14028          #NO_APP
14029
14030          So here we provide the missing zero.  */
14031
14032       *displacement_string_end = '0';
14033     }
14034 #endif
14035   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
14036   if (gotfree_input_line)
14037     input_line_pointer = gotfree_input_line;
14038
14039   expr_mode = expr_operator_none;
14040   exp_seg = expression (exp);
14041
14042   SKIP_WHITESPACE ();
14043   if (*input_line_pointer)
14044     as_bad (_("junk `%s' after expression"), input_line_pointer);
14045 #if GCC_ASM_O_HACK
14046   RESTORE_END_STRING (disp_end + 1);
14047 #endif
14048   input_line_pointer = save_input_line_pointer;
14049   if (gotfree_input_line)
14050     {
14051       free (gotfree_input_line);
14052
14053       if (exp->X_op == O_constant || exp->X_op == O_register)
14054         exp->X_op = O_illegal;
14055     }
14056
14057   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
14058
14059   RESTORE_END_STRING (disp_end);
14060
14061   return ret;
14062 }
14063
14064 static int
14065 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
14066                             i386_operand_type types, const char *disp_start)
14067 {
14068   int ret = 1;
14069
14070   /* We do this to make sure that the section symbol is in
14071      the symbol table.  We will ultimately change the relocation
14072      to be relative to the beginning of the section.  */
14073   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
14074       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
14075       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14076     {
14077       if (exp->X_op != O_symbol)
14078         goto inv_disp;
14079
14080       if (S_IS_LOCAL (exp->X_add_symbol)
14081           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
14082           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
14083         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
14084       exp->X_op = O_subtract;
14085       exp->X_op_symbol = GOT_symbol;
14086       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
14087         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
14088       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
14089         i.reloc[this_operand] = BFD_RELOC_64;
14090       else
14091         i.reloc[this_operand] = BFD_RELOC_32;
14092     }
14093
14094   else if (exp->X_op == O_absent
14095            || exp->X_op == O_illegal
14096            || exp->X_op == O_big)
14097     {
14098     inv_disp:
14099       as_bad (_("missing or invalid displacement expression `%s'"),
14100               disp_start);
14101       ret = 0;
14102     }
14103
14104   else if (exp->X_op == O_constant)
14105     {
14106       /* Sizing gets taken care of by optimize_disp().
14107
14108          If not 64bit, sign/zero extend val, to account for wraparound
14109          when !BFD64.  */
14110       if (expr_mode == expr_operator_present
14111           && flag_code != CODE_64BIT && !object_64bit)
14112         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
14113     }
14114
14115 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14116   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
14117            && exp_seg != absolute_section
14118            && exp_seg != text_section
14119            && exp_seg != data_section
14120            && exp_seg != bss_section
14121            && exp_seg != undefined_section
14122            && !bfd_is_com_section (exp_seg))
14123     {
14124       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
14125       ret = 0;
14126     }
14127 #endif
14128
14129   else if (current_templates.start->opcode_modifier.jump == JUMP_BYTE)
14130     i.types[this_operand].bitfield.disp8 = 1;
14131
14132   /* Check if this is a displacement only operand.  */
14133   if (!i.types[this_operand].bitfield.baseindex)
14134     i.types[this_operand] =
14135       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
14136                        operand_type_and (i.types[this_operand], types));
14137
14138   return ret;
14139 }
14140
14141 /* Return the active addressing mode, taking address override and
14142    registers forming the address into consideration.  Update the
14143    address override prefix if necessary.  */
14144
14145 static enum flag_code
14146 i386_addressing_mode (void)
14147 {
14148   enum flag_code addr_mode;
14149
14150   if (i.prefix[ADDR_PREFIX])
14151     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
14152   else if (flag_code == CODE_16BIT
14153            && is_cpu (current_templates.start, CpuMPX)
14154            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
14155               from md_assemble() by "is not a valid base/index expression"
14156               when there is a base and/or index.  */
14157            && !i.types[this_operand].bitfield.baseindex)
14158     {
14159       /* MPX insn memory operands with neither base nor index must be forced
14160          to use 32-bit addressing in 16-bit mode.  */
14161       addr_mode = CODE_32BIT;
14162       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14163       ++i.prefixes;
14164       gas_assert (!i.types[this_operand].bitfield.disp16);
14165       gas_assert (!i.types[this_operand].bitfield.disp32);
14166     }
14167   else
14168     {
14169       addr_mode = flag_code;
14170
14171 #if INFER_ADDR_PREFIX
14172       if (i.mem_operands == 0)
14173         {
14174           /* Infer address prefix from the first memory operand.  */
14175           const reg_entry *addr_reg = i.base_reg;
14176
14177           if (addr_reg == NULL)
14178             addr_reg = i.index_reg;
14179
14180           if (addr_reg)
14181             {
14182               if (addr_reg->reg_type.bitfield.dword)
14183                 addr_mode = CODE_32BIT;
14184               else if (flag_code != CODE_64BIT
14185                        && addr_reg->reg_type.bitfield.word)
14186                 addr_mode = CODE_16BIT;
14187
14188               if (addr_mode != flag_code)
14189                 {
14190                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
14191                   i.prefixes += 1;
14192                   /* Change the size of any displacement too.  At most one
14193                      of Disp16 or Disp32 is set.
14194                      FIXME.  There doesn't seem to be any real need for
14195                      separate Disp16 and Disp32 flags.  The same goes for
14196                      Imm16 and Imm32.  Removing them would probably clean
14197                      up the code quite a lot.  */
14198                   if (flag_code != CODE_64BIT
14199                       && (i.types[this_operand].bitfield.disp16
14200                           || i.types[this_operand].bitfield.disp32))
14201                     {
14202                       static const i386_operand_type disp16_32 = {
14203                         .bitfield = { .disp16 = 1, .disp32 = 1 }
14204                       };
14205
14206                       i.types[this_operand]
14207                         = operand_type_xor (i.types[this_operand], disp16_32);
14208                     }
14209                 }
14210             }
14211         }
14212 #endif
14213     }
14214
14215   return addr_mode;
14216 }
14217
14218 /* Make sure the memory operand we've been dealt is valid.
14219    Return 1 on success, 0 on a failure.  */
14220
14221 static int
14222 i386_index_check (const char *operand_string)
14223 {
14224   const char *kind = "base/index";
14225   enum flag_code addr_mode = i386_addressing_mode ();
14226   const insn_template *t = current_templates.end - 1;
14227
14228   if (t->opcode_modifier.isstring)
14229     {
14230       /* Memory operands of string insns are special in that they only allow
14231          a single register (rDI, rSI, or rBX) as their memory address.  */
14232       const reg_entry *expected_reg;
14233       static const char di_si[][2][4] =
14234         {
14235           { "esi", "edi" },
14236           { "si", "di" },
14237           { "rsi", "rdi" }
14238         };
14239       static const char bx[][4] = { "ebx", "bx", "rbx" };
14240
14241       kind = "string address";
14242
14243       if (t->opcode_modifier.prefixok == PrefixRep)
14244         {
14245           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
14246           int op = 0;
14247
14248           if (!t->operand_types[0].bitfield.baseindex
14249               || ((!i.mem_operands != !intel_syntax)
14250                   && t->operand_types[1].bitfield.baseindex))
14251             op = 1;
14252           expected_reg
14253             = (const reg_entry *) str_hash_find (reg_hash,
14254                                                  di_si[addr_mode][op == es_op]);
14255         }
14256       else
14257         expected_reg
14258           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
14259
14260       if (i.base_reg != expected_reg
14261           || i.index_reg
14262           || operand_type_check (i.types[this_operand], disp))
14263         {
14264           /* The second memory operand must have the same size as
14265              the first one.  */
14266           if (i.mem_operands
14267               && i.base_reg
14268               && !((addr_mode == CODE_64BIT
14269                     && i.base_reg->reg_type.bitfield.qword)
14270                    || (addr_mode == CODE_32BIT
14271                        ? i.base_reg->reg_type.bitfield.dword
14272                        : i.base_reg->reg_type.bitfield.word)))
14273             goto bad_address;
14274
14275           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
14276                    operand_string,
14277                    intel_syntax ? '[' : '(',
14278                    register_prefix,
14279                    expected_reg->reg_name,
14280                    intel_syntax ? ']' : ')');
14281           return 1;
14282         }
14283       else
14284         return 1;
14285
14286     bad_address:
14287       as_bad (_("`%s' is not a valid %s expression"),
14288               operand_string, kind);
14289       return 0;
14290     }
14291   else
14292     {
14293       t = current_templates.start;
14294
14295       if (addr_mode != CODE_16BIT)
14296         {
14297           /* 32-bit/64-bit checks.  */
14298           if (i.disp_encoding == disp_encoding_16bit)
14299             {
14300             bad_disp:
14301               as_bad (_("invalid `%s' prefix"),
14302                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
14303               return 0;
14304             }
14305
14306           if ((i.base_reg
14307                && ((addr_mode == CODE_64BIT
14308                     ? !i.base_reg->reg_type.bitfield.qword
14309                     : !i.base_reg->reg_type.bitfield.dword)
14310                    || (i.index_reg && i.base_reg->reg_num == RegIP)
14311                    || i.base_reg->reg_num == RegIZ))
14312               || (i.index_reg
14313                   && !i.index_reg->reg_type.bitfield.xmmword
14314                   && !i.index_reg->reg_type.bitfield.ymmword
14315                   && !i.index_reg->reg_type.bitfield.zmmword
14316                   && ((addr_mode == CODE_64BIT
14317                        ? !i.index_reg->reg_type.bitfield.qword
14318                        : !i.index_reg->reg_type.bitfield.dword)
14319                       || !i.index_reg->reg_type.bitfield.baseindex)))
14320             goto bad_address;
14321
14322           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
14323           if (t->mnem_off == MN_bndmk
14324               || t->mnem_off == MN_bndldx
14325               || t->mnem_off == MN_bndstx
14326               || t->opcode_modifier.sib == SIBMEM)
14327             {
14328               /* They cannot use RIP-relative addressing. */
14329               if (i.base_reg && i.base_reg->reg_num == RegIP)
14330                 {
14331                   as_bad (_("`%s' cannot be used here"), operand_string);
14332                   return 0;
14333                 }
14334
14335               /* bndldx and bndstx ignore their scale factor. */
14336               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
14337                   && i.log2_scale_factor)
14338                 as_warn (_("register scaling is being ignored here"));
14339             }
14340         }
14341       else
14342         {
14343           /* 16-bit checks.  */
14344           if (i.disp_encoding == disp_encoding_32bit)
14345             goto bad_disp;
14346
14347           if ((i.base_reg
14348                && (!i.base_reg->reg_type.bitfield.word
14349                    || !i.base_reg->reg_type.bitfield.baseindex))
14350               || (i.index_reg
14351                   && (!i.index_reg->reg_type.bitfield.word
14352                       || !i.index_reg->reg_type.bitfield.baseindex
14353                       || !(i.base_reg
14354                            && i.base_reg->reg_num < 6
14355                            && i.index_reg->reg_num >= 6
14356                            && i.log2_scale_factor == 0))))
14357             goto bad_address;
14358         }
14359     }
14360   return 1;
14361 }
14362
14363 /* Handle vector immediates.  */
14364
14365 static int
14366 RC_SAE_immediate (const char *imm_start)
14367 {
14368   const char *pstr = imm_start;
14369
14370   if (*pstr != '{')
14371     return 0;
14372
14373   pstr = RC_SAE_specifier (pstr + 1);
14374   if (pstr == NULL)
14375     return 0;
14376
14377   if (*pstr++ != '}')
14378     {
14379       as_bad (_("Missing '}': '%s'"), imm_start);
14380       return 0;
14381     }
14382   /* RC/SAE immediate string should contain nothing more.  */;
14383   if (*pstr != 0)
14384     {
14385       as_bad (_("Junk after '}': '%s'"), imm_start);
14386       return 0;
14387     }
14388
14389   /* Internally this doesn't count as an operand.  */
14390   --i.operands;
14391
14392   return 1;
14393 }
14394
14395 static INLINE bool starts_memory_operand (char c)
14396 {
14397   return ISDIGIT (c)
14398          || is_name_beginner (c)
14399          || strchr ("([\"+-!~", c);
14400 }
14401
14402 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
14403    on error.  */
14404
14405 static int
14406 i386_att_operand (char *operand_string)
14407 {
14408   const reg_entry *r;
14409   char *end_op;
14410   char *op_string = operand_string;
14411
14412   if (is_space_char (*op_string))
14413     ++op_string;
14414
14415   /* We check for an absolute prefix (differentiating,
14416      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
14417   if (*op_string == ABSOLUTE_PREFIX
14418       && current_templates.start->opcode_modifier.jump)
14419     {
14420       ++op_string;
14421       if (is_space_char (*op_string))
14422         ++op_string;
14423       i.jumpabsolute = true;
14424     }
14425
14426   /* Check if operand is a register.  */
14427   if ((r = parse_register (op_string, &end_op)) != NULL)
14428     {
14429       i386_operand_type temp;
14430
14431       if (r == &bad_reg)
14432         return 0;
14433
14434       /* Check for a segment override by searching for ':' after a
14435          segment register.  */
14436       op_string = end_op;
14437       if (is_space_char (*op_string))
14438         ++op_string;
14439       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
14440         {
14441           i.seg[i.mem_operands] = r;
14442
14443           /* Skip the ':' and whitespace.  */
14444           ++op_string;
14445           if (is_space_char (*op_string))
14446             ++op_string;
14447
14448           /* Handle case of %es:*foo.  */
14449           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
14450               && current_templates.start->opcode_modifier.jump)
14451             {
14452               ++op_string;
14453               if (is_space_char (*op_string))
14454                 ++op_string;
14455               i.jumpabsolute = true;
14456             }
14457
14458           if (!starts_memory_operand (*op_string))
14459             {
14460               as_bad (_("bad memory operand `%s'"), op_string);
14461               return 0;
14462             }
14463           goto do_memory_reference;
14464         }
14465
14466       /* Handle vector operations.  */
14467       if (*op_string == '{')
14468         {
14469           op_string = check_VecOperations (op_string);
14470           if (op_string == NULL)
14471             return 0;
14472         }
14473
14474       if (*op_string)
14475         {
14476           as_bad (_("junk `%s' after register"), op_string);
14477           return 0;
14478         }
14479
14480        /* Reject pseudo registers for .insn.  */
14481       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
14482         {
14483           as_bad (_("`%s%s' cannot be used here"),
14484                   register_prefix, r->reg_name);
14485           return 0;
14486         }
14487
14488       temp = r->reg_type;
14489       temp.bitfield.baseindex = 0;
14490       i.types[this_operand] = operand_type_or (i.types[this_operand],
14491                                                temp);
14492       i.types[this_operand].bitfield.unspecified = 0;
14493       i.op[this_operand].regs = r;
14494       i.reg_operands++;
14495
14496       /* A GPR may follow an RC or SAE immediate only if a (vector) register
14497          operand was also present earlier on.  */
14498       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
14499           && i.reg_operands == 1)
14500         {
14501           unsigned int j;
14502
14503           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
14504             if (i.rounding.type == RC_NamesTable[j].type)
14505               break;
14506           as_bad (_("`%s': misplaced `{%s}'"),
14507                   insn_name (current_templates.start), RC_NamesTable[j].name);
14508           return 0;
14509         }
14510     }
14511   else if (*op_string == REGISTER_PREFIX)
14512     {
14513       as_bad (_("bad register name `%s'"), op_string);
14514       return 0;
14515     }
14516   else if (*op_string == IMMEDIATE_PREFIX)
14517     {
14518       ++op_string;
14519       if (i.jumpabsolute)
14520         {
14521           as_bad (_("immediate operand illegal with absolute jump"));
14522           return 0;
14523         }
14524       if (!i386_immediate (op_string))
14525         return 0;
14526       if (i.rounding.type != rc_none)
14527         {
14528           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
14529                   insn_name (current_templates.start));
14530           return 0;
14531         }
14532     }
14533   else if (RC_SAE_immediate (operand_string))
14534     {
14535       /* If it is a RC or SAE immediate, do the necessary placement check:
14536          Only another immediate or a GPR may precede it.  */
14537       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
14538           || (i.reg_operands == 1
14539               && i.op[0].regs->reg_type.bitfield.class != Reg))
14540         {
14541           as_bad (_("`%s': misplaced `%s'"),
14542                   insn_name (current_templates.start), operand_string);
14543           return 0;
14544         }
14545     }
14546   else if (starts_memory_operand (*op_string))
14547     {
14548       /* This is a memory reference of some sort.  */
14549       char *base_string;
14550
14551       /* Start and end of displacement string expression (if found).  */
14552       char *displacement_string_start;
14553       char *displacement_string_end;
14554
14555     do_memory_reference:
14556       /* Check for base index form.  We detect the base index form by
14557          looking for an ')' at the end of the operand, searching
14558          for the '(' matching it, and finding a REGISTER_PREFIX or ','
14559          after the '('.  */
14560       base_string = op_string + strlen (op_string);
14561
14562       /* Handle vector operations.  */
14563       --base_string;
14564       if (is_space_char (*base_string))
14565         --base_string;
14566
14567       if (*base_string == '}')
14568         {
14569           char *vop_start = NULL;
14570
14571           while (base_string-- > op_string)
14572             {
14573               if (*base_string == '"')
14574                 break;
14575               if (*base_string != '{')
14576                 continue;
14577
14578               vop_start = base_string;
14579
14580               --base_string;
14581               if (is_space_char (*base_string))
14582                 --base_string;
14583
14584               if (*base_string != '}')
14585                 break;
14586
14587               vop_start = NULL;
14588             }
14589
14590           if (!vop_start)
14591             {
14592               as_bad (_("unbalanced figure braces"));
14593               return 0;
14594             }
14595
14596           if (check_VecOperations (vop_start) == NULL)
14597             return 0;
14598         }
14599
14600       /* If we only have a displacement, set-up for it to be parsed later.  */
14601       displacement_string_start = op_string;
14602       displacement_string_end = base_string + 1;
14603
14604       if (*base_string == ')')
14605         {
14606           char *temp_string;
14607           unsigned int parens_not_balanced = 0;
14608           bool in_quotes = false;
14609
14610           /* We've already checked that the number of left & right ()'s are
14611              equal, and that there's a matching set of double quotes.  */
14612           end_op = base_string;
14613           for (temp_string = op_string; temp_string < end_op; temp_string++)
14614             {
14615               if (*temp_string == '\\' && temp_string[1] == '"')
14616                 ++temp_string;
14617               else if (*temp_string == '"')
14618                 in_quotes = !in_quotes;
14619               else if (!in_quotes)
14620                 {
14621                   if (*temp_string == '(' && !parens_not_balanced++)
14622                     base_string = temp_string;
14623                   if (*temp_string == ')')
14624                     --parens_not_balanced;
14625                 }
14626             }
14627
14628           temp_string = base_string;
14629
14630           /* Skip past '(' and whitespace.  */
14631           gas_assert (*base_string == '(');
14632           ++base_string;
14633           if (is_space_char (*base_string))
14634             ++base_string;
14635
14636           if (*base_string == ','
14637               || ((i.base_reg = parse_register (base_string, &end_op))
14638                   != NULL))
14639             {
14640               displacement_string_end = temp_string;
14641
14642               i.types[this_operand].bitfield.baseindex = 1;
14643
14644               if (i.base_reg)
14645                 {
14646                   if (i.base_reg == &bad_reg)
14647                     return 0;
14648                   base_string = end_op;
14649                   if (is_space_char (*base_string))
14650                     ++base_string;
14651                 }
14652
14653               /* There may be an index reg or scale factor here.  */
14654               if (*base_string == ',')
14655                 {
14656                   ++base_string;
14657                   if (is_space_char (*base_string))
14658                     ++base_string;
14659
14660                   if ((i.index_reg = parse_register (base_string, &end_op))
14661                       != NULL)
14662                     {
14663                       if (i.index_reg == &bad_reg)
14664                         return 0;
14665                       base_string = end_op;
14666                       if (is_space_char (*base_string))
14667                         ++base_string;
14668                       if (*base_string == ',')
14669                         {
14670                           ++base_string;
14671                           if (is_space_char (*base_string))
14672                             ++base_string;
14673                         }
14674                       else if (*base_string != ')')
14675                         {
14676                           as_bad (_("expecting `,' or `)' "
14677                                     "after index register in `%s'"),
14678                                   operand_string);
14679                           return 0;
14680                         }
14681                     }
14682                   else if (*base_string == REGISTER_PREFIX)
14683                     {
14684                       end_op = strchr (base_string, ',');
14685                       if (end_op)
14686                         *end_op = '\0';
14687                       as_bad (_("bad register name `%s'"), base_string);
14688                       return 0;
14689                     }
14690
14691                   /* Check for scale factor.  */
14692                   if (*base_string != ')')
14693                     {
14694                       char *end_scale = i386_scale (base_string);
14695
14696                       if (!end_scale)
14697                         return 0;
14698
14699                       base_string = end_scale;
14700                       if (is_space_char (*base_string))
14701                         ++base_string;
14702                       if (*base_string != ')')
14703                         {
14704                           as_bad (_("expecting `)' "
14705                                     "after scale factor in `%s'"),
14706                                   operand_string);
14707                           return 0;
14708                         }
14709                     }
14710                   else if (!i.index_reg)
14711                     {
14712                       as_bad (_("expecting index register or scale factor "
14713                                 "after `,'; got '%c'"),
14714                               *base_string);
14715                       return 0;
14716                     }
14717                 }
14718               else if (*base_string != ')')
14719                 {
14720                   as_bad (_("expecting `,' or `)' "
14721                             "after base register in `%s'"),
14722                           operand_string);
14723                   return 0;
14724                 }
14725             }
14726           else if (*base_string == REGISTER_PREFIX)
14727             {
14728               end_op = strchr (base_string, ',');
14729               if (end_op)
14730                 *end_op = '\0';
14731               as_bad (_("bad register name `%s'"), base_string);
14732               return 0;
14733             }
14734         }
14735
14736       /* If there's an expression beginning the operand, parse it,
14737          assuming displacement_string_start and
14738          displacement_string_end are meaningful.  */
14739       if (displacement_string_start != displacement_string_end)
14740         {
14741           if (!i386_displacement (displacement_string_start,
14742                                   displacement_string_end))
14743             return 0;
14744         }
14745
14746       /* Special case for (%dx) while doing input/output op.  */
14747       if (i.base_reg
14748           && i.base_reg->reg_type.bitfield.instance == RegD
14749           && i.base_reg->reg_type.bitfield.word
14750           && i.index_reg == 0
14751           && i.log2_scale_factor == 0
14752           && i.seg[i.mem_operands] == 0
14753           && !operand_type_check (i.types[this_operand], disp))
14754         {
14755           i.types[this_operand] = i.base_reg->reg_type;
14756           i.op[this_operand].regs = i.base_reg;
14757           i.base_reg = NULL;
14758           i.input_output_operand = true;
14759           return 1;
14760         }
14761
14762       if (i386_index_check (operand_string) == 0)
14763         return 0;
14764       i.flags[this_operand] |= Operand_Mem;
14765       i.mem_operands++;
14766     }
14767   else
14768     {
14769       /* It's not a memory operand; argh!  */
14770       as_bad (_("invalid char %s beginning operand %d `%s'"),
14771               output_invalid (*op_string),
14772               this_operand + 1,
14773               op_string);
14774       return 0;
14775     }
14776   return 1;                     /* Normal return.  */
14777 }
14778 \f
14779 /* Calculate the maximum variable size (i.e., excluding fr_fix)
14780    that an rs_machine_dependent frag may reach.  */
14781
14782 unsigned int
14783 i386_frag_max_var (fragS *frag)
14784 {
14785   /* The only relaxable frags are for jumps.
14786      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
14787   gas_assert (frag->fr_type == rs_machine_dependent);
14788   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
14789 }
14790
14791 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14792 static int
14793 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
14794 {
14795   /* STT_GNU_IFUNC symbol must go through PLT.  */
14796   if ((symbol_get_bfdsym (fr_symbol)->flags
14797        & BSF_GNU_INDIRECT_FUNCTION) != 0)
14798     return 0;
14799
14800   if (!S_IS_EXTERNAL (fr_symbol))
14801     /* Symbol may be weak or local.  */
14802     return !S_IS_WEAK (fr_symbol);
14803
14804   /* Global symbols with non-default visibility can't be preempted. */
14805   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
14806     return 1;
14807
14808   if (fr_var != NO_RELOC)
14809     switch ((enum bfd_reloc_code_real) fr_var)
14810       {
14811       case BFD_RELOC_386_PLT32:
14812       case BFD_RELOC_X86_64_PLT32:
14813         /* Symbol with PLT relocation may be preempted. */
14814         return 0;
14815       default:
14816         abort ();
14817       }
14818
14819   /* Global symbols with default visibility in a shared library may be
14820      preempted by another definition.  */
14821   return !shared;
14822 }
14823 #endif
14824
14825 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
14826    Note also work for Skylake and Cascadelake.
14827 ---------------------------------------------------------------------
14828 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
14829 | ------  | ----------- | ------- | -------- |
14830 |   Jo    |      N      |    N    |     Y    |
14831 |   Jno   |      N      |    N    |     Y    |
14832 |  Jc/Jb  |      Y      |    N    |     Y    |
14833 | Jae/Jnb |      Y      |    N    |     Y    |
14834 |  Je/Jz  |      Y      |    Y    |     Y    |
14835 | Jne/Jnz |      Y      |    Y    |     Y    |
14836 | Jna/Jbe |      Y      |    N    |     Y    |
14837 | Ja/Jnbe |      Y      |    N    |     Y    |
14838 |   Js    |      N      |    N    |     Y    |
14839 |   Jns   |      N      |    N    |     Y    |
14840 |  Jp/Jpe |      N      |    N    |     Y    |
14841 | Jnp/Jpo |      N      |    N    |     Y    |
14842 | Jl/Jnge |      Y      |    Y    |     Y    |
14843 | Jge/Jnl |      Y      |    Y    |     Y    |
14844 | Jle/Jng |      Y      |    Y    |     Y    |
14845 | Jg/Jnle |      Y      |    Y    |     Y    |
14846 ---------------------------------------------------------------------  */
14847 static int
14848 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
14849 {
14850   if (mf_cmp == mf_cmp_alu_cmp)
14851     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
14852             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
14853   if (mf_cmp == mf_cmp_incdec)
14854     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
14855             || mf_jcc == mf_jcc_jle);
14856   if (mf_cmp == mf_cmp_test_and)
14857     return 1;
14858   return 0;
14859 }
14860
14861 /* Return the next non-empty frag.  */
14862
14863 static fragS *
14864 i386_next_non_empty_frag (fragS *fragP)
14865 {
14866   /* There may be a frag with a ".fill 0" when there is no room in
14867      the current frag for frag_grow in output_insn.  */
14868   for (fragP = fragP->fr_next;
14869        (fragP != NULL
14870         && fragP->fr_type == rs_fill
14871         && fragP->fr_fix == 0);
14872        fragP = fragP->fr_next)
14873     ;
14874   return fragP;
14875 }
14876
14877 /* Return the next jcc frag after BRANCH_PADDING.  */
14878
14879 static fragS *
14880 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
14881 {
14882   fragS *branch_fragP;
14883   if (!pad_fragP)
14884     return NULL;
14885
14886   if (pad_fragP->fr_type == rs_machine_dependent
14887       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
14888           == BRANCH_PADDING))
14889     {
14890       branch_fragP = i386_next_non_empty_frag (pad_fragP);
14891       if (branch_fragP->fr_type != rs_machine_dependent)
14892         return NULL;
14893       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
14894           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
14895                                    pad_fragP->tc_frag_data.mf_type))
14896         return branch_fragP;
14897     }
14898
14899   return NULL;
14900 }
14901
14902 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
14903
14904 static void
14905 i386_classify_machine_dependent_frag (fragS *fragP)
14906 {
14907   fragS *cmp_fragP;
14908   fragS *pad_fragP;
14909   fragS *branch_fragP;
14910   fragS *next_fragP;
14911   unsigned int max_prefix_length;
14912
14913   if (fragP->tc_frag_data.classified)
14914     return;
14915
14916   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
14917      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
14918   for (next_fragP = fragP;
14919        next_fragP != NULL;
14920        next_fragP = next_fragP->fr_next)
14921     {
14922       next_fragP->tc_frag_data.classified = 1;
14923       if (next_fragP->fr_type == rs_machine_dependent)
14924         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
14925           {
14926           case BRANCH_PADDING:
14927             /* The BRANCH_PADDING frag must be followed by a branch
14928                frag.  */
14929             branch_fragP = i386_next_non_empty_frag (next_fragP);
14930             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
14931             break;
14932           case FUSED_JCC_PADDING:
14933             /* Check if this is a fused jcc:
14934                FUSED_JCC_PADDING
14935                CMP like instruction
14936                BRANCH_PADDING
14937                COND_JUMP
14938                */
14939             cmp_fragP = i386_next_non_empty_frag (next_fragP);
14940             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
14941             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
14942             if (branch_fragP)
14943               {
14944                 /* The BRANCH_PADDING frag is merged with the
14945                    FUSED_JCC_PADDING frag.  */
14946                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
14947                 /* CMP like instruction size.  */
14948                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
14949                 frag_wane (pad_fragP);
14950                 /* Skip to branch_fragP.  */
14951                 next_fragP = branch_fragP;
14952               }
14953             else if (next_fragP->tc_frag_data.max_prefix_length)
14954               {
14955                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
14956                    a fused jcc.  */
14957                 next_fragP->fr_subtype
14958                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
14959                 next_fragP->tc_frag_data.max_bytes
14960                   = next_fragP->tc_frag_data.max_prefix_length;
14961                 /* This will be updated in the BRANCH_PREFIX scan.  */
14962                 next_fragP->tc_frag_data.max_prefix_length = 0;
14963               }
14964             else
14965               frag_wane (next_fragP);
14966             break;
14967           }
14968     }
14969
14970   /* Stop if there is no BRANCH_PREFIX.  */
14971   if (!align_branch_prefix_size)
14972     return;
14973
14974   /* Scan for BRANCH_PREFIX.  */
14975   for (; fragP != NULL; fragP = fragP->fr_next)
14976     {
14977       if (fragP->fr_type != rs_machine_dependent
14978           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
14979               != BRANCH_PREFIX))
14980         continue;
14981
14982       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
14983          COND_JUMP_PREFIX.  */
14984       max_prefix_length = 0;
14985       for (next_fragP = fragP;
14986            next_fragP != NULL;
14987            next_fragP = next_fragP->fr_next)
14988         {
14989           if (next_fragP->fr_type == rs_fill)
14990             /* Skip rs_fill frags.  */
14991             continue;
14992           else if (next_fragP->fr_type != rs_machine_dependent)
14993             /* Stop for all other frags.  */
14994             break;
14995
14996           /* rs_machine_dependent frags.  */
14997           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
14998               == BRANCH_PREFIX)
14999             {
15000               /* Count BRANCH_PREFIX frags.  */
15001               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
15002                 {
15003                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
15004                   frag_wane (next_fragP);
15005                 }
15006               else
15007                 max_prefix_length
15008                   += next_fragP->tc_frag_data.max_bytes;
15009             }
15010           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15011                     == BRANCH_PADDING)
15012                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15013                        == FUSED_JCC_PADDING))
15014             {
15015               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
15016               fragP->tc_frag_data.u.padding_fragP = next_fragP;
15017               break;
15018             }
15019           else
15020             /* Stop for other rs_machine_dependent frags.  */
15021             break;
15022         }
15023
15024       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
15025
15026       /* Skip to the next frag.  */
15027       fragP = next_fragP;
15028     }
15029 }
15030
15031 /* Compute padding size for
15032
15033         FUSED_JCC_PADDING
15034         CMP like instruction
15035         BRANCH_PADDING
15036         COND_JUMP/UNCOND_JUMP
15037
15038    or
15039
15040         BRANCH_PADDING
15041         COND_JUMP/UNCOND_JUMP
15042  */
15043
15044 static int
15045 i386_branch_padding_size (fragS *fragP, offsetT address)
15046 {
15047   unsigned int offset, size, padding_size;
15048   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
15049
15050   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
15051   if (!address)
15052     address = fragP->fr_address;
15053   address += fragP->fr_fix;
15054
15055   /* CMP like instrunction size.  */
15056   size = fragP->tc_frag_data.cmp_size;
15057
15058   /* The base size of the branch frag.  */
15059   size += branch_fragP->fr_fix;
15060
15061   /* Add opcode and displacement bytes for the rs_machine_dependent
15062      branch frag.  */
15063   if (branch_fragP->fr_type == rs_machine_dependent)
15064     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
15065
15066   /* Check if branch is within boundary and doesn't end at the last
15067      byte.  */
15068   offset = address & ((1U << align_branch_power) - 1);
15069   if ((offset + size) >= (1U << align_branch_power))
15070     /* Padding needed to avoid crossing boundary.  */
15071     padding_size = (1U << align_branch_power) - offset;
15072   else
15073     /* No padding needed.  */
15074     padding_size = 0;
15075
15076   /* The return value may be saved in tc_frag_data.length which is
15077      unsigned byte.  */
15078   if (!fits_in_unsigned_byte (padding_size))
15079     abort ();
15080
15081   return padding_size;
15082 }
15083
15084 /* i386_generic_table_relax_frag()
15085
15086    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
15087    grow/shrink padding to align branch frags.  Hand others to
15088    relax_frag().  */
15089
15090 long
15091 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
15092 {
15093   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15094       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15095     {
15096       long padding_size = i386_branch_padding_size (fragP, 0);
15097       long grow = padding_size - fragP->tc_frag_data.length;
15098
15099       /* When the BRANCH_PREFIX frag is used, the computed address
15100          must match the actual address and there should be no padding.  */
15101       if (fragP->tc_frag_data.padding_address
15102           && (fragP->tc_frag_data.padding_address != fragP->fr_address
15103               || padding_size))
15104         abort ();
15105
15106       /* Update the padding size.  */
15107       if (grow)
15108         fragP->tc_frag_data.length = padding_size;
15109
15110       return grow;
15111     }
15112   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15113     {
15114       fragS *padding_fragP, *next_fragP;
15115       long padding_size, left_size, last_size;
15116
15117       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15118       if (!padding_fragP)
15119         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
15120         return (fragP->tc_frag_data.length
15121                 - fragP->tc_frag_data.last_length);
15122
15123       /* Compute the relative address of the padding frag in the very
15124         first time where the BRANCH_PREFIX frag sizes are zero.  */
15125       if (!fragP->tc_frag_data.padding_address)
15126         fragP->tc_frag_data.padding_address
15127           = padding_fragP->fr_address - (fragP->fr_address - stretch);
15128
15129       /* First update the last length from the previous interation.  */
15130       left_size = fragP->tc_frag_data.prefix_length;
15131       for (next_fragP = fragP;
15132            next_fragP != padding_fragP;
15133            next_fragP = next_fragP->fr_next)
15134         if (next_fragP->fr_type == rs_machine_dependent
15135             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15136                 == BRANCH_PREFIX))
15137           {
15138             if (left_size)
15139               {
15140                 int max = next_fragP->tc_frag_data.max_bytes;
15141                 if (max)
15142                   {
15143                     int size;
15144                     if (max > left_size)
15145                       size = left_size;
15146                     else
15147                       size = max;
15148                     left_size -= size;
15149                     next_fragP->tc_frag_data.last_length = size;
15150                   }
15151               }
15152             else
15153               next_fragP->tc_frag_data.last_length = 0;
15154           }
15155
15156       /* Check the padding size for the padding frag.  */
15157       padding_size = i386_branch_padding_size
15158         (padding_fragP, (fragP->fr_address
15159                          + fragP->tc_frag_data.padding_address));
15160
15161       last_size = fragP->tc_frag_data.prefix_length;
15162       /* Check if there is change from the last interation.  */
15163       if (padding_size == last_size)
15164         {
15165           /* Update the expected address of the padding frag.  */
15166           padding_fragP->tc_frag_data.padding_address
15167             = (fragP->fr_address + padding_size
15168                + fragP->tc_frag_data.padding_address);
15169           return 0;
15170         }
15171
15172       if (padding_size > fragP->tc_frag_data.max_prefix_length)
15173         {
15174           /* No padding if there is no sufficient room.  Clear the
15175              expected address of the padding frag.  */
15176           padding_fragP->tc_frag_data.padding_address = 0;
15177           padding_size = 0;
15178         }
15179       else
15180         /* Store the expected address of the padding frag.  */
15181         padding_fragP->tc_frag_data.padding_address
15182           = (fragP->fr_address + padding_size
15183              + fragP->tc_frag_data.padding_address);
15184
15185       fragP->tc_frag_data.prefix_length = padding_size;
15186
15187       /* Update the length for the current interation.  */
15188       left_size = padding_size;
15189       for (next_fragP = fragP;
15190            next_fragP != padding_fragP;
15191            next_fragP = next_fragP->fr_next)
15192         if (next_fragP->fr_type == rs_machine_dependent
15193             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
15194                 == BRANCH_PREFIX))
15195           {
15196             if (left_size)
15197               {
15198                 int max = next_fragP->tc_frag_data.max_bytes;
15199                 if (max)
15200                   {
15201                     int size;
15202                     if (max > left_size)
15203                       size = left_size;
15204                     else
15205                       size = max;
15206                     left_size -= size;
15207                     next_fragP->tc_frag_data.length = size;
15208                   }
15209               }
15210             else
15211               next_fragP->tc_frag_data.length = 0;
15212           }
15213
15214       return (fragP->tc_frag_data.length
15215               - fragP->tc_frag_data.last_length);
15216     }
15217   return relax_frag (segment, fragP, stretch);
15218 }
15219
15220 /* md_estimate_size_before_relax()
15221
15222    Called just before relax() for rs_machine_dependent frags.  The x86
15223    assembler uses these frags to handle variable size jump
15224    instructions.
15225
15226    Any symbol that is now undefined will not become defined.
15227    Return the correct fr_subtype in the frag.
15228    Return the initial "guess for variable size of frag" to caller.
15229    The guess is actually the growth beyond the fixed part.  Whatever
15230    we do to grow the fixed or variable part contributes to our
15231    returned value.  */
15232
15233 int
15234 md_estimate_size_before_relax (fragS *fragP, segT segment)
15235 {
15236   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15237       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
15238       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
15239     {
15240       i386_classify_machine_dependent_frag (fragP);
15241       return fragP->tc_frag_data.length;
15242     }
15243
15244   /* We've already got fragP->fr_subtype right;  all we have to do is
15245      check for un-relaxable symbols.  On an ELF system, we can't relax
15246      an externally visible symbol, because it may be overridden by a
15247      shared library.  */
15248   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
15249 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15250       || (IS_ELF
15251           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
15252                                                 fragP->fr_var))
15253 #endif
15254 #if defined (OBJ_COFF) && defined (TE_PE)
15255       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
15256           && S_IS_WEAK (fragP->fr_symbol))
15257 #endif
15258       )
15259     {
15260       /* Symbol is undefined in this segment, or we need to keep a
15261          reloc so that weak symbols can be overridden.  */
15262       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
15263       enum bfd_reloc_code_real reloc_type;
15264       unsigned char *opcode;
15265       int old_fr_fix;
15266       fixS *fixP = NULL;
15267
15268       if (fragP->fr_var != NO_RELOC)
15269         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
15270       else if (size == 2)
15271         reloc_type = BFD_RELOC_16_PCREL;
15272 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15273       else if (fragP->tc_frag_data.code == CODE_64BIT
15274                && fragP->fr_offset == 0
15275                && need_plt32_p (fragP->fr_symbol))
15276         reloc_type = BFD_RELOC_X86_64_PLT32;
15277 #endif
15278       else
15279         reloc_type = BFD_RELOC_32_PCREL;
15280
15281       old_fr_fix = fragP->fr_fix;
15282       opcode = (unsigned char *) fragP->fr_opcode;
15283
15284       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
15285         {
15286         case UNCOND_JUMP:
15287           /* Make jmp (0xeb) a (d)word displacement jump.  */
15288           opcode[0] = 0xe9;
15289           fragP->fr_fix += size;
15290           fixP = fix_new (fragP, old_fr_fix, size,
15291                           fragP->fr_symbol,
15292                           fragP->fr_offset, 1,
15293                           reloc_type);
15294           break;
15295
15296         case COND_JUMP86:
15297           if (size == 2
15298               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
15299             {
15300               /* Negate the condition, and branch past an
15301                  unconditional jump.  */
15302               opcode[0] ^= 1;
15303               opcode[1] = 3;
15304               /* Insert an unconditional jump.  */
15305               opcode[2] = 0xe9;
15306               /* We added two extra opcode bytes, and have a two byte
15307                  offset.  */
15308               fragP->fr_fix += 2 + 2;
15309               fix_new (fragP, old_fr_fix + 2, 2,
15310                        fragP->fr_symbol,
15311                        fragP->fr_offset, 1,
15312                        reloc_type);
15313               break;
15314             }
15315           /* Fall through.  */
15316
15317         case COND_JUMP:
15318           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
15319             {
15320               fragP->fr_fix += 1;
15321               fixP = fix_new (fragP, old_fr_fix, 1,
15322                               fragP->fr_symbol,
15323                               fragP->fr_offset, 1,
15324                               BFD_RELOC_8_PCREL);
15325               fixP->fx_signed = 1;
15326               break;
15327             }
15328
15329           /* This changes the byte-displacement jump 0x7N
15330              to the (d)word-displacement jump 0x0f,0x8N.  */
15331           opcode[1] = opcode[0] + 0x10;
15332           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15333           /* We've added an opcode byte.  */
15334           fragP->fr_fix += 1 + size;
15335           fixP = fix_new (fragP, old_fr_fix + 1, size,
15336                           fragP->fr_symbol,
15337                           fragP->fr_offset, 1,
15338                           reloc_type);
15339           break;
15340
15341         default:
15342           BAD_CASE (fragP->fr_subtype);
15343           break;
15344         }
15345
15346       /* All jumps handled here are signed, but don't unconditionally use a
15347          signed limit check for 32 and 16 bit jumps as we want to allow wrap
15348          around at 4G (outside of 64-bit mode) and 64k.  */
15349       if (size == 4 && flag_code == CODE_64BIT)
15350         fixP->fx_signed = 1;
15351
15352       frag_wane (fragP);
15353       return fragP->fr_fix - old_fr_fix;
15354     }
15355
15356   /* Guess size depending on current relax state.  Initially the relax
15357      state will correspond to a short jump and we return 1, because
15358      the variable part of the frag (the branch offset) is one byte
15359      long.  However, we can relax a section more than once and in that
15360      case we must either set fr_subtype back to the unrelaxed state,
15361      or return the value for the appropriate branch.  */
15362   return md_relax_table[fragP->fr_subtype].rlx_length;
15363 }
15364
15365 /* Called after relax() is finished.
15366
15367    In:  Address of frag.
15368         fr_type == rs_machine_dependent.
15369         fr_subtype is what the address relaxed to.
15370
15371    Out: Any fixSs and constants are set up.
15372         Caller will turn frag into a ".space 0".  */
15373
15374 void
15375 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
15376                  fragS *fragP)
15377 {
15378   unsigned char *opcode;
15379   unsigned char *where_to_put_displacement = NULL;
15380   offsetT target_address;
15381   offsetT opcode_address;
15382   unsigned int extension = 0;
15383   offsetT displacement_from_opcode_start;
15384
15385   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
15386       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
15387       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15388     {
15389       /* Generate nop padding.  */
15390       unsigned int size = fragP->tc_frag_data.length;
15391       if (size)
15392         {
15393           if (size > fragP->tc_frag_data.max_bytes)
15394             abort ();
15395
15396           if (flag_debug)
15397             {
15398               const char *msg;
15399               const char *branch = "branch";
15400               const char *prefix = "";
15401               fragS *padding_fragP;
15402               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
15403                   == BRANCH_PREFIX)
15404                 {
15405                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
15406                   switch (fragP->tc_frag_data.default_prefix)
15407                     {
15408                     default:
15409                       abort ();
15410                       break;
15411                     case CS_PREFIX_OPCODE:
15412                       prefix = " cs";
15413                       break;
15414                     case DS_PREFIX_OPCODE:
15415                       prefix = " ds";
15416                       break;
15417                     case ES_PREFIX_OPCODE:
15418                       prefix = " es";
15419                       break;
15420                     case FS_PREFIX_OPCODE:
15421                       prefix = " fs";
15422                       break;
15423                     case GS_PREFIX_OPCODE:
15424                       prefix = " gs";
15425                       break;
15426                     case SS_PREFIX_OPCODE:
15427                       prefix = " ss";
15428                       break;
15429                     }
15430                   if (padding_fragP)
15431                     msg = _("%s:%u: add %d%s at 0x%llx to align "
15432                             "%s within %d-byte boundary\n");
15433                   else
15434                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
15435                             "align %s within %d-byte boundary\n");
15436                 }
15437               else
15438                 {
15439                   padding_fragP = fragP;
15440                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
15441                           "%s within %d-byte boundary\n");
15442                 }
15443
15444               if (padding_fragP)
15445                 switch (padding_fragP->tc_frag_data.branch_type)
15446                   {
15447                   case align_branch_jcc:
15448                     branch = "jcc";
15449                     break;
15450                   case align_branch_fused:
15451                     branch = "fused jcc";
15452                     break;
15453                   case align_branch_jmp:
15454                     branch = "jmp";
15455                     break;
15456                   case align_branch_call:
15457                     branch = "call";
15458                     break;
15459                   case align_branch_indirect:
15460                     branch = "indiret branch";
15461                     break;
15462                   case align_branch_ret:
15463                     branch = "ret";
15464                     break;
15465                   default:
15466                     break;
15467                   }
15468
15469               fprintf (stdout, msg,
15470                        fragP->fr_file, fragP->fr_line, size, prefix,
15471                        (long long) fragP->fr_address, branch,
15472                        1 << align_branch_power);
15473             }
15474           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
15475             memset (fragP->fr_opcode,
15476                     fragP->tc_frag_data.default_prefix, size);
15477           else
15478             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
15479                                 size, 0);
15480           fragP->fr_fix += size;
15481         }
15482       return;
15483     }
15484
15485   opcode = (unsigned char *) fragP->fr_opcode;
15486
15487   /* Address we want to reach in file space.  */
15488   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
15489
15490   /* Address opcode resides at in file space.  */
15491   opcode_address = fragP->fr_address + fragP->fr_fix;
15492
15493   /* Displacement from opcode start to fill into instruction.  */
15494   displacement_from_opcode_start = target_address - opcode_address;
15495
15496   if ((fragP->fr_subtype & BIG) == 0)
15497     {
15498       /* Don't have to change opcode.  */
15499       extension = 1;            /* 1 opcode + 1 displacement  */
15500       where_to_put_displacement = &opcode[1];
15501     }
15502   else
15503     {
15504       if (no_cond_jump_promotion
15505           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
15506         as_warn_where (fragP->fr_file, fragP->fr_line,
15507                        _("long jump required"));
15508
15509       switch (fragP->fr_subtype)
15510         {
15511         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
15512           extension = 4;                /* 1 opcode + 4 displacement  */
15513           opcode[0] = 0xe9;
15514           where_to_put_displacement = &opcode[1];
15515           break;
15516
15517         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
15518           extension = 2;                /* 1 opcode + 2 displacement  */
15519           opcode[0] = 0xe9;
15520           where_to_put_displacement = &opcode[1];
15521           break;
15522
15523         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
15524         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
15525           extension = 5;                /* 2 opcode + 4 displacement  */
15526           opcode[1] = opcode[0] + 0x10;
15527           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15528           where_to_put_displacement = &opcode[2];
15529           break;
15530
15531         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
15532           extension = 3;                /* 2 opcode + 2 displacement  */
15533           opcode[1] = opcode[0] + 0x10;
15534           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
15535           where_to_put_displacement = &opcode[2];
15536           break;
15537
15538         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
15539           extension = 4;
15540           opcode[0] ^= 1;
15541           opcode[1] = 3;
15542           opcode[2] = 0xe9;
15543           where_to_put_displacement = &opcode[3];
15544           break;
15545
15546         default:
15547           BAD_CASE (fragP->fr_subtype);
15548           break;
15549         }
15550     }
15551
15552   /* If size if less then four we are sure that the operand fits,
15553      but if it's 4, then it could be that the displacement is larger
15554      then -/+ 2GB.  */
15555   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
15556       && object_64bit
15557       && ((addressT) (displacement_from_opcode_start - extension
15558                       + ((addressT) 1 << 31))
15559           > (((addressT) 2 << 31) - 1)))
15560     {
15561       as_bad_where (fragP->fr_file, fragP->fr_line,
15562                     _("jump target out of range"));
15563       /* Make us emit 0.  */
15564       displacement_from_opcode_start = extension;
15565     }
15566   /* Now put displacement after opcode.  */
15567   md_number_to_chars ((char *) where_to_put_displacement,
15568                       (valueT) (displacement_from_opcode_start - extension),
15569                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
15570   fragP->fr_fix += extension;
15571 }
15572 \f
15573 /* Apply a fixup (fixP) to segment data, once it has been determined
15574    by our caller that we have all the info we need to fix it up.
15575
15576    Parameter valP is the pointer to the value of the bits.
15577
15578    On the 386, immediates, displacements, and data pointers are all in
15579    the same (little-endian) format, so we don't need to care about which
15580    we are handling.  */
15581
15582 void
15583 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
15584 {
15585   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
15586   valueT value = *valP;
15587
15588 #if !defined (TE_Mach)
15589   if (fixP->fx_pcrel)
15590     {
15591       switch (fixP->fx_r_type)
15592         {
15593         default:
15594           break;
15595
15596         case BFD_RELOC_64:
15597           fixP->fx_r_type = BFD_RELOC_64_PCREL;
15598           break;
15599         case BFD_RELOC_32:
15600         case BFD_RELOC_X86_64_32S:
15601           fixP->fx_r_type = BFD_RELOC_32_PCREL;
15602           break;
15603         case BFD_RELOC_16:
15604           fixP->fx_r_type = BFD_RELOC_16_PCREL;
15605           break;
15606         case BFD_RELOC_8:
15607           fixP->fx_r_type = BFD_RELOC_8_PCREL;
15608           break;
15609         }
15610     }
15611
15612   if (fixP->fx_addsy != NULL
15613       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
15614           || fixP->fx_r_type == BFD_RELOC_64_PCREL
15615           || fixP->fx_r_type == BFD_RELOC_16_PCREL
15616           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
15617       && !use_rela_relocations)
15618     {
15619       /* This is a hack.  There should be a better way to handle this.
15620          This covers for the fact that bfd_install_relocation will
15621          subtract the current location (for partial_inplace, PC relative
15622          relocations); see more below.  */
15623 #ifndef OBJ_AOUT
15624       if (IS_ELF
15625 #ifdef TE_PE
15626           || OUTPUT_FLAVOR == bfd_target_coff_flavour
15627 #endif
15628           )
15629         value += fixP->fx_where + fixP->fx_frag->fr_address;
15630 #endif
15631 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15632       if (IS_ELF)
15633         {
15634           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
15635
15636           if ((sym_seg == seg
15637                || (symbol_section_p (fixP->fx_addsy)
15638                    && sym_seg != absolute_section))
15639               && !generic_force_reloc (fixP))
15640             {
15641               /* Yes, we add the values in twice.  This is because
15642                  bfd_install_relocation subtracts them out again.  I think
15643                  bfd_install_relocation is broken, but I don't dare change
15644                  it.  FIXME.  */
15645               value += fixP->fx_where + fixP->fx_frag->fr_address;
15646             }
15647         }
15648 #endif
15649 #if defined (OBJ_COFF) && defined (TE_PE)
15650       /* For some reason, the PE format does not store a
15651          section address offset for a PC relative symbol.  */
15652       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
15653           || S_IS_WEAK (fixP->fx_addsy))
15654         value += md_pcrel_from (fixP);
15655 #endif
15656     }
15657 #if defined (OBJ_COFF) && defined (TE_PE)
15658   if (fixP->fx_addsy != NULL
15659       && S_IS_WEAK (fixP->fx_addsy)
15660       /* PR 16858: Do not modify weak function references.  */
15661       && ! fixP->fx_pcrel)
15662     {
15663 #if !defined (TE_PEP)
15664       /* For x86 PE weak function symbols are neither PC-relative
15665          nor do they set S_IS_FUNCTION.  So the only reliable way
15666          to detect them is to check the flags of their containing
15667          section.  */
15668       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
15669           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
15670         ;
15671       else
15672 #endif
15673       value -= S_GET_VALUE (fixP->fx_addsy);
15674     }
15675 #endif
15676
15677   /* Fix a few things - the dynamic linker expects certain values here,
15678      and we must not disappoint it.  */
15679 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15680   if (IS_ELF && fixP->fx_addsy)
15681     switch (fixP->fx_r_type)
15682       {
15683       case BFD_RELOC_386_PLT32:
15684       case BFD_RELOC_X86_64_PLT32:
15685         /* Make the jump instruction point to the address of the operand.
15686            At runtime we merely add the offset to the actual PLT entry.
15687            NB: Subtract the offset size only for jump instructions.  */
15688         if (fixP->fx_pcrel)
15689           value = -4;
15690         break;
15691
15692       case BFD_RELOC_386_TLS_GD:
15693       case BFD_RELOC_386_TLS_LDM:
15694       case BFD_RELOC_386_TLS_IE_32:
15695       case BFD_RELOC_386_TLS_IE:
15696       case BFD_RELOC_386_TLS_GOTIE:
15697       case BFD_RELOC_386_TLS_GOTDESC:
15698       case BFD_RELOC_X86_64_TLSGD:
15699       case BFD_RELOC_X86_64_TLSLD:
15700       case BFD_RELOC_X86_64_GOTTPOFF:
15701       case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
15702       case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
15703       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15704       case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
15705         value = 0; /* Fully resolved at runtime.  No addend.  */
15706         /* Fallthrough */
15707       case BFD_RELOC_386_TLS_LE:
15708       case BFD_RELOC_386_TLS_LDO_32:
15709       case BFD_RELOC_386_TLS_LE_32:
15710       case BFD_RELOC_X86_64_DTPOFF32:
15711       case BFD_RELOC_X86_64_DTPOFF64:
15712       case BFD_RELOC_X86_64_TPOFF32:
15713       case BFD_RELOC_X86_64_TPOFF64:
15714         S_SET_THREAD_LOCAL (fixP->fx_addsy);
15715         break;
15716
15717       case BFD_RELOC_386_TLS_DESC_CALL:
15718       case BFD_RELOC_X86_64_TLSDESC_CALL:
15719         value = 0; /* Fully resolved at runtime.  No addend.  */
15720         S_SET_THREAD_LOCAL (fixP->fx_addsy);
15721         fixP->fx_done = 0;
15722         return;
15723
15724       case BFD_RELOC_VTABLE_INHERIT:
15725       case BFD_RELOC_VTABLE_ENTRY:
15726         fixP->fx_done = 0;
15727         return;
15728
15729       default:
15730         break;
15731       }
15732 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
15733
15734   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
15735   if (!object_64bit)
15736     value = extend_to_32bit_address (value);
15737
15738   *valP = value;
15739 #endif /* !defined (TE_Mach)  */
15740
15741   /* Are we finished with this relocation now?  */
15742   if (fixP->fx_addsy == NULL)
15743     {
15744       fixP->fx_done = 1;
15745       switch (fixP->fx_r_type)
15746         {
15747         case BFD_RELOC_X86_64_32S:
15748           fixP->fx_signed = 1;
15749           break;
15750
15751         default:
15752           break;
15753         }
15754     }
15755 #if defined (OBJ_COFF) && defined (TE_PE)
15756   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
15757     {
15758       fixP->fx_done = 0;
15759       /* Remember value for tc_gen_reloc.  */
15760       fixP->fx_addnumber = value;
15761       /* Clear out the frag for now.  */
15762       value = 0;
15763     }
15764 #endif
15765   else if (use_rela_relocations)
15766     {
15767       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
15768         fixP->fx_no_overflow = 1;
15769       /* Remember value for tc_gen_reloc.  */
15770       fixP->fx_addnumber = value;
15771       value = 0;
15772     }
15773
15774   md_number_to_chars (p, value, fixP->fx_size);
15775 }
15776 \f
15777 const char *
15778 md_atof (int type, char *litP, int *sizeP)
15779 {
15780   /* This outputs the LITTLENUMs in REVERSE order;
15781      in accord with the bigendian 386.  */
15782   return ieee_md_atof (type, litP, sizeP, false);
15783 }
15784 \f
15785 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
15786
15787 static char *
15788 output_invalid (int c)
15789 {
15790   if (ISPRINT (c))
15791     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
15792               "'%c'", c);
15793   else
15794     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
15795               "(0x%x)", (unsigned char) c);
15796   return output_invalid_buf;
15797 }
15798
15799 /* Verify that @r can be used in the current context.  */
15800
15801 static bool check_register (const reg_entry *r)
15802 {
15803   if (allow_pseudo_reg)
15804     return true;
15805
15806   if (operand_type_all_zero (&r->reg_type))
15807     return false;
15808
15809   if ((r->reg_type.bitfield.dword
15810        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
15811        || r->reg_type.bitfield.class == RegCR
15812        || r->reg_type.bitfield.class == RegDR)
15813       && !cpu_arch_flags.bitfield.cpui386)
15814     return false;
15815
15816   if (r->reg_type.bitfield.class == RegTR
15817       && (flag_code == CODE_64BIT
15818           || !cpu_arch_flags.bitfield.cpui386
15819           || cpu_arch_isa_flags.bitfield.cpui586
15820           || cpu_arch_isa_flags.bitfield.cpui686))
15821     return false;
15822
15823   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
15824     return false;
15825
15826   if (!cpu_arch_flags.bitfield.cpuavx512f)
15827     {
15828       if (r->reg_type.bitfield.zmmword
15829           || r->reg_type.bitfield.class == RegMask)
15830         return false;
15831
15832       if (!cpu_arch_flags.bitfield.cpuavx)
15833         {
15834           if (r->reg_type.bitfield.ymmword)
15835             return false;
15836
15837           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
15838             return false;
15839         }
15840     }
15841
15842   if (r->reg_type.bitfield.zmmword)
15843     {
15844       if (vector_size < VSZ512)
15845         return false;
15846
15847       switch (i.encoding)
15848         {
15849         case encoding_default:
15850         case encoding_egpr:
15851           i.encoding = encoding_evex512;
15852           break;
15853         case encoding_evex:
15854         case encoding_evex512:
15855           break;
15856         default:
15857           i.encoding = encoding_error;
15858           break;
15859         }
15860     }
15861
15862   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
15863     return false;
15864
15865   if (r->reg_type.bitfield.tmmword
15866       && (!cpu_arch_flags.bitfield.cpuamx_tile
15867           || flag_code != CODE_64BIT))
15868     return false;
15869
15870   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
15871     return false;
15872
15873   /* Don't allow fake index register unless allow_index_reg isn't 0. */
15874   if (!allow_index_reg && r->reg_num == RegIZ)
15875     return false;
15876
15877   /* Upper 16 vector registers are only available with VREX in 64bit
15878      mode, and require EVEX encoding.  */
15879   if (r->reg_flags & RegVRex)
15880     {
15881       if (!cpu_arch_flags.bitfield.cpuavx512f
15882           || flag_code != CODE_64BIT)
15883         return false;
15884
15885       switch (i.encoding)
15886         {
15887           case encoding_default:
15888           case encoding_egpr:
15889           case encoding_evex512:
15890             i.encoding = encoding_evex;
15891             break;
15892           case encoding_evex:
15893             break;
15894           default:
15895             i.encoding = encoding_error;
15896             break;
15897         }
15898     }
15899
15900   if (r->reg_flags & RegRex2)
15901     {
15902       if (!cpu_arch_flags.bitfield.cpuapx_f
15903           || flag_code != CODE_64BIT)
15904         return false;
15905
15906       switch (i.encoding)
15907         {
15908         case encoding_default:
15909           i.encoding = encoding_egpr;
15910           break;
15911         case encoding_egpr:
15912         case encoding_evex:
15913         case encoding_evex512:
15914           break;
15915         default:
15916           i.encoding = encoding_error;
15917           break;
15918         }
15919     }
15920
15921   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
15922       && (!cpu_arch_flags.bitfield.cpu64
15923           || r->reg_type.bitfield.class != RegCR
15924           || dot_insn ())
15925       && flag_code != CODE_64BIT)
15926     return false;
15927
15928   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
15929       && !intel_syntax)
15930     return false;
15931
15932   return true;
15933 }
15934
15935 /* REG_STRING starts *before* REGISTER_PREFIX.  */
15936
15937 static const reg_entry *
15938 parse_real_register (const char *reg_string, char **end_op)
15939 {
15940   const char *s = reg_string;
15941   char *p;
15942   char reg_name_given[MAX_REG_NAME_SIZE + 1];
15943   const reg_entry *r;
15944
15945   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
15946   if (*s == REGISTER_PREFIX)
15947     ++s;
15948
15949   if (is_space_char (*s))
15950     ++s;
15951
15952   p = reg_name_given;
15953   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
15954     {
15955       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
15956         return (const reg_entry *) NULL;
15957       s++;
15958     }
15959
15960   if (is_part_of_name (*s))
15961     return (const reg_entry *) NULL;
15962
15963   *end_op = (char *) s;
15964
15965   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
15966
15967   /* Handle floating point regs, allowing spaces in the (i) part.  */
15968   if (r == reg_st0)
15969     {
15970       if (!cpu_arch_flags.bitfield.cpu8087
15971           && !cpu_arch_flags.bitfield.cpu287
15972           && !cpu_arch_flags.bitfield.cpu387
15973           && !allow_pseudo_reg)
15974         return (const reg_entry *) NULL;
15975
15976       if (is_space_char (*s))
15977         ++s;
15978       if (*s == '(')
15979         {
15980           ++s;
15981           if (is_space_char (*s))
15982             ++s;
15983           if (*s >= '0' && *s <= '7')
15984             {
15985               int fpr = *s - '0';
15986               ++s;
15987               if (is_space_char (*s))
15988                 ++s;
15989               if (*s == ')')
15990                 {
15991                   *end_op = (char *) s + 1;
15992                   know (r[fpr].reg_num == fpr);
15993                   return r + fpr;
15994                 }
15995             }
15996           /* We have "%st(" then garbage.  */
15997           return (const reg_entry *) NULL;
15998         }
15999     }
16000
16001   return r && check_register (r) ? r : NULL;
16002 }
16003
16004 /* REG_STRING starts *before* REGISTER_PREFIX.  */
16005
16006 static const reg_entry *
16007 parse_register (const char *reg_string, char **end_op)
16008 {
16009   const reg_entry *r;
16010
16011   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
16012     r = parse_real_register (reg_string, end_op);
16013   else
16014     r = NULL;
16015   if (!r)
16016     {
16017       char *save = input_line_pointer;
16018       char *buf = xstrdup (reg_string), *name;
16019       symbolS *symbolP;
16020
16021       input_line_pointer = buf;
16022       get_symbol_name (&name);
16023       symbolP = symbol_find (name);
16024       while (symbolP && symbol_equated_p (symbolP))
16025         {
16026           const expressionS *e = symbol_get_value_expression(symbolP);
16027
16028           if (e->X_add_number)
16029             break;
16030           symbolP = e->X_add_symbol;
16031         }
16032       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
16033         {
16034           const expressionS *e = symbol_get_value_expression (symbolP);
16035
16036           if (e->X_op == O_register)
16037             {
16038               know (e->X_add_number >= 0
16039                     && (valueT) e->X_add_number < i386_regtab_size);
16040               r = i386_regtab + e->X_add_number;
16041               *end_op = (char *) reg_string + (input_line_pointer - buf);
16042             }
16043           if (r && !check_register (r))
16044             {
16045               as_bad (_("register '%s%s' cannot be used here"),
16046                       register_prefix, r->reg_name);
16047               r = &bad_reg;
16048             }
16049         }
16050       input_line_pointer = save;
16051       free (buf);
16052     }
16053   return r;
16054 }
16055
16056 int
16057 i386_parse_name (char *name, expressionS *e, char *nextcharP)
16058 {
16059   const reg_entry *r = NULL;
16060   char *end = input_line_pointer;
16061
16062   /* We only know the terminating character here.  It being double quote could
16063      be the closing one of a quoted symbol name, or an opening one from a
16064      following string (or another quoted symbol name).  Since the latter can't
16065      be valid syntax for anything, bailing in either case is good enough.  */
16066   if (*nextcharP == '"')
16067     return 0;
16068
16069   *end = *nextcharP;
16070   if (*name == REGISTER_PREFIX || allow_naked_reg)
16071     r = parse_real_register (name, &input_line_pointer);
16072   if (r && end <= input_line_pointer)
16073     {
16074       *nextcharP = *input_line_pointer;
16075       *input_line_pointer = 0;
16076       e->X_op = O_register;
16077       e->X_add_number = r - i386_regtab;
16078       return 1;
16079     }
16080   input_line_pointer = end;
16081   *end = 0;
16082   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
16083 }
16084
16085 void
16086 md_operand (expressionS *e)
16087 {
16088   char *end;
16089   const reg_entry *r;
16090
16091   switch (*input_line_pointer)
16092     {
16093     case REGISTER_PREFIX:
16094       r = parse_real_register (input_line_pointer, &end);
16095       if (r)
16096         {
16097           e->X_op = O_register;
16098           e->X_add_number = r - i386_regtab;
16099           input_line_pointer = end;
16100         }
16101       break;
16102
16103     case '[':
16104       gas_assert (intel_syntax);
16105       end = input_line_pointer++;
16106       expression (e);
16107       if (*input_line_pointer == ']')
16108         {
16109           ++input_line_pointer;
16110           e->X_op_symbol = make_expr_symbol (e);
16111           e->X_add_symbol = NULL;
16112           e->X_add_number = 0;
16113           e->X_op = O_index;
16114         }
16115       else
16116         {
16117           e->X_op = O_absent;
16118           input_line_pointer = end;
16119         }
16120       break;
16121     }
16122 }
16123
16124 #ifdef BFD64
16125 /* To maintain consistency with !BFD64 builds of gas record, whether any
16126    (binary) operator was involved in an expression.  As expressions are
16127    evaluated in only 32 bits when !BFD64, we use this to decide whether to
16128    truncate results.  */
16129 bool i386_record_operator (operatorT op,
16130                            const expressionS *left,
16131                            const expressionS *right)
16132 {
16133   if (op == O_absent)
16134     return false;
16135
16136   if (!left)
16137     {
16138       /* Since the expression parser applies unary operators fine to bignum
16139          operands, we don't need to be concerned of respective operands not
16140          fitting in 32 bits.  */
16141       if (right->X_op == O_constant && right->X_unsigned
16142           && !fits_in_unsigned_long (right->X_add_number))
16143         return false;
16144     }
16145   /* This isn't entirely right: The pattern can also result when constant
16146      expressions are folded (e.g. 0xffffffff + 1).  */
16147   else if ((left->X_op == O_constant && left->X_unsigned
16148             && !fits_in_unsigned_long (left->X_add_number))
16149            || (right->X_op == O_constant && right->X_unsigned
16150                && !fits_in_unsigned_long (right->X_add_number)))
16151     expr_mode = expr_large_value;
16152
16153   if (expr_mode != expr_large_value)
16154     expr_mode = expr_operator_present;
16155
16156   return false;
16157 }
16158 #endif
16159 \f
16160 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16161 const char *md_shortopts = "kVQ:sqnO::";
16162 #else
16163 const char *md_shortopts = "qnO::";
16164 #endif
16165
16166 #define OPTION_32 (OPTION_MD_BASE + 0)
16167 #define OPTION_64 (OPTION_MD_BASE + 1)
16168 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
16169 #define OPTION_MARCH (OPTION_MD_BASE + 3)
16170 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
16171 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
16172 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
16173 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
16174 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
16175 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
16176 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
16177 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
16178 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
16179 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
16180 #define OPTION_X32 (OPTION_MD_BASE + 14)
16181 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
16182 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
16183 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
16184 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
16185 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
16186 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
16187 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
16188 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
16189 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
16190 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
16191 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
16192 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
16193 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
16194 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
16195 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
16196 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
16197 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
16198 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
16199 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
16200 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
16201
16202 struct option md_longopts[] =
16203 {
16204   {"32", no_argument, NULL, OPTION_32},
16205 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
16206      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
16207   {"64", no_argument, NULL, OPTION_64},
16208 #endif
16209 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16210   {"x32", no_argument, NULL, OPTION_X32},
16211   {"mshared", no_argument, NULL, OPTION_MSHARED},
16212   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
16213 #endif
16214   {"divide", no_argument, NULL, OPTION_DIVIDE},
16215   {"march", required_argument, NULL, OPTION_MARCH},
16216   {"mtune", required_argument, NULL, OPTION_MTUNE},
16217   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
16218   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
16219   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
16220   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
16221   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
16222   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
16223   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
16224   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
16225   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
16226   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
16227   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
16228   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
16229   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
16230 # if defined (TE_PE) || defined (TE_PEP)
16231   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
16232 #endif
16233   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
16234   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
16235   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
16236   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
16237   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
16238   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
16239   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
16240   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
16241   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
16242   {"mlfence-before-indirect-branch", required_argument, NULL,
16243    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
16244   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
16245   {"mamd64", no_argument, NULL, OPTION_MAMD64},
16246   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
16247   {NULL, no_argument, NULL, 0}
16248 };
16249 size_t md_longopts_size = sizeof (md_longopts);
16250
16251 int
16252 md_parse_option (int c, const char *arg)
16253 {
16254   unsigned int j;
16255   char *arch, *next, *saved, *type;
16256
16257   switch (c)
16258     {
16259     case 'n':
16260       optimize_align_code = 0;
16261       break;
16262
16263     case 'q':
16264       quiet_warnings = 1;
16265       break;
16266
16267 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16268       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
16269          should be emitted or not.  FIXME: Not implemented.  */
16270     case 'Q':
16271       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
16272         return 0;
16273       break;
16274
16275       /* -V: SVR4 argument to print version ID.  */
16276     case 'V':
16277       print_version_id ();
16278       break;
16279
16280       /* -k: Ignore for FreeBSD compatibility.  */
16281     case 'k':
16282       break;
16283
16284     case 's':
16285       /* -s: On i386 Solaris, this tells the native assembler to use
16286          .stab instead of .stab.excl.  We always use .stab anyhow.  */
16287       break;
16288
16289     case OPTION_MSHARED:
16290       shared = 1;
16291       break;
16292
16293     case OPTION_X86_USED_NOTE:
16294       if (strcasecmp (arg, "yes") == 0)
16295         x86_used_note = 1;
16296       else if (strcasecmp (arg, "no") == 0)
16297         x86_used_note = 0;
16298       else
16299         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
16300       break;
16301
16302
16303 #endif
16304 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
16305      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
16306     case OPTION_64:
16307       {
16308         const char **list, **l;
16309
16310         list = bfd_target_list ();
16311         for (l = list; *l != NULL; l++)
16312           if (startswith (*l, "elf64-x86-64")
16313               || strcmp (*l, "coff-x86-64") == 0
16314               || strcmp (*l, "pe-x86-64") == 0
16315               || strcmp (*l, "pei-x86-64") == 0
16316               || strcmp (*l, "mach-o-x86-64") == 0)
16317             {
16318               default_arch = "x86_64";
16319               break;
16320             }
16321         if (*l == NULL)
16322           as_fatal (_("no compiled in support for x86_64"));
16323         free (list);
16324       }
16325       break;
16326 #endif
16327
16328 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16329     case OPTION_X32:
16330       if (IS_ELF)
16331         {
16332           const char **list, **l;
16333
16334           list = bfd_target_list ();
16335           for (l = list; *l != NULL; l++)
16336             if (startswith (*l, "elf32-x86-64"))
16337               {
16338                 default_arch = "x86_64:32";
16339                 break;
16340               }
16341           if (*l == NULL)
16342             as_fatal (_("no compiled in support for 32bit x86_64"));
16343           free (list);
16344         }
16345       else
16346         as_fatal (_("32bit x86_64 is only supported for ELF"));
16347       break;
16348 #endif
16349
16350     case OPTION_32:
16351       {
16352         const char **list, **l;
16353
16354         list = bfd_target_list ();
16355         for (l = list; *l != NULL; l++)
16356           if (strstr (*l, "-i386")
16357               || strstr (*l, "-go32"))
16358             {
16359               default_arch = "i386";
16360               break;
16361             }
16362         if (*l == NULL)
16363           as_fatal (_("no compiled in support for ix86"));
16364         free (list);
16365       }
16366       break;
16367
16368     case OPTION_DIVIDE:
16369 #ifdef SVR4_COMMENT_CHARS
16370       {
16371         char *n, *t;
16372         const char *s;
16373
16374         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
16375         t = n;
16376         for (s = i386_comment_chars; *s != '\0'; s++)
16377           if (*s != '/')
16378             *t++ = *s;
16379         *t = '\0';
16380         i386_comment_chars = n;
16381       }
16382 #endif
16383       break;
16384
16385     case OPTION_MARCH:
16386       saved = xstrdup (arg);
16387       arch = saved;
16388       /* Allow -march=+nosse.  */
16389       if (*arch == '+')
16390         arch++;
16391       do
16392         {
16393           char *vsz;
16394
16395           if (*arch == '.')
16396             as_fatal (_("invalid -march= option: `%s'"), arg);
16397           next = strchr (arch, '+');
16398           if (next)
16399             *next++ = '\0';
16400           vsz = strchr (arch, '/');
16401           if (vsz)
16402             *vsz++ = '\0';
16403           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16404             {
16405               if (vsz && cpu_arch[j].vsz != vsz_set)
16406                 continue;
16407
16408               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
16409                   && strcmp (arch, cpu_arch[j].name) == 0)
16410                 {
16411                   /* Processor.  */
16412                   if (! cpu_arch[j].enable.bitfield.cpui386)
16413                     continue;
16414
16415                   cpu_arch_name = cpu_arch[j].name;
16416                   free (cpu_sub_arch_name);
16417                   cpu_sub_arch_name = NULL;
16418                   cpu_arch_flags = cpu_arch[j].enable;
16419                   cpu_arch_isa = cpu_arch[j].type;
16420                   cpu_arch_isa_flags = cpu_arch[j].enable;
16421                   if (!cpu_arch_tune_set)
16422                     cpu_arch_tune = cpu_arch_isa;
16423                   vector_size = VSZ_DEFAULT;
16424                   break;
16425                 }
16426               else if (cpu_arch[j].type == PROCESSOR_NONE
16427                        && strcmp (arch, cpu_arch[j].name) == 0
16428                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
16429                 {
16430                   /* ISA extension.  */
16431                   isa_enable (j);
16432
16433                   switch (cpu_arch[j].vsz)
16434                     {
16435                     default:
16436                       break;
16437
16438                     case vsz_set:
16439                       if (vsz)
16440                         {
16441                           char *end;
16442                           unsigned long val = strtoul (vsz, &end, 0);
16443
16444                           if (*end)
16445                             val = 0;
16446                           switch (val)
16447                             {
16448                             case 512: vector_size = VSZ512; break;
16449                             case 256: vector_size = VSZ256; break;
16450                             case 128: vector_size = VSZ128; break;
16451                             default:
16452                               as_warn (_("Unrecognized vector size specifier ignored"));
16453                               break;
16454                             }
16455                           break;
16456                         }
16457                         /* Fall through.  */
16458                     case vsz_reset:
16459                       vector_size = VSZ_DEFAULT;
16460                       break;
16461                     }
16462
16463                   break;
16464                 }
16465             }
16466
16467           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
16468             {
16469               /* Disable an ISA extension.  */
16470               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16471                 if (cpu_arch[j].type == PROCESSOR_NONE
16472                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
16473                   {
16474                     isa_disable (j);
16475                     if (cpu_arch[j].vsz == vsz_set)
16476                       vector_size = VSZ_DEFAULT;
16477                     break;
16478                   }
16479             }
16480
16481           if (j >= ARRAY_SIZE (cpu_arch))
16482             as_fatal (_("invalid -march= option: `%s'"), arg);
16483
16484           arch = next;
16485         }
16486       while (next != NULL);
16487       free (saved);
16488       break;
16489
16490     case OPTION_MTUNE:
16491       if (*arg == '.')
16492         as_fatal (_("invalid -mtune= option: `%s'"), arg);
16493       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16494         {
16495           if (cpu_arch[j].type != PROCESSOR_NONE
16496               && strcmp (arg, cpu_arch[j].name) == 0)
16497             {
16498               cpu_arch_tune_set = 1;
16499               cpu_arch_tune = cpu_arch [j].type;
16500               break;
16501             }
16502         }
16503       if (j >= ARRAY_SIZE (cpu_arch))
16504         as_fatal (_("invalid -mtune= option: `%s'"), arg);
16505       break;
16506
16507     case OPTION_MMNEMONIC:
16508       if (strcasecmp (arg, "att") == 0)
16509         intel_mnemonic = 0;
16510       else if (strcasecmp (arg, "intel") == 0)
16511         intel_mnemonic = 1;
16512       else
16513         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
16514       break;
16515
16516     case OPTION_MSYNTAX:
16517       if (strcasecmp (arg, "att") == 0)
16518         _set_intel_syntax (0);
16519       else if (strcasecmp (arg, "intel") == 0)
16520         _set_intel_syntax (1);
16521       else
16522         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
16523       break;
16524
16525     case OPTION_MINDEX_REG:
16526       allow_index_reg = 1;
16527       break;
16528
16529     case OPTION_MNAKED_REG:
16530       allow_naked_reg = 1;
16531       register_prefix = "";
16532       break;
16533
16534     case OPTION_MSSE2AVX:
16535       sse2avx = 1;
16536       break;
16537
16538     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
16539       use_unaligned_vector_move = 1;
16540       break;
16541
16542     case OPTION_MSSE_CHECK:
16543       if (strcasecmp (arg, "error") == 0)
16544         sse_check = check_error;
16545       else if (strcasecmp (arg, "warning") == 0)
16546         sse_check = check_warning;
16547       else if (strcasecmp (arg, "none") == 0)
16548         sse_check = check_none;
16549       else
16550         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
16551       break;
16552
16553     case OPTION_MOPERAND_CHECK:
16554       if (strcasecmp (arg, "error") == 0)
16555         operand_check = check_error;
16556       else if (strcasecmp (arg, "warning") == 0)
16557         operand_check = check_warning;
16558       else if (strcasecmp (arg, "none") == 0)
16559         operand_check = check_none;
16560       else
16561         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
16562       break;
16563
16564     case OPTION_MAVXSCALAR:
16565       if (strcasecmp (arg, "128") == 0)
16566         avxscalar = vex128;
16567       else if (strcasecmp (arg, "256") == 0)
16568         avxscalar = vex256;
16569       else
16570         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
16571       break;
16572
16573     case OPTION_MVEXWIG:
16574       if (strcmp (arg, "0") == 0)
16575         vexwig = vexw0;
16576       else if (strcmp (arg, "1") == 0)
16577         vexwig = vexw1;
16578       else
16579         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
16580       break;
16581
16582     case OPTION_MADD_BND_PREFIX:
16583       add_bnd_prefix = 1;
16584       break;
16585
16586     case OPTION_MEVEXLIG:
16587       if (strcmp (arg, "128") == 0)
16588         evexlig = evexl128;
16589       else if (strcmp (arg, "256") == 0)
16590         evexlig = evexl256;
16591       else  if (strcmp (arg, "512") == 0)
16592         evexlig = evexl512;
16593       else
16594         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
16595       break;
16596
16597     case OPTION_MEVEXRCIG:
16598       if (strcmp (arg, "rne") == 0)
16599         evexrcig = rne;
16600       else if (strcmp (arg, "rd") == 0)
16601         evexrcig = rd;
16602       else if (strcmp (arg, "ru") == 0)
16603         evexrcig = ru;
16604       else if (strcmp (arg, "rz") == 0)
16605         evexrcig = rz;
16606       else
16607         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
16608       break;
16609
16610     case OPTION_MEVEXWIG:
16611       if (strcmp (arg, "0") == 0)
16612         evexwig = evexw0;
16613       else if (strcmp (arg, "1") == 0)
16614         evexwig = evexw1;
16615       else
16616         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
16617       break;
16618
16619 # if defined (TE_PE) || defined (TE_PEP)
16620     case OPTION_MBIG_OBJ:
16621       use_big_obj = 1;
16622       break;
16623 #endif
16624
16625     case OPTION_MOMIT_LOCK_PREFIX:
16626       if (strcasecmp (arg, "yes") == 0)
16627         omit_lock_prefix = 1;
16628       else if (strcasecmp (arg, "no") == 0)
16629         omit_lock_prefix = 0;
16630       else
16631         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
16632       break;
16633
16634     case OPTION_MFENCE_AS_LOCK_ADD:
16635       if (strcasecmp (arg, "yes") == 0)
16636         avoid_fence = 1;
16637       else if (strcasecmp (arg, "no") == 0)
16638         avoid_fence = 0;
16639       else
16640         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
16641       break;
16642
16643     case OPTION_MLFENCE_AFTER_LOAD:
16644       if (strcasecmp (arg, "yes") == 0)
16645         lfence_after_load = 1;
16646       else if (strcasecmp (arg, "no") == 0)
16647         lfence_after_load = 0;
16648       else
16649         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
16650       break;
16651
16652     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
16653       if (strcasecmp (arg, "all") == 0)
16654         {
16655           lfence_before_indirect_branch = lfence_branch_all;
16656           if (lfence_before_ret == lfence_before_ret_none)
16657             lfence_before_ret = lfence_before_ret_shl;
16658         }
16659       else if (strcasecmp (arg, "memory") == 0)
16660         lfence_before_indirect_branch = lfence_branch_memory;
16661       else if (strcasecmp (arg, "register") == 0)
16662         lfence_before_indirect_branch = lfence_branch_register;
16663       else if (strcasecmp (arg, "none") == 0)
16664         lfence_before_indirect_branch = lfence_branch_none;
16665       else
16666         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
16667                   arg);
16668       break;
16669
16670     case OPTION_MLFENCE_BEFORE_RET:
16671       if (strcasecmp (arg, "or") == 0)
16672         lfence_before_ret = lfence_before_ret_or;
16673       else if (strcasecmp (arg, "not") == 0)
16674         lfence_before_ret = lfence_before_ret_not;
16675       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
16676         lfence_before_ret = lfence_before_ret_shl;
16677       else if (strcasecmp (arg, "none") == 0)
16678         lfence_before_ret = lfence_before_ret_none;
16679       else
16680         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
16681                   arg);
16682       break;
16683
16684     case OPTION_MRELAX_RELOCATIONS:
16685       if (strcasecmp (arg, "yes") == 0)
16686         generate_relax_relocations = 1;
16687       else if (strcasecmp (arg, "no") == 0)
16688         generate_relax_relocations = 0;
16689       else
16690         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
16691       break;
16692
16693     case OPTION_MALIGN_BRANCH_BOUNDARY:
16694       {
16695         char *end;
16696         long int align = strtoul (arg, &end, 0);
16697         if (*end == '\0')
16698           {
16699             if (align == 0)
16700               {
16701                 align_branch_power = 0;
16702                 break;
16703               }
16704             else if (align >= 16)
16705               {
16706                 int align_power;
16707                 for (align_power = 0;
16708                      (align & 1) == 0;
16709                      align >>= 1, align_power++)
16710                   continue;
16711                 /* Limit alignment power to 31.  */
16712                 if (align == 1 && align_power < 32)
16713                   {
16714                     align_branch_power = align_power;
16715                     break;
16716                   }
16717               }
16718           }
16719         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
16720       }
16721       break;
16722
16723     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
16724       {
16725         char *end;
16726         int align = strtoul (arg, &end, 0);
16727         /* Some processors only support 5 prefixes.  */
16728         if (*end == '\0' && align >= 0 && align < 6)
16729           {
16730             align_branch_prefix_size = align;
16731             break;
16732           }
16733         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
16734                   arg);
16735       }
16736       break;
16737
16738     case OPTION_MALIGN_BRANCH:
16739       align_branch = 0;
16740       saved = xstrdup (arg);
16741       type = saved;
16742       do
16743         {
16744           next = strchr (type, '+');
16745           if (next)
16746             *next++ = '\0';
16747           if (strcasecmp (type, "jcc") == 0)
16748             align_branch |= align_branch_jcc_bit;
16749           else if (strcasecmp (type, "fused") == 0)
16750             align_branch |= align_branch_fused_bit;
16751           else if (strcasecmp (type, "jmp") == 0)
16752             align_branch |= align_branch_jmp_bit;
16753           else if (strcasecmp (type, "call") == 0)
16754             align_branch |= align_branch_call_bit;
16755           else if (strcasecmp (type, "ret") == 0)
16756             align_branch |= align_branch_ret_bit;
16757           else if (strcasecmp (type, "indirect") == 0)
16758             align_branch |= align_branch_indirect_bit;
16759           else
16760             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
16761           type = next;
16762         }
16763       while (next != NULL);
16764       free (saved);
16765       break;
16766
16767     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
16768       align_branch_power = 5;
16769       align_branch_prefix_size = 5;
16770       align_branch = (align_branch_jcc_bit
16771                       | align_branch_fused_bit
16772                       | align_branch_jmp_bit);
16773       break;
16774
16775     case OPTION_MAMD64:
16776       isa64 = amd64;
16777       break;
16778
16779     case OPTION_MINTEL64:
16780       isa64 = intel64;
16781       break;
16782
16783     case 'O':
16784       if (arg == NULL)
16785         {
16786           optimize = 1;
16787           /* Turn off -Os.  */
16788           optimize_for_space = 0;
16789         }
16790       else if (*arg == 's')
16791         {
16792           optimize_for_space = 1;
16793           /* Turn on all encoding optimizations.  */
16794           optimize = INT_MAX;
16795         }
16796       else
16797         {
16798           optimize = atoi (arg);
16799           /* Turn off -Os.  */
16800           optimize_for_space = 0;
16801         }
16802       break;
16803
16804     default:
16805       return 0;
16806     }
16807   return 1;
16808 }
16809
16810 #define MESSAGE_TEMPLATE \
16811 "                                                                                "
16812
16813 static char *
16814 output_message (FILE *stream, char *p, char *message, char *start,
16815                 int *left_p, const char *name, int len)
16816 {
16817   int size = sizeof (MESSAGE_TEMPLATE);
16818   int left = *left_p;
16819
16820   /* Reserve 2 spaces for ", " or ",\0" */
16821   left -= len + 2;
16822
16823   /* Check if there is any room.  */
16824   if (left >= 0)
16825     {
16826       if (p != start)
16827         {
16828           *p++ = ',';
16829           *p++ = ' ';
16830         }
16831       p = mempcpy (p, name, len);
16832     }
16833   else
16834     {
16835       /* Output the current message now and start a new one.  */
16836       *p++ = ',';
16837       *p = '\0';
16838       fprintf (stream, "%s\n", message);
16839       p = start;
16840       left = size - (start - message) - len - 2;
16841
16842       gas_assert (left >= 0);
16843
16844       p = mempcpy (p, name, len);
16845     }
16846
16847   *left_p = left;
16848   return p;
16849 }
16850
16851 static void
16852 show_arch (FILE *stream, int ext, int check)
16853 {
16854   static char message[] = MESSAGE_TEMPLATE;
16855   char *start = message + 27;
16856   char *p;
16857   int size = sizeof (MESSAGE_TEMPLATE);
16858   int left;
16859   const char *name;
16860   int len;
16861   unsigned int j;
16862
16863   p = start;
16864   left = size - (start - message);
16865
16866   if (!ext && check)
16867     {
16868       p = output_message (stream, p, message, start, &left,
16869                           STRING_COMMA_LEN ("default"));
16870       p = output_message (stream, p, message, start, &left,
16871                           STRING_COMMA_LEN ("push"));
16872       p = output_message (stream, p, message, start, &left,
16873                           STRING_COMMA_LEN ("pop"));
16874     }
16875
16876   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16877     {
16878       /* Should it be skipped?  */
16879       if (cpu_arch [j].skip)
16880         continue;
16881
16882       name = cpu_arch [j].name;
16883       len = cpu_arch [j].len;
16884       if (cpu_arch[j].type == PROCESSOR_NONE)
16885         {
16886           /* It is an extension.  Skip if we aren't asked to show it.  */
16887           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
16888             continue;
16889         }
16890       else if (ext)
16891         {
16892           /* It is an processor.  Skip if we show only extension.  */
16893           continue;
16894         }
16895       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
16896         {
16897           /* It is an impossible processor - skip.  */
16898           continue;
16899         }
16900
16901       p = output_message (stream, p, message, start, &left, name, len);
16902     }
16903
16904   /* Display disabled extensions.  */
16905   if (ext)
16906     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
16907       {
16908         char *str;
16909
16910         if (cpu_arch[j].type != PROCESSOR_NONE
16911             || !cpu_flags_all_zero (&cpu_arch[j].enable))
16912           continue;
16913         str = xasprintf ("no%s", cpu_arch[j].name);
16914         p = output_message (stream, p, message, start, &left, str,
16915                             strlen (str));
16916         free (str);
16917       }
16918
16919   *p = '\0';
16920   fprintf (stream, "%s\n", message);
16921 }
16922
16923 void
16924 md_show_usage (FILE *stream)
16925 {
16926 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16927   fprintf (stream, _("\
16928   -Qy, -Qn                ignored\n\
16929   -V                      print assembler version number\n\
16930   -k                      ignored\n"));
16931 #endif
16932   fprintf (stream, _("\
16933   -n                      do not optimize code alignment\n\
16934   -O{012s}                attempt some code optimizations\n\
16935   -q                      quieten some warnings\n"));
16936 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16937   fprintf (stream, _("\
16938   -s                      ignored\n"));
16939 #endif
16940 #ifdef BFD64
16941 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
16942   fprintf (stream, _("\
16943   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
16944 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
16945   fprintf (stream, _("\
16946   --32/--64               generate 32bit/64bit object\n"));
16947 # endif
16948 #endif
16949 #ifdef SVR4_COMMENT_CHARS
16950   fprintf (stream, _("\
16951   --divide                do not treat `/' as a comment character\n"));
16952 #else
16953   fprintf (stream, _("\
16954   --divide                ignored\n"));
16955 #endif
16956   fprintf (stream, _("\
16957   -march=CPU[,+EXTENSION...]\n\
16958                           generate code for CPU and EXTENSION, CPU is one of:\n"));
16959   show_arch (stream, 0, 1);
16960   fprintf (stream, _("\
16961                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
16962   show_arch (stream, 1, 0);
16963   fprintf (stream, _("\
16964   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
16965   show_arch (stream, 0, 0);
16966   fprintf (stream, _("\
16967   -msse2avx               encode SSE instructions with VEX prefix\n"));
16968   fprintf (stream, _("\
16969   -muse-unaligned-vector-move\n\
16970                           encode aligned vector move as unaligned vector move\n"));
16971   fprintf (stream, _("\
16972   -msse-check=[none|error|warning] (default: none)\n\
16973                           check SSE instructions\n"));
16974   fprintf (stream, _("\
16975   -moperand-check=[none|error|warning] (default: warning)\n\
16976                           check operand combinations for validity\n"));
16977   fprintf (stream, _("\
16978   -mavxscalar=[128|256] (default: 128)\n\
16979                           encode scalar AVX instructions with specific vector\n\
16980                            length\n"));
16981   fprintf (stream, _("\
16982   -mvexwig=[0|1] (default: 0)\n\
16983                           encode VEX instructions with specific VEX.W value\n\
16984                            for VEX.W bit ignored instructions\n"));
16985   fprintf (stream, _("\
16986   -mevexlig=[128|256|512] (default: 128)\n\
16987                           encode scalar EVEX instructions with specific vector\n\
16988                            length\n"));
16989   fprintf (stream, _("\
16990   -mevexwig=[0|1] (default: 0)\n\
16991                           encode EVEX instructions with specific EVEX.W value\n\
16992                            for EVEX.W bit ignored instructions\n"));
16993   fprintf (stream, _("\
16994   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
16995                           encode EVEX instructions with specific EVEX.RC value\n\
16996                            for SAE-only ignored instructions\n"));
16997   fprintf (stream, _("\
16998   -mmnemonic=[att|intel] "));
16999   if (SYSV386_COMPAT)
17000     fprintf (stream, _("(default: att)\n"));
17001   else
17002     fprintf (stream, _("(default: intel)\n"));
17003   fprintf (stream, _("\
17004                           use AT&T/Intel mnemonic (AT&T syntax only)\n"));
17005   fprintf (stream, _("\
17006   -msyntax=[att|intel] (default: att)\n\
17007                           use AT&T/Intel syntax\n"));
17008   fprintf (stream, _("\
17009   -mindex-reg             support pseudo index registers\n"));
17010   fprintf (stream, _("\
17011   -mnaked-reg             don't require `%%' prefix for registers\n"));
17012   fprintf (stream, _("\
17013   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
17014 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17015   fprintf (stream, _("\
17016   -mshared                disable branch optimization for shared code\n"));
17017   fprintf (stream, _("\
17018   -mx86-used-note=[no|yes] "));
17019   if (DEFAULT_X86_USED_NOTE)
17020     fprintf (stream, _("(default: yes)\n"));
17021   else
17022     fprintf (stream, _("(default: no)\n"));
17023   fprintf (stream, _("\
17024                           generate x86 used ISA and feature properties\n"));
17025 #endif
17026 #if defined (TE_PE) || defined (TE_PEP)
17027   fprintf (stream, _("\
17028   -mbig-obj               generate big object files\n"));
17029 #endif
17030   fprintf (stream, _("\
17031   -momit-lock-prefix=[no|yes] (default: no)\n\
17032                           strip all lock prefixes\n"));
17033   fprintf (stream, _("\
17034   -mfence-as-lock-add=[no|yes] (default: no)\n\
17035                           encode lfence, mfence and sfence as\n\
17036                            lock addl $0x0, (%%{re}sp)\n"));
17037   fprintf (stream, _("\
17038   -mrelax-relocations=[no|yes] "));
17039   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
17040     fprintf (stream, _("(default: yes)\n"));
17041   else
17042     fprintf (stream, _("(default: no)\n"));
17043   fprintf (stream, _("\
17044                           generate relax relocations\n"));
17045   fprintf (stream, _("\
17046   -malign-branch-boundary=NUM (default: 0)\n\
17047                           align branches within NUM byte boundary\n"));
17048   fprintf (stream, _("\
17049   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
17050                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
17051                            indirect\n\
17052                           specify types of branches to align\n"));
17053   fprintf (stream, _("\
17054   -malign-branch-prefix-size=NUM (default: 5)\n\
17055                           align branches with NUM prefixes per instruction\n"));
17056   fprintf (stream, _("\
17057   -mbranches-within-32B-boundaries\n\
17058                           align branches within 32 byte boundary\n"));
17059   fprintf (stream, _("\
17060   -mlfence-after-load=[no|yes] (default: no)\n\
17061                           generate lfence after load\n"));
17062   fprintf (stream, _("\
17063   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
17064                           generate lfence before indirect near branch\n"));
17065   fprintf (stream, _("\
17066   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
17067                           generate lfence before ret\n"));
17068   fprintf (stream, _("\
17069   -mamd64                 accept only AMD64 ISA [default]\n"));
17070   fprintf (stream, _("\
17071   -mintel64               accept only Intel64 ISA\n"));
17072 }
17073
17074 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
17075      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
17076      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
17077
17078 /* Pick the target format to use.  */
17079
17080 const char *
17081 i386_target_format (void)
17082 {
17083   if (startswith (default_arch, "x86_64"))
17084     {
17085       update_code_flag (CODE_64BIT, 1);
17086 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17087       if (default_arch[6] == '\0')
17088         x86_elf_abi = X86_64_ABI;
17089       else
17090         x86_elf_abi = X86_64_X32_ABI;
17091 #endif
17092     }
17093   else if (!strcmp (default_arch, "i386"))
17094     update_code_flag (CODE_32BIT, 1);
17095   else if (!strcmp (default_arch, "iamcu"))
17096     {
17097       update_code_flag (CODE_32BIT, 1);
17098       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
17099         {
17100           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
17101           cpu_arch_name = "iamcu";
17102           free (cpu_sub_arch_name);
17103           cpu_sub_arch_name = NULL;
17104           cpu_arch_flags = iamcu_flags;
17105           cpu_arch_isa = PROCESSOR_IAMCU;
17106           cpu_arch_isa_flags = iamcu_flags;
17107           if (!cpu_arch_tune_set)
17108             cpu_arch_tune = PROCESSOR_IAMCU;
17109         }
17110       else if (cpu_arch_isa != PROCESSOR_IAMCU)
17111         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
17112                   cpu_arch_name);
17113     }
17114   else
17115     as_fatal (_("unknown architecture"));
17116
17117 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17118   if (IS_ELF && flag_synth_cfi && x86_elf_abi != X86_64_ABI)
17119     as_fatal (_("SCFI is not supported for this ABI"));
17120 #endif
17121
17122   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
17123     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
17124
17125   switch (OUTPUT_FLAVOR)
17126     {
17127 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
17128     case bfd_target_aout_flavour:
17129       return AOUT_TARGET_FORMAT;
17130 #endif
17131 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
17132 # if defined (TE_PE) || defined (TE_PEP)
17133     case bfd_target_coff_flavour:
17134       if (flag_code == CODE_64BIT)
17135         {
17136           object_64bit = 1;
17137           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
17138         }
17139       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
17140 # elif defined (TE_GO32)
17141     case bfd_target_coff_flavour:
17142       return "coff-go32";
17143 # else
17144     case bfd_target_coff_flavour:
17145       return "coff-i386";
17146 # endif
17147 #endif
17148 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
17149     case bfd_target_elf_flavour:
17150       {
17151         const char *format;
17152
17153         switch (x86_elf_abi)
17154           {
17155           default:
17156             format = ELF_TARGET_FORMAT;
17157 #ifndef TE_SOLARIS
17158             tls_get_addr = "___tls_get_addr";
17159 #endif
17160             break;
17161           case X86_64_ABI:
17162             use_rela_relocations = 1;
17163             object_64bit = 1;
17164 #ifndef TE_SOLARIS
17165             tls_get_addr = "__tls_get_addr";
17166 #endif
17167             format = ELF_TARGET_FORMAT64;
17168             break;
17169           case X86_64_X32_ABI:
17170             use_rela_relocations = 1;
17171             object_64bit = 1;
17172 #ifndef TE_SOLARIS
17173             tls_get_addr = "__tls_get_addr";
17174 #endif
17175             disallow_64bit_reloc = 1;
17176             format = ELF_TARGET_FORMAT32;
17177             break;
17178           }
17179         if (cpu_arch_isa == PROCESSOR_IAMCU)
17180           {
17181             if (x86_elf_abi != I386_ABI)
17182               as_fatal (_("Intel MCU is 32bit only"));
17183             return ELF_TARGET_IAMCU_FORMAT;
17184           }
17185         else
17186           return format;
17187       }
17188 #endif
17189 #if defined (OBJ_MACH_O)
17190     case bfd_target_mach_o_flavour:
17191       if (flag_code == CODE_64BIT)
17192         {
17193           use_rela_relocations = 1;
17194           object_64bit = 1;
17195           return "mach-o-x86-64";
17196         }
17197       else
17198         return "mach-o-i386";
17199 #endif
17200     default:
17201       abort ();
17202       return NULL;
17203     }
17204 }
17205
17206 #endif /* OBJ_MAYBE_ more than one  */
17207 \f
17208 symbolS *
17209 md_undefined_symbol (char *name)
17210 {
17211   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
17212       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
17213       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
17214       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
17215     {
17216       if (!GOT_symbol)
17217         {
17218           if (symbol_find (name))
17219             as_bad (_("GOT already in symbol table"));
17220           GOT_symbol = symbol_new (name, undefined_section,
17221                                    &zero_address_frag, 0);
17222         };
17223       return GOT_symbol;
17224     }
17225   return 0;
17226 }
17227
17228 #if defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT)
17229 /* Round up a section size to the appropriate boundary.  */
17230
17231 valueT
17232 md_section_align (segT segment, valueT size)
17233 {
17234   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
17235     {
17236       /* For a.out, force the section size to be aligned.  If we don't do
17237          this, BFD will align it for us, but it will not write out the
17238          final bytes of the section.  This may be a bug in BFD, but it is
17239          easier to fix it here since that is how the other a.out targets
17240          work.  */
17241       int align;
17242
17243       align = bfd_section_alignment (segment);
17244       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
17245     }
17246
17247   return size;
17248 }
17249 #endif
17250
17251 /* On the i386, PC-relative offsets are relative to the start of the
17252    next instruction.  That is, the address of the offset, plus its
17253    size, since the offset is always the last part of the insn.  */
17254
17255 long
17256 md_pcrel_from (fixS *fixP)
17257 {
17258   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
17259 }
17260
17261 #ifdef OBJ_AOUT
17262
17263 static void
17264 s_bss (int ignore ATTRIBUTE_UNUSED)
17265 {
17266   int temp;
17267
17268   temp = get_absolute_expression ();
17269   subseg_set (bss_section, (subsegT) temp);
17270   demand_empty_rest_of_line ();
17271 }
17272
17273 #endif
17274
17275 /* Remember constant directive.  */
17276
17277 void
17278 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
17279 {
17280   struct last_insn *last_insn
17281     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
17282
17283   if (bfd_section_flags (now_seg) & SEC_CODE)
17284     {
17285       last_insn->kind = last_insn_directive;
17286       last_insn->name = "constant directive";
17287       last_insn->file = as_where (&last_insn->line);
17288     }
17289 }
17290
17291 int
17292 i386_validate_fix (fixS *fixp)
17293 {
17294   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
17295     {
17296       reloc_howto_type *howto;
17297
17298       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
17299       as_bad_where (fixp->fx_file, fixp->fx_line,
17300                     _("invalid %s relocation against register"),
17301                     howto ? howto->name : "<unknown>");
17302       return 0;
17303     }
17304
17305 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17306   if (fixp->fx_r_type == BFD_RELOC_SIZE32
17307       || fixp->fx_r_type == BFD_RELOC_SIZE64)
17308     return IS_ELF && fixp->fx_addsy
17309            && (!S_IS_DEFINED (fixp->fx_addsy)
17310                || S_IS_EXTERNAL (fixp->fx_addsy));
17311
17312   /* BFD_RELOC_X86_64_GOTTPOFF:
17313       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTTPOFF
17314       2. fx_tcbit2 -> BFD_RELOC_X86_64_CODE_6_GOTTPOFF
17315     BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17316       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
17317     BFD_RELOC_32_PCREL:
17318       1. fx_tcbit -> BFD_RELOC_X86_64_GOTPCRELX
17319       2. fx_tcbit2 -> BFD_RELOC_X86_64_REX_GOTPCRELX
17320       3. fx_tcbit3 -> BFD_RELOC_X86_64_CODE_4_GOTPCRELX
17321       4. else -> BFD_RELOC_X86_64_GOTPCREL
17322    */
17323   if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF)
17324     {
17325       if (fixp->fx_tcbit)
17326         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTTPOFF;
17327       else if (fixp->fx_tcbit2)
17328         fixp->fx_r_type = BFD_RELOC_X86_64_CODE_6_GOTTPOFF;
17329     }
17330   else if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
17331            && fixp->fx_tcbit)
17332     fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC;
17333 #endif
17334
17335   if (fixp->fx_subsy)
17336     {
17337       if (fixp->fx_subsy == GOT_symbol)
17338         {
17339           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
17340             {
17341               if (!object_64bit)
17342                 abort ();
17343 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17344               if (fixp->fx_tcbit)
17345                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCRELX;
17346               else if (fixp->fx_tcbit2)
17347                 fixp->fx_r_type = BFD_RELOC_X86_64_REX_GOTPCRELX;
17348               else if (fixp->fx_tcbit3)
17349                 fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPCRELX;
17350               else
17351 #endif
17352                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
17353             }
17354           else
17355             {
17356               if (!object_64bit)
17357                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
17358               else
17359                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
17360             }
17361           fixp->fx_subsy = 0;
17362         }
17363     }
17364 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17365   else
17366     {
17367       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
17368          to section.  Since PLT32 relocation must be against symbols,
17369          turn such PLT32 relocation into PC32 relocation.  */
17370       if (fixp->fx_addsy
17371           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
17372               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
17373           && symbol_section_p (fixp->fx_addsy))
17374         fixp->fx_r_type = BFD_RELOC_32_PCREL;
17375       if (!object_64bit)
17376         {
17377           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
17378               && fixp->fx_tcbit2)
17379             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
17380         }
17381     }
17382 #endif
17383
17384   return 1;
17385 }
17386
17387 arelent *
17388 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
17389 {
17390   arelent *rel;
17391   bfd_reloc_code_real_type code;
17392
17393   switch (fixp->fx_r_type)
17394     {
17395 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17396       symbolS *sym;
17397
17398     case BFD_RELOC_SIZE32:
17399     case BFD_RELOC_SIZE64:
17400       if (fixp->fx_addsy
17401           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
17402           && (!fixp->fx_subsy
17403               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
17404         sym = fixp->fx_addsy;
17405       else if (fixp->fx_subsy
17406                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
17407                && (!fixp->fx_addsy
17408                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
17409         sym = fixp->fx_subsy;
17410       else
17411         sym = NULL;
17412       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
17413         {
17414           /* Resolve size relocation against local symbol to size of
17415              the symbol plus addend.  */
17416           valueT value = S_GET_SIZE (sym);
17417
17418           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
17419             value = bfd_section_size (S_GET_SEGMENT (sym));
17420           if (sym == fixp->fx_subsy)
17421             {
17422               value = -value;
17423               if (fixp->fx_addsy)
17424                 value += S_GET_VALUE (fixp->fx_addsy);
17425             }
17426           else if (fixp->fx_subsy)
17427             value -= S_GET_VALUE (fixp->fx_subsy);
17428           value += fixp->fx_offset;
17429           if (fixp->fx_r_type == BFD_RELOC_SIZE32
17430               && object_64bit
17431               && !fits_in_unsigned_long (value))
17432             as_bad_where (fixp->fx_file, fixp->fx_line,
17433                           _("symbol size computation overflow"));
17434           fixp->fx_addsy = NULL;
17435           fixp->fx_subsy = NULL;
17436           md_apply_fix (fixp, (valueT *) &value, NULL);
17437           return NULL;
17438         }
17439       if (!fixp->fx_addsy || fixp->fx_subsy)
17440         {
17441           as_bad_where (fixp->fx_file, fixp->fx_line,
17442                         "unsupported expression involving @size");
17443           return NULL;
17444         }
17445 #endif
17446       /* Fall through.  */
17447
17448     case BFD_RELOC_X86_64_PLT32:
17449     case BFD_RELOC_X86_64_GOT32:
17450     case BFD_RELOC_X86_64_GOTPCREL:
17451     case BFD_RELOC_X86_64_GOTPCRELX:
17452     case BFD_RELOC_X86_64_REX_GOTPCRELX:
17453     case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
17454     case BFD_RELOC_386_PLT32:
17455     case BFD_RELOC_386_GOT32:
17456     case BFD_RELOC_386_GOT32X:
17457     case BFD_RELOC_386_GOTOFF:
17458     case BFD_RELOC_386_GOTPC:
17459     case BFD_RELOC_386_TLS_GD:
17460     case BFD_RELOC_386_TLS_LDM:
17461     case BFD_RELOC_386_TLS_LDO_32:
17462     case BFD_RELOC_386_TLS_IE_32:
17463     case BFD_RELOC_386_TLS_IE:
17464     case BFD_RELOC_386_TLS_GOTIE:
17465     case BFD_RELOC_386_TLS_LE_32:
17466     case BFD_RELOC_386_TLS_LE:
17467     case BFD_RELOC_386_TLS_GOTDESC:
17468     case BFD_RELOC_386_TLS_DESC_CALL:
17469     case BFD_RELOC_X86_64_TLSGD:
17470     case BFD_RELOC_X86_64_TLSLD:
17471     case BFD_RELOC_X86_64_DTPOFF32:
17472     case BFD_RELOC_X86_64_DTPOFF64:
17473     case BFD_RELOC_X86_64_GOTTPOFF:
17474     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
17475     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
17476     case BFD_RELOC_X86_64_TPOFF32:
17477     case BFD_RELOC_X86_64_TPOFF64:
17478     case BFD_RELOC_X86_64_GOTOFF64:
17479     case BFD_RELOC_X86_64_GOTPC32:
17480     case BFD_RELOC_X86_64_GOT64:
17481     case BFD_RELOC_X86_64_GOTPCREL64:
17482     case BFD_RELOC_X86_64_GOTPC64:
17483     case BFD_RELOC_X86_64_GOTPLT64:
17484     case BFD_RELOC_X86_64_PLTOFF64:
17485     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17486     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
17487     case BFD_RELOC_X86_64_TLSDESC_CALL:
17488     case BFD_RELOC_RVA:
17489     case BFD_RELOC_VTABLE_ENTRY:
17490     case BFD_RELOC_VTABLE_INHERIT:
17491 #ifdef TE_PE
17492     case BFD_RELOC_32_SECREL:
17493     case BFD_RELOC_16_SECIDX:
17494 #endif
17495       code = fixp->fx_r_type;
17496       break;
17497     case BFD_RELOC_X86_64_32S:
17498       if (!fixp->fx_pcrel)
17499         {
17500           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
17501           code = fixp->fx_r_type;
17502           break;
17503         }
17504       /* Fall through.  */
17505     default:
17506       if (fixp->fx_pcrel)
17507         {
17508           switch (fixp->fx_size)
17509             {
17510             default:
17511               as_bad_where (fixp->fx_file, fixp->fx_line,
17512                             _("can not do %d byte pc-relative relocation"),
17513                             fixp->fx_size);
17514               code = BFD_RELOC_32_PCREL;
17515               break;
17516             case 1: code = BFD_RELOC_8_PCREL;  break;
17517             case 2: code = BFD_RELOC_16_PCREL; break;
17518             case 4: code = BFD_RELOC_32_PCREL; break;
17519 #ifdef BFD64
17520             case 8: code = BFD_RELOC_64_PCREL; break;
17521 #endif
17522             }
17523         }
17524       else
17525         {
17526           switch (fixp->fx_size)
17527             {
17528             default:
17529               as_bad_where (fixp->fx_file, fixp->fx_line,
17530                             _("can not do %d byte relocation"),
17531                             fixp->fx_size);
17532               code = BFD_RELOC_32;
17533               break;
17534             case 1: code = BFD_RELOC_8;  break;
17535             case 2: code = BFD_RELOC_16; break;
17536             case 4: code = BFD_RELOC_32; break;
17537 #ifdef BFD64
17538             case 8: code = BFD_RELOC_64; break;
17539 #endif
17540             }
17541         }
17542       break;
17543     }
17544
17545   if ((code == BFD_RELOC_32
17546        || code == BFD_RELOC_32_PCREL
17547        || code == BFD_RELOC_X86_64_32S)
17548       && GOT_symbol
17549       && fixp->fx_addsy == GOT_symbol)
17550     {
17551       if (!object_64bit)
17552         code = BFD_RELOC_386_GOTPC;
17553       else
17554         code = BFD_RELOC_X86_64_GOTPC32;
17555     }
17556   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
17557       && GOT_symbol
17558       && fixp->fx_addsy == GOT_symbol)
17559     {
17560       code = BFD_RELOC_X86_64_GOTPC64;
17561     }
17562
17563   rel = XNEW (arelent);
17564   rel->sym_ptr_ptr = XNEW (asymbol *);
17565   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
17566
17567   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
17568
17569   if (!use_rela_relocations)
17570     {
17571       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
17572          vtable entry to be used in the relocation's section offset.  */
17573       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
17574         rel->address = fixp->fx_offset;
17575 #if defined (OBJ_COFF) && defined (TE_PE)
17576       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
17577         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
17578       else
17579 #endif
17580       rel->addend = 0;
17581     }
17582   /* Use the rela in 64bit mode.  */
17583   else
17584     {
17585       if (disallow_64bit_reloc)
17586         switch (code)
17587           {
17588           case BFD_RELOC_X86_64_DTPOFF64:
17589           case BFD_RELOC_X86_64_TPOFF64:
17590           case BFD_RELOC_64_PCREL:
17591           case BFD_RELOC_X86_64_GOTOFF64:
17592           case BFD_RELOC_X86_64_GOT64:
17593           case BFD_RELOC_X86_64_GOTPCREL64:
17594           case BFD_RELOC_X86_64_GOTPC64:
17595           case BFD_RELOC_X86_64_GOTPLT64:
17596           case BFD_RELOC_X86_64_PLTOFF64:
17597             as_bad_where (fixp->fx_file, fixp->fx_line,
17598                           _("cannot represent relocation type %s in x32 mode"),
17599                           bfd_get_reloc_code_name (code));
17600             break;
17601           default:
17602             break;
17603           }
17604
17605       if (!fixp->fx_pcrel)
17606         rel->addend = fixp->fx_offset;
17607       else
17608         switch (code)
17609           {
17610           case BFD_RELOC_X86_64_PLT32:
17611           case BFD_RELOC_X86_64_GOT32:
17612           case BFD_RELOC_X86_64_GOTPCREL:
17613           case BFD_RELOC_X86_64_GOTPCRELX:
17614           case BFD_RELOC_X86_64_REX_GOTPCRELX:
17615           case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
17616           case BFD_RELOC_X86_64_TLSGD:
17617           case BFD_RELOC_X86_64_TLSLD:
17618           case BFD_RELOC_X86_64_GOTTPOFF:
17619           case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
17620           case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
17621           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
17622           case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
17623           case BFD_RELOC_X86_64_TLSDESC_CALL:
17624             rel->addend = fixp->fx_offset - fixp->fx_size;
17625             break;
17626           default:
17627             rel->addend = (section->vma
17628                            - fixp->fx_size
17629                            + fixp->fx_addnumber
17630                            + md_pcrel_from (fixp));
17631             break;
17632           }
17633     }
17634
17635   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
17636   if (rel->howto == NULL)
17637     {
17638       as_bad_where (fixp->fx_file, fixp->fx_line,
17639                     _("cannot represent relocation type %s"),
17640                     bfd_get_reloc_code_name (code));
17641       /* Set howto to a garbage value so that we can keep going.  */
17642       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
17643       gas_assert (rel->howto != NULL);
17644     }
17645
17646   return rel;
17647 }
17648
17649 #include "tc-i386-intel.c"
17650
17651 void
17652 tc_x86_parse_to_dw2regnum (expressionS *exp)
17653 {
17654   int saved_naked_reg;
17655   char saved_register_dot;
17656
17657   saved_naked_reg = allow_naked_reg;
17658   allow_naked_reg = 1;
17659   saved_register_dot = register_chars['.'];
17660   register_chars['.'] = '.';
17661   allow_pseudo_reg = 1;
17662   expression_and_evaluate (exp);
17663   allow_pseudo_reg = 0;
17664   register_chars['.'] = saved_register_dot;
17665   allow_naked_reg = saved_naked_reg;
17666
17667   if (exp->X_op == O_register && exp->X_add_number >= 0)
17668     {
17669       exp->X_op = O_illegal;
17670       if ((addressT) exp->X_add_number < i386_regtab_size)
17671         {
17672           exp->X_add_number = i386_regtab[exp->X_add_number]
17673                               .dw2_regnum[object_64bit];
17674           if (exp->X_add_number != Dw2Inval)
17675             exp->X_op = O_constant;
17676         }
17677     }
17678 }
17679
17680 void
17681 tc_x86_frame_initial_instructions (void)
17682 {
17683   cfi_add_CFA_def_cfa (object_64bit ? REG_SP : 4, -x86_cie_data_alignment);
17684   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
17685 }
17686
17687 int
17688 x86_dwarf2_addr_size (void)
17689 {
17690 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
17691   if (x86_elf_abi == X86_64_X32_ABI)
17692     return 4;
17693 #endif
17694   return bfd_arch_bits_per_address (stdoutput) / 8;
17695 }
17696
17697 #ifdef TE_PE
17698 void
17699 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
17700 {
17701   expressionS exp;
17702
17703   exp.X_op = O_secrel;
17704   exp.X_add_symbol = symbol;
17705   exp.X_add_number = 0;
17706   emit_expr (&exp, size);
17707 }
17708 #endif
17709
17710 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
17711 int
17712 i386_elf_section_type (const char *str, size_t len)
17713 {
17714   if (flag_code == CODE_64BIT
17715       && len == sizeof ("unwind") - 1
17716       && startswith (str, "unwind"))
17717     return SHT_X86_64_UNWIND;
17718
17719   return -1;
17720 }
17721
17722 void
17723 i386_elf_section_change_hook (void)
17724 {
17725   struct i386_segment_info *info = &seg_info(now_seg)->tc_segment_info_data;
17726   struct i386_segment_info *curr, *prev;
17727
17728   if (info->subseg == now_subseg)
17729     return;
17730
17731   /* Find the (or make a) list entry to save state into.  */
17732   for (prev = info; (curr = prev->next) != NULL; prev = curr)
17733     if (curr->subseg == info->subseg)
17734       break;
17735   if (!curr)
17736     {
17737       curr = notes_alloc (sizeof (*curr));
17738       curr->subseg = info->subseg;
17739       curr->next = NULL;
17740       prev->next = curr;
17741     }
17742   curr->last_insn = info->last_insn;
17743
17744   /* Find the list entry to load state from.  */
17745   for (curr = info->next; curr; curr = curr->next)
17746     if (curr->subseg == now_subseg)
17747       break;
17748   if (curr)
17749     info->last_insn = curr->last_insn;
17750   else
17751     memset (&info->last_insn, 0, sizeof (info->last_insn));
17752   info->subseg = now_subseg;
17753 }
17754
17755 #ifdef TE_SOLARIS
17756 void
17757 i386_solaris_fix_up_eh_frame (segT sec)
17758 {
17759   if (flag_code == CODE_64BIT)
17760     elf_section_type (sec) = SHT_X86_64_UNWIND;
17761 }
17762 #endif
17763
17764 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
17765
17766 bfd_vma
17767 x86_64_section_letter (int letter, const char **ptr_msg)
17768 {
17769   if (flag_code == CODE_64BIT)
17770     {
17771       if (letter == 'l')
17772         return SHF_X86_64_LARGE;
17773
17774       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
17775     }
17776   else
17777     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
17778   return -1;
17779 }
17780
17781 static void
17782 handle_large_common (int small ATTRIBUTE_UNUSED)
17783 {
17784   if (flag_code != CODE_64BIT)
17785     {
17786       s_comm_internal (0, elf_common_parse);
17787       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
17788     }
17789   else
17790     {
17791       static segT lbss_section;
17792       asection *saved_com_section_ptr = elf_com_section_ptr;
17793       asection *saved_bss_section = bss_section;
17794
17795       if (lbss_section == NULL)
17796         {
17797           flagword applicable;
17798           segT seg = now_seg;
17799           subsegT subseg = now_subseg;
17800
17801           /* The .lbss section is for local .largecomm symbols.  */
17802           lbss_section = subseg_new (".lbss", 0);
17803           applicable = bfd_applicable_section_flags (stdoutput);
17804           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
17805           seg_info (lbss_section)->bss = 1;
17806
17807           subseg_set (seg, subseg);
17808         }
17809
17810       elf_com_section_ptr = &_bfd_elf_large_com_section;
17811       bss_section = lbss_section;
17812
17813       s_comm_internal (0, elf_common_parse);
17814
17815       elf_com_section_ptr = saved_com_section_ptr;
17816       bss_section = saved_bss_section;
17817     }
17818 }
17819 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */