gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2021 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35 #include <limits.h>
  36
  37 #ifndef INFER_ADDR_PREFIX
  38 #define INFER_ADDR_PREFIX 1
  39 #endif
  40
  41 #ifndef DEFAULT_ARCH
  42 #define DEFAULT_ARCH "i386"
  43 #endif
  44
  45 #ifndef INLINE
  46 #if __GNUC__ >= 2
  47 #define INLINE __inline__
  48 #else
  49 #define INLINE
  50 #endif
  51 #endif
  52
  53 /* Prefixes will be emitted in the order defined below.
  54    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  55    instruction, and so must come before any prefixes.
  56    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  57    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  58 #define WAIT_PREFIX     0
  59 #define SEG_PREFIX      1
  60 #define ADDR_PREFIX     2
  61 #define DATA_PREFIX     3
  62 #define REP_PREFIX      4
  63 #define HLE_PREFIX      REP_PREFIX
  64 #define BND_PREFIX      REP_PREFIX
  65 #define LOCK_PREFIX     5
  66 #define REX_PREFIX      6       /* must come last.  */
  67 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  68
  69 /* we define the syntax here (modulo base,index,scale syntax) */
  70 #define REGISTER_PREFIX '%'
  71 #define IMMEDIATE_PREFIX '$'
  72 #define ABSOLUTE_PREFIX '*'
  73
  74 /* these are the instruction mnemonic suffixes in AT&T syntax or
  75    memory operand size in Intel syntax.  */
  76 #define WORD_MNEM_SUFFIX  'w'
  77 #define BYTE_MNEM_SUFFIX  'b'
  78 #define SHORT_MNEM_SUFFIX 's'
  79 #define LONG_MNEM_SUFFIX  'l'
  80 #define QWORD_MNEM_SUFFIX  'q'
  81 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  82    in instructions.  */
  83 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  84
  85 #define END_OF_INSN '\0'
  86
  87 /* This matches the C -> StaticRounding alias in the opcode table.  */
  88 #define commutative staticrounding
  89
  90 /*
  91   'templates' is for grouping together 'template' structures for opcodes
  92   of the same name.  This is only used for storing the insns in the grand
  93   ole hash table of insns.
  94   The templates themselves start at START and range up to (but not including)
  95   END.
  96   */
  97 typedef struct
  98 {
  99   const insn_template *start;
 100   const insn_template *end;
 101 }
 102 templates;
 103
 104 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 105 typedef struct
 106 {
 107   unsigned int regmem;  /* codes register or memory operand */
 108   unsigned int reg;     /* codes register operand (or extended opcode) */
 109   unsigned int mode;    /* how to interpret regmem & reg */
 110 }
 111 modrm_byte;
 112
 113 /* x86-64 extension prefix.  */
 114 typedef int rex_byte;
 115
 116 /* 386 opcode byte to code indirect addressing.  */
 117 typedef struct
 118 {
 119   unsigned base;
 120   unsigned index;
 121   unsigned scale;
 122 }
 123 sib_byte;
 124
 125 /* x86 arch names, types and features */
 126 typedef struct
 127 {
 128   const char *name;             /* arch name */
 129   unsigned int len;             /* arch string length */
 130   enum processor_type type;     /* arch type */
 131   i386_cpu_flags flags;         /* cpu feature flags */
 132   unsigned int skip;            /* show_arch should skip this. */
 133 }
 134 arch_entry;
 135
 136 /* Used to turn off indicated flags.  */
 137 typedef struct
 138 {
 139   const char *name;             /* arch name */
 140   unsigned int len;             /* arch string length */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142 }
 143 noarch_entry;
 144
 145 static void update_code_flag (int, int);
 146 static void set_code_flag (int);
 147 static void set_16bit_gcc_code_flag (int);
 148 static void set_intel_syntax (int);
 149 static void set_intel_mnemonic (int);
 150 static void set_allow_index_reg (int);
 151 static void set_check (int);
 152 static void set_cpu_arch (int);
 153 #ifdef TE_PE
 154 static void pe_directive_secrel (int);
 155 #endif
 156 static void signed_cons (int);
 157 static char *output_invalid (int c);
 158 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 159                                     const char *);
 160 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 161                                        const char *);
 162 static int i386_att_operand (char *);
 163 static int i386_intel_operand (char *, int);
 164 static int i386_intel_simplify (expressionS *);
 165 static int i386_intel_parse_name (const char *, expressionS *);
 166 static const reg_entry *parse_register (char *, char **);
 167 static char *parse_insn (char *, char *);
 168 static char *parse_operands (char *, const char *);
 169 static void swap_operands (void);
 170 static void swap_2_operands (unsigned int, unsigned int);
 171 static enum flag_code i386_addressing_mode (void);
 172 static void optimize_imm (void);
 173 static void optimize_disp (void);
 174 static const insn_template *match_template (char);
 175 static int check_string (void);
 176 static int process_suffix (void);
 177 static int check_byte_reg (void);
 178 static int check_long_reg (void);
 179 static int check_qword_reg (void);
 180 static int check_word_reg (void);
 181 static int finalize_imm (void);
 182 static int process_operands (void);
 183 static const reg_entry *build_modrm_byte (void);
 184 static void output_insn (void);
 185 static void output_imm (fragS *, offsetT);
 186 static void output_disp (fragS *, offsetT);
 187 #ifndef I386COFF
 188 static void s_bss (int);
 189 #endif
 190 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 191 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 192
 193 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 194 static unsigned int x86_isa_1_used;
 195 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 196 static unsigned int x86_feature_2_used;
 197 /* Generate x86 used ISA and feature properties.  */
 198 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 199 #endif
 200
 201 static const char *default_arch = DEFAULT_ARCH;
 202
 203 /* parse_register() returns this when a register alias cannot be used.  */
 204 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 205                                    { Dw2Inval, Dw2Inval } };
 206
 207 static const reg_entry *reg_eax;
 208 static const reg_entry *reg_ds;
 209 static const reg_entry *reg_es;
 210 static const reg_entry *reg_ss;
 211 static const reg_entry *reg_st0;
 212 static const reg_entry *reg_k0;
 213
 214 /* VEX prefix.  */
 215 typedef struct
 216 {
 217   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 218   unsigned char bytes[4];
 219   unsigned int length;
 220   /* Destination or source register specifier.  */
 221   const reg_entry *register_specifier;
 222 } vex_prefix;
 223
 224 /* 'md_assemble ()' gathers together information and puts it into a
 225    i386_insn.  */
 226
 227 union i386_op
 228   {
 229     expressionS *disps;
 230     expressionS *imms;
 231     const reg_entry *regs;
 232   };
 233
 234 enum i386_error
 235   {
 236     operand_size_mismatch,
 237     operand_type_mismatch,
 238     register_type_mismatch,
 239     number_of_operands_mismatch,
 240     invalid_instruction_suffix,
 241     bad_imm4,
 242     unsupported_with_intel_mnemonic,
 243     unsupported_syntax,
 244     unsupported,
 245     invalid_sib_address,
 246     invalid_vsib_address,
 247     invalid_vector_register_set,
 248     invalid_tmm_register_set,
 249     unsupported_vector_index_register,
 250     unsupported_broadcast,
 251     broadcast_needed,
 252     unsupported_masking,
 253     mask_not_on_destination,
 254     no_default_mask,
 255     unsupported_rc_sae,
 256     rc_sae_operand_not_last_imm,
 257     invalid_register_operand,
 258   };
 259
 260 struct _i386_insn
 261   {
 262     /* TM holds the template for the insn were currently assembling.  */
 263     insn_template tm;
 264
 265     /* SUFFIX holds the instruction size suffix for byte, word, dword
 266        or qword, if given.  */
 267     char suffix;
 268
 269     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 270     unsigned char opcode_length;
 271
 272     /* OPERANDS gives the number of given operands.  */
 273     unsigned int operands;
 274
 275     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 276        of given register, displacement, memory operands and immediate
 277        operands.  */
 278     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 279
 280     /* TYPES [i] is the type (see above #defines) which tells us how to
 281        use OP[i] for the corresponding operand.  */
 282     i386_operand_type types[MAX_OPERANDS];
 283
 284     /* Displacement expression, immediate expression, or register for each
 285        operand.  */
 286     union i386_op op[MAX_OPERANDS];
 287
 288     /* Flags for operands.  */
 289     unsigned int flags[MAX_OPERANDS];
 290 #define Operand_PCrel 1
 291 #define Operand_Mem   2
 292
 293     /* Relocation type for operand */
 294     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 295
 296     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 297        the base index byte below.  */
 298     const reg_entry *base_reg;
 299     const reg_entry *index_reg;
 300     unsigned int log2_scale_factor;
 301
 302     /* SEG gives the seg_entries of this insn.  They are zero unless
 303        explicit segment overrides are given.  */
 304     const reg_entry *seg[2];
 305
 306     /* Copied first memory operand string, for re-checking.  */
 307     char *memop1_string;
 308
 309     /* PREFIX holds all the given prefix opcodes (usually null).
 310        PREFIXES is the number of prefix opcodes.  */
 311     unsigned int prefixes;
 312     unsigned char prefix[MAX_PREFIXES];
 313
 314     /* Register is in low 3 bits of opcode.  */
 315     bool short_form;
 316
 317     /* The operand to a branch insn indicates an absolute branch.  */
 318     bool jumpabsolute;
 319
 320     /* Extended states.  */
 321     enum
 322       {
 323         /* Use MMX state.  */
 324         xstate_mmx = 1 << 0,
 325         /* Use XMM state.  */
 326         xstate_xmm = 1 << 1,
 327         /* Use YMM state.  */
 328         xstate_ymm = 1 << 2 | xstate_xmm,
 329         /* Use ZMM state.  */
 330         xstate_zmm = 1 << 3 | xstate_ymm,
 331         /* Use TMM state.  */
 332         xstate_tmm = 1 << 4,
 333         /* Use MASK state.  */
 334         xstate_mask = 1 << 5
 335       } xstate;
 336
 337     /* Has GOTPC or TLS relocation.  */
 338     bool has_gotpc_tls_reloc;
 339
 340     /* RM and SIB are the modrm byte and the sib byte where the
 341        addressing modes of this insn are encoded.  */
 342     modrm_byte rm;
 343     rex_byte rex;
 344     rex_byte vrex;
 345     sib_byte sib;
 346     vex_prefix vex;
 347
 348     /* Masking attributes.
 349
 350        The struct describes masking, applied to OPERAND in the instruction.
 351        REG is a pointer to the corresponding mask register.  ZEROING tells
 352        whether merging or zeroing mask is used.  */
 353     struct Mask_Operation
 354     {
 355       const reg_entry *reg;
 356       unsigned int zeroing;
 357       /* The operand where this operation is associated.  */
 358       unsigned int operand;
 359     } mask;
 360
 361     /* Rounding control and SAE attributes.  */
 362     struct RC_Operation
 363     {
 364       enum rc_type
 365         {
 366           rc_none = -1,
 367           rne,
 368           rd,
 369           ru,
 370           rz,
 371           saeonly
 372         } type;
 373
 374       unsigned int operand;
 375     } rounding;
 376
 377     /* Broadcasting attributes.
 378
 379        The struct describes broadcasting, applied to OPERAND.  TYPE is
 380        expresses the broadcast factor.  */
 381     struct Broadcast_Operation
 382     {
 383       /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 384       unsigned int type;
 385
 386       /* Index of broadcasted operand.  */
 387       unsigned int operand;
 388
 389       /* Number of bytes to broadcast.  */
 390       unsigned int bytes;
 391     } broadcast;
 392
 393     /* Compressed disp8*N attribute.  */
 394     unsigned int memshift;
 395
 396     /* Prefer load or store in encoding.  */
 397     enum
 398       {
 399         dir_encoding_default = 0,
 400         dir_encoding_load,
 401         dir_encoding_store,
 402         dir_encoding_swap
 403       } dir_encoding;
 404
 405     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 406     enum
 407       {
 408         disp_encoding_default = 0,
 409         disp_encoding_8bit,
 410         disp_encoding_16bit,
 411         disp_encoding_32bit
 412       } disp_encoding;
 413
 414     /* Prefer the REX byte in encoding.  */
 415     bool rex_encoding;
 416
 417     /* Disable instruction size optimization.  */
 418     bool no_optimize;
 419
 420     /* How to encode vector instructions.  */
 421     enum
 422       {
 423         vex_encoding_default = 0,
 424         vex_encoding_vex,
 425         vex_encoding_vex3,
 426         vex_encoding_evex,
 427         vex_encoding_error
 428       } vec_encoding;
 429
 430     /* REP prefix.  */
 431     const char *rep_prefix;
 432
 433     /* HLE prefix.  */
 434     const char *hle_prefix;
 435
 436     /* Have BND prefix.  */
 437     const char *bnd_prefix;
 438
 439     /* Have NOTRACK prefix.  */
 440     const char *notrack_prefix;
 441
 442     /* Error message.  */
 443     enum i386_error error;
 444   };
 445
 446 typedef struct _i386_insn i386_insn;
 447
 448 /* Link RC type with corresponding string, that'll be looked for in
 449    asm.  */
 450 struct RC_name
 451 {
 452   enum rc_type type;
 453   const char *name;
 454   unsigned int len;
 455 };
 456
 457 static const struct RC_name RC_NamesTable[] =
 458 {
 459   {  rne, STRING_COMMA_LEN ("rn-sae") },
 460   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 461   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 462   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 463   {  saeonly,  STRING_COMMA_LEN ("sae") },
 464 };
 465
 466 /* List of chars besides those in app.c:symbol_chars that can start an
 467    operand.  Used to prevent the scrubber eating vital white-space.  */
 468 const char extra_symbol_chars[] = "*%-([{}"
 469 #ifdef LEX_AT
 470         "@"
 471 #endif
 472 #ifdef LEX_QM
 473         "?"
 474 #endif
 475         ;
 476
 477 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 478      && !defined (TE_GNU)                               \
 479      && !defined (TE_LINUX)                             \
 480      && !defined (TE_FreeBSD)                           \
 481      && !defined (TE_DragonFly)                         \
 482      && !defined (TE_NetBSD))
 483 /* This array holds the chars that always start a comment.  If the
 484    pre-processor is disabled, these aren't very useful.  The option
 485    --divide will remove '/' from this list.  */
 486 const char *i386_comment_chars = "#/";
 487 #define SVR4_COMMENT_CHARS 1
 488 #define PREFIX_SEPARATOR '\\'
 489
 490 #else
 491 const char *i386_comment_chars = "#";
 492 #define PREFIX_SEPARATOR '/'
 493 #endif
 494
 495 /* This array holds the chars that only start a comment at the beginning of
 496    a line.  If the line seems to have the form '# 123 filename'
 497    .line and .file directives will appear in the pre-processed output.
 498    Note that input_file.c hand checks for '#' at the beginning of the
 499    first line of the input file.  This is because the compiler outputs
 500    #NO_APP at the beginning of its output.
 501    Also note that comments started like this one will always work if
 502    '/' isn't otherwise defined.  */
 503 const char line_comment_chars[] = "#/";
 504
 505 const char line_separator_chars[] = ";";
 506
 507 /* Chars that can be used to separate mant from exp in floating point
 508    nums.  */
 509 const char EXP_CHARS[] = "eE";
 510
 511 /* Chars that mean this number is a floating point constant
 512    As in 0f12.456
 513    or    0d1.2345e12.  */
 514 const char FLT_CHARS[] = "fFdDxX";
 515
 516 /* Tables for lexical analysis.  */
 517 static char mnemonic_chars[256];
 518 static char register_chars[256];
 519 static char operand_chars[256];
 520 static char identifier_chars[256];
 521
 522 /* Lexical macros.  */
 523 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 524 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 525 #define is_register_char(x) (register_chars[(unsigned char) x])
 526 #define is_space_char(x) ((x) == ' ')
 527 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 528
 529 /* All non-digit non-letter characters that may occur in an operand.  */
 530 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 531
 532 /* md_assemble() always leaves the strings it's passed unaltered.  To
 533    effect this we maintain a stack of saved characters that we've smashed
 534    with '\0's (indicating end of strings for various sub-fields of the
 535    assembler instruction).  */
 536 static char save_stack[32];
 537 static char *save_stack_p;
 538 #define END_STRING_AND_SAVE(s) \
 539         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 540 #define RESTORE_END_STRING(s) \
 541         do { *(s) = *--save_stack_p; } while (0)
 542
 543 /* The instruction we're assembling.  */
 544 static i386_insn i;
 545
 546 /* Possible templates for current insn.  */
 547 static const templates *current_templates;
 548
 549 /* Per instruction expressionS buffers: max displacements & immediates.  */
 550 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 551 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 552
 553 /* Current operand we are working on.  */
 554 static int this_operand = -1;
 555
 556 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 557    these.  */
 558
 559 enum flag_code {
 560         CODE_32BIT,
 561         CODE_16BIT,
 562         CODE_64BIT };
 563
 564 static enum flag_code flag_code;
 565 static unsigned int object_64bit;
 566 static unsigned int disallow_64bit_reloc;
 567 static int use_rela_relocations = 0;
 568 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 569 static const char *tls_get_addr;
 570
 571 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 572      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 573      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 574
 575 /* The ELF ABI to use.  */
 576 enum x86_elf_abi
 577 {
 578   I386_ABI,
 579   X86_64_ABI,
 580   X86_64_X32_ABI
 581 };
 582
 583 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 584 #endif
 585
 586 #if defined (TE_PE) || defined (TE_PEP)
 587 /* Use big object file format.  */
 588 static int use_big_obj = 0;
 589 #endif
 590
 591 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 592 /* 1 if generating code for a shared library.  */
 593 static int shared = 0;
 594 #endif
 595
 596 /* 1 for intel syntax,
 597    0 if att syntax.  */
 598 static int intel_syntax = 0;
 599
 600 static enum x86_64_isa
 601 {
 602   amd64 = 1,    /* AMD64 ISA.  */
 603   intel64       /* Intel64 ISA.  */
 604 } isa64;
 605
 606 /* 1 for intel mnemonic,
 607    0 if att mnemonic.  */
 608 static int intel_mnemonic = !SYSV386_COMPAT;
 609
 610 /* 1 if pseudo registers are permitted.  */
 611 static int allow_pseudo_reg = 0;
 612
 613 /* 1 if register prefix % not required.  */
 614 static int allow_naked_reg = 0;
 615
 616 /* 1 if the assembler should add BND prefix for all control-transferring
 617    instructions supporting it, even if this prefix wasn't specified
 618    explicitly.  */
 619 static int add_bnd_prefix = 0;
 620
 621 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 622 static int allow_index_reg = 0;
 623
 624 /* 1 if the assembler should ignore LOCK prefix, even if it was
 625    specified explicitly.  */
 626 static int omit_lock_prefix = 0;
 627
 628 /* 1 if the assembler should encode lfence, mfence, and sfence as
 629    "lock addl $0, (%{re}sp)".  */
 630 static int avoid_fence = 0;
 631
 632 /* 1 if lfence should be inserted after every load.  */
 633 static int lfence_after_load = 0;
 634
 635 /* Non-zero if lfence should be inserted before indirect branch.  */
 636 static enum lfence_before_indirect_branch_kind
 637   {
 638     lfence_branch_none = 0,
 639     lfence_branch_register,
 640     lfence_branch_memory,
 641     lfence_branch_all
 642   }
 643 lfence_before_indirect_branch;
 644
 645 /* Non-zero if lfence should be inserted before ret.  */
 646 static enum lfence_before_ret_kind
 647   {
 648     lfence_before_ret_none = 0,
 649     lfence_before_ret_not,
 650     lfence_before_ret_or,
 651     lfence_before_ret_shl
 652   }
 653 lfence_before_ret;
 654
 655 /* Types of previous instruction is .byte or prefix.  */
 656 static struct
 657   {
 658     segT seg;
 659     const char *file;
 660     const char *name;
 661     unsigned int line;
 662     enum last_insn_kind
 663       {
 664         last_insn_other = 0,
 665         last_insn_directive,
 666         last_insn_prefix
 667       } kind;
 668   } last_insn;
 669
 670 /* 1 if the assembler should generate relax relocations.  */
 671
 672 static int generate_relax_relocations
 673   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 674
 675 static enum check_kind
 676   {
 677     check_none = 0,
 678     check_warning,
 679     check_error
 680   }
 681 sse_check, operand_check = check_warning;
 682
 683 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 684 static int align_branch_power = 0;
 685
 686 /* Types of branches to align.  */
 687 enum align_branch_kind
 688   {
 689     align_branch_none = 0,
 690     align_branch_jcc = 1,
 691     align_branch_fused = 2,
 692     align_branch_jmp = 3,
 693     align_branch_call = 4,
 694     align_branch_indirect = 5,
 695     align_branch_ret = 6
 696   };
 697
 698 /* Type bits of branches to align.  */
 699 enum align_branch_bit
 700   {
 701     align_branch_jcc_bit = 1 << align_branch_jcc,
 702     align_branch_fused_bit = 1 << align_branch_fused,
 703     align_branch_jmp_bit = 1 << align_branch_jmp,
 704     align_branch_call_bit = 1 << align_branch_call,
 705     align_branch_indirect_bit = 1 << align_branch_indirect,
 706     align_branch_ret_bit = 1 << align_branch_ret
 707   };
 708
 709 static unsigned int align_branch = (align_branch_jcc_bit
 710                                     | align_branch_fused_bit
 711                                     | align_branch_jmp_bit);
 712
 713 /* Types of condition jump used by macro-fusion.  */
 714 enum mf_jcc_kind
 715   {
 716     mf_jcc_jo = 0,  /* base opcode 0x70  */
 717     mf_jcc_jc,      /* base opcode 0x72  */
 718     mf_jcc_je,      /* base opcode 0x74  */
 719     mf_jcc_jna,     /* base opcode 0x76  */
 720     mf_jcc_js,      /* base opcode 0x78  */
 721     mf_jcc_jp,      /* base opcode 0x7a  */
 722     mf_jcc_jl,      /* base opcode 0x7c  */
 723     mf_jcc_jle,     /* base opcode 0x7e  */
 724   };
 725
 726 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 727 enum mf_cmp_kind
 728   {
 729     mf_cmp_test_and,  /* test/cmp */
 730     mf_cmp_alu_cmp,  /* add/sub/cmp */
 731     mf_cmp_incdec  /* inc/dec */
 732   };
 733
 734 /* The maximum padding size for fused jcc.  CMP like instruction can
 735    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 736    prefixes.   */
 737 #define MAX_FUSED_JCC_PADDING_SIZE 20
 738
 739 /* The maximum number of prefixes added for an instruction.  */
 740 static unsigned int align_branch_prefix_size = 5;
 741
 742 /* Optimization:
 743    1. Clear the REX_W bit with register operand if possible.
 744    2. Above plus use 128bit vector instruction to clear the full vector
 745       register.
 746  */
 747 static int optimize = 0;
 748
 749 /* Optimization:
 750    1. Clear the REX_W bit with register operand if possible.
 751    2. Above plus use 128bit vector instruction to clear the full vector
 752       register.
 753    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 754       "testb $imm7,%r8".
 755  */
 756 static int optimize_for_space = 0;
 757
 758 /* Register prefix used for error message.  */
 759 static const char *register_prefix = "%";
 760
 761 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 762    leave, push, and pop instructions so that gcc has the same stack
 763    frame as in 32 bit mode.  */
 764 static char stackop_size = '\0';
 765
 766 /* Non-zero to optimize code alignment.  */
 767 int optimize_align_code = 1;
 768
 769 /* Non-zero to quieten some warnings.  */
 770 static int quiet_warnings = 0;
 771
 772 /* CPU name.  */
 773 static const char *cpu_arch_name = NULL;
 774 static char *cpu_sub_arch_name = NULL;
 775
 776 /* CPU feature flags.  */
 777 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 778
 779 /* If we have selected a cpu we are generating instructions for.  */
 780 static int cpu_arch_tune_set = 0;
 781
 782 /* Cpu we are generating instructions for.  */
 783 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 784
 785 /* CPU feature flags of cpu we are generating instructions for.  */
 786 static i386_cpu_flags cpu_arch_tune_flags;
 787
 788 /* CPU instruction set architecture used.  */
 789 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 790
 791 /* CPU feature flags of instruction set architecture used.  */
 792 i386_cpu_flags cpu_arch_isa_flags;
 793
 794 /* If set, conditional jumps are not automatically promoted to handle
 795    larger than a byte offset.  */
 796 static unsigned int no_cond_jump_promotion = 0;
 797
 798 /* Encode SSE instructions with VEX prefix.  */
 799 static unsigned int sse2avx;
 800
 801 /* Encode scalar AVX instructions with specific vector length.  */
 802 static enum
 803   {
 804     vex128 = 0,
 805     vex256
 806   } avxscalar;
 807
 808 /* Encode VEX WIG instructions with specific vex.w.  */
 809 static enum
 810   {
 811     vexw0 = 0,
 812     vexw1
 813   } vexwig;
 814
 815 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 816 static enum
 817   {
 818     evexl128 = 0,
 819     evexl256,
 820     evexl512
 821   } evexlig;
 822
 823 /* Encode EVEX WIG instructions with specific evex.w.  */
 824 static enum
 825   {
 826     evexw0 = 0,
 827     evexw1
 828   } evexwig;
 829
 830 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 831 static enum rc_type evexrcig = rne;
 832
 833 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 834 static symbolS *GOT_symbol;
 835
 836 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 837 unsigned int x86_dwarf2_return_column;
 838
 839 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 840 int x86_cie_data_alignment;
 841
 842 /* Interface to relax_segment.
 843    There are 3 major relax states for 386 jump insns because the
 844    different types of jumps add different sizes to frags when we're
 845    figuring out what sort of jump to choose to reach a given label.
 846
 847    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 848    branches which are handled by md_estimate_size_before_relax() and
 849    i386_generic_table_relax_frag().  */
 850
 851 /* Types.  */
 852 #define UNCOND_JUMP 0
 853 #define COND_JUMP 1
 854 #define COND_JUMP86 2
 855 #define BRANCH_PADDING 3
 856 #define BRANCH_PREFIX 4
 857 #define FUSED_JCC_PADDING 5
 858
 859 /* Sizes.  */
 860 #define CODE16  1
 861 #define SMALL   0
 862 #define SMALL16 (SMALL | CODE16)
 863 #define BIG     2
 864 #define BIG16   (BIG | CODE16)
 865
 866 #ifndef INLINE
 867 #ifdef __GNUC__
 868 #define INLINE __inline__
 869 #else
 870 #define INLINE
 871 #endif
 872 #endif
 873
 874 #define ENCODE_RELAX_STATE(type, size) \
 875   ((relax_substateT) (((type) << 2) | (size)))
 876 #define TYPE_FROM_RELAX_STATE(s) \
 877   ((s) >> 2)
 878 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 879     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 880
 881 /* This table is used by relax_frag to promote short jumps to long
 882    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 883    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 884    don't allow a short jump in a 32 bit code segment to be promoted to
 885    a 16 bit offset jump because it's slower (requires data size
 886    prefix), and doesn't work, unless the destination is in the bottom
 887    64k of the code segment (The top 16 bits of eip are zeroed).  */
 888
 889 const relax_typeS md_relax_table[] =
 890 {
 891   /* The fields are:
 892      1) most positive reach of this state,
 893      2) most negative reach of this state,
 894      3) how many bytes this mode will have in the variable part of the frag
 895      4) which index into the table to try if we can't fit into this one.  */
 896
 897   /* UNCOND_JUMP states.  */
 898   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 899   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 900   /* dword jmp adds 4 bytes to frag:
 901      0 extra opcode bytes, 4 displacement bytes.  */
 902   {0, 0, 4, 0},
 903   /* word jmp adds 2 byte2 to frag:
 904      0 extra opcode bytes, 2 displacement bytes.  */
 905   {0, 0, 2, 0},
 906
 907   /* COND_JUMP states.  */
 908   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 909   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 910   /* dword conditionals adds 5 bytes to frag:
 911      1 extra opcode byte, 4 displacement bytes.  */
 912   {0, 0, 5, 0},
 913   /* word conditionals add 3 bytes to frag:
 914      1 extra opcode byte, 2 displacement bytes.  */
 915   {0, 0, 3, 0},
 916
 917   /* COND_JUMP86 states.  */
 918   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 919   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 920   /* dword conditionals adds 5 bytes to frag:
 921      1 extra opcode byte, 4 displacement bytes.  */
 922   {0, 0, 5, 0},
 923   /* word conditionals add 4 bytes to frag:
 924      1 displacement byte and a 3 byte long branch insn.  */
 925   {0, 0, 4, 0}
 926 };
 927
 928 static const arch_entry cpu_arch[] =
 929 {
 930   /* Do not replace the first two entries - i386_target_format()
 931      relies on them being there in this order.  */
 932   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 933     CPU_GENERIC32_FLAGS, 0 },
 934   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 935     CPU_GENERIC64_FLAGS, 0 },
 936   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 937     CPU_NONE_FLAGS, 0 },
 938   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 939     CPU_I186_FLAGS, 0 },
 940   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 941     CPU_I286_FLAGS, 0 },
 942   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 943     CPU_I386_FLAGS, 0 },
 944   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 945     CPU_I486_FLAGS, 0 },
 946   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 947     CPU_I586_FLAGS, 0 },
 948   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 949     CPU_I686_FLAGS, 0 },
 950   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 951     CPU_I586_FLAGS, 0 },
 952   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 953     CPU_PENTIUMPRO_FLAGS, 0 },
 954   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 955     CPU_P2_FLAGS, 0 },
 956   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 957     CPU_P3_FLAGS, 0 },
 958   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 959     CPU_P4_FLAGS, 0 },
 960   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 961     CPU_CORE_FLAGS, 0 },
 962   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 963     CPU_NOCONA_FLAGS, 0 },
 964   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 965     CPU_CORE_FLAGS, 1 },
 966   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 967     CPU_CORE_FLAGS, 0 },
 968   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 969     CPU_CORE2_FLAGS, 1 },
 970   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 971     CPU_CORE2_FLAGS, 0 },
 972   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 973     CPU_COREI7_FLAGS, 0 },
 974   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 975     CPU_L1OM_FLAGS, 0 },
 976   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 977     CPU_K1OM_FLAGS, 0 },
 978   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 979     CPU_IAMCU_FLAGS, 0 },
 980   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 981     CPU_K6_FLAGS, 0 },
 982   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 983     CPU_K6_2_FLAGS, 0 },
 984   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 985     CPU_ATHLON_FLAGS, 0 },
 986   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 987     CPU_K8_FLAGS, 1 },
 988   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
 989     CPU_K8_FLAGS, 0 },
 990   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
 991     CPU_K8_FLAGS, 0 },
 992   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
 993     CPU_AMDFAM10_FLAGS, 0 },
 994   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
 995     CPU_BDVER1_FLAGS, 0 },
 996   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
 997     CPU_BDVER2_FLAGS, 0 },
 998   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
 999     CPU_BDVER3_FLAGS, 0 },
1000   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
1001     CPU_BDVER4_FLAGS, 0 },
1002   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
1003     CPU_ZNVER1_FLAGS, 0 },
1004   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
1005     CPU_ZNVER2_FLAGS, 0 },
1006   { STRING_COMMA_LEN ("znver3"), PROCESSOR_ZNVER,
1007     CPU_ZNVER3_FLAGS, 0 },
1008   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
1009     CPU_BTVER1_FLAGS, 0 },
1010   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
1011     CPU_BTVER2_FLAGS, 0 },
1012   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
1013     CPU_8087_FLAGS, 0 },
1014   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
1015     CPU_287_FLAGS, 0 },
1016   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
1017     CPU_387_FLAGS, 0 },
1018   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
1019     CPU_687_FLAGS, 0 },
1020   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
1021     CPU_CMOV_FLAGS, 0 },
1022   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
1023     CPU_FXSR_FLAGS, 0 },
1024   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1025     CPU_MMX_FLAGS, 0 },
1026   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1027     CPU_SSE_FLAGS, 0 },
1028   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1029     CPU_SSE2_FLAGS, 0 },
1030   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1031     CPU_SSE3_FLAGS, 0 },
1032   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1033     CPU_SSE4A_FLAGS, 0 },
1034   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1035     CPU_SSSE3_FLAGS, 0 },
1036   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1037     CPU_SSE4_1_FLAGS, 0 },
1038   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1039     CPU_SSE4_2_FLAGS, 0 },
1040   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1041     CPU_SSE4_2_FLAGS, 0 },
1042   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1043     CPU_AVX_FLAGS, 0 },
1044   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1045     CPU_AVX2_FLAGS, 0 },
1046   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1047     CPU_AVX512F_FLAGS, 0 },
1048   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1049     CPU_AVX512CD_FLAGS, 0 },
1050   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1051     CPU_AVX512ER_FLAGS, 0 },
1052   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1053     CPU_AVX512PF_FLAGS, 0 },
1054   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1055     CPU_AVX512DQ_FLAGS, 0 },
1056   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1057     CPU_AVX512BW_FLAGS, 0 },
1058   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1059     CPU_AVX512VL_FLAGS, 0 },
1060   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1061     CPU_VMX_FLAGS, 0 },
1062   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1063     CPU_VMFUNC_FLAGS, 0 },
1064   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1065     CPU_SMX_FLAGS, 0 },
1066   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1067     CPU_XSAVE_FLAGS, 0 },
1068   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1069     CPU_XSAVEOPT_FLAGS, 0 },
1070   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1071     CPU_XSAVEC_FLAGS, 0 },
1072   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1073     CPU_XSAVES_FLAGS, 0 },
1074   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1075     CPU_AES_FLAGS, 0 },
1076   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1077     CPU_PCLMUL_FLAGS, 0 },
1078   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1079     CPU_PCLMUL_FLAGS, 1 },
1080   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1081     CPU_FSGSBASE_FLAGS, 0 },
1082   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1083     CPU_RDRND_FLAGS, 0 },
1084   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1085     CPU_F16C_FLAGS, 0 },
1086   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1087     CPU_BMI2_FLAGS, 0 },
1088   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1089     CPU_FMA_FLAGS, 0 },
1090   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1091     CPU_FMA4_FLAGS, 0 },
1092   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1093     CPU_XOP_FLAGS, 0 },
1094   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1095     CPU_LWP_FLAGS, 0 },
1096   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1097     CPU_MOVBE_FLAGS, 0 },
1098   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1099     CPU_CX16_FLAGS, 0 },
1100   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1101     CPU_EPT_FLAGS, 0 },
1102   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1103     CPU_LZCNT_FLAGS, 0 },
1104   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1105     CPU_POPCNT_FLAGS, 0 },
1106   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1107     CPU_HLE_FLAGS, 0 },
1108   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1109     CPU_RTM_FLAGS, 0 },
1110   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1111     CPU_INVPCID_FLAGS, 0 },
1112   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1113     CPU_CLFLUSH_FLAGS, 0 },
1114   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1115     CPU_NOP_FLAGS, 0 },
1116   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1117     CPU_SYSCALL_FLAGS, 0 },
1118   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1119     CPU_RDTSCP_FLAGS, 0 },
1120   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1121     CPU_3DNOW_FLAGS, 0 },
1122   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1123     CPU_3DNOWA_FLAGS, 0 },
1124   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1125     CPU_PADLOCK_FLAGS, 0 },
1126   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1127     CPU_SVME_FLAGS, 1 },
1128   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1129     CPU_SVME_FLAGS, 0 },
1130   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1131     CPU_SSE4A_FLAGS, 0 },
1132   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1133     CPU_ABM_FLAGS, 0 },
1134   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1135     CPU_BMI_FLAGS, 0 },
1136   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1137     CPU_TBM_FLAGS, 0 },
1138   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1139     CPU_ADX_FLAGS, 0 },
1140   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1141     CPU_RDSEED_FLAGS, 0 },
1142   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1143     CPU_PRFCHW_FLAGS, 0 },
1144   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1145     CPU_SMAP_FLAGS, 0 },
1146   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1147     CPU_MPX_FLAGS, 0 },
1148   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1149     CPU_SHA_FLAGS, 0 },
1150   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1151     CPU_CLFLUSHOPT_FLAGS, 0 },
1152   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1153     CPU_PREFETCHWT1_FLAGS, 0 },
1154   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1155     CPU_SE1_FLAGS, 0 },
1156   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1157     CPU_CLWB_FLAGS, 0 },
1158   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1159     CPU_AVX512IFMA_FLAGS, 0 },
1160   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1161     CPU_AVX512VBMI_FLAGS, 0 },
1162   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1163     CPU_AVX512_4FMAPS_FLAGS, 0 },
1164   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1165     CPU_AVX512_4VNNIW_FLAGS, 0 },
1166   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1167     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1168   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1169     CPU_AVX512_VBMI2_FLAGS, 0 },
1170   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1171     CPU_AVX512_VNNI_FLAGS, 0 },
1172   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1173     CPU_AVX512_BITALG_FLAGS, 0 },
1174   { STRING_COMMA_LEN (".avx_vnni"), PROCESSOR_UNKNOWN,
1175     CPU_AVX_VNNI_FLAGS, 0 },
1176   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1177     CPU_CLZERO_FLAGS, 0 },
1178   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1179     CPU_MWAITX_FLAGS, 0 },
1180   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1181     CPU_OSPKE_FLAGS, 0 },
1182   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1183     CPU_RDPID_FLAGS, 0 },
1184   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1185     CPU_PTWRITE_FLAGS, 0 },
1186   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1187     CPU_IBT_FLAGS, 0 },
1188   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1189     CPU_SHSTK_FLAGS, 0 },
1190   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1191     CPU_GFNI_FLAGS, 0 },
1192   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1193     CPU_VAES_FLAGS, 0 },
1194   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1195     CPU_VPCLMULQDQ_FLAGS, 0 },
1196   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1197     CPU_WBNOINVD_FLAGS, 0 },
1198   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1199     CPU_PCONFIG_FLAGS, 0 },
1200   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1201     CPU_WAITPKG_FLAGS, 0 },
1202   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1203     CPU_CLDEMOTE_FLAGS, 0 },
1204   { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
1205     CPU_AMX_INT8_FLAGS, 0 },
1206   { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
1207     CPU_AMX_BF16_FLAGS, 0 },
1208   { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
1209     CPU_AMX_TILE_FLAGS, 0 },
1210   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1211     CPU_MOVDIRI_FLAGS, 0 },
1212   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1213     CPU_MOVDIR64B_FLAGS, 0 },
1214   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1215     CPU_AVX512_BF16_FLAGS, 0 },
1216   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1217     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1218   { STRING_COMMA_LEN (".tdx"), PROCESSOR_UNKNOWN,
1219     CPU_TDX_FLAGS, 0 },
1220   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1221     CPU_ENQCMD_FLAGS, 0 },
1222   { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
1223     CPU_SERIALIZE_FLAGS, 0 },
1224   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1225     CPU_RDPRU_FLAGS, 0 },
1226   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1227     CPU_MCOMMIT_FLAGS, 0 },
1228   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1229     CPU_SEV_ES_FLAGS, 0 },
1230   { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
1231     CPU_TSXLDTRK_FLAGS, 0 },
1232   { STRING_COMMA_LEN (".kl"), PROCESSOR_UNKNOWN,
1233     CPU_KL_FLAGS, 0 },
1234   { STRING_COMMA_LEN (".widekl"), PROCESSOR_UNKNOWN,
1235     CPU_WIDEKL_FLAGS, 0 },
1236   { STRING_COMMA_LEN (".uintr"), PROCESSOR_UNKNOWN,
1237     CPU_UINTR_FLAGS, 0 },
1238   { STRING_COMMA_LEN (".hreset"), PROCESSOR_UNKNOWN,
1239     CPU_HRESET_FLAGS, 0 },
1240 };
1241
1242 static const noarch_entry cpu_noarch[] =
1243 {
1244   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1245   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1246   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1247   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1248   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1249   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1250   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1251   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1252   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1253   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1254   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1255   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1256   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1257   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1258   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1259   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1260   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1261   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1262   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1263   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1264   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1265   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1266   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1267   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1268   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1269   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1270   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1271   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1272   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1273   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1274   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1275   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1276   { STRING_COMMA_LEN ("noavx_vnni"), CPU_ANY_AVX_VNNI_FLAGS },
1277   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1278   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1279   { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
1280   { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
1281   { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
1282   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1283   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1284   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1285   { STRING_COMMA_LEN ("noavx512_vp2intersect"),
1286     CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
1287   { STRING_COMMA_LEN ("notdx"), CPU_ANY_TDX_FLAGS },
1288   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1289   { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
1290   { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
1291   { STRING_COMMA_LEN ("nokl"), CPU_ANY_KL_FLAGS },
1292   { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
1293   { STRING_COMMA_LEN ("nouintr"), CPU_ANY_UINTR_FLAGS },
1294   { STRING_COMMA_LEN ("nohreset"), CPU_ANY_HRESET_FLAGS },
1295 };
1296
1297 #ifdef I386COFF
1298 /* Like s_lcomm_internal in gas/read.c but the alignment string
1299    is allowed to be optional.  */
1300
1301 static symbolS *
1302 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1303 {
1304   addressT align = 0;
1305
1306   SKIP_WHITESPACE ();
1307
1308   if (needs_align
1309       && *input_line_pointer == ',')
1310     {
1311       align = parse_align (needs_align - 1);
1312
1313       if (align == (addressT) -1)
1314         return NULL;
1315     }
1316   else
1317     {
1318       if (size >= 8)
1319         align = 3;
1320       else if (size >= 4)
1321         align = 2;
1322       else if (size >= 2)
1323         align = 1;
1324       else
1325         align = 0;
1326     }
1327
1328   bss_alloc (symbolP, size, align);
1329   return symbolP;
1330 }
1331
1332 static void
1333 pe_lcomm (int needs_align)
1334 {
1335   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1336 }
1337 #endif
1338
1339 const pseudo_typeS md_pseudo_table[] =
1340 {
1341 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1342   {"align", s_align_bytes, 0},
1343 #else
1344   {"align", s_align_ptwo, 0},
1345 #endif
1346   {"arch", set_cpu_arch, 0},
1347 #ifndef I386COFF
1348   {"bss", s_bss, 0},
1349 #else
1350   {"lcomm", pe_lcomm, 1},
1351 #endif
1352   {"ffloat", float_cons, 'f'},
1353   {"dfloat", float_cons, 'd'},
1354   {"tfloat", float_cons, 'x'},
1355   {"value", cons, 2},
1356   {"slong", signed_cons, 4},
1357   {"noopt", s_ignore, 0},
1358   {"optim", s_ignore, 0},
1359   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1360   {"code16", set_code_flag, CODE_16BIT},
1361   {"code32", set_code_flag, CODE_32BIT},
1362 #ifdef BFD64
1363   {"code64", set_code_flag, CODE_64BIT},
1364 #endif
1365   {"intel_syntax", set_intel_syntax, 1},
1366   {"att_syntax", set_intel_syntax, 0},
1367   {"intel_mnemonic", set_intel_mnemonic, 1},
1368   {"att_mnemonic", set_intel_mnemonic, 0},
1369   {"allow_index_reg", set_allow_index_reg, 1},
1370   {"disallow_index_reg", set_allow_index_reg, 0},
1371   {"sse_check", set_check, 0},
1372   {"operand_check", set_check, 1},
1373 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1374   {"largecomm", handle_large_common, 0},
1375 #else
1376   {"file", dwarf2_directive_file, 0},
1377   {"loc", dwarf2_directive_loc, 0},
1378   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1379 #endif
1380 #ifdef TE_PE
1381   {"secrel32", pe_directive_secrel, 0},
1382 #endif
1383   {0, 0, 0}
1384 };
1385
1386 /* For interface with expression ().  */
1387 extern char *input_line_pointer;
1388
1389 /* Hash table for instruction mnemonic lookup.  */
1390 static htab_t op_hash;
1391
1392 /* Hash table for register lookup.  */
1393 static htab_t reg_hash;
1394 \f
1395   /* Various efficient no-op patterns for aligning code labels.
1396      Note: Don't try to assemble the instructions in the comments.
1397      0L and 0w are not legal.  */
1398 static const unsigned char f32_1[] =
1399   {0x90};                               /* nop                  */
1400 static const unsigned char f32_2[] =
1401   {0x66,0x90};                          /* xchg %ax,%ax         */
1402 static const unsigned char f32_3[] =
1403   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1404 static const unsigned char f32_4[] =
1405   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1406 static const unsigned char f32_6[] =
1407   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1408 static const unsigned char f32_7[] =
1409   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1410 static const unsigned char f16_3[] =
1411   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1412 static const unsigned char f16_4[] =
1413   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1414 static const unsigned char jump_disp8[] =
1415   {0xeb};                               /* jmp disp8           */
1416 static const unsigned char jump32_disp32[] =
1417   {0xe9};                               /* jmp disp32          */
1418 static const unsigned char jump16_disp32[] =
1419   {0x66,0xe9};                          /* jmp disp32          */
1420 /* 32-bit NOPs patterns.  */
1421 static const unsigned char *const f32_patt[] = {
1422   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1423 };
1424 /* 16-bit NOPs patterns.  */
1425 static const unsigned char *const f16_patt[] = {
1426   f32_1, f32_2, f16_3, f16_4
1427 };
1428 /* nopl (%[re]ax) */
1429 static const unsigned char alt_3[] =
1430   {0x0f,0x1f,0x00};
1431 /* nopl 0(%[re]ax) */
1432 static const unsigned char alt_4[] =
1433   {0x0f,0x1f,0x40,0x00};
1434 /* nopl 0(%[re]ax,%[re]ax,1) */
1435 static const unsigned char alt_5[] =
1436   {0x0f,0x1f,0x44,0x00,0x00};
1437 /* nopw 0(%[re]ax,%[re]ax,1) */
1438 static const unsigned char alt_6[] =
1439   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1440 /* nopl 0L(%[re]ax) */
1441 static const unsigned char alt_7[] =
1442   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1443 /* nopl 0L(%[re]ax,%[re]ax,1) */
1444 static const unsigned char alt_8[] =
1445   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1446 /* nopw 0L(%[re]ax,%[re]ax,1) */
1447 static const unsigned char alt_9[] =
1448   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1449 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1450 static const unsigned char alt_10[] =
1451   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1452 /* data16 nopw %cs:0L(%eax,%eax,1) */
1453 static const unsigned char alt_11[] =
1454   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1455 /* 32-bit and 64-bit NOPs patterns.  */
1456 static const unsigned char *const alt_patt[] = {
1457   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1458   alt_9, alt_10, alt_11
1459 };
1460
1461 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1462    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1463
1464 static void
1465 i386_output_nops (char *where, const unsigned char *const *patt,
1466                   int count, int max_single_nop_size)
1467
1468 {
1469   /* Place the longer NOP first.  */
1470   int last;
1471   int offset;
1472   const unsigned char *nops;
1473
1474   if (max_single_nop_size < 1)
1475     {
1476       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1477                 max_single_nop_size);
1478       return;
1479     }
1480
1481   nops = patt[max_single_nop_size - 1];
1482
1483   /* Use the smaller one if the requsted one isn't available.  */
1484   if (nops == NULL)
1485     {
1486       max_single_nop_size--;
1487       nops = patt[max_single_nop_size - 1];
1488     }
1489
1490   last = count % max_single_nop_size;
1491
1492   count -= last;
1493   for (offset = 0; offset < count; offset += max_single_nop_size)
1494     memcpy (where + offset, nops, max_single_nop_size);
1495
1496   if (last)
1497     {
1498       nops = patt[last - 1];
1499       if (nops == NULL)
1500         {
1501           /* Use the smaller one plus one-byte NOP if the needed one
1502              isn't available.  */
1503           last--;
1504           nops = patt[last - 1];
1505           memcpy (where + offset, nops, last);
1506           where[offset + last] = *patt[0];
1507         }
1508       else
1509         memcpy (where + offset, nops, last);
1510     }
1511 }
1512
1513 static INLINE int
1514 fits_in_imm7 (offsetT num)
1515 {
1516   return (num & 0x7f) == num;
1517 }
1518
1519 static INLINE int
1520 fits_in_imm31 (offsetT num)
1521 {
1522   return (num & 0x7fffffff) == num;
1523 }
1524
1525 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1526    single NOP instruction LIMIT.  */
1527
1528 void
1529 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1530 {
1531   const unsigned char *const *patt = NULL;
1532   int max_single_nop_size;
1533   /* Maximum number of NOPs before switching to jump over NOPs.  */
1534   int max_number_of_nops;
1535
1536   switch (fragP->fr_type)
1537     {
1538     case rs_fill_nop:
1539     case rs_align_code:
1540       break;
1541     case rs_machine_dependent:
1542       /* Allow NOP padding for jumps and calls.  */
1543       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1544           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1545         break;
1546       /* Fall through.  */
1547     default:
1548       return;
1549     }
1550
1551   /* We need to decide which NOP sequence to use for 32bit and
1552      64bit. When -mtune= is used:
1553
1554      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1555      PROCESSOR_GENERIC32, f32_patt will be used.
1556      2. For the rest, alt_patt will be used.
1557
1558      When -mtune= isn't used, alt_patt will be used if
1559      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1560      be used.
1561
1562      When -march= or .arch is used, we can't use anything beyond
1563      cpu_arch_isa_flags.   */
1564
1565   if (flag_code == CODE_16BIT)
1566     {
1567       patt = f16_patt;
1568       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1569       /* Limit number of NOPs to 2 in 16-bit mode.  */
1570       max_number_of_nops = 2;
1571     }
1572   else
1573     {
1574       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1575         {
1576           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1577           switch (cpu_arch_tune)
1578             {
1579             case PROCESSOR_UNKNOWN:
1580               /* We use cpu_arch_isa_flags to check if we SHOULD
1581                  optimize with nops.  */
1582               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1583                 patt = alt_patt;
1584               else
1585                 patt = f32_patt;
1586               break;
1587             case PROCESSOR_PENTIUM4:
1588             case PROCESSOR_NOCONA:
1589             case PROCESSOR_CORE:
1590             case PROCESSOR_CORE2:
1591             case PROCESSOR_COREI7:
1592             case PROCESSOR_L1OM:
1593             case PROCESSOR_K1OM:
1594             case PROCESSOR_GENERIC64:
1595             case PROCESSOR_K6:
1596             case PROCESSOR_ATHLON:
1597             case PROCESSOR_K8:
1598             case PROCESSOR_AMDFAM10:
1599             case PROCESSOR_BD:
1600             case PROCESSOR_ZNVER:
1601             case PROCESSOR_BT:
1602               patt = alt_patt;
1603               break;
1604             case PROCESSOR_I386:
1605             case PROCESSOR_I486:
1606             case PROCESSOR_PENTIUM:
1607             case PROCESSOR_PENTIUMPRO:
1608             case PROCESSOR_IAMCU:
1609             case PROCESSOR_GENERIC32:
1610               patt = f32_patt;
1611               break;
1612             }
1613         }
1614       else
1615         {
1616           switch (fragP->tc_frag_data.tune)
1617             {
1618             case PROCESSOR_UNKNOWN:
1619               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1620                  PROCESSOR_UNKNOWN.  */
1621               abort ();
1622               break;
1623
1624             case PROCESSOR_I386:
1625             case PROCESSOR_I486:
1626             case PROCESSOR_PENTIUM:
1627             case PROCESSOR_IAMCU:
1628             case PROCESSOR_K6:
1629             case PROCESSOR_ATHLON:
1630             case PROCESSOR_K8:
1631             case PROCESSOR_AMDFAM10:
1632             case PROCESSOR_BD:
1633             case PROCESSOR_ZNVER:
1634             case PROCESSOR_BT:
1635             case PROCESSOR_GENERIC32:
1636               /* We use cpu_arch_isa_flags to check if we CAN optimize
1637                  with nops.  */
1638               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1639                 patt = alt_patt;
1640               else
1641                 patt = f32_patt;
1642               break;
1643             case PROCESSOR_PENTIUMPRO:
1644             case PROCESSOR_PENTIUM4:
1645             case PROCESSOR_NOCONA:
1646             case PROCESSOR_CORE:
1647             case PROCESSOR_CORE2:
1648             case PROCESSOR_COREI7:
1649             case PROCESSOR_L1OM:
1650             case PROCESSOR_K1OM:
1651               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1652                 patt = alt_patt;
1653               else
1654                 patt = f32_patt;
1655               break;
1656             case PROCESSOR_GENERIC64:
1657               patt = alt_patt;
1658               break;
1659             }
1660         }
1661
1662       if (patt == f32_patt)
1663         {
1664           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1665           /* Limit number of NOPs to 2 for older processors.  */
1666           max_number_of_nops = 2;
1667         }
1668       else
1669         {
1670           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1671           /* Limit number of NOPs to 7 for newer processors.  */
1672           max_number_of_nops = 7;
1673         }
1674     }
1675
1676   if (limit == 0)
1677     limit = max_single_nop_size;
1678
1679   if (fragP->fr_type == rs_fill_nop)
1680     {
1681       /* Output NOPs for .nop directive.  */
1682       if (limit > max_single_nop_size)
1683         {
1684           as_bad_where (fragP->fr_file, fragP->fr_line,
1685                         _("invalid single nop size: %d "
1686                           "(expect within [0, %d])"),
1687                         limit, max_single_nop_size);
1688           return;
1689         }
1690     }
1691   else if (fragP->fr_type != rs_machine_dependent)
1692     fragP->fr_var = count;
1693
1694   if ((count / max_single_nop_size) > max_number_of_nops)
1695     {
1696       /* Generate jump over NOPs.  */
1697       offsetT disp = count - 2;
1698       if (fits_in_imm7 (disp))
1699         {
1700           /* Use "jmp disp8" if possible.  */
1701           count = disp;
1702           where[0] = jump_disp8[0];
1703           where[1] = count;
1704           where += 2;
1705         }
1706       else
1707         {
1708           unsigned int size_of_jump;
1709
1710           if (flag_code == CODE_16BIT)
1711             {
1712               where[0] = jump16_disp32[0];
1713               where[1] = jump16_disp32[1];
1714               size_of_jump = 2;
1715             }
1716           else
1717             {
1718               where[0] = jump32_disp32[0];
1719               size_of_jump = 1;
1720             }
1721
1722           count -= size_of_jump + 4;
1723           if (!fits_in_imm31 (count))
1724             {
1725               as_bad_where (fragP->fr_file, fragP->fr_line,
1726                             _("jump over nop padding out of range"));
1727               return;
1728             }
1729
1730           md_number_to_chars (where + size_of_jump, count, 4);
1731           where += size_of_jump + 4;
1732         }
1733     }
1734
1735   /* Generate multiple NOPs.  */
1736   i386_output_nops (where, patt, count, limit);
1737 }
1738
1739 static INLINE int
1740 operand_type_all_zero (const union i386_operand_type *x)
1741 {
1742   switch (ARRAY_SIZE(x->array))
1743     {
1744     case 3:
1745       if (x->array[2])
1746         return 0;
1747       /* Fall through.  */
1748     case 2:
1749       if (x->array[1])
1750         return 0;
1751       /* Fall through.  */
1752     case 1:
1753       return !x->array[0];
1754     default:
1755       abort ();
1756     }
1757 }
1758
1759 static INLINE void
1760 operand_type_set (union i386_operand_type *x, unsigned int v)
1761 {
1762   switch (ARRAY_SIZE(x->array))
1763     {
1764     case 3:
1765       x->array[2] = v;
1766       /* Fall through.  */
1767     case 2:
1768       x->array[1] = v;
1769       /* Fall through.  */
1770     case 1:
1771       x->array[0] = v;
1772       /* Fall through.  */
1773       break;
1774     default:
1775       abort ();
1776     }
1777
1778   x->bitfield.class = ClassNone;
1779   x->bitfield.instance = InstanceNone;
1780 }
1781
1782 static INLINE int
1783 operand_type_equal (const union i386_operand_type *x,
1784                     const union i386_operand_type *y)
1785 {
1786   switch (ARRAY_SIZE(x->array))
1787     {
1788     case 3:
1789       if (x->array[2] != y->array[2])
1790         return 0;
1791       /* Fall through.  */
1792     case 2:
1793       if (x->array[1] != y->array[1])
1794         return 0;
1795       /* Fall through.  */
1796     case 1:
1797       return x->array[0] == y->array[0];
1798       break;
1799     default:
1800       abort ();
1801     }
1802 }
1803
1804 static INLINE int
1805 cpu_flags_all_zero (const union i386_cpu_flags *x)
1806 {
1807   switch (ARRAY_SIZE(x->array))
1808     {
1809     case 4:
1810       if (x->array[3])
1811         return 0;
1812       /* Fall through.  */
1813     case 3:
1814       if (x->array[2])
1815         return 0;
1816       /* Fall through.  */
1817     case 2:
1818       if (x->array[1])
1819         return 0;
1820       /* Fall through.  */
1821     case 1:
1822       return !x->array[0];
1823     default:
1824       abort ();
1825     }
1826 }
1827
1828 static INLINE int
1829 cpu_flags_equal (const union i386_cpu_flags *x,
1830                  const union i386_cpu_flags *y)
1831 {
1832   switch (ARRAY_SIZE(x->array))
1833     {
1834     case 4:
1835       if (x->array[3] != y->array[3])
1836         return 0;
1837       /* Fall through.  */
1838     case 3:
1839       if (x->array[2] != y->array[2])
1840         return 0;
1841       /* Fall through.  */
1842     case 2:
1843       if (x->array[1] != y->array[1])
1844         return 0;
1845       /* Fall through.  */
1846     case 1:
1847       return x->array[0] == y->array[0];
1848       break;
1849     default:
1850       abort ();
1851     }
1852 }
1853
1854 static INLINE int
1855 cpu_flags_check_cpu64 (i386_cpu_flags f)
1856 {
1857   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1858            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1859 }
1860
1861 static INLINE i386_cpu_flags
1862 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1863 {
1864   switch (ARRAY_SIZE (x.array))
1865     {
1866     case 4:
1867       x.array [3] &= y.array [3];
1868       /* Fall through.  */
1869     case 3:
1870       x.array [2] &= y.array [2];
1871       /* Fall through.  */
1872     case 2:
1873       x.array [1] &= y.array [1];
1874       /* Fall through.  */
1875     case 1:
1876       x.array [0] &= y.array [0];
1877       break;
1878     default:
1879       abort ();
1880     }
1881   return x;
1882 }
1883
1884 static INLINE i386_cpu_flags
1885 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1886 {
1887   switch (ARRAY_SIZE (x.array))
1888     {
1889     case 4:
1890       x.array [3] |= y.array [3];
1891       /* Fall through.  */
1892     case 3:
1893       x.array [2] |= y.array [2];
1894       /* Fall through.  */
1895     case 2:
1896       x.array [1] |= y.array [1];
1897       /* Fall through.  */
1898     case 1:
1899       x.array [0] |= y.array [0];
1900       break;
1901     default:
1902       abort ();
1903     }
1904   return x;
1905 }
1906
1907 static INLINE i386_cpu_flags
1908 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1909 {
1910   switch (ARRAY_SIZE (x.array))
1911     {
1912     case 4:
1913       x.array [3] &= ~y.array [3];
1914       /* Fall through.  */
1915     case 3:
1916       x.array [2] &= ~y.array [2];
1917       /* Fall through.  */
1918     case 2:
1919       x.array [1] &= ~y.array [1];
1920       /* Fall through.  */
1921     case 1:
1922       x.array [0] &= ~y.array [0];
1923       break;
1924     default:
1925       abort ();
1926     }
1927   return x;
1928 }
1929
1930 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1931
1932 #define CPU_FLAGS_ARCH_MATCH            0x1
1933 #define CPU_FLAGS_64BIT_MATCH           0x2
1934
1935 #define CPU_FLAGS_PERFECT_MATCH \
1936   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1937
1938 /* Return CPU flags match bits. */
1939
1940 static int
1941 cpu_flags_match (const insn_template *t)
1942 {
1943   i386_cpu_flags x = t->cpu_flags;
1944   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1945
1946   x.bitfield.cpu64 = 0;
1947   x.bitfield.cpuno64 = 0;
1948
1949   if (cpu_flags_all_zero (&x))
1950     {
1951       /* This instruction is available on all archs.  */
1952       match |= CPU_FLAGS_ARCH_MATCH;
1953     }
1954   else
1955     {
1956       /* This instruction is available only on some archs.  */
1957       i386_cpu_flags cpu = cpu_arch_flags;
1958
1959       /* AVX512VL is no standalone feature - match it and then strip it.  */
1960       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1961         return match;
1962       x.bitfield.cpuavx512vl = 0;
1963
1964       cpu = cpu_flags_and (x, cpu);
1965       if (!cpu_flags_all_zero (&cpu))
1966         {
1967           if (x.bitfield.cpuavx)
1968             {
1969               /* We need to check a few extra flags with AVX.  */
1970               if (cpu.bitfield.cpuavx
1971                   && (!t->opcode_modifier.sse2avx
1972                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1973                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1974                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1975                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1976                 match |= CPU_FLAGS_ARCH_MATCH;
1977             }
1978           else if (x.bitfield.cpuavx512f)
1979             {
1980               /* We need to check a few extra flags with AVX512F.  */
1981               if (cpu.bitfield.cpuavx512f
1982                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1983                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1984                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1985                 match |= CPU_FLAGS_ARCH_MATCH;
1986             }
1987           else
1988             match |= CPU_FLAGS_ARCH_MATCH;
1989         }
1990     }
1991   return match;
1992 }
1993
1994 static INLINE i386_operand_type
1995 operand_type_and (i386_operand_type x, i386_operand_type y)
1996 {
1997   if (x.bitfield.class != y.bitfield.class)
1998     x.bitfield.class = ClassNone;
1999   if (x.bitfield.instance != y.bitfield.instance)
2000     x.bitfield.instance = InstanceNone;
2001
2002   switch (ARRAY_SIZE (x.array))
2003     {
2004     case 3:
2005       x.array [2] &= y.array [2];
2006       /* Fall through.  */
2007     case 2:
2008       x.array [1] &= y.array [1];
2009       /* Fall through.  */
2010     case 1:
2011       x.array [0] &= y.array [0];
2012       break;
2013     default:
2014       abort ();
2015     }
2016   return x;
2017 }
2018
2019 static INLINE i386_operand_type
2020 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2021 {
2022   gas_assert (y.bitfield.class == ClassNone);
2023   gas_assert (y.bitfield.instance == InstanceNone);
2024
2025   switch (ARRAY_SIZE (x.array))
2026     {
2027     case 3:
2028       x.array [2] &= ~y.array [2];
2029       /* Fall through.  */
2030     case 2:
2031       x.array [1] &= ~y.array [1];
2032       /* Fall through.  */
2033     case 1:
2034       x.array [0] &= ~y.array [0];
2035       break;
2036     default:
2037       abort ();
2038     }
2039   return x;
2040 }
2041
2042 static INLINE i386_operand_type
2043 operand_type_or (i386_operand_type x, i386_operand_type y)
2044 {
2045   gas_assert (x.bitfield.class == ClassNone ||
2046               y.bitfield.class == ClassNone ||
2047               x.bitfield.class == y.bitfield.class);
2048   gas_assert (x.bitfield.instance == InstanceNone ||
2049               y.bitfield.instance == InstanceNone ||
2050               x.bitfield.instance == y.bitfield.instance);
2051
2052   switch (ARRAY_SIZE (x.array))
2053     {
2054     case 3:
2055       x.array [2] |= y.array [2];
2056       /* Fall through.  */
2057     case 2:
2058       x.array [1] |= y.array [1];
2059       /* Fall through.  */
2060     case 1:
2061       x.array [0] |= y.array [0];
2062       break;
2063     default:
2064       abort ();
2065     }
2066   return x;
2067 }
2068
2069 static INLINE i386_operand_type
2070 operand_type_xor (i386_operand_type x, i386_operand_type y)
2071 {
2072   gas_assert (y.bitfield.class == ClassNone);
2073   gas_assert (y.bitfield.instance == InstanceNone);
2074
2075   switch (ARRAY_SIZE (x.array))
2076     {
2077     case 3:
2078       x.array [2] ^= y.array [2];
2079       /* Fall through.  */
2080     case 2:
2081       x.array [1] ^= y.array [1];
2082       /* Fall through.  */
2083     case 1:
2084       x.array [0] ^= y.array [0];
2085       break;
2086     default:
2087       abort ();
2088     }
2089   return x;
2090 }
2091
2092 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2093 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2094 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2095 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2096 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2097 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2098 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2099 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2100 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2101 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2102 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2103 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2104 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2105 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2106 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2107 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2108 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2109
2110 enum operand_type
2111 {
2112   reg,
2113   imm,
2114   disp,
2115   anymem
2116 };
2117
2118 static INLINE int
2119 operand_type_check (i386_operand_type t, enum operand_type c)
2120 {
2121   switch (c)
2122     {
2123     case reg:
2124       return t.bitfield.class == Reg;
2125
2126     case imm:
2127       return (t.bitfield.imm8
2128               || t.bitfield.imm8s
2129               || t.bitfield.imm16
2130               || t.bitfield.imm32
2131               || t.bitfield.imm32s
2132               || t.bitfield.imm64);
2133
2134     case disp:
2135       return (t.bitfield.disp8
2136               || t.bitfield.disp16
2137               || t.bitfield.disp32
2138               || t.bitfield.disp32s
2139               || t.bitfield.disp64);
2140
2141     case anymem:
2142       return (t.bitfield.disp8
2143               || t.bitfield.disp16
2144               || t.bitfield.disp32
2145               || t.bitfield.disp32s
2146               || t.bitfield.disp64
2147               || t.bitfield.baseindex);
2148
2149     default:
2150       abort ();
2151     }
2152
2153   return 0;
2154 }
2155
2156 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2157    between operand GIVEN and opeand WANTED for instruction template T.  */
2158
2159 static INLINE int
2160 match_operand_size (const insn_template *t, unsigned int wanted,
2161                     unsigned int given)
2162 {
2163   return !((i.types[given].bitfield.byte
2164             && !t->operand_types[wanted].bitfield.byte)
2165            || (i.types[given].bitfield.word
2166                && !t->operand_types[wanted].bitfield.word)
2167            || (i.types[given].bitfield.dword
2168                && !t->operand_types[wanted].bitfield.dword)
2169            || (i.types[given].bitfield.qword
2170                && !t->operand_types[wanted].bitfield.qword)
2171            || (i.types[given].bitfield.tbyte
2172                && !t->operand_types[wanted].bitfield.tbyte));
2173 }
2174
2175 /* Return 1 if there is no conflict in SIMD register between operand
2176    GIVEN and opeand WANTED for instruction template T.  */
2177
2178 static INLINE int
2179 match_simd_size (const insn_template *t, unsigned int wanted,
2180                  unsigned int given)
2181 {
2182   return !((i.types[given].bitfield.xmmword
2183             && !t->operand_types[wanted].bitfield.xmmword)
2184            || (i.types[given].bitfield.ymmword
2185                && !t->operand_types[wanted].bitfield.ymmword)
2186            || (i.types[given].bitfield.zmmword
2187                && !t->operand_types[wanted].bitfield.zmmword)
2188            || (i.types[given].bitfield.tmmword
2189                && !t->operand_types[wanted].bitfield.tmmword));
2190 }
2191
2192 /* Return 1 if there is no conflict in any size between operand GIVEN
2193    and opeand WANTED for instruction template T.  */
2194
2195 static INLINE int
2196 match_mem_size (const insn_template *t, unsigned int wanted,
2197                 unsigned int given)
2198 {
2199   return (match_operand_size (t, wanted, given)
2200           && !((i.types[given].bitfield.unspecified
2201                 && !i.broadcast.type
2202                 && !t->operand_types[wanted].bitfield.unspecified)
2203                || (i.types[given].bitfield.fword
2204                    && !t->operand_types[wanted].bitfield.fword)
2205                /* For scalar opcode templates to allow register and memory
2206                   operands at the same time, some special casing is needed
2207                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2208                   down-conversion vpmov*.  */
2209                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2210                     && t->operand_types[wanted].bitfield.byte
2211                        + t->operand_types[wanted].bitfield.word
2212                        + t->operand_types[wanted].bitfield.dword
2213                        + t->operand_types[wanted].bitfield.qword
2214                        > !!t->opcode_modifier.broadcast)
2215                    ? (i.types[given].bitfield.xmmword
2216                       || i.types[given].bitfield.ymmword
2217                       || i.types[given].bitfield.zmmword)
2218                    : !match_simd_size(t, wanted, given))));
2219 }
2220
2221 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2222    operands for instruction template T, and it has MATCH_REVERSE set if there
2223    is no size conflict on any operands for the template with operands reversed
2224    (and the template allows for reversing in the first place).  */
2225
2226 #define MATCH_STRAIGHT 1
2227 #define MATCH_REVERSE  2
2228
2229 static INLINE unsigned int
2230 operand_size_match (const insn_template *t)
2231 {
2232   unsigned int j, match = MATCH_STRAIGHT;
2233
2234   /* Don't check non-absolute jump instructions.  */
2235   if (t->opcode_modifier.jump
2236       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2237     return match;
2238
2239   /* Check memory and accumulator operand size.  */
2240   for (j = 0; j < i.operands; j++)
2241     {
2242       if (i.types[j].bitfield.class != Reg
2243           && i.types[j].bitfield.class != RegSIMD
2244           && t->opcode_modifier.anysize)
2245         continue;
2246
2247       if (t->operand_types[j].bitfield.class == Reg
2248           && !match_operand_size (t, j, j))
2249         {
2250           match = 0;
2251           break;
2252         }
2253
2254       if (t->operand_types[j].bitfield.class == RegSIMD
2255           && !match_simd_size (t, j, j))
2256         {
2257           match = 0;
2258           break;
2259         }
2260
2261       if (t->operand_types[j].bitfield.instance == Accum
2262           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2263         {
2264           match = 0;
2265           break;
2266         }
2267
2268       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2269         {
2270           match = 0;
2271           break;
2272         }
2273     }
2274
2275   if (!t->opcode_modifier.d)
2276     {
2277     mismatch:
2278       if (!match)
2279         i.error = operand_size_mismatch;
2280       return match;
2281     }
2282
2283   /* Check reverse.  */
2284   gas_assert (i.operands >= 2 && i.operands <= 3);
2285
2286   for (j = 0; j < i.operands; j++)
2287     {
2288       unsigned int given = i.operands - j - 1;
2289
2290       if (t->operand_types[j].bitfield.class == Reg
2291           && !match_operand_size (t, j, given))
2292         goto mismatch;
2293
2294       if (t->operand_types[j].bitfield.class == RegSIMD
2295           && !match_simd_size (t, j, given))
2296         goto mismatch;
2297
2298       if (t->operand_types[j].bitfield.instance == Accum
2299           && (!match_operand_size (t, j, given)
2300               || !match_simd_size (t, j, given)))
2301         goto mismatch;
2302
2303       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2304         goto mismatch;
2305     }
2306
2307   return match | MATCH_REVERSE;
2308 }
2309
2310 static INLINE int
2311 operand_type_match (i386_operand_type overlap,
2312                     i386_operand_type given)
2313 {
2314   i386_operand_type temp = overlap;
2315
2316   temp.bitfield.unspecified = 0;
2317   temp.bitfield.byte = 0;
2318   temp.bitfield.word = 0;
2319   temp.bitfield.dword = 0;
2320   temp.bitfield.fword = 0;
2321   temp.bitfield.qword = 0;
2322   temp.bitfield.tbyte = 0;
2323   temp.bitfield.xmmword = 0;
2324   temp.bitfield.ymmword = 0;
2325   temp.bitfield.zmmword = 0;
2326   temp.bitfield.tmmword = 0;
2327   if (operand_type_all_zero (&temp))
2328     goto mismatch;
2329
2330   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2331     return 1;
2332
2333  mismatch:
2334   i.error = operand_type_mismatch;
2335   return 0;
2336 }
2337
2338 /* If given types g0 and g1 are registers they must be of the same type
2339    unless the expected operand type register overlap is null.
2340    Some Intel syntax memory operand size checking also happens here.  */
2341
2342 static INLINE int
2343 operand_type_register_match (i386_operand_type g0,
2344                              i386_operand_type t0,
2345                              i386_operand_type g1,
2346                              i386_operand_type t1)
2347 {
2348   if (g0.bitfield.class != Reg
2349       && g0.bitfield.class != RegSIMD
2350       && (!operand_type_check (g0, anymem)
2351           || g0.bitfield.unspecified
2352           || (t0.bitfield.class != Reg
2353               && t0.bitfield.class != RegSIMD)))
2354     return 1;
2355
2356   if (g1.bitfield.class != Reg
2357       && g1.bitfield.class != RegSIMD
2358       && (!operand_type_check (g1, anymem)
2359           || g1.bitfield.unspecified
2360           || (t1.bitfield.class != Reg
2361               && t1.bitfield.class != RegSIMD)))
2362     return 1;
2363
2364   if (g0.bitfield.byte == g1.bitfield.byte
2365       && g0.bitfield.word == g1.bitfield.word
2366       && g0.bitfield.dword == g1.bitfield.dword
2367       && g0.bitfield.qword == g1.bitfield.qword
2368       && g0.bitfield.xmmword == g1.bitfield.xmmword
2369       && g0.bitfield.ymmword == g1.bitfield.ymmword
2370       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2371     return 1;
2372
2373   if (!(t0.bitfield.byte & t1.bitfield.byte)
2374       && !(t0.bitfield.word & t1.bitfield.word)
2375       && !(t0.bitfield.dword & t1.bitfield.dword)
2376       && !(t0.bitfield.qword & t1.bitfield.qword)
2377       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2378       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2379       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2380     return 1;
2381
2382   i.error = register_type_mismatch;
2383
2384   return 0;
2385 }
2386
2387 static INLINE unsigned int
2388 register_number (const reg_entry *r)
2389 {
2390   unsigned int nr = r->reg_num;
2391
2392   if (r->reg_flags & RegRex)
2393     nr += 8;
2394
2395   if (r->reg_flags & RegVRex)
2396     nr += 16;
2397
2398   return nr;
2399 }
2400
2401 static INLINE unsigned int
2402 mode_from_disp_size (i386_operand_type t)
2403 {
2404   if (t.bitfield.disp8)
2405     return 1;
2406   else if (t.bitfield.disp16
2407            || t.bitfield.disp32
2408            || t.bitfield.disp32s)
2409     return 2;
2410   else
2411     return 0;
2412 }
2413
2414 static INLINE int
2415 fits_in_signed_byte (addressT num)
2416 {
2417   return num + 0x80 <= 0xff;
2418 }
2419
2420 static INLINE int
2421 fits_in_unsigned_byte (addressT num)
2422 {
2423   return num <= 0xff;
2424 }
2425
2426 static INLINE int
2427 fits_in_unsigned_word (addressT num)
2428 {
2429   return num <= 0xffff;
2430 }
2431
2432 static INLINE int
2433 fits_in_signed_word (addressT num)
2434 {
2435   return num + 0x8000 <= 0xffff;
2436 }
2437
2438 static INLINE int
2439 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2440 {
2441 #ifndef BFD64
2442   return 1;
2443 #else
2444   return num + 0x80000000 <= 0xffffffff;
2445 #endif
2446 }                               /* fits_in_signed_long() */
2447
2448 static INLINE int
2449 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2450 {
2451 #ifndef BFD64
2452   return 1;
2453 #else
2454   return num <= 0xffffffff;
2455 #endif
2456 }                               /* fits_in_unsigned_long() */
2457
2458 static INLINE valueT extend_to_32bit_address (addressT num)
2459 {
2460 #ifdef BFD64
2461   if (fits_in_unsigned_long(num))
2462     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2463
2464   if (!fits_in_signed_long (num))
2465     return num & 0xffffffff;
2466 #endif
2467
2468   return num;
2469 }
2470
2471 static INLINE int
2472 fits_in_disp8 (offsetT num)
2473 {
2474   int shift = i.memshift;
2475   unsigned int mask;
2476
2477   if (shift == -1)
2478     abort ();
2479
2480   mask = (1 << shift) - 1;
2481
2482   /* Return 0 if NUM isn't properly aligned.  */
2483   if ((num & mask))
2484     return 0;
2485
2486   /* Check if NUM will fit in 8bit after shift.  */
2487   return fits_in_signed_byte (num >> shift);
2488 }
2489
2490 static INLINE int
2491 fits_in_imm4 (offsetT num)
2492 {
2493   return (num & 0xf) == num;
2494 }
2495
2496 static i386_operand_type
2497 smallest_imm_type (offsetT num)
2498 {
2499   i386_operand_type t;
2500
2501   operand_type_set (&t, 0);
2502   t.bitfield.imm64 = 1;
2503
2504   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2505     {
2506       /* This code is disabled on the 486 because all the Imm1 forms
2507          in the opcode table are slower on the i486.  They're the
2508          versions with the implicitly specified single-position
2509          displacement, which has another syntax if you really want to
2510          use that form.  */
2511       t.bitfield.imm1 = 1;
2512       t.bitfield.imm8 = 1;
2513       t.bitfield.imm8s = 1;
2514       t.bitfield.imm16 = 1;
2515       t.bitfield.imm32 = 1;
2516       t.bitfield.imm32s = 1;
2517     }
2518   else if (fits_in_signed_byte (num))
2519     {
2520       t.bitfield.imm8 = 1;
2521       t.bitfield.imm8s = 1;
2522       t.bitfield.imm16 = 1;
2523       t.bitfield.imm32 = 1;
2524       t.bitfield.imm32s = 1;
2525     }
2526   else if (fits_in_unsigned_byte (num))
2527     {
2528       t.bitfield.imm8 = 1;
2529       t.bitfield.imm16 = 1;
2530       t.bitfield.imm32 = 1;
2531       t.bitfield.imm32s = 1;
2532     }
2533   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2534     {
2535       t.bitfield.imm16 = 1;
2536       t.bitfield.imm32 = 1;
2537       t.bitfield.imm32s = 1;
2538     }
2539   else if (fits_in_signed_long (num))
2540     {
2541       t.bitfield.imm32 = 1;
2542       t.bitfield.imm32s = 1;
2543     }
2544   else if (fits_in_unsigned_long (num))
2545     t.bitfield.imm32 = 1;
2546
2547   return t;
2548 }
2549
2550 static offsetT
2551 offset_in_range (offsetT val, int size)
2552 {
2553   addressT mask;
2554
2555   switch (size)
2556     {
2557     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2558     case 2: mask = ((addressT) 1 << 16) - 1; break;
2559     case 4: mask = ((addressT) 2 << 31) - 1; break;
2560 #ifdef BFD64
2561     case 8: mask = ((addressT) 2 << 63) - 1; break;
2562 #endif
2563     default: abort ();
2564     }
2565
2566   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2567     {
2568       char buf1[40], buf2[40];
2569
2570       bfd_sprintf_vma (stdoutput, buf1, val);
2571       bfd_sprintf_vma (stdoutput, buf2, val & mask);
2572       as_warn (_("%s shortened to %s"), buf1, buf2);
2573     }
2574   return val & mask;
2575 }
2576
2577 enum PREFIX_GROUP
2578 {
2579   PREFIX_EXIST = 0,
2580   PREFIX_LOCK,
2581   PREFIX_REP,
2582   PREFIX_DS,
2583   PREFIX_OTHER
2584 };
2585
2586 /* Returns
2587    a. PREFIX_EXIST if attempting to add a prefix where one from the
2588    same class already exists.
2589    b. PREFIX_LOCK if lock prefix is added.
2590    c. PREFIX_REP if rep/repne prefix is added.
2591    d. PREFIX_DS if ds prefix is added.
2592    e. PREFIX_OTHER if other prefix is added.
2593  */
2594
2595 static enum PREFIX_GROUP
2596 add_prefix (unsigned int prefix)
2597 {
2598   enum PREFIX_GROUP ret = PREFIX_OTHER;
2599   unsigned int q;
2600
2601   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2602       && flag_code == CODE_64BIT)
2603     {
2604       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2605           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2606           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2607           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2608         ret = PREFIX_EXIST;
2609       q = REX_PREFIX;
2610     }
2611   else
2612     {
2613       switch (prefix)
2614         {
2615         default:
2616           abort ();
2617
2618         case DS_PREFIX_OPCODE:
2619           ret = PREFIX_DS;
2620           /* Fall through.  */
2621         case CS_PREFIX_OPCODE:
2622         case ES_PREFIX_OPCODE:
2623         case FS_PREFIX_OPCODE:
2624         case GS_PREFIX_OPCODE:
2625         case SS_PREFIX_OPCODE:
2626           q = SEG_PREFIX;
2627           break;
2628
2629         case REPNE_PREFIX_OPCODE:
2630         case REPE_PREFIX_OPCODE:
2631           q = REP_PREFIX;
2632           ret = PREFIX_REP;
2633           break;
2634
2635         case LOCK_PREFIX_OPCODE:
2636           q = LOCK_PREFIX;
2637           ret = PREFIX_LOCK;
2638           break;
2639
2640         case FWAIT_OPCODE:
2641           q = WAIT_PREFIX;
2642           break;
2643
2644         case ADDR_PREFIX_OPCODE:
2645           q = ADDR_PREFIX;
2646           break;
2647
2648         case DATA_PREFIX_OPCODE:
2649           q = DATA_PREFIX;
2650           break;
2651         }
2652       if (i.prefix[q] != 0)
2653         ret = PREFIX_EXIST;
2654     }
2655
2656   if (ret)
2657     {
2658       if (!i.prefix[q])
2659         ++i.prefixes;
2660       i.prefix[q] |= prefix;
2661     }
2662   else
2663     as_bad (_("same type of prefix used twice"));
2664
2665   return ret;
2666 }
2667
2668 static void
2669 update_code_flag (int value, int check)
2670 {
2671   PRINTF_LIKE ((*as_error));
2672
2673   flag_code = (enum flag_code) value;
2674   if (flag_code == CODE_64BIT)
2675     {
2676       cpu_arch_flags.bitfield.cpu64 = 1;
2677       cpu_arch_flags.bitfield.cpuno64 = 0;
2678     }
2679   else
2680     {
2681       cpu_arch_flags.bitfield.cpu64 = 0;
2682       cpu_arch_flags.bitfield.cpuno64 = 1;
2683     }
2684   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2685     {
2686       if (check)
2687         as_error = as_fatal;
2688       else
2689         as_error = as_bad;
2690       (*as_error) (_("64bit mode not supported on `%s'."),
2691                    cpu_arch_name ? cpu_arch_name : default_arch);
2692     }
2693   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2694     {
2695       if (check)
2696         as_error = as_fatal;
2697       else
2698         as_error = as_bad;
2699       (*as_error) (_("32bit mode not supported on `%s'."),
2700                    cpu_arch_name ? cpu_arch_name : default_arch);
2701     }
2702   stackop_size = '\0';
2703 }
2704
2705 static void
2706 set_code_flag (int value)
2707 {
2708   update_code_flag (value, 0);
2709 }
2710
2711 static void
2712 set_16bit_gcc_code_flag (int new_code_flag)
2713 {
2714   flag_code = (enum flag_code) new_code_flag;
2715   if (flag_code != CODE_16BIT)
2716     abort ();
2717   cpu_arch_flags.bitfield.cpu64 = 0;
2718   cpu_arch_flags.bitfield.cpuno64 = 1;
2719   stackop_size = LONG_MNEM_SUFFIX;
2720 }
2721
2722 static void
2723 set_intel_syntax (int syntax_flag)
2724 {
2725   /* Find out if register prefixing is specified.  */
2726   int ask_naked_reg = 0;
2727
2728   SKIP_WHITESPACE ();
2729   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2730     {
2731       char *string;
2732       int e = get_symbol_name (&string);
2733
2734       if (strcmp (string, "prefix") == 0)
2735         ask_naked_reg = 1;
2736       else if (strcmp (string, "noprefix") == 0)
2737         ask_naked_reg = -1;
2738       else
2739         as_bad (_("bad argument to syntax directive."));
2740       (void) restore_line_pointer (e);
2741     }
2742   demand_empty_rest_of_line ();
2743
2744   intel_syntax = syntax_flag;
2745
2746   if (ask_naked_reg == 0)
2747     allow_naked_reg = (intel_syntax
2748                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2749   else
2750     allow_naked_reg = (ask_naked_reg < 0);
2751
2752   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2753
2754   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2755   identifier_chars['$'] = intel_syntax ? '$' : 0;
2756   register_prefix = allow_naked_reg ? "" : "%";
2757 }
2758
2759 static void
2760 set_intel_mnemonic (int mnemonic_flag)
2761 {
2762   intel_mnemonic = mnemonic_flag;
2763 }
2764
2765 static void
2766 set_allow_index_reg (int flag)
2767 {
2768   allow_index_reg = flag;
2769 }
2770
2771 static void
2772 set_check (int what)
2773 {
2774   enum check_kind *kind;
2775   const char *str;
2776
2777   if (what)
2778     {
2779       kind = &operand_check;
2780       str = "operand";
2781     }
2782   else
2783     {
2784       kind = &sse_check;
2785       str = "sse";
2786     }
2787
2788   SKIP_WHITESPACE ();
2789
2790   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2791     {
2792       char *string;
2793       int e = get_symbol_name (&string);
2794
2795       if (strcmp (string, "none") == 0)
2796         *kind = check_none;
2797       else if (strcmp (string, "warning") == 0)
2798         *kind = check_warning;
2799       else if (strcmp (string, "error") == 0)
2800         *kind = check_error;
2801       else
2802         as_bad (_("bad argument to %s_check directive."), str);
2803       (void) restore_line_pointer (e);
2804     }
2805   else
2806     as_bad (_("missing argument for %s_check directive"), str);
2807
2808   demand_empty_rest_of_line ();
2809 }
2810
2811 static void
2812 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2813                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2814 {
2815 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2816   static const char *arch;
2817
2818   /* Intel LIOM is only supported on ELF.  */
2819   if (!IS_ELF)
2820     return;
2821
2822   if (!arch)
2823     {
2824       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2825          use default_arch.  */
2826       arch = cpu_arch_name;
2827       if (!arch)
2828         arch = default_arch;
2829     }
2830
2831   /* If we are targeting Intel MCU, we must enable it.  */
2832   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2833       || new_flag.bitfield.cpuiamcu)
2834     return;
2835
2836   /* If we are targeting Intel L1OM, we must enable it.  */
2837   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2838       || new_flag.bitfield.cpul1om)
2839     return;
2840
2841   /* If we are targeting Intel K1OM, we must enable it.  */
2842   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2843       || new_flag.bitfield.cpuk1om)
2844     return;
2845
2846   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2847 #endif
2848 }
2849
2850 static void
2851 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2852 {
2853   SKIP_WHITESPACE ();
2854
2855   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2856     {
2857       char *string;
2858       int e = get_symbol_name (&string);
2859       unsigned int j;
2860       i386_cpu_flags flags;
2861
2862       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2863         {
2864           if (strcmp (string, cpu_arch[j].name) == 0)
2865             {
2866               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2867
2868               if (*string != '.')
2869                 {
2870                   cpu_arch_name = cpu_arch[j].name;
2871                   cpu_sub_arch_name = NULL;
2872                   cpu_arch_flags = cpu_arch[j].flags;
2873                   if (flag_code == CODE_64BIT)
2874                     {
2875                       cpu_arch_flags.bitfield.cpu64 = 1;
2876                       cpu_arch_flags.bitfield.cpuno64 = 0;
2877                     }
2878                   else
2879                     {
2880                       cpu_arch_flags.bitfield.cpu64 = 0;
2881                       cpu_arch_flags.bitfield.cpuno64 = 1;
2882                     }
2883                   cpu_arch_isa = cpu_arch[j].type;
2884                   cpu_arch_isa_flags = cpu_arch[j].flags;
2885                   if (!cpu_arch_tune_set)
2886                     {
2887                       cpu_arch_tune = cpu_arch_isa;
2888                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2889                     }
2890                   break;
2891                 }
2892
2893               flags = cpu_flags_or (cpu_arch_flags,
2894                                     cpu_arch[j].flags);
2895
2896               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2897                 {
2898                   if (cpu_sub_arch_name)
2899                     {
2900                       char *name = cpu_sub_arch_name;
2901                       cpu_sub_arch_name = concat (name,
2902                                                   cpu_arch[j].name,
2903                                                   (const char *) NULL);
2904                       free (name);
2905                     }
2906                   else
2907                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2908                   cpu_arch_flags = flags;
2909                   cpu_arch_isa_flags = flags;
2910                 }
2911               else
2912                 cpu_arch_isa_flags
2913                   = cpu_flags_or (cpu_arch_isa_flags,
2914                                   cpu_arch[j].flags);
2915               (void) restore_line_pointer (e);
2916               demand_empty_rest_of_line ();
2917               return;
2918             }
2919         }
2920
2921       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2922         {
2923           /* Disable an ISA extension.  */
2924           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2925             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2926               {
2927                 flags = cpu_flags_and_not (cpu_arch_flags,
2928                                            cpu_noarch[j].flags);
2929                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2930                   {
2931                     if (cpu_sub_arch_name)
2932                       {
2933                         char *name = cpu_sub_arch_name;
2934                         cpu_sub_arch_name = concat (name, string,
2935                                                     (const char *) NULL);
2936                         free (name);
2937                       }
2938                     else
2939                       cpu_sub_arch_name = xstrdup (string);
2940                     cpu_arch_flags = flags;
2941                     cpu_arch_isa_flags = flags;
2942                   }
2943                 (void) restore_line_pointer (e);
2944                 demand_empty_rest_of_line ();
2945                 return;
2946               }
2947
2948           j = ARRAY_SIZE (cpu_arch);
2949         }
2950
2951       if (j >= ARRAY_SIZE (cpu_arch))
2952         as_bad (_("no such architecture: `%s'"), string);
2953
2954       *input_line_pointer = e;
2955     }
2956   else
2957     as_bad (_("missing cpu architecture"));
2958
2959   no_cond_jump_promotion = 0;
2960   if (*input_line_pointer == ','
2961       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2962     {
2963       char *string;
2964       char e;
2965
2966       ++input_line_pointer;
2967       e = get_symbol_name (&string);
2968
2969       if (strcmp (string, "nojumps") == 0)
2970         no_cond_jump_promotion = 1;
2971       else if (strcmp (string, "jumps") == 0)
2972         ;
2973       else
2974         as_bad (_("no such architecture modifier: `%s'"), string);
2975
2976       (void) restore_line_pointer (e);
2977     }
2978
2979   demand_empty_rest_of_line ();
2980 }
2981
2982 enum bfd_architecture
2983 i386_arch (void)
2984 {
2985   if (cpu_arch_isa == PROCESSOR_L1OM)
2986     {
2987       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2988           || flag_code != CODE_64BIT)
2989         as_fatal (_("Intel L1OM is 64bit ELF only"));
2990       return bfd_arch_l1om;
2991     }
2992   else if (cpu_arch_isa == PROCESSOR_K1OM)
2993     {
2994       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2995           || flag_code != CODE_64BIT)
2996         as_fatal (_("Intel K1OM is 64bit ELF only"));
2997       return bfd_arch_k1om;
2998     }
2999   else if (cpu_arch_isa == PROCESSOR_IAMCU)
3000     {
3001       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3002           || flag_code == CODE_64BIT)
3003         as_fatal (_("Intel MCU is 32bit ELF only"));
3004       return bfd_arch_iamcu;
3005     }
3006   else
3007     return bfd_arch_i386;
3008 }
3009
3010 unsigned long
3011 i386_mach (void)
3012 {
3013   if (startswith (default_arch, "x86_64"))
3014     {
3015       if (cpu_arch_isa == PROCESSOR_L1OM)
3016         {
3017           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3018               || default_arch[6] != '\0')
3019             as_fatal (_("Intel L1OM is 64bit ELF only"));
3020           return bfd_mach_l1om;
3021         }
3022       else if (cpu_arch_isa == PROCESSOR_K1OM)
3023         {
3024           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3025               || default_arch[6] != '\0')
3026             as_fatal (_("Intel K1OM is 64bit ELF only"));
3027           return bfd_mach_k1om;
3028         }
3029       else if (default_arch[6] == '\0')
3030         return bfd_mach_x86_64;
3031       else
3032         return bfd_mach_x64_32;
3033     }
3034   else if (!strcmp (default_arch, "i386")
3035            || !strcmp (default_arch, "iamcu"))
3036     {
3037       if (cpu_arch_isa == PROCESSOR_IAMCU)
3038         {
3039           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3040             as_fatal (_("Intel MCU is 32bit ELF only"));
3041           return bfd_mach_i386_iamcu;
3042         }
3043       else
3044         return bfd_mach_i386_i386;
3045     }
3046   else
3047     as_fatal (_("unknown architecture"));
3048 }
3049 \f
3050 void
3051 md_begin (void)
3052 {
3053   /* Support pseudo prefixes like {disp32}.  */
3054   lex_type ['{'] = LEX_BEGIN_NAME;
3055
3056   /* Initialize op_hash hash table.  */
3057   op_hash = str_htab_create ();
3058
3059   {
3060     const insn_template *optab;
3061     templates *core_optab;
3062
3063     /* Setup for loop.  */
3064     optab = i386_optab;
3065     core_optab = XNEW (templates);
3066     core_optab->start = optab;
3067
3068     while (1)
3069       {
3070         ++optab;
3071         if (optab->name == NULL
3072             || strcmp (optab->name, (optab - 1)->name) != 0)
3073           {
3074             /* different name --> ship out current template list;
3075                add to hash table; & begin anew.  */
3076             core_optab->end = optab;
3077             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
3078               as_fatal (_("duplicate %s"), (optab - 1)->name);
3079
3080             if (optab->name == NULL)
3081               break;
3082             core_optab = XNEW (templates);
3083             core_optab->start = optab;
3084           }
3085       }
3086   }
3087
3088   /* Initialize reg_hash hash table.  */
3089   reg_hash = str_htab_create ();
3090   {
3091     const reg_entry *regtab;
3092     unsigned int regtab_size = i386_regtab_size;
3093
3094     for (regtab = i386_regtab; regtab_size--; regtab++)
3095       {
3096         switch (regtab->reg_type.bitfield.class)
3097           {
3098           case Reg:
3099             if (regtab->reg_type.bitfield.dword)
3100               {
3101                 if (regtab->reg_type.bitfield.instance == Accum)
3102                   reg_eax = regtab;
3103               }
3104             else if (regtab->reg_type.bitfield.tbyte)
3105               {
3106                 /* There's no point inserting st(<N>) in the hash table, as
3107                    parentheses aren't included in register_chars[] anyway.  */
3108                 if (regtab->reg_type.bitfield.instance != Accum)
3109                   continue;
3110                 reg_st0 = regtab;
3111               }
3112             break;
3113
3114           case SReg:
3115             switch (regtab->reg_num)
3116               {
3117               case 0: reg_es = regtab; break;
3118               case 2: reg_ss = regtab; break;
3119               case 3: reg_ds = regtab; break;
3120               }
3121             break;
3122
3123           case RegMask:
3124             if (!regtab->reg_num)
3125               reg_k0 = regtab;
3126             break;
3127           }
3128
3129         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3130           as_fatal (_("duplicate %s"), regtab->reg_name);
3131       }
3132   }
3133
3134   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3135   {
3136     int c;
3137     char *p;
3138
3139     for (c = 0; c < 256; c++)
3140       {
3141         if (ISDIGIT (c) || ISLOWER (c))
3142           {
3143             mnemonic_chars[c] = c;
3144             register_chars[c] = c;
3145             operand_chars[c] = c;
3146           }
3147         else if (ISUPPER (c))
3148           {
3149             mnemonic_chars[c] = TOLOWER (c);
3150             register_chars[c] = mnemonic_chars[c];
3151             operand_chars[c] = c;
3152           }
3153         else if (c == '{' || c == '}')
3154           {
3155             mnemonic_chars[c] = c;
3156             operand_chars[c] = c;
3157           }
3158 #ifdef SVR4_COMMENT_CHARS
3159         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3160           operand_chars[c] = c;
3161 #endif
3162
3163         if (ISALPHA (c) || ISDIGIT (c))
3164           identifier_chars[c] = c;
3165         else if (c >= 128)
3166           {
3167             identifier_chars[c] = c;
3168             operand_chars[c] = c;
3169           }
3170       }
3171
3172 #ifdef LEX_AT
3173     identifier_chars['@'] = '@';
3174 #endif
3175 #ifdef LEX_QM
3176     identifier_chars['?'] = '?';
3177     operand_chars['?'] = '?';
3178 #endif
3179     mnemonic_chars['_'] = '_';
3180     mnemonic_chars['-'] = '-';
3181     mnemonic_chars['.'] = '.';
3182     identifier_chars['_'] = '_';
3183     identifier_chars['.'] = '.';
3184
3185     for (p = operand_special_chars; *p != '\0'; p++)
3186       operand_chars[(unsigned char) *p] = *p;
3187   }
3188
3189   if (flag_code == CODE_64BIT)
3190     {
3191 #if defined (OBJ_COFF) && defined (TE_PE)
3192       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3193                                   ? 32 : 16);
3194 #else
3195       x86_dwarf2_return_column = 16;
3196 #endif
3197       x86_cie_data_alignment = -8;
3198     }
3199   else
3200     {
3201       x86_dwarf2_return_column = 8;
3202       x86_cie_data_alignment = -4;
3203     }
3204
3205   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3206      can be turned into BRANCH_PREFIX frag.  */
3207   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3208     abort ();
3209 }
3210
3211 void
3212 i386_print_statistics (FILE *file)
3213 {
3214   htab_print_statistics (file, "i386 opcode", op_hash);
3215   htab_print_statistics (file, "i386 register", reg_hash);
3216 }
3217 \f
3218 #ifdef DEBUG386
3219
3220 /* Debugging routines for md_assemble.  */
3221 static void pte (insn_template *);
3222 static void pt (i386_operand_type);
3223 static void pe (expressionS *);
3224 static void ps (symbolS *);
3225
3226 static void
3227 pi (const char *line, i386_insn *x)
3228 {
3229   unsigned int j;
3230
3231   fprintf (stdout, "%s: template ", line);
3232   pte (&x->tm);
3233   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3234            x->base_reg ? x->base_reg->reg_name : "none",
3235            x->index_reg ? x->index_reg->reg_name : "none",
3236            x->log2_scale_factor);
3237   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3238            x->rm.mode, x->rm.reg, x->rm.regmem);
3239   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3240            x->sib.base, x->sib.index, x->sib.scale);
3241   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3242            (x->rex & REX_W) != 0,
3243            (x->rex & REX_R) != 0,
3244            (x->rex & REX_X) != 0,
3245            (x->rex & REX_B) != 0);
3246   for (j = 0; j < x->operands; j++)
3247     {
3248       fprintf (stdout, "    #%d:  ", j + 1);
3249       pt (x->types[j]);
3250       fprintf (stdout, "\n");
3251       if (x->types[j].bitfield.class == Reg
3252           || x->types[j].bitfield.class == RegMMX
3253           || x->types[j].bitfield.class == RegSIMD
3254           || x->types[j].bitfield.class == RegMask
3255           || x->types[j].bitfield.class == SReg
3256           || x->types[j].bitfield.class == RegCR
3257           || x->types[j].bitfield.class == RegDR
3258           || x->types[j].bitfield.class == RegTR
3259           || x->types[j].bitfield.class == RegBND)
3260         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3261       if (operand_type_check (x->types[j], imm))
3262         pe (x->op[j].imms);
3263       if (operand_type_check (x->types[j], disp))
3264         pe (x->op[j].disps);
3265     }
3266 }
3267
3268 static void
3269 pte (insn_template *t)
3270 {
3271   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3272   static const char *const opc_spc[] = {
3273     NULL, "0f", "0f38", "0f3a", NULL, NULL, NULL, NULL,
3274     "XOP08", "XOP09", "XOP0A",
3275   };
3276   unsigned int j;
3277
3278   fprintf (stdout, " %d operands ", t->operands);
3279   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3280     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3281   if (opc_spc[t->opcode_modifier.opcodespace])
3282     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3283   fprintf (stdout, "opcode %x ", t->base_opcode);
3284   if (t->extension_opcode != None)
3285     fprintf (stdout, "ext %x ", t->extension_opcode);
3286   if (t->opcode_modifier.d)
3287     fprintf (stdout, "D");
3288   if (t->opcode_modifier.w)
3289     fprintf (stdout, "W");
3290   fprintf (stdout, "\n");
3291   for (j = 0; j < t->operands; j++)
3292     {
3293       fprintf (stdout, "    #%d type ", j + 1);
3294       pt (t->operand_types[j]);
3295       fprintf (stdout, "\n");
3296     }
3297 }
3298
3299 static void
3300 pe (expressionS *e)
3301 {
3302   fprintf (stdout, "    operation     %d\n", e->X_op);
3303   fprintf (stdout, "    add_number    %" BFD_VMA_FMT "d (%" BFD_VMA_FMT "x)\n",
3304            e->X_add_number, e->X_add_number);
3305   if (e->X_add_symbol)
3306     {
3307       fprintf (stdout, "    add_symbol    ");
3308       ps (e->X_add_symbol);
3309       fprintf (stdout, "\n");
3310     }
3311   if (e->X_op_symbol)
3312     {
3313       fprintf (stdout, "    op_symbol    ");
3314       ps (e->X_op_symbol);
3315       fprintf (stdout, "\n");
3316     }
3317 }
3318
3319 static void
3320 ps (symbolS *s)
3321 {
3322   fprintf (stdout, "%s type %s%s",
3323            S_GET_NAME (s),
3324            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3325            segment_name (S_GET_SEGMENT (s)));
3326 }
3327
3328 static struct type_name
3329   {
3330     i386_operand_type mask;
3331     const char *name;
3332   }
3333 const type_names[] =
3334 {
3335   { OPERAND_TYPE_REG8, "r8" },
3336   { OPERAND_TYPE_REG16, "r16" },
3337   { OPERAND_TYPE_REG32, "r32" },
3338   { OPERAND_TYPE_REG64, "r64" },
3339   { OPERAND_TYPE_ACC8, "acc8" },
3340   { OPERAND_TYPE_ACC16, "acc16" },
3341   { OPERAND_TYPE_ACC32, "acc32" },
3342   { OPERAND_TYPE_ACC64, "acc64" },
3343   { OPERAND_TYPE_IMM8, "i8" },
3344   { OPERAND_TYPE_IMM8, "i8s" },
3345   { OPERAND_TYPE_IMM16, "i16" },
3346   { OPERAND_TYPE_IMM32, "i32" },
3347   { OPERAND_TYPE_IMM32S, "i32s" },
3348   { OPERAND_TYPE_IMM64, "i64" },
3349   { OPERAND_TYPE_IMM1, "i1" },
3350   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3351   { OPERAND_TYPE_DISP8, "d8" },
3352   { OPERAND_TYPE_DISP16, "d16" },
3353   { OPERAND_TYPE_DISP32, "d32" },
3354   { OPERAND_TYPE_DISP32S, "d32s" },
3355   { OPERAND_TYPE_DISP64, "d64" },
3356   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3357   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3358   { OPERAND_TYPE_CONTROL, "control reg" },
3359   { OPERAND_TYPE_TEST, "test reg" },
3360   { OPERAND_TYPE_DEBUG, "debug reg" },
3361   { OPERAND_TYPE_FLOATREG, "FReg" },
3362   { OPERAND_TYPE_FLOATACC, "FAcc" },
3363   { OPERAND_TYPE_SREG, "SReg" },
3364   { OPERAND_TYPE_REGMMX, "rMMX" },
3365   { OPERAND_TYPE_REGXMM, "rXMM" },
3366   { OPERAND_TYPE_REGYMM, "rYMM" },
3367   { OPERAND_TYPE_REGZMM, "rZMM" },
3368   { OPERAND_TYPE_REGTMM, "rTMM" },
3369   { OPERAND_TYPE_REGMASK, "Mask reg" },
3370 };
3371
3372 static void
3373 pt (i386_operand_type t)
3374 {
3375   unsigned int j;
3376   i386_operand_type a;
3377
3378   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3379     {
3380       a = operand_type_and (t, type_names[j].mask);
3381       if (operand_type_equal (&a, &type_names[j].mask))
3382         fprintf (stdout, "%s, ",  type_names[j].name);
3383     }
3384   fflush (stdout);
3385 }
3386
3387 #endif /* DEBUG386 */
3388 \f
3389 static bfd_reloc_code_real_type
3390 reloc (unsigned int size,
3391        int pcrel,
3392        int sign,
3393        bfd_reloc_code_real_type other)
3394 {
3395   if (other != NO_RELOC)
3396     {
3397       reloc_howto_type *rel;
3398
3399       if (size == 8)
3400         switch (other)
3401           {
3402           case BFD_RELOC_X86_64_GOT32:
3403             return BFD_RELOC_X86_64_GOT64;
3404             break;
3405           case BFD_RELOC_X86_64_GOTPLT64:
3406             return BFD_RELOC_X86_64_GOTPLT64;
3407             break;
3408           case BFD_RELOC_X86_64_PLTOFF64:
3409             return BFD_RELOC_X86_64_PLTOFF64;
3410             break;
3411           case BFD_RELOC_X86_64_GOTPC32:
3412             other = BFD_RELOC_X86_64_GOTPC64;
3413             break;
3414           case BFD_RELOC_X86_64_GOTPCREL:
3415             other = BFD_RELOC_X86_64_GOTPCREL64;
3416             break;
3417           case BFD_RELOC_X86_64_TPOFF32:
3418             other = BFD_RELOC_X86_64_TPOFF64;
3419             break;
3420           case BFD_RELOC_X86_64_DTPOFF32:
3421             other = BFD_RELOC_X86_64_DTPOFF64;
3422             break;
3423           default:
3424             break;
3425           }
3426
3427 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3428       if (other == BFD_RELOC_SIZE32)
3429         {
3430           if (size == 8)
3431             other = BFD_RELOC_SIZE64;
3432           if (pcrel)
3433             {
3434               as_bad (_("there are no pc-relative size relocations"));
3435               return NO_RELOC;
3436             }
3437         }
3438 #endif
3439
3440       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3441       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3442         sign = -1;
3443
3444       rel = bfd_reloc_type_lookup (stdoutput, other);
3445       if (!rel)
3446         as_bad (_("unknown relocation (%u)"), other);
3447       else if (size != bfd_get_reloc_size (rel))
3448         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3449                 bfd_get_reloc_size (rel),
3450                 size);
3451       else if (pcrel && !rel->pc_relative)
3452         as_bad (_("non-pc-relative relocation for pc-relative field"));
3453       else if ((rel->complain_on_overflow == complain_overflow_signed
3454                 && !sign)
3455                || (rel->complain_on_overflow == complain_overflow_unsigned
3456                    && sign > 0))
3457         as_bad (_("relocated field and relocation type differ in signedness"));
3458       else
3459         return other;
3460       return NO_RELOC;
3461     }
3462
3463   if (pcrel)
3464     {
3465       if (!sign)
3466         as_bad (_("there are no unsigned pc-relative relocations"));
3467       switch (size)
3468         {
3469         case 1: return BFD_RELOC_8_PCREL;
3470         case 2: return BFD_RELOC_16_PCREL;
3471         case 4: return BFD_RELOC_32_PCREL;
3472         case 8: return BFD_RELOC_64_PCREL;
3473         }
3474       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3475     }
3476   else
3477     {
3478       if (sign > 0)
3479         switch (size)
3480           {
3481           case 4: return BFD_RELOC_X86_64_32S;
3482           }
3483       else
3484         switch (size)
3485           {
3486           case 1: return BFD_RELOC_8;
3487           case 2: return BFD_RELOC_16;
3488           case 4: return BFD_RELOC_32;
3489           case 8: return BFD_RELOC_64;
3490           }
3491       as_bad (_("cannot do %s %u byte relocation"),
3492               sign > 0 ? "signed" : "unsigned", size);
3493     }
3494
3495   return NO_RELOC;
3496 }
3497
3498 /* Here we decide which fixups can be adjusted to make them relative to
3499    the beginning of the section instead of the symbol.  Basically we need
3500    to make sure that the dynamic relocations are done correctly, so in
3501    some cases we force the original symbol to be used.  */
3502
3503 int
3504 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3505 {
3506 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3507   if (!IS_ELF)
3508     return 1;
3509
3510   /* Don't adjust pc-relative references to merge sections in 64-bit
3511      mode.  */
3512   if (use_rela_relocations
3513       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3514       && fixP->fx_pcrel)
3515     return 0;
3516
3517   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3518      and changed later by validate_fix.  */
3519   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3520       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3521     return 0;
3522
3523   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3524      for size relocations.  */
3525   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3526       || fixP->fx_r_type == BFD_RELOC_SIZE64
3527       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3528       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3529       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3530       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3531       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3532       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3533       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3534       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3535       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3536       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3537       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3538       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3539       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3540       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3541       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3542       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3543       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3544       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3545       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3546       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3547       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3548       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3549       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3550       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3551       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3552       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3553       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3554       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3555       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3556     return 0;
3557 #endif
3558   return 1;
3559 }
3560
3561 static INLINE bool
3562 want_disp32 (const insn_template *t)
3563 {
3564   return flag_code != CODE_64BIT
3565          || i.prefix[ADDR_PREFIX]
3566          || (t->base_opcode == 0x8d
3567              && t->opcode_modifier.opcodespace == SPACE_BASE
3568              && (!i.types[1].bitfield.qword
3569                 || t->opcode_modifier.size == SIZE32));
3570 }
3571
3572 static int
3573 intel_float_operand (const char *mnemonic)
3574 {
3575   /* Note that the value returned is meaningful only for opcodes with (memory)
3576      operands, hence the code here is free to improperly handle opcodes that
3577      have no operands (for better performance and smaller code). */
3578
3579   if (mnemonic[0] != 'f')
3580     return 0; /* non-math */
3581
3582   switch (mnemonic[1])
3583     {
3584     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3585        the fs segment override prefix not currently handled because no
3586        call path can make opcodes without operands get here */
3587     case 'i':
3588       return 2 /* integer op */;
3589     case 'l':
3590       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3591         return 3; /* fldcw/fldenv */
3592       break;
3593     case 'n':
3594       if (mnemonic[2] != 'o' /* fnop */)
3595         return 3; /* non-waiting control op */
3596       break;
3597     case 'r':
3598       if (mnemonic[2] == 's')
3599         return 3; /* frstor/frstpm */
3600       break;
3601     case 's':
3602       if (mnemonic[2] == 'a')
3603         return 3; /* fsave */
3604       if (mnemonic[2] == 't')
3605         {
3606           switch (mnemonic[3])
3607             {
3608             case 'c': /* fstcw */
3609             case 'd': /* fstdw */
3610             case 'e': /* fstenv */
3611             case 's': /* fsts[gw] */
3612               return 3;
3613             }
3614         }
3615       break;
3616     case 'x':
3617       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3618         return 0; /* fxsave/fxrstor are not really math ops */
3619       break;
3620     }
3621
3622   return 1;
3623 }
3624
3625 static INLINE void
3626 install_template (const insn_template *t)
3627 {
3628   unsigned int l;
3629
3630   i.tm = *t;
3631
3632   /* Note that for pseudo prefixes this produces a length of 1. But for them
3633      the length isn't interesting at all.  */
3634   for (l = 1; l < 4; ++l)
3635     if (!(t->base_opcode >> (8 * l)))
3636       break;
3637
3638   i.opcode_length = l;
3639 }
3640
3641 /* Build the VEX prefix.  */
3642
3643 static void
3644 build_vex_prefix (const insn_template *t)
3645 {
3646   unsigned int register_specifier;
3647   unsigned int vector_length;
3648   unsigned int w;
3649
3650   /* Check register specifier.  */
3651   if (i.vex.register_specifier)
3652     {
3653       register_specifier =
3654         ~register_number (i.vex.register_specifier) & 0xf;
3655       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3656     }
3657   else
3658     register_specifier = 0xf;
3659
3660   /* Use 2-byte VEX prefix by swapping destination and source operand
3661      if there are more than 1 register operand.  */
3662   if (i.reg_operands > 1
3663       && i.vec_encoding != vex_encoding_vex3
3664       && i.dir_encoding == dir_encoding_default
3665       && i.operands == i.reg_operands
3666       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3667       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3668       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3669       && i.rex == REX_B)
3670     {
3671       unsigned int xchg = i.operands - 1;
3672       union i386_op temp_op;
3673       i386_operand_type temp_type;
3674
3675       temp_type = i.types[xchg];
3676       i.types[xchg] = i.types[0];
3677       i.types[0] = temp_type;
3678       temp_op = i.op[xchg];
3679       i.op[xchg] = i.op[0];
3680       i.op[0] = temp_op;
3681
3682       gas_assert (i.rm.mode == 3);
3683
3684       i.rex = REX_R;
3685       xchg = i.rm.regmem;
3686       i.rm.regmem = i.rm.reg;
3687       i.rm.reg = xchg;
3688
3689       if (i.tm.opcode_modifier.d)
3690         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3691                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3692       else /* Use the next insn.  */
3693         install_template (&t[1]);
3694     }
3695
3696   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3697      are no memory operands and at least 3 register ones.  */
3698   if (i.reg_operands >= 3
3699       && i.vec_encoding != vex_encoding_vex3
3700       && i.reg_operands == i.operands - i.imm_operands
3701       && i.tm.opcode_modifier.vex
3702       && i.tm.opcode_modifier.commutative
3703       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3704       && i.rex == REX_B
3705       && i.vex.register_specifier
3706       && !(i.vex.register_specifier->reg_flags & RegRex))
3707     {
3708       unsigned int xchg = i.operands - i.reg_operands;
3709       union i386_op temp_op;
3710       i386_operand_type temp_type;
3711
3712       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3713       gas_assert (!i.tm.opcode_modifier.sae);
3714       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3715                                       &i.types[i.operands - 3]));
3716       gas_assert (i.rm.mode == 3);
3717
3718       temp_type = i.types[xchg];
3719       i.types[xchg] = i.types[xchg + 1];
3720       i.types[xchg + 1] = temp_type;
3721       temp_op = i.op[xchg];
3722       i.op[xchg] = i.op[xchg + 1];
3723       i.op[xchg + 1] = temp_op;
3724
3725       i.rex = 0;
3726       xchg = i.rm.regmem | 8;
3727       i.rm.regmem = ~register_specifier & 0xf;
3728       gas_assert (!(i.rm.regmem & 8));
3729       i.vex.register_specifier += xchg - i.rm.regmem;
3730       register_specifier = ~xchg & 0xf;
3731     }
3732
3733   if (i.tm.opcode_modifier.vex == VEXScalar)
3734     vector_length = avxscalar;
3735   else if (i.tm.opcode_modifier.vex == VEX256)
3736     vector_length = 1;
3737   else
3738     {
3739       unsigned int op;
3740
3741       /* Determine vector length from the last multi-length vector
3742          operand.  */
3743       vector_length = 0;
3744       for (op = t->operands; op--;)
3745         if (t->operand_types[op].bitfield.xmmword
3746             && t->operand_types[op].bitfield.ymmword
3747             && i.types[op].bitfield.ymmword)
3748           {
3749             vector_length = 1;
3750             break;
3751           }
3752     }
3753
3754   /* Check the REX.W bit and VEXW.  */
3755   if (i.tm.opcode_modifier.vexw == VEXWIG)
3756     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3757   else if (i.tm.opcode_modifier.vexw)
3758     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3759   else
3760     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3761
3762   /* Use 2-byte VEX prefix if possible.  */
3763   if (w == 0
3764       && i.vec_encoding != vex_encoding_vex3
3765       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3766       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3767     {
3768       /* 2-byte VEX prefix.  */
3769       unsigned int r;
3770
3771       i.vex.length = 2;
3772       i.vex.bytes[0] = 0xc5;
3773
3774       /* Check the REX.R bit.  */
3775       r = (i.rex & REX_R) ? 0 : 1;
3776       i.vex.bytes[1] = (r << 7
3777                         | register_specifier << 3
3778                         | vector_length << 2
3779                         | i.tm.opcode_modifier.opcodeprefix);
3780     }
3781   else
3782     {
3783       /* 3-byte VEX prefix.  */
3784       i.vex.length = 3;
3785
3786       switch (i.tm.opcode_modifier.opcodespace)
3787         {
3788         case SPACE_0F:
3789         case SPACE_0F38:
3790         case SPACE_0F3A:
3791           i.vex.bytes[0] = 0xc4;
3792           break;
3793         case SPACE_XOP08:
3794         case SPACE_XOP09:
3795         case SPACE_XOP0A:
3796           i.vex.bytes[0] = 0x8f;
3797           break;
3798         default:
3799           abort ();
3800         }
3801
3802       /* The high 3 bits of the second VEX byte are 1's compliment
3803          of RXB bits from REX.  */
3804       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3805
3806       i.vex.bytes[2] = (w << 7
3807                         | register_specifier << 3
3808                         | vector_length << 2
3809                         | i.tm.opcode_modifier.opcodeprefix);
3810     }
3811 }
3812
3813 static INLINE bool
3814 is_evex_encoding (const insn_template *t)
3815 {
3816   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3817          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3818          || t->opcode_modifier.sae;
3819 }
3820
3821 static INLINE bool
3822 is_any_vex_encoding (const insn_template *t)
3823 {
3824   return t->opcode_modifier.vex || is_evex_encoding (t);
3825 }
3826
3827 /* Build the EVEX prefix.  */
3828
3829 static void
3830 build_evex_prefix (void)
3831 {
3832   unsigned int register_specifier, w;
3833   rex_byte vrex_used = 0;
3834
3835   /* Check register specifier.  */
3836   if (i.vex.register_specifier)
3837     {
3838       gas_assert ((i.vrex & REX_X) == 0);
3839
3840       register_specifier = i.vex.register_specifier->reg_num;
3841       if ((i.vex.register_specifier->reg_flags & RegRex))
3842         register_specifier += 8;
3843       /* The upper 16 registers are encoded in the fourth byte of the
3844          EVEX prefix.  */
3845       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3846         i.vex.bytes[3] = 0x8;
3847       register_specifier = ~register_specifier & 0xf;
3848     }
3849   else
3850     {
3851       register_specifier = 0xf;
3852
3853       /* Encode upper 16 vector index register in the fourth byte of
3854          the EVEX prefix.  */
3855       if (!(i.vrex & REX_X))
3856         i.vex.bytes[3] = 0x8;
3857       else
3858         vrex_used |= REX_X;
3859     }
3860
3861   /* 4 byte EVEX prefix.  */
3862   i.vex.length = 4;
3863   i.vex.bytes[0] = 0x62;
3864
3865   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3866      bits from REX.  */
3867   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3868   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_0F3A);
3869   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3870
3871   /* The fifth bit of the second EVEX byte is 1's compliment of the
3872      REX_R bit in VREX.  */
3873   if (!(i.vrex & REX_R))
3874     i.vex.bytes[1] |= 0x10;
3875   else
3876     vrex_used |= REX_R;
3877
3878   if ((i.reg_operands + i.imm_operands) == i.operands)
3879     {
3880       /* When all operands are registers, the REX_X bit in REX is not
3881          used.  We reuse it to encode the upper 16 registers, which is
3882          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3883          as 1's compliment.  */
3884       if ((i.vrex & REX_B))
3885         {
3886           vrex_used |= REX_B;
3887           i.vex.bytes[1] &= ~0x40;
3888         }
3889     }
3890
3891   /* EVEX instructions shouldn't need the REX prefix.  */
3892   i.vrex &= ~vrex_used;
3893   gas_assert (i.vrex == 0);
3894
3895   /* Check the REX.W bit and VEXW.  */
3896   if (i.tm.opcode_modifier.vexw == VEXWIG)
3897     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3898   else if (i.tm.opcode_modifier.vexw)
3899     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3900   else
3901     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3902
3903   /* The third byte of the EVEX prefix.  */
3904   i.vex.bytes[2] = ((w << 7)
3905                     | (register_specifier << 3)
3906                     | 4 /* Encode the U bit.  */
3907                     | i.tm.opcode_modifier.opcodeprefix);
3908
3909   /* The fourth byte of the EVEX prefix.  */
3910   /* The zeroing-masking bit.  */
3911   if (i.mask.reg && i.mask.zeroing)
3912     i.vex.bytes[3] |= 0x80;
3913
3914   /* Don't always set the broadcast bit if there is no RC.  */
3915   if (i.rounding.type == rc_none)
3916     {
3917       /* Encode the vector length.  */
3918       unsigned int vec_length;
3919
3920       if (!i.tm.opcode_modifier.evex
3921           || i.tm.opcode_modifier.evex == EVEXDYN)
3922         {
3923           unsigned int op;
3924
3925           /* Determine vector length from the last multi-length vector
3926              operand.  */
3927           for (op = i.operands; op--;)
3928             if (i.tm.operand_types[op].bitfield.xmmword
3929                 + i.tm.operand_types[op].bitfield.ymmword
3930                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3931               {
3932                 if (i.types[op].bitfield.zmmword)
3933                   {
3934                     i.tm.opcode_modifier.evex = EVEX512;
3935                     break;
3936                   }
3937                 else if (i.types[op].bitfield.ymmword)
3938                   {
3939                     i.tm.opcode_modifier.evex = EVEX256;
3940                     break;
3941                   }
3942                 else if (i.types[op].bitfield.xmmword)
3943                   {
3944                     i.tm.opcode_modifier.evex = EVEX128;
3945                     break;
3946                   }
3947                 else if (i.broadcast.type && op == i.broadcast.operand)
3948                   {
3949                     switch (i.broadcast.bytes)
3950                       {
3951                         case 64:
3952                           i.tm.opcode_modifier.evex = EVEX512;
3953                           break;
3954                         case 32:
3955                           i.tm.opcode_modifier.evex = EVEX256;
3956                           break;
3957                         case 16:
3958                           i.tm.opcode_modifier.evex = EVEX128;
3959                           break;
3960                         default:
3961                           abort ();
3962                       }
3963                     break;
3964                   }
3965               }
3966
3967           if (op >= MAX_OPERANDS)
3968             abort ();
3969         }
3970
3971       switch (i.tm.opcode_modifier.evex)
3972         {
3973         case EVEXLIG: /* LL' is ignored */
3974           vec_length = evexlig << 5;
3975           break;
3976         case EVEX128:
3977           vec_length = 0 << 5;
3978           break;
3979         case EVEX256:
3980           vec_length = 1 << 5;
3981           break;
3982         case EVEX512:
3983           vec_length = 2 << 5;
3984           break;
3985         default:
3986           abort ();
3987           break;
3988         }
3989       i.vex.bytes[3] |= vec_length;
3990       /* Encode the broadcast bit.  */
3991       if (i.broadcast.type)
3992         i.vex.bytes[3] |= 0x10;
3993     }
3994   else if (i.rounding.type != saeonly)
3995     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3996   else
3997     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3998
3999   if (i.mask.reg)
4000     i.vex.bytes[3] |= i.mask.reg->reg_num;
4001 }
4002
4003 static void
4004 process_immext (void)
4005 {
4006   expressionS *exp;
4007
4008   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4009      which is coded in the same place as an 8-bit immediate field
4010      would be.  Here we fake an 8-bit immediate operand from the
4011      opcode suffix stored in tm.extension_opcode.
4012
4013      AVX instructions also use this encoding, for some of
4014      3 argument instructions.  */
4015
4016   gas_assert (i.imm_operands <= 1
4017               && (i.operands <= 2
4018                   || (is_any_vex_encoding (&i.tm)
4019                       && i.operands <= 4)));
4020
4021   exp = &im_expressions[i.imm_operands++];
4022   i.op[i.operands].imms = exp;
4023   i.types[i.operands] = imm8;
4024   i.operands++;
4025   exp->X_op = O_constant;
4026   exp->X_add_number = i.tm.extension_opcode;
4027   i.tm.extension_opcode = None;
4028 }
4029
4030
4031 static int
4032 check_hle (void)
4033 {
4034   switch (i.tm.opcode_modifier.prefixok)
4035     {
4036     default:
4037       abort ();
4038     case PrefixLock:
4039     case PrefixNone:
4040     case PrefixNoTrack:
4041     case PrefixRep:
4042       as_bad (_("invalid instruction `%s' after `%s'"),
4043               i.tm.name, i.hle_prefix);
4044       return 0;
4045     case PrefixHLELock:
4046       if (i.prefix[LOCK_PREFIX])
4047         return 1;
4048       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4049       return 0;
4050     case PrefixHLEAny:
4051       return 1;
4052     case PrefixHLERelease:
4053       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4054         {
4055           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4056                   i.tm.name);
4057           return 0;
4058         }
4059       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4060         {
4061           as_bad (_("memory destination needed for instruction `%s'"
4062                     " after `xrelease'"), i.tm.name);
4063           return 0;
4064         }
4065       return 1;
4066     }
4067 }
4068
4069 /* Try the shortest encoding by shortening operand size.  */
4070
4071 static void
4072 optimize_encoding (void)
4073 {
4074   unsigned int j;
4075
4076   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4077       && i.tm.base_opcode == 0x8d)
4078     {
4079       /* Optimize: -O:
4080            lea symbol, %rN    -> mov $symbol, %rN
4081            lea (%rM), %rN     -> mov %rM, %rN
4082            lea (,%rM,1), %rN  -> mov %rM, %rN
4083
4084            and in 32-bit mode for 16-bit addressing
4085
4086            lea (%rM), %rN     -> movzx %rM, %rN
4087
4088            and in 64-bit mode zap 32-bit addressing in favor of using a
4089            32-bit (or less) destination.
4090        */
4091       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4092         {
4093           if (!i.op[1].regs->reg_type.bitfield.word)
4094             i.tm.opcode_modifier.size = SIZE32;
4095           i.prefix[ADDR_PREFIX] = 0;
4096         }
4097
4098       if (!i.index_reg && !i.base_reg)
4099         {
4100           /* Handle:
4101                lea symbol, %rN    -> mov $symbol, %rN
4102            */
4103           if (flag_code == CODE_64BIT)
4104             {
4105               /* Don't transform a relocation to a 16-bit one.  */
4106               if (i.op[0].disps
4107                   && i.op[0].disps->X_op != O_constant
4108                   && i.op[1].regs->reg_type.bitfield.word)
4109                 return;
4110
4111               if (!i.op[1].regs->reg_type.bitfield.qword
4112                   || i.tm.opcode_modifier.size == SIZE32)
4113                 {
4114                   i.tm.base_opcode = 0xb8;
4115                   i.tm.opcode_modifier.modrm = 0;
4116                   if (!i.op[1].regs->reg_type.bitfield.word)
4117                     i.types[0].bitfield.imm32 = 1;
4118                   else
4119                     {
4120                       i.tm.opcode_modifier.size = SIZE16;
4121                       i.types[0].bitfield.imm16 = 1;
4122                     }
4123                 }
4124               else
4125                 {
4126                   /* Subject to further optimization below.  */
4127                   i.tm.base_opcode = 0xc7;
4128                   i.tm.extension_opcode = 0;
4129                   i.types[0].bitfield.imm32s = 1;
4130                   i.types[0].bitfield.baseindex = 0;
4131                 }
4132             }
4133           /* Outside of 64-bit mode address and operand sizes have to match if
4134              a relocation is involved, as otherwise we wouldn't (currently) or
4135              even couldn't express the relocation correctly.  */
4136           else if (i.op[0].disps
4137                    && i.op[0].disps->X_op != O_constant
4138                    && ((!i.prefix[ADDR_PREFIX])
4139                        != (flag_code == CODE_32BIT
4140                            ? i.op[1].regs->reg_type.bitfield.dword
4141                            : i.op[1].regs->reg_type.bitfield.word)))
4142             return;
4143           else
4144             {
4145               i.tm.base_opcode = 0xb8;
4146               i.tm.opcode_modifier.modrm = 0;
4147               if (i.op[1].regs->reg_type.bitfield.dword)
4148                 i.types[0].bitfield.imm32 = 1;
4149               else
4150                 i.types[0].bitfield.imm16 = 1;
4151
4152               if (i.op[0].disps
4153                   && i.op[0].disps->X_op == O_constant
4154                   && i.op[1].regs->reg_type.bitfield.dword
4155                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4156                      GCC 5. */
4157                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4158                 i.op[0].disps->X_add_number &= 0xffff;
4159             }
4160
4161           i.tm.operand_types[0] = i.types[0];
4162           i.imm_operands = 1;
4163           if (!i.op[0].imms)
4164             {
4165               i.op[0].imms = &im_expressions[0];
4166               i.op[0].imms->X_op = O_absent;
4167             }
4168         }
4169       else if (i.op[0].disps
4170                   && (i.op[0].disps->X_op != O_constant
4171                       || i.op[0].disps->X_add_number))
4172         return;
4173       else
4174         {
4175           /* Handle:
4176                lea (%rM), %rN     -> mov %rM, %rN
4177                lea (,%rM,1), %rN  -> mov %rM, %rN
4178                lea (%rM), %rN     -> movzx %rM, %rN
4179            */
4180           const reg_entry *addr_reg;
4181
4182           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4183             addr_reg = i.base_reg;
4184           else if (!i.base_reg
4185                    && i.index_reg->reg_num != RegIZ
4186                    && !i.log2_scale_factor)
4187             addr_reg = i.index_reg;
4188           else
4189             return;
4190
4191           if (addr_reg->reg_type.bitfield.word
4192               && i.op[1].regs->reg_type.bitfield.dword)
4193             {
4194               if (flag_code != CODE_32BIT)
4195                 return;
4196               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4197               i.tm.base_opcode = 0xb7;
4198             }
4199           else
4200             i.tm.base_opcode = 0x8b;
4201
4202           if (addr_reg->reg_type.bitfield.dword
4203               && i.op[1].regs->reg_type.bitfield.qword)
4204             i.tm.opcode_modifier.size = SIZE32;
4205
4206           i.op[0].regs = addr_reg;
4207           i.reg_operands = 2;
4208         }
4209
4210       i.mem_operands = 0;
4211       i.disp_operands = 0;
4212       i.prefix[ADDR_PREFIX] = 0;
4213       i.prefix[SEG_PREFIX] = 0;
4214       i.seg[0] = NULL;
4215     }
4216
4217   if (optimize_for_space
4218       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4219       && i.reg_operands == 1
4220       && i.imm_operands == 1
4221       && !i.types[1].bitfield.byte
4222       && i.op[0].imms->X_op == O_constant
4223       && fits_in_imm7 (i.op[0].imms->X_add_number)
4224       && (i.tm.base_opcode == 0xa8
4225           || (i.tm.base_opcode == 0xf6
4226               && i.tm.extension_opcode == 0x0)))
4227     {
4228       /* Optimize: -Os:
4229            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4230        */
4231       unsigned int base_regnum = i.op[1].regs->reg_num;
4232       if (flag_code == CODE_64BIT || base_regnum < 4)
4233         {
4234           i.types[1].bitfield.byte = 1;
4235           /* Ignore the suffix.  */
4236           i.suffix = 0;
4237           /* Convert to byte registers.  */
4238           if (i.types[1].bitfield.word)
4239             j = 16;
4240           else if (i.types[1].bitfield.dword)
4241             j = 32;
4242           else
4243             j = 48;
4244           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4245             j += 8;
4246           i.op[1].regs -= j;
4247         }
4248     }
4249   else if (flag_code == CODE_64BIT
4250            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4251            && ((i.types[1].bitfield.qword
4252                 && i.reg_operands == 1
4253                 && i.imm_operands == 1
4254                 && i.op[0].imms->X_op == O_constant
4255                 && ((i.tm.base_opcode == 0xb8
4256                      && i.tm.extension_opcode == None
4257                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4258                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4259                         && ((i.tm.base_opcode == 0x24
4260                              || i.tm.base_opcode == 0xa8)
4261                             || (i.tm.base_opcode == 0x80
4262                                 && i.tm.extension_opcode == 0x4)
4263                             || ((i.tm.base_opcode == 0xf6
4264                                  || (i.tm.base_opcode | 1) == 0xc7)
4265                                 && i.tm.extension_opcode == 0x0)))
4266                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4267                         && i.tm.base_opcode == 0x83
4268                         && i.tm.extension_opcode == 0x4)))
4269                || (i.types[0].bitfield.qword
4270                    && ((i.reg_operands == 2
4271                         && i.op[0].regs == i.op[1].regs
4272                         && (i.tm.base_opcode == 0x30
4273                             || i.tm.base_opcode == 0x28))
4274                        || (i.reg_operands == 1
4275                            && i.operands == 1
4276                            && i.tm.base_opcode == 0x30)))))
4277     {
4278       /* Optimize: -O:
4279            andq $imm31, %r64   -> andl $imm31, %r32
4280            andq $imm7, %r64    -> andl $imm7, %r32
4281            testq $imm31, %r64  -> testl $imm31, %r32
4282            xorq %r64, %r64     -> xorl %r32, %r32
4283            subq %r64, %r64     -> subl %r32, %r32
4284            movq $imm31, %r64   -> movl $imm31, %r32
4285            movq $imm32, %r64   -> movl $imm32, %r32
4286         */
4287       i.tm.opcode_modifier.norex64 = 1;
4288       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4289         {
4290           /* Handle
4291                movq $imm31, %r64   -> movl $imm31, %r32
4292                movq $imm32, %r64   -> movl $imm32, %r32
4293            */
4294           i.tm.operand_types[0].bitfield.imm32 = 1;
4295           i.tm.operand_types[0].bitfield.imm32s = 0;
4296           i.tm.operand_types[0].bitfield.imm64 = 0;
4297           i.types[0].bitfield.imm32 = 1;
4298           i.types[0].bitfield.imm32s = 0;
4299           i.types[0].bitfield.imm64 = 0;
4300           i.types[1].bitfield.dword = 1;
4301           i.types[1].bitfield.qword = 0;
4302           if ((i.tm.base_opcode | 1) == 0xc7)
4303             {
4304               /* Handle
4305                    movq $imm31, %r64   -> movl $imm31, %r32
4306                */
4307               i.tm.base_opcode = 0xb8;
4308               i.tm.extension_opcode = None;
4309               i.tm.opcode_modifier.w = 0;
4310               i.tm.opcode_modifier.modrm = 0;
4311             }
4312         }
4313     }
4314   else if (optimize > 1
4315            && !optimize_for_space
4316            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4317            && i.reg_operands == 2
4318            && i.op[0].regs == i.op[1].regs
4319            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4320                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4321            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4322     {
4323       /* Optimize: -O2:
4324            andb %rN, %rN  -> testb %rN, %rN
4325            andw %rN, %rN  -> testw %rN, %rN
4326            andq %rN, %rN  -> testq %rN, %rN
4327            orb %rN, %rN   -> testb %rN, %rN
4328            orw %rN, %rN   -> testw %rN, %rN
4329            orq %rN, %rN   -> testq %rN, %rN
4330
4331            and outside of 64-bit mode
4332
4333            andl %rN, %rN  -> testl %rN, %rN
4334            orl %rN, %rN   -> testl %rN, %rN
4335        */
4336       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4337     }
4338   else if (i.reg_operands == 3
4339            && i.op[0].regs == i.op[1].regs
4340            && !i.types[2].bitfield.xmmword
4341            && (i.tm.opcode_modifier.vex
4342                || ((!i.mask.reg || i.mask.zeroing)
4343                    && i.rounding.type == rc_none
4344                    && is_evex_encoding (&i.tm)
4345                    && (i.vec_encoding != vex_encoding_evex
4346                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4347                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4348                        || (i.tm.operand_types[2].bitfield.zmmword
4349                            && i.types[2].bitfield.ymmword))))
4350            && ((i.tm.base_opcode == 0x55
4351                 || i.tm.base_opcode == 0x57
4352                 || i.tm.base_opcode == 0xdf
4353                 || i.tm.base_opcode == 0xef
4354                 || i.tm.base_opcode == 0xf8
4355                 || i.tm.base_opcode == 0xf9
4356                 || i.tm.base_opcode == 0xfa
4357                 || i.tm.base_opcode == 0xfb
4358                 || i.tm.base_opcode == 0x42
4359                 || i.tm.base_opcode == 0x47)
4360                && i.tm.extension_opcode == None))
4361     {
4362       /* Optimize: -O1:
4363            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4364            vpsubq and vpsubw:
4365              EVEX VOP %zmmM, %zmmM, %zmmN
4366                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4367                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4368              EVEX VOP %ymmM, %ymmM, %ymmN
4369                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4370                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4371              VEX VOP %ymmM, %ymmM, %ymmN
4372                -> VEX VOP %xmmM, %xmmM, %xmmN
4373            VOP, one of vpandn and vpxor:
4374              VEX VOP %ymmM, %ymmM, %ymmN
4375                -> VEX VOP %xmmM, %xmmM, %xmmN
4376            VOP, one of vpandnd and vpandnq:
4377              EVEX VOP %zmmM, %zmmM, %zmmN
4378                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4379                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4380              EVEX VOP %ymmM, %ymmM, %ymmN
4381                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4382                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4383            VOP, one of vpxord and vpxorq:
4384              EVEX VOP %zmmM, %zmmM, %zmmN
4385                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4386                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4387              EVEX VOP %ymmM, %ymmM, %ymmN
4388                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4389                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4390            VOP, one of kxord and kxorq:
4391              VEX VOP %kM, %kM, %kN
4392                -> VEX kxorw %kM, %kM, %kN
4393            VOP, one of kandnd and kandnq:
4394              VEX VOP %kM, %kM, %kN
4395                -> VEX kandnw %kM, %kM, %kN
4396        */
4397       if (is_evex_encoding (&i.tm))
4398         {
4399           if (i.vec_encoding != vex_encoding_evex)
4400             {
4401               i.tm.opcode_modifier.vex = VEX128;
4402               i.tm.opcode_modifier.vexw = VEXW0;
4403               i.tm.opcode_modifier.evex = 0;
4404             }
4405           else if (optimize > 1)
4406             i.tm.opcode_modifier.evex = EVEX128;
4407           else
4408             return;
4409         }
4410       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4411         {
4412           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4413           i.tm.opcode_modifier.vexw = VEXW0;
4414         }
4415       else
4416         i.tm.opcode_modifier.vex = VEX128;
4417
4418       if (i.tm.opcode_modifier.vex)
4419         for (j = 0; j < 3; j++)
4420           {
4421             i.types[j].bitfield.xmmword = 1;
4422             i.types[j].bitfield.ymmword = 0;
4423           }
4424     }
4425   else if (i.vec_encoding != vex_encoding_evex
4426            && !i.types[0].bitfield.zmmword
4427            && !i.types[1].bitfield.zmmword
4428            && !i.mask.reg
4429            && !i.broadcast.type
4430            && is_evex_encoding (&i.tm)
4431            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4432                || (i.tm.base_opcode & ~4) == 0xdb
4433                || (i.tm.base_opcode & ~4) == 0xeb)
4434            && i.tm.extension_opcode == None)
4435     {
4436       /* Optimize: -O1:
4437            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4438            vmovdqu32 and vmovdqu64:
4439              EVEX VOP %xmmM, %xmmN
4440                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4441              EVEX VOP %ymmM, %ymmN
4442                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4443              EVEX VOP %xmmM, mem
4444                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4445              EVEX VOP %ymmM, mem
4446                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4447              EVEX VOP mem, %xmmN
4448                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4449              EVEX VOP mem, %ymmN
4450                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4451            VOP, one of vpand, vpandn, vpor, vpxor:
4452              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4453                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4454              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4455                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4456              EVEX VOP{d,q} mem, %xmmM, %xmmN
4457                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4458              EVEX VOP{d,q} mem, %ymmM, %ymmN
4459                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4460        */
4461       for (j = 0; j < i.operands; j++)
4462         if (operand_type_check (i.types[j], disp)
4463             && i.op[j].disps->X_op == O_constant)
4464           {
4465             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4466                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4467                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4468             int evex_disp8, vex_disp8;
4469             unsigned int memshift = i.memshift;
4470             offsetT n = i.op[j].disps->X_add_number;
4471
4472             evex_disp8 = fits_in_disp8 (n);
4473             i.memshift = 0;
4474             vex_disp8 = fits_in_disp8 (n);
4475             if (evex_disp8 != vex_disp8)
4476               {
4477                 i.memshift = memshift;
4478                 return;
4479               }
4480
4481             i.types[j].bitfield.disp8 = vex_disp8;
4482             break;
4483           }
4484       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4485           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4486         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4487       i.tm.opcode_modifier.vex
4488         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4489       i.tm.opcode_modifier.vexw = VEXW0;
4490       /* VPAND, VPOR, and VPXOR are commutative.  */
4491       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4492         i.tm.opcode_modifier.commutative = 1;
4493       i.tm.opcode_modifier.evex = 0;
4494       i.tm.opcode_modifier.masking = 0;
4495       i.tm.opcode_modifier.broadcast = 0;
4496       i.tm.opcode_modifier.disp8memshift = 0;
4497       i.memshift = 0;
4498       if (j < i.operands)
4499         i.types[j].bitfield.disp8
4500           = fits_in_disp8 (i.op[j].disps->X_add_number);
4501     }
4502 }
4503
4504 /* Return non-zero for load instruction.  */
4505
4506 static int
4507 load_insn_p (void)
4508 {
4509   unsigned int dest;
4510   int any_vex_p = is_any_vex_encoding (&i.tm);
4511   unsigned int base_opcode = i.tm.base_opcode | 1;
4512
4513   if (!any_vex_p)
4514     {
4515       /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
4516          prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
4517          bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote.  */
4518       if (i.tm.opcode_modifier.anysize)
4519         return 0;
4520
4521       /* pop.   */
4522       if (strcmp (i.tm.name, "pop") == 0)
4523         return 1;
4524     }
4525
4526   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4527     {
4528       /* popf, popa.   */
4529       if (i.tm.base_opcode == 0x9d
4530           || i.tm.base_opcode == 0x61)
4531         return 1;
4532
4533       /* movs, cmps, lods, scas.  */
4534       if ((i.tm.base_opcode | 0xb) == 0xaf)
4535         return 1;
4536
4537       /* outs, xlatb.  */
4538       if (base_opcode == 0x6f
4539           || i.tm.base_opcode == 0xd7)
4540         return 1;
4541       /* NB: For AMD-specific insns with implicit memory operands,
4542          they're intentionally not covered.  */
4543     }
4544
4545   /* No memory operand.  */
4546   if (!i.mem_operands)
4547     return 0;
4548
4549   if (any_vex_p)
4550     {
4551       /* vldmxcsr.  */
4552       if (i.tm.base_opcode == 0xae
4553           && i.tm.opcode_modifier.vex
4554           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4555           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4556           && i.tm.extension_opcode == 2)
4557         return 1;
4558     }
4559   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4560     {
4561       /* test, not, neg, mul, imul, div, idiv.  */
4562       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4563           && i.tm.extension_opcode != 1)
4564         return 1;
4565
4566       /* inc, dec.  */
4567       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4568         return 1;
4569
4570       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4571       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4572         return 1;
4573
4574       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4575       if ((base_opcode == 0xc1
4576            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4577           && i.tm.extension_opcode != 6)
4578         return 1;
4579
4580       /* Check for x87 instructions.  */
4581       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4582         {
4583           /* Skip fst, fstp, fstenv, fstcw.  */
4584           if (i.tm.base_opcode == 0xd9
4585               && (i.tm.extension_opcode == 2
4586                   || i.tm.extension_opcode == 3
4587                   || i.tm.extension_opcode == 6
4588                   || i.tm.extension_opcode == 7))
4589             return 0;
4590
4591           /* Skip fisttp, fist, fistp, fstp.  */
4592           if (i.tm.base_opcode == 0xdb
4593               && (i.tm.extension_opcode == 1
4594                   || i.tm.extension_opcode == 2
4595                   || i.tm.extension_opcode == 3
4596                   || i.tm.extension_opcode == 7))
4597             return 0;
4598
4599           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4600           if (i.tm.base_opcode == 0xdd
4601               && (i.tm.extension_opcode == 1
4602                   || i.tm.extension_opcode == 2
4603                   || i.tm.extension_opcode == 3
4604                   || i.tm.extension_opcode == 6
4605                   || i.tm.extension_opcode == 7))
4606             return 0;
4607
4608           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4609           if (i.tm.base_opcode == 0xdf
4610               && (i.tm.extension_opcode == 1
4611                   || i.tm.extension_opcode == 2
4612                   || i.tm.extension_opcode == 3
4613                   || i.tm.extension_opcode == 6
4614                   || i.tm.extension_opcode == 7))
4615             return 0;
4616
4617           return 1;
4618         }
4619     }
4620   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4621     {
4622       /* bt, bts, btr, btc.  */
4623       if (i.tm.base_opcode == 0xba
4624           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4625         return 1;
4626
4627       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4628       if (i.tm.base_opcode == 0xc7
4629           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4630           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4631               || i.tm.extension_opcode == 6))
4632         return 1;
4633
4634       /* fxrstor, ldmxcsr, xrstor.  */
4635       if (i.tm.base_opcode == 0xae
4636           && (i.tm.extension_opcode == 1
4637               || i.tm.extension_opcode == 2
4638               || i.tm.extension_opcode == 5))
4639         return 1;
4640
4641       /* lgdt, lidt, lmsw.  */
4642       if (i.tm.base_opcode == 0x01
4643           && (i.tm.extension_opcode == 2
4644               || i.tm.extension_opcode == 3
4645               || i.tm.extension_opcode == 6))
4646         return 1;
4647     }
4648
4649   dest = i.operands - 1;
4650
4651   /* Check fake imm8 operand and 3 source operands.  */
4652   if ((i.tm.opcode_modifier.immext
4653        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4654       && i.types[dest].bitfield.imm8)
4655     dest--;
4656
4657   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4658   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4659       && (base_opcode == 0x1
4660           || base_opcode == 0x9
4661           || base_opcode == 0x11
4662           || base_opcode == 0x19
4663           || base_opcode == 0x21
4664           || base_opcode == 0x29
4665           || base_opcode == 0x31
4666           || base_opcode == 0x39
4667           || (base_opcode | 2) == 0x87))
4668     return 1;
4669
4670   /* xadd.  */
4671   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4672       && base_opcode == 0xc1)
4673     return 1;
4674
4675   /* Check for load instruction.  */
4676   return (i.types[dest].bitfield.class != ClassNone
4677           || i.types[dest].bitfield.instance == Accum);
4678 }
4679
4680 /* Output lfence, 0xfaee8, after instruction.  */
4681
4682 static void
4683 insert_lfence_after (void)
4684 {
4685   if (lfence_after_load && load_insn_p ())
4686     {
4687       /* There are also two REP string instructions that require
4688          special treatment. Specifically, the compare string (CMPS)
4689          and scan string (SCAS) instructions set EFLAGS in a manner
4690          that depends on the data being compared/scanned. When used
4691          with a REP prefix, the number of iterations may therefore
4692          vary depending on this data. If the data is a program secret
4693          chosen by the adversary using an LVI method,
4694          then this data-dependent behavior may leak some aspect
4695          of the secret.  */
4696       if (((i.tm.base_opcode | 0x1) == 0xa7
4697            || (i.tm.base_opcode | 0x1) == 0xaf)
4698           && i.prefix[REP_PREFIX])
4699         {
4700             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4701                      i.tm.name);
4702         }
4703       char *p = frag_more (3);
4704       *p++ = 0xf;
4705       *p++ = 0xae;
4706       *p = 0xe8;
4707     }
4708 }
4709
4710 /* Output lfence, 0xfaee8, before instruction.  */
4711
4712 static void
4713 insert_lfence_before (void)
4714 {
4715   char *p;
4716
4717   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4718     return;
4719
4720   if (i.tm.base_opcode == 0xff
4721       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4722     {
4723       /* Insert lfence before indirect branch if needed.  */
4724
4725       if (lfence_before_indirect_branch == lfence_branch_none)
4726         return;
4727
4728       if (i.operands != 1)
4729         abort ();
4730
4731       if (i.reg_operands == 1)
4732         {
4733           /* Indirect branch via register.  Don't insert lfence with
4734              -mlfence-after-load=yes.  */
4735           if (lfence_after_load
4736               || lfence_before_indirect_branch == lfence_branch_memory)
4737             return;
4738         }
4739       else if (i.mem_operands == 1
4740                && lfence_before_indirect_branch != lfence_branch_register)
4741         {
4742           as_warn (_("indirect `%s` with memory operand should be avoided"),
4743                    i.tm.name);
4744           return;
4745         }
4746       else
4747         return;
4748
4749       if (last_insn.kind != last_insn_other
4750           && last_insn.seg == now_seg)
4751         {
4752           as_warn_where (last_insn.file, last_insn.line,
4753                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4754                          last_insn.name, i.tm.name);
4755           return;
4756         }
4757
4758       p = frag_more (3);
4759       *p++ = 0xf;
4760       *p++ = 0xae;
4761       *p = 0xe8;
4762       return;
4763     }
4764
4765   /* Output or/not/shl and lfence before near ret.  */
4766   if (lfence_before_ret != lfence_before_ret_none
4767       && (i.tm.base_opcode == 0xc2
4768           || i.tm.base_opcode == 0xc3))
4769     {
4770       if (last_insn.kind != last_insn_other
4771           && last_insn.seg == now_seg)
4772         {
4773           as_warn_where (last_insn.file, last_insn.line,
4774                          _("`%s` skips -mlfence-before-ret on `%s`"),
4775                          last_insn.name, i.tm.name);
4776           return;
4777         }
4778
4779       /* Near ret ingore operand size override under CPU64.  */
4780       char prefix = flag_code == CODE_64BIT
4781                     ? 0x48
4782                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4783
4784       if (lfence_before_ret == lfence_before_ret_not)
4785         {
4786           /* not: 0xf71424, may add prefix
4787              for operand size override or 64-bit code.  */
4788           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4789           if (prefix)
4790             *p++ = prefix;
4791           *p++ = 0xf7;
4792           *p++ = 0x14;
4793           *p++ = 0x24;
4794           if (prefix)
4795             *p++ = prefix;
4796           *p++ = 0xf7;
4797           *p++ = 0x14;
4798           *p++ = 0x24;
4799         }
4800       else
4801         {
4802           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4803           if (prefix)
4804             *p++ = prefix;
4805           if (lfence_before_ret == lfence_before_ret_or)
4806             {
4807               /* or: 0x830c2400, may add prefix
4808                  for operand size override or 64-bit code.  */
4809               *p++ = 0x83;
4810               *p++ = 0x0c;
4811             }
4812           else
4813             {
4814               /* shl: 0xc1242400, may add prefix
4815                  for operand size override or 64-bit code.  */
4816               *p++ = 0xc1;
4817               *p++ = 0x24;
4818             }
4819
4820           *p++ = 0x24;
4821           *p++ = 0x0;
4822         }
4823
4824       *p++ = 0xf;
4825       *p++ = 0xae;
4826       *p = 0xe8;
4827     }
4828 }
4829
4830 /* This is the guts of the machine-dependent assembler.  LINE points to a
4831    machine dependent instruction.  This function is supposed to emit
4832    the frags/bytes it assembles to.  */
4833
4834 void
4835 md_assemble (char *line)
4836 {
4837   unsigned int j;
4838   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4839   const insn_template *t;
4840
4841   /* Initialize globals.  */
4842   memset (&i, '\0', sizeof (i));
4843   i.rounding.type = rc_none;
4844   for (j = 0; j < MAX_OPERANDS; j++)
4845     i.reloc[j] = NO_RELOC;
4846   memset (disp_expressions, '\0', sizeof (disp_expressions));
4847   memset (im_expressions, '\0', sizeof (im_expressions));
4848   save_stack_p = save_stack;
4849
4850   /* First parse an instruction mnemonic & call i386_operand for the operands.
4851      We assume that the scrubber has arranged it so that line[0] is the valid
4852      start of a (possibly prefixed) mnemonic.  */
4853
4854   line = parse_insn (line, mnemonic);
4855   if (line == NULL)
4856     return;
4857   mnem_suffix = i.suffix;
4858
4859   line = parse_operands (line, mnemonic);
4860   this_operand = -1;
4861   xfree (i.memop1_string);
4862   i.memop1_string = NULL;
4863   if (line == NULL)
4864     return;
4865
4866   /* Now we've parsed the mnemonic into a set of templates, and have the
4867      operands at hand.  */
4868
4869   /* All Intel opcodes have reversed operands except for "bound", "enter",
4870      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4871      "rmpadjust", and "rmpupdate".  We also don't reverse intersegment "jmp"
4872      and "call" instructions with 2 immediate operands so that the immediate
4873      segment precedes the offset consistently in Intel and AT&T modes.  */
4874   if (intel_syntax
4875       && i.operands > 1
4876       && (strcmp (mnemonic, "bound") != 0)
4877       && (strncmp (mnemonic, "invlpg", 6) != 0)
4878       && !startswith (mnemonic, "monitor")
4879       && !startswith (mnemonic, "mwait")
4880       && (strcmp (mnemonic, "pvalidate") != 0)
4881       && !startswith (mnemonic, "rmp")
4882       && (strcmp (mnemonic, "tpause") != 0)
4883       && (strcmp (mnemonic, "umwait") != 0)
4884       && !(operand_type_check (i.types[0], imm)
4885            && operand_type_check (i.types[1], imm)))
4886     swap_operands ();
4887
4888   /* The order of the immediates should be reversed
4889      for 2 immediates extrq and insertq instructions */
4890   if (i.imm_operands == 2
4891       && (strcmp (mnemonic, "extrq") == 0
4892           || strcmp (mnemonic, "insertq") == 0))
4893       swap_2_operands (0, 1);
4894
4895   if (i.imm_operands)
4896     optimize_imm ();
4897
4898   if (i.disp_operands && !want_disp32 (current_templates->start))
4899     {
4900       for (j = 0; j < i.operands; ++j)
4901         {
4902           const expressionS *exp = i.op[j].disps;
4903
4904           if (!operand_type_check (i.types[j], disp))
4905             continue;
4906
4907           if (exp->X_op != O_constant)
4908             continue;
4909
4910           /* Since displacement is signed extended to 64bit, don't allow
4911              disp32 and turn off disp32s if they are out of range.  */
4912           i.types[j].bitfield.disp32 = 0;
4913           if (fits_in_signed_long (exp->X_add_number))
4914             continue;
4915
4916           i.types[j].bitfield.disp32s = 0;
4917           if (i.types[j].bitfield.baseindex)
4918             {
4919               as_bad (_("0x%" BFD_VMA_FMT "x out of range of signed 32bit displacement"),
4920                       exp->X_add_number);
4921               return;
4922             }
4923         }
4924     }
4925
4926   /* Don't optimize displacement for movabs since it only takes 64bit
4927      displacement.  */
4928   if (i.disp_operands
4929       && i.disp_encoding != disp_encoding_32bit
4930       && (flag_code != CODE_64BIT
4931           || strcmp (mnemonic, "movabs") != 0))
4932     optimize_disp ();
4933
4934   /* Next, we find a template that matches the given insn,
4935      making sure the overlap of the given operands types is consistent
4936      with the template operand types.  */
4937
4938   if (!(t = match_template (mnem_suffix)))
4939     return;
4940
4941   if (sse_check != check_none
4942       && !i.tm.opcode_modifier.noavx
4943       && !i.tm.cpu_flags.bitfield.cpuavx
4944       && !i.tm.cpu_flags.bitfield.cpuavx512f
4945       && (i.tm.cpu_flags.bitfield.cpusse
4946           || i.tm.cpu_flags.bitfield.cpusse2
4947           || i.tm.cpu_flags.bitfield.cpusse3
4948           || i.tm.cpu_flags.bitfield.cpussse3
4949           || i.tm.cpu_flags.bitfield.cpusse4_1
4950           || i.tm.cpu_flags.bitfield.cpusse4_2
4951           || i.tm.cpu_flags.bitfield.cpupclmul
4952           || i.tm.cpu_flags.bitfield.cpuaes
4953           || i.tm.cpu_flags.bitfield.cpusha
4954           || i.tm.cpu_flags.bitfield.cpugfni))
4955     {
4956       (sse_check == check_warning
4957        ? as_warn
4958        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4959     }
4960
4961   if (i.tm.opcode_modifier.fwait)
4962     if (!add_prefix (FWAIT_OPCODE))
4963       return;
4964
4965   /* Check if REP prefix is OK.  */
4966   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
4967     {
4968       as_bad (_("invalid instruction `%s' after `%s'"),
4969                 i.tm.name, i.rep_prefix);
4970       return;
4971     }
4972
4973   /* Check for lock without a lockable instruction.  Destination operand
4974      must be memory unless it is xchg (0x86).  */
4975   if (i.prefix[LOCK_PREFIX]
4976       && (i.tm.opcode_modifier.prefixok < PrefixLock
4977           || i.mem_operands == 0
4978           || (i.tm.base_opcode != 0x86
4979               && !(i.flags[i.operands - 1] & Operand_Mem))))
4980     {
4981       as_bad (_("expecting lockable instruction after `lock'"));
4982       return;
4983     }
4984
4985   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
4986   if (i.prefix[DATA_PREFIX]
4987       && (is_any_vex_encoding (&i.tm)
4988           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
4989           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
4990     {
4991       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4992       return;
4993     }
4994
4995   /* Check if HLE prefix is OK.  */
4996   if (i.hle_prefix && !check_hle ())
4997     return;
4998
4999   /* Check BND prefix.  */
5000   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5001     as_bad (_("expecting valid branch instruction after `bnd'"));
5002
5003   /* Check NOTRACK prefix.  */
5004   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5005     as_bad (_("expecting indirect branch instruction after `notrack'"));
5006
5007   if (i.tm.cpu_flags.bitfield.cpumpx)
5008     {
5009       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5010         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5011       else if (flag_code != CODE_16BIT
5012                ? i.prefix[ADDR_PREFIX]
5013                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5014         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5015     }
5016
5017   /* Insert BND prefix.  */
5018   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5019     {
5020       if (!i.prefix[BND_PREFIX])
5021         add_prefix (BND_PREFIX_OPCODE);
5022       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5023         {
5024           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5025           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5026         }
5027     }
5028
5029   /* Check string instruction segment overrides.  */
5030   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5031     {
5032       gas_assert (i.mem_operands);
5033       if (!check_string ())
5034         return;
5035       i.disp_operands = 0;
5036     }
5037
5038   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5039     optimize_encoding ();
5040
5041   if (!process_suffix ())
5042     return;
5043
5044   /* Update operand types and check extended states.  */
5045   for (j = 0; j < i.operands; j++)
5046     {
5047       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5048       switch (i.tm.operand_types[j].bitfield.class)
5049         {
5050         default:
5051           break;
5052         case RegMMX:
5053           i.xstate |= xstate_mmx;
5054           break;
5055         case RegMask:
5056           i.xstate |= xstate_mask;
5057           break;
5058         case RegSIMD:
5059           if (i.tm.operand_types[j].bitfield.tmmword)
5060             i.xstate |= xstate_tmm;
5061           else if (i.tm.operand_types[j].bitfield.zmmword)
5062             i.xstate |= xstate_zmm;
5063           else if (i.tm.operand_types[j].bitfield.ymmword)
5064             i.xstate |= xstate_ymm;
5065           else if (i.tm.operand_types[j].bitfield.xmmword)
5066             i.xstate |= xstate_xmm;
5067           break;
5068         }
5069     }
5070
5071   /* Make still unresolved immediate matches conform to size of immediate
5072      given in i.suffix.  */
5073   if (!finalize_imm ())
5074     return;
5075
5076   if (i.types[0].bitfield.imm1)
5077     i.imm_operands = 0; /* kludge for shift insns.  */
5078
5079   /* We only need to check those implicit registers for instructions
5080      with 3 operands or less.  */
5081   if (i.operands <= 3)
5082     for (j = 0; j < i.operands; j++)
5083       if (i.types[j].bitfield.instance != InstanceNone
5084           && !i.types[j].bitfield.xmmword)
5085         i.reg_operands--;
5086
5087   /* For insns with operands there are more diddles to do to the opcode.  */
5088   if (i.operands)
5089     {
5090       if (!process_operands ())
5091         return;
5092     }
5093   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
5094     {
5095       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5096       as_warn (_("translating to `%sp'"), i.tm.name);
5097     }
5098
5099   if (is_any_vex_encoding (&i.tm))
5100     {
5101       if (!cpu_arch_flags.bitfield.cpui286)
5102         {
5103           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5104                   i.tm.name);
5105           return;
5106         }
5107
5108       /* Check for explicit REX prefix.  */
5109       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5110         {
5111           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5112           return;
5113         }
5114
5115       if (i.tm.opcode_modifier.vex)
5116         build_vex_prefix (t);
5117       else
5118         build_evex_prefix ();
5119
5120       /* The individual REX.RXBW bits got consumed.  */
5121       i.rex &= REX_OPCODE;
5122     }
5123
5124   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5125      instructions may define INT_OPCODE as well, so avoid this corner
5126      case for those instructions that use MODRM.  */
5127   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5128       && i.tm.base_opcode == INT_OPCODE
5129       && !i.tm.opcode_modifier.modrm
5130       && i.op[0].imms->X_add_number == 3)
5131     {
5132       i.tm.base_opcode = INT3_OPCODE;
5133       i.imm_operands = 0;
5134     }
5135
5136   if ((i.tm.opcode_modifier.jump == JUMP
5137        || i.tm.opcode_modifier.jump == JUMP_BYTE
5138        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5139       && i.op[0].disps->X_op == O_constant)
5140     {
5141       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5142          the absolute address given by the constant.  Since ix86 jumps and
5143          calls are pc relative, we need to generate a reloc.  */
5144       i.op[0].disps->X_add_symbol = &abs_symbol;
5145       i.op[0].disps->X_op = O_symbol;
5146     }
5147
5148   /* For 8 bit registers we need an empty rex prefix.  Also if the
5149      instruction already has a prefix, we need to convert old
5150      registers to new ones.  */
5151
5152   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5153        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5154       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5155           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5156       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5157            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5158           && i.rex != 0))
5159     {
5160       int x;
5161
5162       i.rex |= REX_OPCODE;
5163       for (x = 0; x < 2; x++)
5164         {
5165           /* Look for 8 bit operand that uses old registers.  */
5166           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5167               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5168             {
5169               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5170               /* In case it is "hi" register, give up.  */
5171               if (i.op[x].regs->reg_num > 3)
5172                 as_bad (_("can't encode register '%s%s' in an "
5173                           "instruction requiring REX prefix."),
5174                         register_prefix, i.op[x].regs->reg_name);
5175
5176               /* Otherwise it is equivalent to the extended register.
5177                  Since the encoding doesn't change this is merely
5178                  cosmetic cleanup for debug output.  */
5179
5180               i.op[x].regs = i.op[x].regs + 8;
5181             }
5182         }
5183     }
5184
5185   if (i.rex == 0 && i.rex_encoding)
5186     {
5187       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5188          that uses legacy register.  If it is "hi" register, don't add
5189          the REX_OPCODE byte.  */
5190       int x;
5191       for (x = 0; x < 2; x++)
5192         if (i.types[x].bitfield.class == Reg
5193             && i.types[x].bitfield.byte
5194             && (i.op[x].regs->reg_flags & RegRex64) == 0
5195             && i.op[x].regs->reg_num > 3)
5196           {
5197             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5198             i.rex_encoding = false;
5199             break;
5200           }
5201
5202       if (i.rex_encoding)
5203         i.rex = REX_OPCODE;
5204     }
5205
5206   if (i.rex != 0)
5207     add_prefix (REX_OPCODE | i.rex);
5208
5209   insert_lfence_before ();
5210
5211   /* We are ready to output the insn.  */
5212   output_insn ();
5213
5214   insert_lfence_after ();
5215
5216   last_insn.seg = now_seg;
5217
5218   if (i.tm.opcode_modifier.isprefix)
5219     {
5220       last_insn.kind = last_insn_prefix;
5221       last_insn.name = i.tm.name;
5222       last_insn.file = as_where (&last_insn.line);
5223     }
5224   else
5225     last_insn.kind = last_insn_other;
5226 }
5227
5228 static char *
5229 parse_insn (char *line, char *mnemonic)
5230 {
5231   char *l = line;
5232   char *token_start = l;
5233   char *mnem_p;
5234   int supported;
5235   const insn_template *t;
5236   char *dot_p = NULL;
5237
5238   while (1)
5239     {
5240       mnem_p = mnemonic;
5241       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5242         {
5243           if (*mnem_p == '.')
5244             dot_p = mnem_p;
5245           mnem_p++;
5246           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5247             {
5248               as_bad (_("no such instruction: `%s'"), token_start);
5249               return NULL;
5250             }
5251           l++;
5252         }
5253       if (!is_space_char (*l)
5254           && *l != END_OF_INSN
5255           && (intel_syntax
5256               || (*l != PREFIX_SEPARATOR
5257                   && *l != ',')))
5258         {
5259           as_bad (_("invalid character %s in mnemonic"),
5260                   output_invalid (*l));
5261           return NULL;
5262         }
5263       if (token_start == l)
5264         {
5265           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5266             as_bad (_("expecting prefix; got nothing"));
5267           else
5268             as_bad (_("expecting mnemonic; got nothing"));
5269           return NULL;
5270         }
5271
5272       /* Look up instruction (or prefix) via hash table.  */
5273       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5274
5275       if (*l != END_OF_INSN
5276           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5277           && current_templates
5278           && current_templates->start->opcode_modifier.isprefix)
5279         {
5280           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5281             {
5282               as_bad ((flag_code != CODE_64BIT
5283                        ? _("`%s' is only supported in 64-bit mode")
5284                        : _("`%s' is not supported in 64-bit mode")),
5285                       current_templates->start->name);
5286               return NULL;
5287             }
5288           /* If we are in 16-bit mode, do not allow addr16 or data16.
5289              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5290           if ((current_templates->start->opcode_modifier.size == SIZE16
5291                || current_templates->start->opcode_modifier.size == SIZE32)
5292               && flag_code != CODE_64BIT
5293               && ((current_templates->start->opcode_modifier.size == SIZE32)
5294                   ^ (flag_code == CODE_16BIT)))
5295             {
5296               as_bad (_("redundant %s prefix"),
5297                       current_templates->start->name);
5298               return NULL;
5299             }
5300
5301           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5302             {
5303               /* Handle pseudo prefixes.  */
5304               switch (current_templates->start->extension_opcode)
5305                 {
5306                 case Prefix_Disp8:
5307                   /* {disp8} */
5308                   i.disp_encoding = disp_encoding_8bit;
5309                   break;
5310                 case Prefix_Disp16:
5311                   /* {disp16} */
5312                   i.disp_encoding = disp_encoding_16bit;
5313                   break;
5314                 case Prefix_Disp32:
5315                   /* {disp32} */
5316                   i.disp_encoding = disp_encoding_32bit;
5317                   break;
5318                 case Prefix_Load:
5319                   /* {load} */
5320                   i.dir_encoding = dir_encoding_load;
5321                   break;
5322                 case Prefix_Store:
5323                   /* {store} */
5324                   i.dir_encoding = dir_encoding_store;
5325                   break;
5326                 case Prefix_VEX:
5327                   /* {vex} */
5328                   i.vec_encoding = vex_encoding_vex;
5329                   break;
5330                 case Prefix_VEX3:
5331                   /* {vex3} */
5332                   i.vec_encoding = vex_encoding_vex3;
5333                   break;
5334                 case Prefix_EVEX:
5335                   /* {evex} */
5336                   i.vec_encoding = vex_encoding_evex;
5337                   break;
5338                 case Prefix_REX:
5339                   /* {rex} */
5340                   i.rex_encoding = true;
5341                   break;
5342                 case Prefix_NoOptimize:
5343                   /* {nooptimize} */
5344                   i.no_optimize = true;
5345                   break;
5346                 default:
5347                   abort ();
5348                 }
5349             }
5350           else
5351             {
5352               /* Add prefix, checking for repeated prefixes.  */
5353               switch (add_prefix (current_templates->start->base_opcode))
5354                 {
5355                 case PREFIX_EXIST:
5356                   return NULL;
5357                 case PREFIX_DS:
5358                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5359                     i.notrack_prefix = current_templates->start->name;
5360                   break;
5361                 case PREFIX_REP:
5362                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5363                     i.hle_prefix = current_templates->start->name;
5364                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5365                     i.bnd_prefix = current_templates->start->name;
5366                   else
5367                     i.rep_prefix = current_templates->start->name;
5368                   break;
5369                 default:
5370                   break;
5371                 }
5372             }
5373           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5374           token_start = ++l;
5375         }
5376       else
5377         break;
5378     }
5379
5380   if (!current_templates)
5381     {
5382       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5383          Check if we should swap operand or force 32bit displacement in
5384          encoding.  */
5385       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5386         i.dir_encoding = dir_encoding_swap;
5387       else if (mnem_p - 3 == dot_p
5388                && dot_p[1] == 'd'
5389                && dot_p[2] == '8')
5390         i.disp_encoding = disp_encoding_8bit;
5391       else if (mnem_p - 4 == dot_p
5392                && dot_p[1] == 'd'
5393                && dot_p[2] == '3'
5394                && dot_p[3] == '2')
5395         i.disp_encoding = disp_encoding_32bit;
5396       else
5397         goto check_suffix;
5398       mnem_p = dot_p;
5399       *dot_p = '\0';
5400       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5401     }
5402
5403   if (!current_templates)
5404     {
5405     check_suffix:
5406       if (mnem_p > mnemonic)
5407         {
5408           /* See if we can get a match by trimming off a suffix.  */
5409           switch (mnem_p[-1])
5410             {
5411             case WORD_MNEM_SUFFIX:
5412               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5413                 i.suffix = SHORT_MNEM_SUFFIX;
5414               else
5415                 /* Fall through.  */
5416               case BYTE_MNEM_SUFFIX:
5417               case QWORD_MNEM_SUFFIX:
5418                 i.suffix = mnem_p[-1];
5419               mnem_p[-1] = '\0';
5420               current_templates
5421                 = (const templates *) str_hash_find (op_hash, mnemonic);
5422               break;
5423             case SHORT_MNEM_SUFFIX:
5424             case LONG_MNEM_SUFFIX:
5425               if (!intel_syntax)
5426                 {
5427                   i.suffix = mnem_p[-1];
5428                   mnem_p[-1] = '\0';
5429                   current_templates
5430                     = (const templates *) str_hash_find (op_hash, mnemonic);
5431                 }
5432               break;
5433
5434               /* Intel Syntax.  */
5435             case 'd':
5436               if (intel_syntax)
5437                 {
5438                   if (intel_float_operand (mnemonic) == 1)
5439                     i.suffix = SHORT_MNEM_SUFFIX;
5440                   else
5441                     i.suffix = LONG_MNEM_SUFFIX;
5442                   mnem_p[-1] = '\0';
5443                   current_templates
5444                     = (const templates *) str_hash_find (op_hash, mnemonic);
5445                 }
5446               break;
5447             }
5448         }
5449
5450       if (!current_templates)
5451         {
5452           as_bad (_("no such instruction: `%s'"), token_start);
5453           return NULL;
5454         }
5455     }
5456
5457   if (current_templates->start->opcode_modifier.jump == JUMP
5458       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5459     {
5460       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5461          predict taken and predict not taken respectively.
5462          I'm not sure that branch hints actually do anything on loop
5463          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5464          may work in the future and it doesn't hurt to accept them
5465          now.  */
5466       if (l[0] == ',' && l[1] == 'p')
5467         {
5468           if (l[2] == 't')
5469             {
5470               if (!add_prefix (DS_PREFIX_OPCODE))
5471                 return NULL;
5472               l += 3;
5473             }
5474           else if (l[2] == 'n')
5475             {
5476               if (!add_prefix (CS_PREFIX_OPCODE))
5477                 return NULL;
5478               l += 3;
5479             }
5480         }
5481     }
5482   /* Any other comma loses.  */
5483   if (*l == ',')
5484     {
5485       as_bad (_("invalid character %s in mnemonic"),
5486               output_invalid (*l));
5487       return NULL;
5488     }
5489
5490   /* Check if instruction is supported on specified architecture.  */
5491   supported = 0;
5492   for (t = current_templates->start; t < current_templates->end; ++t)
5493     {
5494       supported |= cpu_flags_match (t);
5495       if (supported == CPU_FLAGS_PERFECT_MATCH)
5496         {
5497           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
5498             as_warn (_("use .code16 to ensure correct addressing mode"));
5499
5500           return l;
5501         }
5502     }
5503
5504   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5505     as_bad (flag_code == CODE_64BIT
5506             ? _("`%s' is not supported in 64-bit mode")
5507             : _("`%s' is only supported in 64-bit mode"),
5508             current_templates->start->name);
5509   else
5510     as_bad (_("`%s' is not supported on `%s%s'"),
5511             current_templates->start->name,
5512             cpu_arch_name ? cpu_arch_name : default_arch,
5513             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5514
5515   return NULL;
5516 }
5517
5518 static char *
5519 parse_operands (char *l, const char *mnemonic)
5520 {
5521   char *token_start;
5522
5523   /* 1 if operand is pending after ','.  */
5524   unsigned int expecting_operand = 0;
5525
5526   while (*l != END_OF_INSN)
5527     {
5528       /* Non-zero if operand parens not balanced.  */
5529       unsigned int paren_not_balanced = 0;
5530       /* True if inside double quotes.  */
5531       bool in_quotes = false;
5532
5533       /* Skip optional white space before operand.  */
5534       if (is_space_char (*l))
5535         ++l;
5536       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5537         {
5538           as_bad (_("invalid character %s before operand %d"),
5539                   output_invalid (*l),
5540                   i.operands + 1);
5541           return NULL;
5542         }
5543       token_start = l;  /* After white space.  */
5544       while (in_quotes || paren_not_balanced || *l != ',')
5545         {
5546           if (*l == END_OF_INSN)
5547             {
5548               if (in_quotes)
5549                 {
5550                   as_bad (_("unbalanced double quotes in operand %d."),
5551                           i.operands + 1);
5552                   return NULL;
5553                 }
5554               if (paren_not_balanced)
5555                 {
5556                   know (!intel_syntax);
5557                   as_bad (_("unbalanced parenthesis in operand %d."),
5558                           i.operands + 1);
5559                   return NULL;
5560                 }
5561               else
5562                 break;  /* we are done */
5563             }
5564           else if (*l == '\\' && l[1] == '"')
5565             ++l;
5566           else if (*l == '"')
5567             in_quotes = !in_quotes;
5568           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5569             {
5570               as_bad (_("invalid character %s in operand %d"),
5571                       output_invalid (*l),
5572                       i.operands + 1);
5573               return NULL;
5574             }
5575           if (!intel_syntax && !in_quotes)
5576             {
5577               if (*l == '(')
5578                 ++paren_not_balanced;
5579               if (*l == ')')
5580                 --paren_not_balanced;
5581             }
5582           l++;
5583         }
5584       if (l != token_start)
5585         {                       /* Yes, we've read in another operand.  */
5586           unsigned int operand_ok;
5587           this_operand = i.operands++;
5588           if (i.operands > MAX_OPERANDS)
5589             {
5590               as_bad (_("spurious operands; (%d operands/instruction max)"),
5591                       MAX_OPERANDS);
5592               return NULL;
5593             }
5594           i.types[this_operand].bitfield.unspecified = 1;
5595           /* Now parse operand adding info to 'i' as we go along.  */
5596           END_STRING_AND_SAVE (l);
5597
5598           if (i.mem_operands > 1)
5599             {
5600               as_bad (_("too many memory references for `%s'"),
5601                       mnemonic);
5602               return 0;
5603             }
5604
5605           if (intel_syntax)
5606             operand_ok =
5607               i386_intel_operand (token_start,
5608                                   intel_float_operand (mnemonic));
5609           else
5610             operand_ok = i386_att_operand (token_start);
5611
5612           RESTORE_END_STRING (l);
5613           if (!operand_ok)
5614             return NULL;
5615         }
5616       else
5617         {
5618           if (expecting_operand)
5619             {
5620             expecting_operand_after_comma:
5621               as_bad (_("expecting operand after ','; got nothing"));
5622               return NULL;
5623             }
5624           if (*l == ',')
5625             {
5626               as_bad (_("expecting operand before ','; got nothing"));
5627               return NULL;
5628             }
5629         }
5630
5631       /* Now *l must be either ',' or END_OF_INSN.  */
5632       if (*l == ',')
5633         {
5634           if (*++l == END_OF_INSN)
5635             {
5636               /* Just skip it, if it's \n complain.  */
5637               goto expecting_operand_after_comma;
5638             }
5639           expecting_operand = 1;
5640         }
5641     }
5642   return l;
5643 }
5644
5645 static void
5646 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5647 {
5648   union i386_op temp_op;
5649   i386_operand_type temp_type;
5650   unsigned int temp_flags;
5651   enum bfd_reloc_code_real temp_reloc;
5652
5653   temp_type = i.types[xchg2];
5654   i.types[xchg2] = i.types[xchg1];
5655   i.types[xchg1] = temp_type;
5656
5657   temp_flags = i.flags[xchg2];
5658   i.flags[xchg2] = i.flags[xchg1];
5659   i.flags[xchg1] = temp_flags;
5660
5661   temp_op = i.op[xchg2];
5662   i.op[xchg2] = i.op[xchg1];
5663   i.op[xchg1] = temp_op;
5664
5665   temp_reloc = i.reloc[xchg2];
5666   i.reloc[xchg2] = i.reloc[xchg1];
5667   i.reloc[xchg1] = temp_reloc;
5668
5669   if (i.mask.reg)
5670     {
5671       if (i.mask.operand == xchg1)
5672         i.mask.operand = xchg2;
5673       else if (i.mask.operand == xchg2)
5674         i.mask.operand = xchg1;
5675     }
5676   if (i.broadcast.type)
5677     {
5678       if (i.broadcast.operand == xchg1)
5679         i.broadcast.operand = xchg2;
5680       else if (i.broadcast.operand == xchg2)
5681         i.broadcast.operand = xchg1;
5682     }
5683   if (i.rounding.type != rc_none)
5684     {
5685       if (i.rounding.operand == xchg1)
5686         i.rounding.operand = xchg2;
5687       else if (i.rounding.operand == xchg2)
5688         i.rounding.operand = xchg1;
5689     }
5690 }
5691
5692 static void
5693 swap_operands (void)
5694 {
5695   switch (i.operands)
5696     {
5697     case 5:
5698     case 4:
5699       swap_2_operands (1, i.operands - 2);
5700       /* Fall through.  */
5701     case 3:
5702     case 2:
5703       swap_2_operands (0, i.operands - 1);
5704       break;
5705     default:
5706       abort ();
5707     }
5708
5709   if (i.mem_operands == 2)
5710     {
5711       const reg_entry *temp_seg;
5712       temp_seg = i.seg[0];
5713       i.seg[0] = i.seg[1];
5714       i.seg[1] = temp_seg;
5715     }
5716 }
5717
5718 /* Try to ensure constant immediates are represented in the smallest
5719    opcode possible.  */
5720 static void
5721 optimize_imm (void)
5722 {
5723   char guess_suffix = 0;
5724   int op;
5725
5726   if (i.suffix)
5727     guess_suffix = i.suffix;
5728   else if (i.reg_operands)
5729     {
5730       /* Figure out a suffix from the last register operand specified.
5731          We can't do this properly yet, i.e. excluding special register
5732          instances, but the following works for instructions with
5733          immediates.  In any case, we can't set i.suffix yet.  */
5734       for (op = i.operands; --op >= 0;)
5735         if (i.types[op].bitfield.class != Reg)
5736           continue;
5737         else if (i.types[op].bitfield.byte)
5738           {
5739             guess_suffix = BYTE_MNEM_SUFFIX;
5740             break;
5741           }
5742         else if (i.types[op].bitfield.word)
5743           {
5744             guess_suffix = WORD_MNEM_SUFFIX;
5745             break;
5746           }
5747         else if (i.types[op].bitfield.dword)
5748           {
5749             guess_suffix = LONG_MNEM_SUFFIX;
5750             break;
5751           }
5752         else if (i.types[op].bitfield.qword)
5753           {
5754             guess_suffix = QWORD_MNEM_SUFFIX;
5755             break;
5756           }
5757     }
5758   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5759     guess_suffix = WORD_MNEM_SUFFIX;
5760
5761   for (op = i.operands; --op >= 0;)
5762     if (operand_type_check (i.types[op], imm))
5763       {
5764         switch (i.op[op].imms->X_op)
5765           {
5766           case O_constant:
5767             /* If a suffix is given, this operand may be shortened.  */
5768             switch (guess_suffix)
5769               {
5770               case LONG_MNEM_SUFFIX:
5771                 i.types[op].bitfield.imm32 = 1;
5772                 i.types[op].bitfield.imm64 = 1;
5773                 break;
5774               case WORD_MNEM_SUFFIX:
5775                 i.types[op].bitfield.imm16 = 1;
5776                 i.types[op].bitfield.imm32 = 1;
5777                 i.types[op].bitfield.imm32s = 1;
5778                 i.types[op].bitfield.imm64 = 1;
5779                 break;
5780               case BYTE_MNEM_SUFFIX:
5781                 i.types[op].bitfield.imm8 = 1;
5782                 i.types[op].bitfield.imm8s = 1;
5783                 i.types[op].bitfield.imm16 = 1;
5784                 i.types[op].bitfield.imm32 = 1;
5785                 i.types[op].bitfield.imm32s = 1;
5786                 i.types[op].bitfield.imm64 = 1;
5787                 break;
5788               }
5789
5790             /* If this operand is at most 16 bits, convert it
5791                to a signed 16 bit number before trying to see
5792                whether it will fit in an even smaller size.
5793                This allows a 16-bit operand such as $0xffe0 to
5794                be recognised as within Imm8S range.  */
5795             if ((i.types[op].bitfield.imm16)
5796                 && (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5797               {
5798                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5799                                                 ^ 0x8000) - 0x8000);
5800               }
5801 #ifdef BFD64
5802             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5803             if ((i.types[op].bitfield.imm32)
5804                 && ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5805                     == 0))
5806               {
5807                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5808                                                 ^ ((offsetT) 1 << 31))
5809                                                - ((offsetT) 1 << 31));
5810               }
5811 #endif
5812             i.types[op]
5813               = operand_type_or (i.types[op],
5814                                  smallest_imm_type (i.op[op].imms->X_add_number));
5815
5816             /* We must avoid matching of Imm32 templates when 64bit
5817                only immediate is available.  */
5818             if (guess_suffix == QWORD_MNEM_SUFFIX)
5819               i.types[op].bitfield.imm32 = 0;
5820             break;
5821
5822           case O_absent:
5823           case O_register:
5824             abort ();
5825
5826             /* Symbols and expressions.  */
5827           default:
5828             /* Convert symbolic operand to proper sizes for matching, but don't
5829                prevent matching a set of insns that only supports sizes other
5830                than those matching the insn suffix.  */
5831             {
5832               i386_operand_type mask, allowed;
5833               const insn_template *t = current_templates->start;
5834
5835               operand_type_set (&mask, 0);
5836               allowed = t->operand_types[op];
5837
5838               while (++t < current_templates->end)
5839                 {
5840                   allowed = operand_type_and (allowed, anyimm);
5841                   allowed = operand_type_or (allowed, t->operand_types[op]);
5842                 }
5843               switch (guess_suffix)
5844                 {
5845                 case QWORD_MNEM_SUFFIX:
5846                   mask.bitfield.imm64 = 1;
5847                   mask.bitfield.imm32s = 1;
5848                   break;
5849                 case LONG_MNEM_SUFFIX:
5850                   mask.bitfield.imm32 = 1;
5851                   break;
5852                 case WORD_MNEM_SUFFIX:
5853                   mask.bitfield.imm16 = 1;
5854                   break;
5855                 case BYTE_MNEM_SUFFIX:
5856                   mask.bitfield.imm8 = 1;
5857                   break;
5858                 default:
5859                   break;
5860                 }
5861               allowed = operand_type_and (mask, allowed);
5862               if (!operand_type_all_zero (&allowed))
5863                 i.types[op] = operand_type_and (i.types[op], mask);
5864             }
5865             break;
5866           }
5867       }
5868 }
5869
5870 /* Try to use the smallest displacement type too.  */
5871 static void
5872 optimize_disp (void)
5873 {
5874   int op;
5875
5876   for (op = i.operands; --op >= 0;)
5877     if (operand_type_check (i.types[op], disp))
5878       {
5879         if (i.op[op].disps->X_op == O_constant)
5880           {
5881             offsetT op_disp = i.op[op].disps->X_add_number;
5882
5883             if (i.types[op].bitfield.disp16
5884                 && (op_disp & ~(offsetT) 0xffff) == 0)
5885               {
5886                 /* If this operand is at most 16 bits, convert
5887                    to a signed 16 bit number and don't use 64bit
5888                    displacement.  */
5889                 op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5890                 i.types[op].bitfield.disp64 = 0;
5891               }
5892             if (!op_disp && i.types[op].bitfield.baseindex)
5893               {
5894                 i.types[op].bitfield.disp8 = 0;
5895                 i.types[op].bitfield.disp16 = 0;
5896                 i.types[op].bitfield.disp32 = 0;
5897                 i.types[op].bitfield.disp32s = 0;
5898                 i.types[op].bitfield.disp64 = 0;
5899                 i.op[op].disps = 0;
5900                 i.disp_operands--;
5901               }
5902 #ifdef BFD64
5903             else if (flag_code == CODE_64BIT)
5904               {
5905                 if (want_disp32 (current_templates->start)
5906                     && fits_in_unsigned_long (op_disp))
5907                   i.types[op].bitfield.disp32 = 1;
5908
5909                 /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5910                 if (i.types[op].bitfield.disp32
5911                     && (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5912                   {
5913                     /* If this operand is at most 32 bits, convert
5914                        to a signed 32 bit number and don't use 64bit
5915                        displacement.  */
5916                     op_disp &= (((offsetT) 2 << 31) - 1);
5917                     op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5918                     i.types[op].bitfield.disp64 = 0;
5919                   }
5920
5921                 if (fits_in_signed_long (op_disp))
5922                   {
5923                     i.types[op].bitfield.disp64 = 0;
5924                     i.types[op].bitfield.disp32s = 1;
5925                   }
5926               }
5927 #endif
5928             if ((i.types[op].bitfield.disp32
5929                  || i.types[op].bitfield.disp32s
5930                  || i.types[op].bitfield.disp16)
5931                 && fits_in_disp8 (op_disp))
5932               i.types[op].bitfield.disp8 = 1;
5933           }
5934         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5935                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5936           {
5937             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5938                          i.op[op].disps, 0, i.reloc[op]);
5939             i.types[op].bitfield.disp8 = 0;
5940             i.types[op].bitfield.disp16 = 0;
5941             i.types[op].bitfield.disp32 = 0;
5942             i.types[op].bitfield.disp32s = 0;
5943             i.types[op].bitfield.disp64 = 0;
5944           }
5945         else
5946           /* We only support 64bit displacement on constants.  */
5947           i.types[op].bitfield.disp64 = 0;
5948       }
5949 }
5950
5951 /* Return 1 if there is a match in broadcast bytes between operand
5952    GIVEN and instruction template T.   */
5953
5954 static INLINE int
5955 match_broadcast_size (const insn_template *t, unsigned int given)
5956 {
5957   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5958            && i.types[given].bitfield.byte)
5959           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5960               && i.types[given].bitfield.word)
5961           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5962               && i.types[given].bitfield.dword)
5963           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5964               && i.types[given].bitfield.qword));
5965 }
5966
5967 /* Check if operands are valid for the instruction.  */
5968
5969 static int
5970 check_VecOperands (const insn_template *t)
5971 {
5972   unsigned int op;
5973   i386_cpu_flags cpu;
5974
5975   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5976      any one operand are implicity requiring AVX512VL support if the actual
5977      operand size is YMMword or XMMword.  Since this function runs after
5978      template matching, there's no need to check for YMMword/XMMword in
5979      the template.  */
5980   cpu = cpu_flags_and (t->cpu_flags, avx512);
5981   if (!cpu_flags_all_zero (&cpu)
5982       && !t->cpu_flags.bitfield.cpuavx512vl
5983       && !cpu_arch_flags.bitfield.cpuavx512vl)
5984     {
5985       for (op = 0; op < t->operands; ++op)
5986         {
5987           if (t->operand_types[op].bitfield.zmmword
5988               && (i.types[op].bitfield.ymmword
5989                   || i.types[op].bitfield.xmmword))
5990             {
5991               i.error = unsupported;
5992               return 1;
5993             }
5994         }
5995     }
5996
5997   /* Without VSIB byte, we can't have a vector register for index.  */
5998   if (!t->opcode_modifier.sib
5999       && i.index_reg
6000       && (i.index_reg->reg_type.bitfield.xmmword
6001           || i.index_reg->reg_type.bitfield.ymmword
6002           || i.index_reg->reg_type.bitfield.zmmword))
6003     {
6004       i.error = unsupported_vector_index_register;
6005       return 1;
6006     }
6007
6008   /* Check if default mask is allowed.  */
6009   if (t->opcode_modifier.nodefmask
6010       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6011     {
6012       i.error = no_default_mask;
6013       return 1;
6014     }
6015
6016   /* For VSIB byte, we need a vector register for index, and all vector
6017      registers must be distinct.  */
6018   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6019     {
6020       if (!i.index_reg
6021           || !((t->opcode_modifier.sib == VECSIB128
6022                 && i.index_reg->reg_type.bitfield.xmmword)
6023                || (t->opcode_modifier.sib == VECSIB256
6024                    && i.index_reg->reg_type.bitfield.ymmword)
6025                || (t->opcode_modifier.sib == VECSIB512
6026                    && i.index_reg->reg_type.bitfield.zmmword)))
6027       {
6028         i.error = invalid_vsib_address;
6029         return 1;
6030       }
6031
6032       gas_assert (i.reg_operands == 2 || i.mask.reg);
6033       if (i.reg_operands == 2 && !i.mask.reg)
6034         {
6035           gas_assert (i.types[0].bitfield.class == RegSIMD);
6036           gas_assert (i.types[0].bitfield.xmmword
6037                       || i.types[0].bitfield.ymmword);
6038           gas_assert (i.types[2].bitfield.class == RegSIMD);
6039           gas_assert (i.types[2].bitfield.xmmword
6040                       || i.types[2].bitfield.ymmword);
6041           if (operand_check == check_none)
6042             return 0;
6043           if (register_number (i.op[0].regs)
6044               != register_number (i.index_reg)
6045               && register_number (i.op[2].regs)
6046                  != register_number (i.index_reg)
6047               && register_number (i.op[0].regs)
6048                  != register_number (i.op[2].regs))
6049             return 0;
6050           if (operand_check == check_error)
6051             {
6052               i.error = invalid_vector_register_set;
6053               return 1;
6054             }
6055           as_warn (_("mask, index, and destination registers should be distinct"));
6056         }
6057       else if (i.reg_operands == 1 && i.mask.reg)
6058         {
6059           if (i.types[1].bitfield.class == RegSIMD
6060               && (i.types[1].bitfield.xmmword
6061                   || i.types[1].bitfield.ymmword
6062                   || i.types[1].bitfield.zmmword)
6063               && (register_number (i.op[1].regs)
6064                   == register_number (i.index_reg)))
6065             {
6066               if (operand_check == check_error)
6067                 {
6068                   i.error = invalid_vector_register_set;
6069                   return 1;
6070                 }
6071               if (operand_check != check_none)
6072                 as_warn (_("index and destination registers should be distinct"));
6073             }
6074         }
6075     }
6076
6077   /* For AMX instructions with three tmmword operands, all tmmword operand must be
6078      distinct */
6079   if (t->operand_types[0].bitfield.tmmword
6080       && i.reg_operands == 3)
6081     {
6082       if (register_number (i.op[0].regs)
6083           == register_number (i.op[1].regs)
6084           || register_number (i.op[0].regs)
6085              == register_number (i.op[2].regs)
6086           || register_number (i.op[1].regs)
6087              == register_number (i.op[2].regs))
6088         {
6089           i.error = invalid_tmm_register_set;
6090           return 1;
6091         }
6092     }
6093
6094   /* Check if broadcast is supported by the instruction and is applied
6095      to the memory operand.  */
6096   if (i.broadcast.type)
6097     {
6098       i386_operand_type type, overlap;
6099
6100       /* Check if specified broadcast is supported in this instruction,
6101          and its broadcast bytes match the memory operand.  */
6102       op = i.broadcast.operand;
6103       if (!t->opcode_modifier.broadcast
6104           || !(i.flags[op] & Operand_Mem)
6105           || (!i.types[op].bitfield.unspecified
6106               && !match_broadcast_size (t, op)))
6107         {
6108         bad_broadcast:
6109           i.error = unsupported_broadcast;
6110           return 1;
6111         }
6112
6113       i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6114                            * i.broadcast.type);
6115       operand_type_set (&type, 0);
6116       switch (i.broadcast.bytes)
6117         {
6118         case 2:
6119           type.bitfield.word = 1;
6120           break;
6121         case 4:
6122           type.bitfield.dword = 1;
6123           break;
6124         case 8:
6125           type.bitfield.qword = 1;
6126           break;
6127         case 16:
6128           type.bitfield.xmmword = 1;
6129           break;
6130         case 32:
6131           type.bitfield.ymmword = 1;
6132           break;
6133         case 64:
6134           type.bitfield.zmmword = 1;
6135           break;
6136         default:
6137           goto bad_broadcast;
6138         }
6139
6140       overlap = operand_type_and (type, t->operand_types[op]);
6141       if (t->operand_types[op].bitfield.class == RegSIMD
6142           && t->operand_types[op].bitfield.byte
6143              + t->operand_types[op].bitfield.word
6144              + t->operand_types[op].bitfield.dword
6145              + t->operand_types[op].bitfield.qword > 1)
6146         {
6147           overlap.bitfield.xmmword = 0;
6148           overlap.bitfield.ymmword = 0;
6149           overlap.bitfield.zmmword = 0;
6150         }
6151       if (operand_type_all_zero (&overlap))
6152           goto bad_broadcast;
6153
6154       if (t->opcode_modifier.checkregsize)
6155         {
6156           unsigned int j;
6157
6158           type.bitfield.baseindex = 1;
6159           for (j = 0; j < i.operands; ++j)
6160             {
6161               if (j != op
6162                   && !operand_type_register_match(i.types[j],
6163                                                   t->operand_types[j],
6164                                                   type,
6165                                                   t->operand_types[op]))
6166                 goto bad_broadcast;
6167             }
6168         }
6169     }
6170   /* If broadcast is supported in this instruction, we need to check if
6171      operand of one-element size isn't specified without broadcast.  */
6172   else if (t->opcode_modifier.broadcast && i.mem_operands)
6173     {
6174       /* Find memory operand.  */
6175       for (op = 0; op < i.operands; op++)
6176         if (i.flags[op] & Operand_Mem)
6177           break;
6178       gas_assert (op < i.operands);
6179       /* Check size of the memory operand.  */
6180       if (match_broadcast_size (t, op))
6181         {
6182           i.error = broadcast_needed;
6183           return 1;
6184         }
6185     }
6186   else
6187     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6188
6189   /* Check if requested masking is supported.  */
6190   if (i.mask.reg)
6191     {
6192       switch (t->opcode_modifier.masking)
6193         {
6194         case BOTH_MASKING:
6195           break;
6196         case MERGING_MASKING:
6197           if (i.mask.zeroing)
6198             {
6199         case 0:
6200               i.error = unsupported_masking;
6201               return 1;
6202             }
6203           break;
6204         case DYNAMIC_MASKING:
6205           /* Memory destinations allow only merging masking.  */
6206           if (i.mask.zeroing && i.mem_operands)
6207             {
6208               /* Find memory operand.  */
6209               for (op = 0; op < i.operands; op++)
6210                 if (i.flags[op] & Operand_Mem)
6211                   break;
6212               gas_assert (op < i.operands);
6213               if (op == i.operands - 1)
6214                 {
6215                   i.error = unsupported_masking;
6216                   return 1;
6217                 }
6218             }
6219           break;
6220         default:
6221           abort ();
6222         }
6223     }
6224
6225   /* Check if masking is applied to dest operand.  */
6226   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6227     {
6228       i.error = mask_not_on_destination;
6229       return 1;
6230     }
6231
6232   /* Check RC/SAE.  */
6233   if (i.rounding.type != rc_none)
6234     {
6235       if (!t->opcode_modifier.sae
6236           || (i.rounding.type != saeonly && !t->opcode_modifier.staticrounding))
6237         {
6238           i.error = unsupported_rc_sae;
6239           return 1;
6240         }
6241       /* If the instruction has several immediate operands and one of
6242          them is rounding, the rounding operand should be the last
6243          immediate operand.  */
6244       if (i.imm_operands > 1
6245           && i.rounding.operand != i.imm_operands - 1)
6246         {
6247           i.error = rc_sae_operand_not_last_imm;
6248           return 1;
6249         }
6250     }
6251
6252   /* Check the special Imm4 cases; must be the first operand.  */
6253   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6254     {
6255       if (i.op[0].imms->X_op != O_constant
6256           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6257         {
6258           i.error = bad_imm4;
6259           return 1;
6260         }
6261
6262       /* Turn off Imm<N> so that update_imm won't complain.  */
6263       operand_type_set (&i.types[0], 0);
6264     }
6265
6266   /* Check vector Disp8 operand.  */
6267   if (t->opcode_modifier.disp8memshift
6268       && i.disp_encoding != disp_encoding_32bit)
6269     {
6270       if (i.broadcast.type)
6271         i.memshift = t->opcode_modifier.broadcast - 1;
6272       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6273         i.memshift = t->opcode_modifier.disp8memshift;
6274       else
6275         {
6276           const i386_operand_type *type = NULL;
6277
6278           i.memshift = 0;
6279           for (op = 0; op < i.operands; op++)
6280             if (i.flags[op] & Operand_Mem)
6281               {
6282                 if (t->opcode_modifier.evex == EVEXLIG)
6283                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6284                 else if (t->operand_types[op].bitfield.xmmword
6285                          + t->operand_types[op].bitfield.ymmword
6286                          + t->operand_types[op].bitfield.zmmword <= 1)
6287                   type = &t->operand_types[op];
6288                 else if (!i.types[op].bitfield.unspecified)
6289                   type = &i.types[op];
6290               }
6291             else if (i.types[op].bitfield.class == RegSIMD
6292                      && t->opcode_modifier.evex != EVEXLIG)
6293               {
6294                 if (i.types[op].bitfield.zmmword)
6295                   i.memshift = 6;
6296                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6297                   i.memshift = 5;
6298                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6299                   i.memshift = 4;
6300               }
6301
6302           if (type)
6303             {
6304               if (type->bitfield.zmmword)
6305                 i.memshift = 6;
6306               else if (type->bitfield.ymmword)
6307                 i.memshift = 5;
6308               else if (type->bitfield.xmmword)
6309                 i.memshift = 4;
6310             }
6311
6312           /* For the check in fits_in_disp8().  */
6313           if (i.memshift == 0)
6314             i.memshift = -1;
6315         }
6316
6317       for (op = 0; op < i.operands; op++)
6318         if (operand_type_check (i.types[op], disp)
6319             && i.op[op].disps->X_op == O_constant)
6320           {
6321             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6322               {
6323                 i.types[op].bitfield.disp8 = 1;
6324                 return 0;
6325               }
6326             i.types[op].bitfield.disp8 = 0;
6327           }
6328     }
6329
6330   i.memshift = 0;
6331
6332   return 0;
6333 }
6334
6335 /* Check if encoding requirements are met by the instruction.  */
6336
6337 static int
6338 VEX_check_encoding (const insn_template *t)
6339 {
6340   if (i.vec_encoding == vex_encoding_error)
6341     {
6342       i.error = unsupported;
6343       return 1;
6344     }
6345
6346   if (i.vec_encoding == vex_encoding_evex)
6347     {
6348       /* This instruction must be encoded with EVEX prefix.  */
6349       if (!is_evex_encoding (t))
6350         {
6351           i.error = unsupported;
6352           return 1;
6353         }
6354       return 0;
6355     }
6356
6357   if (!t->opcode_modifier.vex)
6358     {
6359       /* This instruction template doesn't have VEX prefix.  */
6360       if (i.vec_encoding != vex_encoding_default)
6361         {
6362           i.error = unsupported;
6363           return 1;
6364         }
6365       return 0;
6366     }
6367
6368   return 0;
6369 }
6370
6371 static const insn_template *
6372 match_template (char mnem_suffix)
6373 {
6374   /* Points to template once we've found it.  */
6375   const insn_template *t;
6376   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6377   i386_operand_type overlap4;
6378   unsigned int found_reverse_match;
6379   i386_opcode_modifier suffix_check;
6380   i386_operand_type operand_types [MAX_OPERANDS];
6381   int addr_prefix_disp;
6382   unsigned int j, size_match, check_register;
6383   enum i386_error specific_error = 0;
6384
6385 #if MAX_OPERANDS != 5
6386 # error "MAX_OPERANDS must be 5."
6387 #endif
6388
6389   found_reverse_match = 0;
6390   addr_prefix_disp = -1;
6391
6392   /* Prepare for mnemonic suffix check.  */
6393   memset (&suffix_check, 0, sizeof (suffix_check));
6394   switch (mnem_suffix)
6395     {
6396     case BYTE_MNEM_SUFFIX:
6397       suffix_check.no_bsuf = 1;
6398       break;
6399     case WORD_MNEM_SUFFIX:
6400       suffix_check.no_wsuf = 1;
6401       break;
6402     case SHORT_MNEM_SUFFIX:
6403       suffix_check.no_ssuf = 1;
6404       break;
6405     case LONG_MNEM_SUFFIX:
6406       suffix_check.no_lsuf = 1;
6407       break;
6408     case QWORD_MNEM_SUFFIX:
6409       suffix_check.no_qsuf = 1;
6410       break;
6411     default:
6412       /* NB: In Intel syntax, normally we can check for memory operand
6413          size when there is no mnemonic suffix.  But jmp and call have
6414          2 different encodings with Dword memory operand size, one with
6415          No_ldSuf and the other without.  i.suffix is set to
6416          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6417       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6418         suffix_check.no_ldsuf = 1;
6419     }
6420
6421   /* Must have right number of operands.  */
6422   i.error = number_of_operands_mismatch;
6423
6424   for (t = current_templates->start; t < current_templates->end; t++)
6425     {
6426       addr_prefix_disp = -1;
6427       found_reverse_match = 0;
6428
6429       if (i.operands != t->operands)
6430         continue;
6431
6432       /* Check processor support.  */
6433       i.error = unsupported;
6434       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6435         continue;
6436
6437       /* Check Pseudo Prefix.  */
6438       i.error = unsupported;
6439       if (t->opcode_modifier.pseudovexprefix
6440           && !(i.vec_encoding == vex_encoding_vex
6441               || i.vec_encoding == vex_encoding_vex3))
6442         continue;
6443
6444       /* Check AT&T mnemonic.   */
6445       i.error = unsupported_with_intel_mnemonic;
6446       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6447         continue;
6448
6449       /* Check AT&T/Intel syntax.  */
6450       i.error = unsupported_syntax;
6451       if ((intel_syntax && t->opcode_modifier.attsyntax)
6452           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6453         continue;
6454
6455       /* Check Intel64/AMD64 ISA.   */
6456       switch (isa64)
6457         {
6458         default:
6459           /* Default: Don't accept Intel64.  */
6460           if (t->opcode_modifier.isa64 == INTEL64)
6461             continue;
6462           break;
6463         case amd64:
6464           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6465           if (t->opcode_modifier.isa64 >= INTEL64)
6466             continue;
6467           break;
6468         case intel64:
6469           /* -mintel64: Don't accept AMD64.  */
6470           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6471             continue;
6472           break;
6473         }
6474
6475       /* Check the suffix.  */
6476       i.error = invalid_instruction_suffix;
6477       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6478           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6479           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6480           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6481           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6482           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6483         continue;
6484
6485       size_match = operand_size_match (t);
6486       if (!size_match)
6487         continue;
6488
6489       /* This is intentionally not
6490
6491          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6492
6493          as the case of a missing * on the operand is accepted (perhaps with
6494          a warning, issued further down).  */
6495       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6496         {
6497           i.error = operand_type_mismatch;
6498           continue;
6499         }
6500
6501       for (j = 0; j < MAX_OPERANDS; j++)
6502         operand_types[j] = t->operand_types[j];
6503
6504       /* In general, don't allow
6505          - 64-bit operands outside of 64-bit mode,
6506          - 32-bit operands on pre-386.  */
6507       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6508       if (((i.suffix == QWORD_MNEM_SUFFIX
6509             && flag_code != CODE_64BIT
6510             && !(t->opcode_modifier.opcodespace == SPACE_0F
6511                  && t->base_opcode == 0xc7
6512                  && t->opcode_modifier.opcodeprefix == PREFIX_NONE
6513                  && t->extension_opcode == 1) /* cmpxchg8b */)
6514            || (i.suffix == LONG_MNEM_SUFFIX
6515                && !cpu_arch_flags.bitfield.cpui386))
6516           && (intel_syntax
6517               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6518                  && !intel_float_operand (t->name))
6519               : intel_float_operand (t->name) != 2)
6520           && (t->operands == i.imm_operands
6521               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6522                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6523                && operand_types[i.imm_operands].bitfield.class != RegMask)
6524               || (operand_types[j].bitfield.class != RegMMX
6525                   && operand_types[j].bitfield.class != RegSIMD
6526                   && operand_types[j].bitfield.class != RegMask))
6527           && !t->opcode_modifier.sib)
6528         continue;
6529
6530       /* Do not verify operands when there are none.  */
6531       if (!t->operands)
6532         {
6533           if (VEX_check_encoding (t))
6534             {
6535               specific_error = i.error;
6536               continue;
6537             }
6538
6539           /* We've found a match; break out of loop.  */
6540           break;
6541         }
6542
6543       if (!t->opcode_modifier.jump
6544           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6545         {
6546           /* There should be only one Disp operand.  */
6547           for (j = 0; j < MAX_OPERANDS; j++)
6548             if (operand_type_check (operand_types[j], disp))
6549               break;
6550           if (j < MAX_OPERANDS)
6551             {
6552               bool override = (i.prefix[ADDR_PREFIX] != 0);
6553
6554               addr_prefix_disp = j;
6555
6556               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
6557                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
6558               switch (flag_code)
6559                 {
6560                 case CODE_16BIT:
6561                   override = !override;
6562                   /* Fall through.  */
6563                 case CODE_32BIT:
6564                   if (operand_types[j].bitfield.disp32
6565                       && operand_types[j].bitfield.disp16)
6566                     {
6567                       operand_types[j].bitfield.disp16 = override;
6568                       operand_types[j].bitfield.disp32 = !override;
6569                     }
6570                   operand_types[j].bitfield.disp32s = 0;
6571                   operand_types[j].bitfield.disp64 = 0;
6572                   break;
6573
6574                 case CODE_64BIT:
6575                   if (operand_types[j].bitfield.disp32s
6576                       || operand_types[j].bitfield.disp64)
6577                     {
6578                       operand_types[j].bitfield.disp64 &= !override;
6579                       operand_types[j].bitfield.disp32s &= !override;
6580                       operand_types[j].bitfield.disp32 = override;
6581                     }
6582                   operand_types[j].bitfield.disp16 = 0;
6583                   break;
6584                 }
6585             }
6586         }
6587
6588       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6589       if (i.reloc[0] == BFD_RELOC_386_GOT32
6590           && t->base_opcode == 0xa0
6591           && t->opcode_modifier.opcodespace == SPACE_BASE)
6592         continue;
6593
6594       /* We check register size if needed.  */
6595       if (t->opcode_modifier.checkregsize)
6596         {
6597           check_register = (1 << t->operands) - 1;
6598           if (i.broadcast.type)
6599             check_register &= ~(1 << i.broadcast.operand);
6600         }
6601       else
6602         check_register = 0;
6603
6604       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6605       switch (t->operands)
6606         {
6607         case 1:
6608           if (!operand_type_match (overlap0, i.types[0]))
6609             continue;
6610           break;
6611         case 2:
6612           /* xchg %eax, %eax is a special case. It is an alias for nop
6613              only in 32bit mode and we can use opcode 0x90.  In 64bit
6614              mode, we can't use 0x90 for xchg %eax, %eax since it should
6615              zero-extend %eax to %rax.  */
6616           if (flag_code == CODE_64BIT
6617               && t->base_opcode == 0x90
6618               && t->opcode_modifier.opcodespace == SPACE_BASE
6619               && i.types[0].bitfield.instance == Accum
6620               && i.types[0].bitfield.dword
6621               && i.types[1].bitfield.instance == Accum
6622               && i.types[1].bitfield.dword)
6623             continue;
6624           /* xrelease mov %eax, <disp> is another special case. It must not
6625              match the accumulator-only encoding of mov.  */
6626           if (flag_code != CODE_64BIT
6627               && i.hle_prefix
6628               && t->base_opcode == 0xa0
6629               && t->opcode_modifier.opcodespace == SPACE_BASE
6630               && i.types[0].bitfield.instance == Accum
6631               && (i.flags[1] & Operand_Mem))
6632             continue;
6633           /* Fall through.  */
6634
6635         case 3:
6636           if (!(size_match & MATCH_STRAIGHT))
6637             goto check_reverse;
6638           /* Reverse direction of operands if swapping is possible in the first
6639              place (operands need to be symmetric) and
6640              - the load form is requested, and the template is a store form,
6641              - the store form is requested, and the template is a load form,
6642              - the non-default (swapped) form is requested.  */
6643           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6644           if (t->opcode_modifier.d && i.reg_operands == i.operands
6645               && !operand_type_all_zero (&overlap1))
6646             switch (i.dir_encoding)
6647               {
6648               case dir_encoding_load:
6649                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6650                     || t->opcode_modifier.regmem)
6651                   goto check_reverse;
6652                 break;
6653
6654               case dir_encoding_store:
6655                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6656                     && !t->opcode_modifier.regmem)
6657                   goto check_reverse;
6658                 break;
6659
6660               case dir_encoding_swap:
6661                 goto check_reverse;
6662
6663               case dir_encoding_default:
6664                 break;
6665               }
6666           /* If we want store form, we skip the current load.  */
6667           if ((i.dir_encoding == dir_encoding_store
6668                || i.dir_encoding == dir_encoding_swap)
6669               && i.mem_operands == 0
6670               && t->opcode_modifier.load)
6671             continue;
6672           /* Fall through.  */
6673         case 4:
6674         case 5:
6675           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6676           if (!operand_type_match (overlap0, i.types[0])
6677               || !operand_type_match (overlap1, i.types[1])
6678               || ((check_register & 3) == 3
6679                   && !operand_type_register_match (i.types[0],
6680                                                    operand_types[0],
6681                                                    i.types[1],
6682                                                    operand_types[1])))
6683             {
6684               /* Check if other direction is valid ...  */
6685               if (!t->opcode_modifier.d)
6686                 continue;
6687
6688             check_reverse:
6689               if (!(size_match & MATCH_REVERSE))
6690                 continue;
6691               /* Try reversing direction of operands.  */
6692               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6693               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6694               if (!operand_type_match (overlap0, i.types[0])
6695                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6696                   || (check_register
6697                       && !operand_type_register_match (i.types[0],
6698                                                        operand_types[i.operands - 1],
6699                                                        i.types[i.operands - 1],
6700                                                        operand_types[0])))
6701                 {
6702                   /* Does not match either direction.  */
6703                   continue;
6704                 }
6705               /* found_reverse_match holds which of D or FloatR
6706                  we've found.  */
6707               if (!t->opcode_modifier.d)
6708                 found_reverse_match = 0;
6709               else if (operand_types[0].bitfield.tbyte)
6710                 found_reverse_match = Opcode_FloatD;
6711               else if (operand_types[0].bitfield.xmmword
6712                        || operand_types[i.operands - 1].bitfield.xmmword
6713                        || operand_types[0].bitfield.class == RegMMX
6714                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6715                        || is_any_vex_encoding(t))
6716                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6717                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6718               else
6719                 found_reverse_match = Opcode_D;
6720               if (t->opcode_modifier.floatr)
6721                 found_reverse_match |= Opcode_FloatR;
6722             }
6723           else
6724             {
6725               /* Found a forward 2 operand match here.  */
6726               switch (t->operands)
6727                 {
6728                 case 5:
6729                   overlap4 = operand_type_and (i.types[4],
6730                                                operand_types[4]);
6731                   /* Fall through.  */
6732                 case 4:
6733                   overlap3 = operand_type_and (i.types[3],
6734                                                operand_types[3]);
6735                   /* Fall through.  */
6736                 case 3:
6737                   overlap2 = operand_type_and (i.types[2],
6738                                                operand_types[2]);
6739                   break;
6740                 }
6741
6742               switch (t->operands)
6743                 {
6744                 case 5:
6745                   if (!operand_type_match (overlap4, i.types[4])
6746                       || !operand_type_register_match (i.types[3],
6747                                                        operand_types[3],
6748                                                        i.types[4],
6749                                                        operand_types[4]))
6750                     continue;
6751                   /* Fall through.  */
6752                 case 4:
6753                   if (!operand_type_match (overlap3, i.types[3])
6754                       || ((check_register & 0xa) == 0xa
6755                           && !operand_type_register_match (i.types[1],
6756                                                             operand_types[1],
6757                                                             i.types[3],
6758                                                             operand_types[3]))
6759                       || ((check_register & 0xc) == 0xc
6760                           && !operand_type_register_match (i.types[2],
6761                                                             operand_types[2],
6762                                                             i.types[3],
6763                                                             operand_types[3])))
6764                     continue;
6765                   /* Fall through.  */
6766                 case 3:
6767                   /* Here we make use of the fact that there are no
6768                      reverse match 3 operand instructions.  */
6769                   if (!operand_type_match (overlap2, i.types[2])
6770                       || ((check_register & 5) == 5
6771                           && !operand_type_register_match (i.types[0],
6772                                                             operand_types[0],
6773                                                             i.types[2],
6774                                                             operand_types[2]))
6775                       || ((check_register & 6) == 6
6776                           && !operand_type_register_match (i.types[1],
6777                                                             operand_types[1],
6778                                                             i.types[2],
6779                                                             operand_types[2])))
6780                     continue;
6781                   break;
6782                 }
6783             }
6784           /* Found either forward/reverse 2, 3 or 4 operand match here:
6785              slip through to break.  */
6786         }
6787
6788       /* Check if vector operands are valid.  */
6789       if (check_VecOperands (t))
6790         {
6791           specific_error = i.error;
6792           continue;
6793         }
6794
6795       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6796       if (VEX_check_encoding (t))
6797         {
6798           specific_error = i.error;
6799           continue;
6800         }
6801
6802       /* We've found a match; break out of loop.  */
6803       break;
6804     }
6805
6806   if (t == current_templates->end)
6807     {
6808       /* We found no match.  */
6809       const char *err_msg;
6810       switch (specific_error ? specific_error : i.error)
6811         {
6812         default:
6813           abort ();
6814         case operand_size_mismatch:
6815           err_msg = _("operand size mismatch");
6816           break;
6817         case operand_type_mismatch:
6818           err_msg = _("operand type mismatch");
6819           break;
6820         case register_type_mismatch:
6821           err_msg = _("register type mismatch");
6822           break;
6823         case number_of_operands_mismatch:
6824           err_msg = _("number of operands mismatch");
6825           break;
6826         case invalid_instruction_suffix:
6827           err_msg = _("invalid instruction suffix");
6828           break;
6829         case bad_imm4:
6830           err_msg = _("constant doesn't fit in 4 bits");
6831           break;
6832         case unsupported_with_intel_mnemonic:
6833           err_msg = _("unsupported with Intel mnemonic");
6834           break;
6835         case unsupported_syntax:
6836           err_msg = _("unsupported syntax");
6837           break;
6838         case unsupported:
6839           as_bad (_("unsupported instruction `%s'"),
6840                   current_templates->start->name);
6841           return NULL;
6842         case invalid_sib_address:
6843           err_msg = _("invalid SIB address");
6844           break;
6845         case invalid_vsib_address:
6846           err_msg = _("invalid VSIB address");
6847           break;
6848         case invalid_vector_register_set:
6849           err_msg = _("mask, index, and destination registers must be distinct");
6850           break;
6851         case invalid_tmm_register_set:
6852           err_msg = _("all tmm registers must be distinct");
6853           break;
6854         case unsupported_vector_index_register:
6855           err_msg = _("unsupported vector index register");
6856           break;
6857         case unsupported_broadcast:
6858           err_msg = _("unsupported broadcast");
6859           break;
6860         case broadcast_needed:
6861           err_msg = _("broadcast is needed for operand of such type");
6862           break;
6863         case unsupported_masking:
6864           err_msg = _("unsupported masking");
6865           break;
6866         case mask_not_on_destination:
6867           err_msg = _("mask not on destination operand");
6868           break;
6869         case no_default_mask:
6870           err_msg = _("default mask isn't allowed");
6871           break;
6872         case unsupported_rc_sae:
6873           err_msg = _("unsupported static rounding/sae");
6874           break;
6875         case rc_sae_operand_not_last_imm:
6876           if (intel_syntax)
6877             err_msg = _("RC/SAE operand must precede immediate operands");
6878           else
6879             err_msg = _("RC/SAE operand must follow immediate operands");
6880           break;
6881         case invalid_register_operand:
6882           err_msg = _("invalid register operand");
6883           break;
6884         }
6885       as_bad (_("%s for `%s'"), err_msg,
6886               current_templates->start->name);
6887       return NULL;
6888     }
6889
6890   if (!quiet_warnings)
6891     {
6892       if (!intel_syntax
6893           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6894         as_warn (_("indirect %s without `*'"), t->name);
6895
6896       if (t->opcode_modifier.isprefix
6897           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6898         {
6899           /* Warn them that a data or address size prefix doesn't
6900              affect assembly of the next line of code.  */
6901           as_warn (_("stand-alone `%s' prefix"), t->name);
6902         }
6903     }
6904
6905   /* Copy the template we found.  */
6906   install_template (t);
6907
6908   if (addr_prefix_disp != -1)
6909     i.tm.operand_types[addr_prefix_disp]
6910       = operand_types[addr_prefix_disp];
6911
6912   if (found_reverse_match)
6913     {
6914       /* If we found a reverse match we must alter the opcode direction
6915          bit and clear/flip the regmem modifier one.  found_reverse_match
6916          holds bits to change (different for int & float insns).  */
6917
6918       i.tm.base_opcode ^= found_reverse_match;
6919
6920       i.tm.operand_types[0] = operand_types[i.operands - 1];
6921       i.tm.operand_types[i.operands - 1] = operand_types[0];
6922
6923       /* Certain SIMD insns have their load forms specified in the opcode
6924          table, and hence we need to _set_ RegMem instead of clearing it.
6925          We need to avoid setting the bit though on insns like KMOVW.  */
6926       i.tm.opcode_modifier.regmem
6927         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6928           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6929           && !i.tm.opcode_modifier.regmem;
6930     }
6931
6932   return t;
6933 }
6934
6935 static int
6936 check_string (void)
6937 {
6938   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6939   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6940
6941   if (i.seg[op] != NULL && i.seg[op] != reg_es)
6942     {
6943       as_bad (_("`%s' operand %u must use `%ses' segment"),
6944               i.tm.name,
6945               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6946               register_prefix);
6947       return 0;
6948     }
6949
6950   /* There's only ever one segment override allowed per instruction.
6951      This instruction possibly has a legal segment override on the
6952      second operand, so copy the segment to where non-string
6953      instructions store it, allowing common code.  */
6954   i.seg[op] = i.seg[1];
6955
6956   return 1;
6957 }
6958
6959 static int
6960 process_suffix (void)
6961 {
6962   bool is_crc32 = false, is_movx = false;
6963
6964   /* If matched instruction specifies an explicit instruction mnemonic
6965      suffix, use it.  */
6966   if (i.tm.opcode_modifier.size == SIZE16)
6967     i.suffix = WORD_MNEM_SUFFIX;
6968   else if (i.tm.opcode_modifier.size == SIZE32)
6969     i.suffix = LONG_MNEM_SUFFIX;
6970   else if (i.tm.opcode_modifier.size == SIZE64)
6971     i.suffix = QWORD_MNEM_SUFFIX;
6972   else if (i.reg_operands
6973            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6974            && !i.tm.opcode_modifier.addrprefixopreg)
6975     {
6976       unsigned int numop = i.operands;
6977
6978       /* MOVSX/MOVZX */
6979       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
6980                  && (i.tm.base_opcode | 8) == 0xbe)
6981                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
6982                     && i.tm.base_opcode == 0x63
6983                     && i.tm.cpu_flags.bitfield.cpu64);
6984
6985       /* CRC32 */
6986       is_crc32 = (i.tm.base_opcode == 0xf0
6987                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
6988                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
6989
6990       /* movsx/movzx want only their source operand considered here, for the
6991          ambiguity checking below.  The suffix will be replaced afterwards
6992          to represent the destination (register).  */
6993       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
6994         --i.operands;
6995
6996       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
6997       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
6998         i.rex |= REX_W;
6999
7000       /* If there's no instruction mnemonic suffix we try to invent one
7001          based on GPR operands.  */
7002       if (!i.suffix)
7003         {
7004           /* We take i.suffix from the last register operand specified,
7005              Destination register type is more significant than source
7006              register type.  crc32 in SSE4.2 prefers source register
7007              type. */
7008           unsigned int op = is_crc32 ? 1 : i.operands;
7009
7010           while (op--)
7011             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7012                 || i.tm.operand_types[op].bitfield.instance == Accum)
7013               {
7014                 if (i.types[op].bitfield.class != Reg)
7015                   continue;
7016                 if (i.types[op].bitfield.byte)
7017                   i.suffix = BYTE_MNEM_SUFFIX;
7018                 else if (i.types[op].bitfield.word)
7019                   i.suffix = WORD_MNEM_SUFFIX;
7020                 else if (i.types[op].bitfield.dword)
7021                   i.suffix = LONG_MNEM_SUFFIX;
7022                 else if (i.types[op].bitfield.qword)
7023                   i.suffix = QWORD_MNEM_SUFFIX;
7024                 else
7025                   continue;
7026                 break;
7027               }
7028
7029           /* As an exception, movsx/movzx silently default to a byte source
7030              in AT&T mode.  */
7031           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7032             i.suffix = BYTE_MNEM_SUFFIX;
7033         }
7034       else if (i.suffix == BYTE_MNEM_SUFFIX)
7035         {
7036           if (intel_syntax
7037               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7038               && i.tm.opcode_modifier.no_bsuf)
7039             i.suffix = 0;
7040           else if (!check_byte_reg ())
7041             return 0;
7042         }
7043       else if (i.suffix == LONG_MNEM_SUFFIX)
7044         {
7045           if (intel_syntax
7046               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7047               && i.tm.opcode_modifier.no_lsuf
7048               && !i.tm.opcode_modifier.todword
7049               && !i.tm.opcode_modifier.toqword)
7050             i.suffix = 0;
7051           else if (!check_long_reg ())
7052             return 0;
7053         }
7054       else if (i.suffix == QWORD_MNEM_SUFFIX)
7055         {
7056           if (intel_syntax
7057               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7058               && i.tm.opcode_modifier.no_qsuf
7059               && !i.tm.opcode_modifier.todword
7060               && !i.tm.opcode_modifier.toqword)
7061             i.suffix = 0;
7062           else if (!check_qword_reg ())
7063             return 0;
7064         }
7065       else if (i.suffix == WORD_MNEM_SUFFIX)
7066         {
7067           if (intel_syntax
7068               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7069               && i.tm.opcode_modifier.no_wsuf)
7070             i.suffix = 0;
7071           else if (!check_word_reg ())
7072             return 0;
7073         }
7074       else if (intel_syntax
7075                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7076         /* Do nothing if the instruction is going to ignore the prefix.  */
7077         ;
7078       else
7079         abort ();
7080
7081       /* Undo the movsx/movzx change done above.  */
7082       i.operands = numop;
7083     }
7084   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7085            && !i.suffix)
7086     {
7087       i.suffix = stackop_size;
7088       if (stackop_size == LONG_MNEM_SUFFIX)
7089         {
7090           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7091              .code16gcc directive to support 16-bit mode with
7092              32-bit address.  For IRET without a suffix, generate
7093              16-bit IRET (opcode 0xcf) to return from an interrupt
7094              handler.  */
7095           if (i.tm.base_opcode == 0xcf)
7096             {
7097               i.suffix = WORD_MNEM_SUFFIX;
7098               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7099             }
7100           /* Warn about changed behavior for segment register push/pop.  */
7101           else if ((i.tm.base_opcode | 1) == 0x07)
7102             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7103                      i.tm.name);
7104         }
7105     }
7106   else if (!i.suffix
7107            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7108                || i.tm.opcode_modifier.jump == JUMP_BYTE
7109                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7110                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7111                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7112                    && i.tm.extension_opcode <= 3)))
7113     {
7114       switch (flag_code)
7115         {
7116         case CODE_64BIT:
7117           if (!i.tm.opcode_modifier.no_qsuf)
7118             {
7119               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7120                   || i.tm.opcode_modifier.no_lsuf)
7121                 i.suffix = QWORD_MNEM_SUFFIX;
7122               break;
7123             }
7124           /* Fall through.  */
7125         case CODE_32BIT:
7126           if (!i.tm.opcode_modifier.no_lsuf)
7127             i.suffix = LONG_MNEM_SUFFIX;
7128           break;
7129         case CODE_16BIT:
7130           if (!i.tm.opcode_modifier.no_wsuf)
7131             i.suffix = WORD_MNEM_SUFFIX;
7132           break;
7133         }
7134     }
7135
7136   if (!i.suffix
7137       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7138           /* Also cover lret/retf/iret in 64-bit mode.  */
7139           || (flag_code == CODE_64BIT
7140               && !i.tm.opcode_modifier.no_lsuf
7141               && !i.tm.opcode_modifier.no_qsuf))
7142       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7143       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7144       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7145       /* Accept FLDENV et al without suffix.  */
7146       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7147     {
7148       unsigned int suffixes, evex = 0;
7149
7150       suffixes = !i.tm.opcode_modifier.no_bsuf;
7151       if (!i.tm.opcode_modifier.no_wsuf)
7152         suffixes |= 1 << 1;
7153       if (!i.tm.opcode_modifier.no_lsuf)
7154         suffixes |= 1 << 2;
7155       if (!i.tm.opcode_modifier.no_ldsuf)
7156         suffixes |= 1 << 3;
7157       if (!i.tm.opcode_modifier.no_ssuf)
7158         suffixes |= 1 << 4;
7159       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7160         suffixes |= 1 << 5;
7161
7162       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7163          also suitable for AT&T syntax mode, it was requested that this be
7164          restricted to just Intel syntax.  */
7165       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast.type)
7166         {
7167           unsigned int op;
7168
7169           for (op = 0; op < i.tm.operands; ++op)
7170             {
7171               if (is_evex_encoding (&i.tm)
7172                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7173                 {
7174                   if (i.tm.operand_types[op].bitfield.ymmword)
7175                     i.tm.operand_types[op].bitfield.xmmword = 0;
7176                   if (i.tm.operand_types[op].bitfield.zmmword)
7177                     i.tm.operand_types[op].bitfield.ymmword = 0;
7178                   if (!i.tm.opcode_modifier.evex
7179                       || i.tm.opcode_modifier.evex == EVEXDYN)
7180                     i.tm.opcode_modifier.evex = EVEX512;
7181                 }
7182
7183               if (i.tm.operand_types[op].bitfield.xmmword
7184                   + i.tm.operand_types[op].bitfield.ymmword
7185                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7186                 continue;
7187
7188               /* Any properly sized operand disambiguates the insn.  */
7189               if (i.types[op].bitfield.xmmword
7190                   || i.types[op].bitfield.ymmword
7191                   || i.types[op].bitfield.zmmword)
7192                 {
7193                   suffixes &= ~(7 << 6);
7194                   evex = 0;
7195                   break;
7196                 }
7197
7198               if ((i.flags[op] & Operand_Mem)
7199                   && i.tm.operand_types[op].bitfield.unspecified)
7200                 {
7201                   if (i.tm.operand_types[op].bitfield.xmmword)
7202                     suffixes |= 1 << 6;
7203                   if (i.tm.operand_types[op].bitfield.ymmword)
7204                     suffixes |= 1 << 7;
7205                   if (i.tm.operand_types[op].bitfield.zmmword)
7206                     suffixes |= 1 << 8;
7207                   if (is_evex_encoding (&i.tm))
7208                     evex = EVEX512;
7209                 }
7210             }
7211         }
7212
7213       /* Are multiple suffixes / operand sizes allowed?  */
7214       if (suffixes & (suffixes - 1))
7215         {
7216           if (intel_syntax
7217               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7218                   || operand_check == check_error))
7219             {
7220               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7221               return 0;
7222             }
7223           if (operand_check == check_error)
7224             {
7225               as_bad (_("no instruction mnemonic suffix given and "
7226                         "no register operands; can't size `%s'"), i.tm.name);
7227               return 0;
7228             }
7229           if (operand_check == check_warning)
7230             as_warn (_("%s; using default for `%s'"),
7231                        intel_syntax
7232                        ? _("ambiguous operand size")
7233                        : _("no instruction mnemonic suffix given and "
7234                            "no register operands"),
7235                        i.tm.name);
7236
7237           if (i.tm.opcode_modifier.floatmf)
7238             i.suffix = SHORT_MNEM_SUFFIX;
7239           else if (is_movx)
7240             /* handled below */;
7241           else if (evex)
7242             i.tm.opcode_modifier.evex = evex;
7243           else if (flag_code == CODE_16BIT)
7244             i.suffix = WORD_MNEM_SUFFIX;
7245           else if (!i.tm.opcode_modifier.no_lsuf)
7246             i.suffix = LONG_MNEM_SUFFIX;
7247           else
7248             i.suffix = QWORD_MNEM_SUFFIX;
7249         }
7250     }
7251
7252   if (is_movx)
7253     {
7254       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7255          In AT&T syntax, if there is no suffix (warned about above), the default
7256          will be byte extension.  */
7257       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7258         i.tm.base_opcode |= 1;
7259
7260       /* For further processing, the suffix should represent the destination
7261          (register).  This is already the case when one was used with
7262          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7263          no suffix to begin with.  */
7264       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7265         {
7266           if (i.types[1].bitfield.word)
7267             i.suffix = WORD_MNEM_SUFFIX;
7268           else if (i.types[1].bitfield.qword)
7269             i.suffix = QWORD_MNEM_SUFFIX;
7270           else
7271             i.suffix = LONG_MNEM_SUFFIX;
7272
7273           i.tm.opcode_modifier.w = 0;
7274         }
7275     }
7276
7277   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7278     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7279                    != (i.tm.operand_types[1].bitfield.class == Reg);
7280
7281   /* Change the opcode based on the operand size given by i.suffix.  */
7282   switch (i.suffix)
7283     {
7284     /* Size floating point instruction.  */
7285     case LONG_MNEM_SUFFIX:
7286       if (i.tm.opcode_modifier.floatmf)
7287         {
7288           i.tm.base_opcode ^= 4;
7289           break;
7290         }
7291     /* fall through */
7292     case WORD_MNEM_SUFFIX:
7293     case QWORD_MNEM_SUFFIX:
7294       /* It's not a byte, select word/dword operation.  */
7295       if (i.tm.opcode_modifier.w)
7296         {
7297           if (i.short_form)
7298             i.tm.base_opcode |= 8;
7299           else
7300             i.tm.base_opcode |= 1;
7301         }
7302     /* fall through */
7303     case SHORT_MNEM_SUFFIX:
7304       /* Now select between word & dword operations via the operand
7305          size prefix, except for instructions that will ignore this
7306          prefix anyway.  */
7307       if (i.suffix != QWORD_MNEM_SUFFIX
7308           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7309           && !i.tm.opcode_modifier.floatmf
7310           && !is_any_vex_encoding (&i.tm)
7311           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7312               || (flag_code == CODE_64BIT
7313                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7314         {
7315           unsigned int prefix = DATA_PREFIX_OPCODE;
7316
7317           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7318             prefix = ADDR_PREFIX_OPCODE;
7319
7320           if (!add_prefix (prefix))
7321             return 0;
7322         }
7323
7324       /* Set mode64 for an operand.  */
7325       if (i.suffix == QWORD_MNEM_SUFFIX
7326           && flag_code == CODE_64BIT
7327           && !i.tm.opcode_modifier.norex64
7328           && !i.tm.opcode_modifier.vexw
7329           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7330              need rex64. */
7331           && ! (i.operands == 2
7332                 && i.tm.base_opcode == 0x90
7333                 && i.tm.extension_opcode == None
7334                 && i.types[0].bitfield.instance == Accum
7335                 && i.types[0].bitfield.qword
7336                 && i.types[1].bitfield.instance == Accum
7337                 && i.types[1].bitfield.qword))
7338         i.rex |= REX_W;
7339
7340       break;
7341
7342     case 0:
7343       /* Select word/dword/qword operation with explicit data sizing prefix
7344          when there are no suitable register operands.  */
7345       if (i.tm.opcode_modifier.w
7346           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7347           && (!i.reg_operands
7348               || (i.reg_operands == 1
7349                       /* ShiftCount */
7350                   && (i.tm.operand_types[0].bitfield.instance == RegC
7351                       /* InOutPortReg */
7352                       || i.tm.operand_types[0].bitfield.instance == RegD
7353                       || i.tm.operand_types[1].bitfield.instance == RegD
7354                       /* CRC32 */
7355                       || is_crc32))))
7356         i.tm.base_opcode |= 1;
7357       break;
7358     }
7359
7360   if (i.tm.opcode_modifier.addrprefixopreg)
7361     {
7362       gas_assert (!i.suffix);
7363       gas_assert (i.reg_operands);
7364
7365       if (i.tm.operand_types[0].bitfield.instance == Accum
7366           || i.operands == 1)
7367         {
7368           /* The address size override prefix changes the size of the
7369              first operand.  */
7370           if (flag_code == CODE_64BIT
7371               && i.op[0].regs->reg_type.bitfield.word)
7372             {
7373               as_bad (_("16-bit addressing unavailable for `%s'"),
7374                       i.tm.name);
7375               return 0;
7376             }
7377
7378           if ((flag_code == CODE_32BIT
7379                ? i.op[0].regs->reg_type.bitfield.word
7380                : i.op[0].regs->reg_type.bitfield.dword)
7381               && !add_prefix (ADDR_PREFIX_OPCODE))
7382             return 0;
7383         }
7384       else
7385         {
7386           /* Check invalid register operand when the address size override
7387              prefix changes the size of register operands.  */
7388           unsigned int op;
7389           enum { need_word, need_dword, need_qword } need;
7390
7391           /* Check the register operand for the address size prefix if
7392              the memory operand has no real registers, like symbol, DISP
7393              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7394           if (i.mem_operands == 1
7395               && i.reg_operands == 1
7396               && i.operands == 2
7397               && i.types[1].bitfield.class == Reg
7398               && (flag_code == CODE_32BIT
7399                   ? i.op[1].regs->reg_type.bitfield.word
7400                   : i.op[1].regs->reg_type.bitfield.dword)
7401               && ((i.base_reg == NULL && i.index_reg == NULL)
7402 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7403                   || (x86_elf_abi == X86_64_X32_ABI
7404                       && i.base_reg
7405                       && i.base_reg->reg_num == RegIP
7406                       && i.base_reg->reg_type.bitfield.qword))
7407 #else
7408                   || 0)
7409 #endif
7410               && !add_prefix (ADDR_PREFIX_OPCODE))
7411             return 0;
7412
7413           if (flag_code == CODE_32BIT)
7414             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7415           else if (i.prefix[ADDR_PREFIX])
7416             need = need_dword;
7417           else
7418             need = flag_code == CODE_64BIT ? need_qword : need_word;
7419
7420           for (op = 0; op < i.operands; op++)
7421             {
7422               if (i.types[op].bitfield.class != Reg)
7423                 continue;
7424
7425               switch (need)
7426                 {
7427                 case need_word:
7428                   if (i.op[op].regs->reg_type.bitfield.word)
7429                     continue;
7430                   break;
7431                 case need_dword:
7432                   if (i.op[op].regs->reg_type.bitfield.dword)
7433                     continue;
7434                   break;
7435                 case need_qword:
7436                   if (i.op[op].regs->reg_type.bitfield.qword)
7437                     continue;
7438                   break;
7439                 }
7440
7441               as_bad (_("invalid register operand size for `%s'"),
7442                       i.tm.name);
7443               return 0;
7444             }
7445         }
7446     }
7447
7448   return 1;
7449 }
7450
7451 static int
7452 check_byte_reg (void)
7453 {
7454   int op;
7455
7456   for (op = i.operands; --op >= 0;)
7457     {
7458       /* Skip non-register operands. */
7459       if (i.types[op].bitfield.class != Reg)
7460         continue;
7461
7462       /* If this is an eight bit register, it's OK.  If it's the 16 or
7463          32 bit version of an eight bit register, we will just use the
7464          low portion, and that's OK too.  */
7465       if (i.types[op].bitfield.byte)
7466         continue;
7467
7468       /* I/O port address operands are OK too.  */
7469       if (i.tm.operand_types[op].bitfield.instance == RegD
7470           && i.tm.operand_types[op].bitfield.word)
7471         continue;
7472
7473       /* crc32 only wants its source operand checked here.  */
7474       if (i.tm.base_opcode == 0xf0
7475           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7476           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7477           && op != 0)
7478         continue;
7479
7480       /* Any other register is bad.  */
7481       as_bad (_("`%s%s' not allowed with `%s%c'"),
7482               register_prefix, i.op[op].regs->reg_name,
7483               i.tm.name, i.suffix);
7484       return 0;
7485     }
7486   return 1;
7487 }
7488
7489 static int
7490 check_long_reg (void)
7491 {
7492   int op;
7493
7494   for (op = i.operands; --op >= 0;)
7495     /* Skip non-register operands. */
7496     if (i.types[op].bitfield.class != Reg)
7497       continue;
7498     /* Reject eight bit registers, except where the template requires
7499        them. (eg. movzb)  */
7500     else if (i.types[op].bitfield.byte
7501              && (i.tm.operand_types[op].bitfield.class == Reg
7502                  || i.tm.operand_types[op].bitfield.instance == Accum)
7503              && (i.tm.operand_types[op].bitfield.word
7504                  || i.tm.operand_types[op].bitfield.dword))
7505       {
7506         as_bad (_("`%s%s' not allowed with `%s%c'"),
7507                 register_prefix,
7508                 i.op[op].regs->reg_name,
7509                 i.tm.name,
7510                 i.suffix);
7511         return 0;
7512       }
7513     /* Error if the e prefix on a general reg is missing.  */
7514     else if (i.types[op].bitfield.word
7515              && (i.tm.operand_types[op].bitfield.class == Reg
7516                  || i.tm.operand_types[op].bitfield.instance == Accum)
7517              && i.tm.operand_types[op].bitfield.dword)
7518       {
7519         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7520                 register_prefix, i.op[op].regs->reg_name,
7521                 i.suffix);
7522         return 0;
7523       }
7524     /* Warn if the r prefix on a general reg is present.  */
7525     else if (i.types[op].bitfield.qword
7526              && (i.tm.operand_types[op].bitfield.class == Reg
7527                  || i.tm.operand_types[op].bitfield.instance == Accum)
7528              && i.tm.operand_types[op].bitfield.dword)
7529       {
7530         if (intel_syntax
7531             && i.tm.opcode_modifier.toqword
7532             && i.types[0].bitfield.class != RegSIMD)
7533           {
7534             /* Convert to QWORD.  We want REX byte. */
7535             i.suffix = QWORD_MNEM_SUFFIX;
7536           }
7537         else
7538           {
7539             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7540                     register_prefix, i.op[op].regs->reg_name,
7541                     i.suffix);
7542             return 0;
7543           }
7544       }
7545   return 1;
7546 }
7547
7548 static int
7549 check_qword_reg (void)
7550 {
7551   int op;
7552
7553   for (op = i.operands; --op >= 0; )
7554     /* Skip non-register operands. */
7555     if (i.types[op].bitfield.class != Reg)
7556       continue;
7557     /* Reject eight bit registers, except where the template requires
7558        them. (eg. movzb)  */
7559     else if (i.types[op].bitfield.byte
7560              && (i.tm.operand_types[op].bitfield.class == Reg
7561                  || i.tm.operand_types[op].bitfield.instance == Accum)
7562              && (i.tm.operand_types[op].bitfield.word
7563                  || i.tm.operand_types[op].bitfield.dword))
7564       {
7565         as_bad (_("`%s%s' not allowed with `%s%c'"),
7566                 register_prefix,
7567                 i.op[op].regs->reg_name,
7568                 i.tm.name,
7569                 i.suffix);
7570         return 0;
7571       }
7572     /* Warn if the r prefix on a general reg is missing.  */
7573     else if ((i.types[op].bitfield.word
7574               || i.types[op].bitfield.dword)
7575              && (i.tm.operand_types[op].bitfield.class == Reg
7576                  || i.tm.operand_types[op].bitfield.instance == Accum)
7577              && i.tm.operand_types[op].bitfield.qword)
7578       {
7579         /* Prohibit these changes in the 64bit mode, since the
7580            lowering is more complicated.  */
7581         if (intel_syntax
7582             && i.tm.opcode_modifier.todword
7583             && i.types[0].bitfield.class != RegSIMD)
7584           {
7585             /* Convert to DWORD.  We don't want REX byte. */
7586             i.suffix = LONG_MNEM_SUFFIX;
7587           }
7588         else
7589           {
7590             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7591                     register_prefix, i.op[op].regs->reg_name,
7592                     i.suffix);
7593             return 0;
7594           }
7595       }
7596   return 1;
7597 }
7598
7599 static int
7600 check_word_reg (void)
7601 {
7602   int op;
7603   for (op = i.operands; --op >= 0;)
7604     /* Skip non-register operands. */
7605     if (i.types[op].bitfield.class != Reg)
7606       continue;
7607     /* Reject eight bit registers, except where the template requires
7608        them. (eg. movzb)  */
7609     else if (i.types[op].bitfield.byte
7610              && (i.tm.operand_types[op].bitfield.class == Reg
7611                  || i.tm.operand_types[op].bitfield.instance == Accum)
7612              && (i.tm.operand_types[op].bitfield.word
7613                  || i.tm.operand_types[op].bitfield.dword))
7614       {
7615         as_bad (_("`%s%s' not allowed with `%s%c'"),
7616                 register_prefix,
7617                 i.op[op].regs->reg_name,
7618                 i.tm.name,
7619                 i.suffix);
7620         return 0;
7621       }
7622     /* Error if the e or r prefix on a general reg is present.  */
7623     else if ((i.types[op].bitfield.dword
7624                  || i.types[op].bitfield.qword)
7625              && (i.tm.operand_types[op].bitfield.class == Reg
7626                  || i.tm.operand_types[op].bitfield.instance == Accum)
7627              && i.tm.operand_types[op].bitfield.word)
7628       {
7629         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7630                 register_prefix, i.op[op].regs->reg_name,
7631                 i.suffix);
7632         return 0;
7633       }
7634   return 1;
7635 }
7636
7637 static int
7638 update_imm (unsigned int j)
7639 {
7640   i386_operand_type overlap = i.types[j];
7641   if ((overlap.bitfield.imm8
7642        || overlap.bitfield.imm8s
7643        || overlap.bitfield.imm16
7644        || overlap.bitfield.imm32
7645        || overlap.bitfield.imm32s
7646        || overlap.bitfield.imm64)
7647       && !operand_type_equal (&overlap, &imm8)
7648       && !operand_type_equal (&overlap, &imm8s)
7649       && !operand_type_equal (&overlap, &imm16)
7650       && !operand_type_equal (&overlap, &imm32)
7651       && !operand_type_equal (&overlap, &imm32s)
7652       && !operand_type_equal (&overlap, &imm64))
7653     {
7654       if (i.suffix)
7655         {
7656           i386_operand_type temp;
7657
7658           operand_type_set (&temp, 0);
7659           if (i.suffix == BYTE_MNEM_SUFFIX)
7660             {
7661               temp.bitfield.imm8 = overlap.bitfield.imm8;
7662               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7663             }
7664           else if (i.suffix == WORD_MNEM_SUFFIX)
7665             temp.bitfield.imm16 = overlap.bitfield.imm16;
7666           else if (i.suffix == QWORD_MNEM_SUFFIX)
7667             {
7668               temp.bitfield.imm64 = overlap.bitfield.imm64;
7669               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7670             }
7671           else
7672             temp.bitfield.imm32 = overlap.bitfield.imm32;
7673           overlap = temp;
7674         }
7675       else if (operand_type_equal (&overlap, &imm16_32_32s)
7676                || operand_type_equal (&overlap, &imm16_32)
7677                || operand_type_equal (&overlap, &imm16_32s))
7678         {
7679           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7680             overlap = imm16;
7681           else
7682             overlap = imm32s;
7683         }
7684       else if (i.prefix[REX_PREFIX] & REX_W)
7685         overlap = operand_type_and (overlap, imm32s);
7686       else if (i.prefix[DATA_PREFIX])
7687         overlap = operand_type_and (overlap,
7688                                     flag_code != CODE_16BIT ? imm16 : imm32);
7689       if (!operand_type_equal (&overlap, &imm8)
7690           && !operand_type_equal (&overlap, &imm8s)
7691           && !operand_type_equal (&overlap, &imm16)
7692           && !operand_type_equal (&overlap, &imm32)
7693           && !operand_type_equal (&overlap, &imm32s)
7694           && !operand_type_equal (&overlap, &imm64))
7695         {
7696           as_bad (_("no instruction mnemonic suffix given; "
7697                     "can't determine immediate size"));
7698           return 0;
7699         }
7700     }
7701   i.types[j] = overlap;
7702
7703   return 1;
7704 }
7705
7706 static int
7707 finalize_imm (void)
7708 {
7709   unsigned int j, n;
7710
7711   /* Update the first 2 immediate operands.  */
7712   n = i.operands > 2 ? 2 : i.operands;
7713   if (n)
7714     {
7715       for (j = 0; j < n; j++)
7716         if (update_imm (j) == 0)
7717           return 0;
7718
7719       /* The 3rd operand can't be immediate operand.  */
7720       gas_assert (operand_type_check (i.types[2], imm) == 0);
7721     }
7722
7723   return 1;
7724 }
7725
7726 static int
7727 process_operands (void)
7728 {
7729   /* Default segment register this instruction will use for memory
7730      accesses.  0 means unknown.  This is only for optimizing out
7731      unnecessary segment overrides.  */
7732   const reg_entry *default_seg = NULL;
7733
7734   if (i.tm.opcode_modifier.sse2avx)
7735     {
7736       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7737          need converting.  */
7738       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7739       i.prefix[REX_PREFIX] = 0;
7740       i.rex_encoding = 0;
7741     }
7742   /* ImmExt should be processed after SSE2AVX.  */
7743   else if (i.tm.opcode_modifier.immext)
7744     process_immext ();
7745
7746   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7747     {
7748       unsigned int dupl = i.operands;
7749       unsigned int dest = dupl - 1;
7750       unsigned int j;
7751
7752       /* The destination must be an xmm register.  */
7753       gas_assert (i.reg_operands
7754                   && MAX_OPERANDS > dupl
7755                   && operand_type_equal (&i.types[dest], &regxmm));
7756
7757       if (i.tm.operand_types[0].bitfield.instance == Accum
7758           && i.tm.operand_types[0].bitfield.xmmword)
7759         {
7760           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7761             {
7762               /* Keep xmm0 for instructions with VEX prefix and 3
7763                  sources.  */
7764               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7765               i.tm.operand_types[0].bitfield.class = RegSIMD;
7766               goto duplicate;
7767             }
7768           else
7769             {
7770               /* We remove the first xmm0 and keep the number of
7771                  operands unchanged, which in fact duplicates the
7772                  destination.  */
7773               for (j = 1; j < i.operands; j++)
7774                 {
7775                   i.op[j - 1] = i.op[j];
7776                   i.types[j - 1] = i.types[j];
7777                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7778                   i.flags[j - 1] = i.flags[j];
7779                 }
7780             }
7781         }
7782       else if (i.tm.opcode_modifier.implicit1stxmm0)
7783         {
7784           gas_assert ((MAX_OPERANDS - 1) > dupl
7785                       && (i.tm.opcode_modifier.vexsources
7786                           == VEX3SOURCES));
7787
7788           /* Add the implicit xmm0 for instructions with VEX prefix
7789              and 3 sources.  */
7790           for (j = i.operands; j > 0; j--)
7791             {
7792               i.op[j] = i.op[j - 1];
7793               i.types[j] = i.types[j - 1];
7794               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7795               i.flags[j] = i.flags[j - 1];
7796             }
7797           i.op[0].regs
7798             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7799           i.types[0] = regxmm;
7800           i.tm.operand_types[0] = regxmm;
7801
7802           i.operands += 2;
7803           i.reg_operands += 2;
7804           i.tm.operands += 2;
7805
7806           dupl++;
7807           dest++;
7808           i.op[dupl] = i.op[dest];
7809           i.types[dupl] = i.types[dest];
7810           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7811           i.flags[dupl] = i.flags[dest];
7812         }
7813       else
7814         {
7815         duplicate:
7816           i.operands++;
7817           i.reg_operands++;
7818           i.tm.operands++;
7819
7820           i.op[dupl] = i.op[dest];
7821           i.types[dupl] = i.types[dest];
7822           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7823           i.flags[dupl] = i.flags[dest];
7824         }
7825
7826        if (i.tm.opcode_modifier.immext)
7827          process_immext ();
7828     }
7829   else if (i.tm.operand_types[0].bitfield.instance == Accum
7830            && i.tm.operand_types[0].bitfield.xmmword)
7831     {
7832       unsigned int j;
7833
7834       for (j = 1; j < i.operands; j++)
7835         {
7836           i.op[j - 1] = i.op[j];
7837           i.types[j - 1] = i.types[j];
7838
7839           /* We need to adjust fields in i.tm since they are used by
7840              build_modrm_byte.  */
7841           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7842
7843           i.flags[j - 1] = i.flags[j];
7844         }
7845
7846       i.operands--;
7847       i.reg_operands--;
7848       i.tm.operands--;
7849     }
7850   else if (i.tm.opcode_modifier.implicitquadgroup)
7851     {
7852       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7853
7854       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7855       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7856       regnum = register_number (i.op[1].regs);
7857       first_reg_in_group = regnum & ~3;
7858       last_reg_in_group = first_reg_in_group + 3;
7859       if (regnum != first_reg_in_group)
7860         as_warn (_("source register `%s%s' implicitly denotes"
7861                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7862                  register_prefix, i.op[1].regs->reg_name,
7863                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7864                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7865                  i.tm.name);
7866     }
7867   else if (i.tm.opcode_modifier.regkludge)
7868     {
7869       /* The imul $imm, %reg instruction is converted into
7870          imul $imm, %reg, %reg, and the clr %reg instruction
7871          is converted into xor %reg, %reg.  */
7872
7873       unsigned int first_reg_op;
7874
7875       if (operand_type_check (i.types[0], reg))
7876         first_reg_op = 0;
7877       else
7878         first_reg_op = 1;
7879       /* Pretend we saw the extra register operand.  */
7880       gas_assert (i.reg_operands == 1
7881                   && i.op[first_reg_op + 1].regs == 0);
7882       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7883       i.types[first_reg_op + 1] = i.types[first_reg_op];
7884       i.operands++;
7885       i.reg_operands++;
7886     }
7887
7888   if (i.tm.opcode_modifier.modrm)
7889     {
7890       /* The opcode is completed (modulo i.tm.extension_opcode which
7891          must be put into the modrm byte).  Now, we make the modrm and
7892          index base bytes based on all the info we've collected.  */
7893
7894       default_seg = build_modrm_byte ();
7895     }
7896   else if (i.types[0].bitfield.class == SReg)
7897     {
7898       if (flag_code != CODE_64BIT
7899           ? i.tm.base_opcode == POP_SEG_SHORT
7900             && i.op[0].regs->reg_num == 1
7901           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
7902             && i.op[0].regs->reg_num < 4)
7903         {
7904           as_bad (_("you can't `%s %s%s'"),
7905                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7906           return 0;
7907         }
7908       if (i.op[0].regs->reg_num > 3
7909           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
7910         {
7911           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
7912           i.tm.opcode_modifier.opcodespace = SPACE_0F;
7913         }
7914       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7915     }
7916   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7917            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
7918     {
7919       default_seg = reg_ds;
7920     }
7921   else if (i.tm.opcode_modifier.isstring)
7922     {
7923       /* For the string instructions that allow a segment override
7924          on one of their operands, the default segment is ds.  */
7925       default_seg = reg_ds;
7926     }
7927   else if (i.short_form)
7928     {
7929       /* The register or float register operand is in operand
7930          0 or 1.  */
7931       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7932
7933       /* Register goes in low 3 bits of opcode.  */
7934       i.tm.base_opcode |= i.op[op].regs->reg_num;
7935       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7936         i.rex |= REX_B;
7937       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7938         {
7939           /* Warn about some common errors, but press on regardless.
7940              The first case can be generated by gcc (<= 2.8.1).  */
7941           if (i.operands == 2)
7942             {
7943               /* Reversed arguments on faddp, fsubp, etc.  */
7944               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7945                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7946                        register_prefix, i.op[intel_syntax].regs->reg_name);
7947             }
7948           else
7949             {
7950               /* Extraneous `l' suffix on fp insn.  */
7951               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7952                        register_prefix, i.op[0].regs->reg_name);
7953             }
7954         }
7955     }
7956
7957   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7958       && i.tm.base_opcode == 0x8d /* lea */
7959       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
7960       && !is_any_vex_encoding(&i.tm))
7961     {
7962       if (!quiet_warnings)
7963         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7964       if (optimize)
7965         {
7966           i.seg[0] = NULL;
7967           i.prefix[SEG_PREFIX] = 0;
7968         }
7969     }
7970
7971   /* If a segment was explicitly specified, and the specified segment
7972      is neither the default nor the one already recorded from a prefix,
7973      use an opcode prefix to select it.  If we never figured out what
7974      the default segment is, then default_seg will be zero at this
7975      point, and the specified segment prefix will always be used.  */
7976   if (i.seg[0]
7977       && i.seg[0] != default_seg
7978       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
7979     {
7980       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
7981         return 0;
7982     }
7983   return 1;
7984 }
7985
7986 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7987                                  bool do_sse2avx)
7988 {
7989   if (r->reg_flags & RegRex)
7990     {
7991       if (i.rex & rex_bit)
7992         as_bad (_("same type of prefix used twice"));
7993       i.rex |= rex_bit;
7994     }
7995   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
7996     {
7997       gas_assert (i.vex.register_specifier == r);
7998       i.vex.register_specifier += 8;
7999     }
8000
8001   if (r->reg_flags & RegVRex)
8002     i.vrex |= rex_bit;
8003 }
8004
8005 static const reg_entry *
8006 build_modrm_byte (void)
8007 {
8008   const reg_entry *default_seg = NULL;
8009   unsigned int source, dest;
8010   int vex_3_sources;
8011
8012   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8013   if (vex_3_sources)
8014     {
8015       unsigned int nds, reg_slot;
8016       expressionS *exp;
8017
8018       dest = i.operands - 1;
8019       nds = dest - 1;
8020
8021       /* There are 2 kinds of instructions:
8022          1. 5 operands: 4 register operands or 3 register operands
8023          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8024          VexW0 or VexW1.  The destination must be either XMM, YMM or
8025          ZMM register.
8026          2. 4 operands: 4 register operands or 3 register operands
8027          plus 1 memory operand, with VexXDS.  */
8028       gas_assert ((i.reg_operands == 4
8029                    || (i.reg_operands == 3 && i.mem_operands == 1))
8030                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8031                   && i.tm.opcode_modifier.vexw
8032                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8033
8034       /* If VexW1 is set, the first non-immediate operand is the source and
8035          the second non-immediate one is encoded in the immediate operand.  */
8036       if (i.tm.opcode_modifier.vexw == VEXW1)
8037         {
8038           source = i.imm_operands;
8039           reg_slot = i.imm_operands + 1;
8040         }
8041       else
8042         {
8043           source = i.imm_operands + 1;
8044           reg_slot = i.imm_operands;
8045         }
8046
8047       if (i.imm_operands == 0)
8048         {
8049           /* When there is no immediate operand, generate an 8bit
8050              immediate operand to encode the first operand.  */
8051           exp = &im_expressions[i.imm_operands++];
8052           i.op[i.operands].imms = exp;
8053           i.types[i.operands] = imm8;
8054           i.operands++;
8055
8056           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8057           exp->X_op = O_constant;
8058           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8059           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8060         }
8061       else
8062         {
8063           gas_assert (i.imm_operands == 1);
8064           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8065           gas_assert (!i.tm.opcode_modifier.immext);
8066
8067           /* Turn on Imm8 again so that output_imm will generate it.  */
8068           i.types[0].bitfield.imm8 = 1;
8069
8070           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8071           i.op[0].imms->X_add_number
8072               |= register_number (i.op[reg_slot].regs) << 4;
8073           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8074         }
8075
8076       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8077       i.vex.register_specifier = i.op[nds].regs;
8078     }
8079   else
8080     source = dest = 0;
8081
8082   /* i.reg_operands MUST be the number of real register operands;
8083      implicit registers do not count.  If there are 3 register
8084      operands, it must be a instruction with VexNDS.  For a
8085      instruction with VexNDD, the destination register is encoded
8086      in VEX prefix.  If there are 4 register operands, it must be
8087      a instruction with VEX prefix and 3 sources.  */
8088   if (i.mem_operands == 0
8089       && ((i.reg_operands == 2
8090            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8091           || (i.reg_operands == 3
8092               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8093           || (i.reg_operands == 4 && vex_3_sources)))
8094     {
8095       switch (i.operands)
8096         {
8097         case 2:
8098           source = 0;
8099           break;
8100         case 3:
8101           /* When there are 3 operands, one of them may be immediate,
8102              which may be the first or the last operand.  Otherwise,
8103              the first operand must be shift count register (cl) or it
8104              is an instruction with VexNDS. */
8105           gas_assert (i.imm_operands == 1
8106                       || (i.imm_operands == 0
8107                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8108                               || (i.types[0].bitfield.instance == RegC
8109                                   && i.types[0].bitfield.byte))));
8110           if (operand_type_check (i.types[0], imm)
8111               || (i.types[0].bitfield.instance == RegC
8112                   && i.types[0].bitfield.byte))
8113             source = 1;
8114           else
8115             source = 0;
8116           break;
8117         case 4:
8118           /* When there are 4 operands, the first two must be 8bit
8119              immediate operands. The source operand will be the 3rd
8120              one.
8121
8122              For instructions with VexNDS, if the first operand
8123              an imm8, the source operand is the 2nd one.  If the last
8124              operand is imm8, the source operand is the first one.  */
8125           gas_assert ((i.imm_operands == 2
8126                        && i.types[0].bitfield.imm8
8127                        && i.types[1].bitfield.imm8)
8128                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8129                           && i.imm_operands == 1
8130                           && (i.types[0].bitfield.imm8
8131                               || i.types[i.operands - 1].bitfield.imm8
8132                               || i.rounding.type != rc_none)));
8133           if (i.imm_operands == 2)
8134             source = 2;
8135           else
8136             {
8137               if (i.types[0].bitfield.imm8)
8138                 source = 1;
8139               else
8140                 source = 0;
8141             }
8142           break;
8143         case 5:
8144           if (is_evex_encoding (&i.tm))
8145             {
8146               /* For EVEX instructions, when there are 5 operands, the
8147                  first one must be immediate operand.  If the second one
8148                  is immediate operand, the source operand is the 3th
8149                  one.  If the last one is immediate operand, the source
8150                  operand is the 2nd one.  */
8151               gas_assert (i.imm_operands == 2
8152                           && i.tm.opcode_modifier.sae
8153                           && operand_type_check (i.types[0], imm));
8154               if (operand_type_check (i.types[1], imm))
8155                 source = 2;
8156               else if (operand_type_check (i.types[4], imm))
8157                 source = 1;
8158               else
8159                 abort ();
8160             }
8161           break;
8162         default:
8163           abort ();
8164         }
8165
8166       if (!vex_3_sources)
8167         {
8168           dest = source + 1;
8169
8170           /* RC/SAE operand could be between DEST and SRC.  That happens
8171              when one operand is GPR and the other one is XMM/YMM/ZMM
8172              register.  */
8173           if (i.rounding.type != rc_none && i.rounding.operand == dest)
8174             dest++;
8175
8176           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8177             {
8178               /* For instructions with VexNDS, the register-only source
8179                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8180                  register.  It is encoded in VEX prefix.  */
8181
8182               i386_operand_type op;
8183               unsigned int vvvv;
8184
8185               /* Swap two source operands if needed.  */
8186               if (i.tm.opcode_modifier.swapsources)
8187                 {
8188                   vvvv = source;
8189                   source = dest;
8190                 }
8191               else
8192                 vvvv = dest;
8193
8194               op = i.tm.operand_types[vvvv];
8195               if ((dest + 1) >= i.operands
8196                   || ((op.bitfield.class != Reg
8197                        || (!op.bitfield.dword && !op.bitfield.qword))
8198                       && op.bitfield.class != RegSIMD
8199                       && !operand_type_equal (&op, &regmask)))
8200                 abort ();
8201               i.vex.register_specifier = i.op[vvvv].regs;
8202               dest++;
8203             }
8204         }
8205
8206       i.rm.mode = 3;
8207       /* One of the register operands will be encoded in the i.rm.reg
8208          field, the other in the combined i.rm.mode and i.rm.regmem
8209          fields.  If no form of this instruction supports a memory
8210          destination operand, then we assume the source operand may
8211          sometimes be a memory operand and so we need to store the
8212          destination in the i.rm.reg field.  */
8213       if (!i.tm.opcode_modifier.regmem
8214           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8215         {
8216           i.rm.reg = i.op[dest].regs->reg_num;
8217           i.rm.regmem = i.op[source].regs->reg_num;
8218           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8219           set_rex_vrex (i.op[source].regs, REX_B, false);
8220         }
8221       else
8222         {
8223           i.rm.reg = i.op[source].regs->reg_num;
8224           i.rm.regmem = i.op[dest].regs->reg_num;
8225           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8226           set_rex_vrex (i.op[source].regs, REX_R, false);
8227         }
8228       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8229         {
8230           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8231             abort ();
8232           i.rex &= ~REX_R;
8233           add_prefix (LOCK_PREFIX_OPCODE);
8234         }
8235     }
8236   else
8237     {                   /* If it's not 2 reg operands...  */
8238       unsigned int mem;
8239
8240       if (i.mem_operands)
8241         {
8242           unsigned int fake_zero_displacement = 0;
8243           unsigned int op;
8244
8245           for (op = 0; op < i.operands; op++)
8246             if (i.flags[op] & Operand_Mem)
8247               break;
8248           gas_assert (op < i.operands);
8249
8250           if (i.tm.opcode_modifier.sib)
8251             {
8252               /* The index register of VSIB shouldn't be RegIZ.  */
8253               if (i.tm.opcode_modifier.sib != SIBMEM
8254                   && i.index_reg->reg_num == RegIZ)
8255                 abort ();
8256
8257               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8258               if (!i.base_reg)
8259                 {
8260                   i.sib.base = NO_BASE_REGISTER;
8261                   i.sib.scale = i.log2_scale_factor;
8262                   i.types[op].bitfield.disp8 = 0;
8263                   i.types[op].bitfield.disp16 = 0;
8264                   i.types[op].bitfield.disp64 = 0;
8265                   if (want_disp32 (&i.tm))
8266                     {
8267                       /* Must be 32 bit */
8268                       i.types[op].bitfield.disp32 = 1;
8269                       i.types[op].bitfield.disp32s = 0;
8270                     }
8271                   else
8272                     {
8273                       i.types[op].bitfield.disp32 = 0;
8274                       i.types[op].bitfield.disp32s = 1;
8275                     }
8276                 }
8277
8278               /* Since the mandatory SIB always has index register, so
8279                  the code logic remains unchanged. The non-mandatory SIB
8280                  without index register is allowed and will be handled
8281                  later.  */
8282               if (i.index_reg)
8283                 {
8284                   if (i.index_reg->reg_num == RegIZ)
8285                     i.sib.index = NO_INDEX_REGISTER;
8286                   else
8287                     i.sib.index = i.index_reg->reg_num;
8288                   set_rex_vrex (i.index_reg, REX_X, false);
8289                 }
8290             }
8291
8292           default_seg = reg_ds;
8293
8294           if (i.base_reg == 0)
8295             {
8296               i.rm.mode = 0;
8297               if (!i.disp_operands)
8298                 fake_zero_displacement = 1;
8299               if (i.index_reg == 0)
8300                 {
8301                   i386_operand_type newdisp;
8302
8303                   /* Both check for VSIB and mandatory non-vector SIB. */
8304                   gas_assert (!i.tm.opcode_modifier.sib
8305                               || i.tm.opcode_modifier.sib == SIBMEM);
8306                   /* Operand is just <disp>  */
8307                   if (flag_code == CODE_64BIT)
8308                     {
8309                       /* 64bit mode overwrites the 32bit absolute
8310                          addressing by RIP relative addressing and
8311                          absolute addressing is encoded by one of the
8312                          redundant SIB forms.  */
8313                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8314                       i.sib.base = NO_BASE_REGISTER;
8315                       i.sib.index = NO_INDEX_REGISTER;
8316                       newdisp = (want_disp32(&i.tm) ? disp32 : disp32s);
8317                     }
8318                   else if ((flag_code == CODE_16BIT)
8319                            ^ (i.prefix[ADDR_PREFIX] != 0))
8320                     {
8321                       i.rm.regmem = NO_BASE_REGISTER_16;
8322                       newdisp = disp16;
8323                     }
8324                   else
8325                     {
8326                       i.rm.regmem = NO_BASE_REGISTER;
8327                       newdisp = disp32;
8328                     }
8329                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8330                   i.types[op] = operand_type_or (i.types[op], newdisp);
8331                 }
8332               else if (!i.tm.opcode_modifier.sib)
8333                 {
8334                   /* !i.base_reg && i.index_reg  */
8335                   if (i.index_reg->reg_num == RegIZ)
8336                     i.sib.index = NO_INDEX_REGISTER;
8337                   else
8338                     i.sib.index = i.index_reg->reg_num;
8339                   i.sib.base = NO_BASE_REGISTER;
8340                   i.sib.scale = i.log2_scale_factor;
8341                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8342                   i.types[op].bitfield.disp8 = 0;
8343                   i.types[op].bitfield.disp16 = 0;
8344                   i.types[op].bitfield.disp64 = 0;
8345                   if (want_disp32 (&i.tm))
8346                     {
8347                       /* Must be 32 bit */
8348                       i.types[op].bitfield.disp32 = 1;
8349                       i.types[op].bitfield.disp32s = 0;
8350                     }
8351                   else
8352                     {
8353                       i.types[op].bitfield.disp32 = 0;
8354                       i.types[op].bitfield.disp32s = 1;
8355                     }
8356                   if ((i.index_reg->reg_flags & RegRex) != 0)
8357                     i.rex |= REX_X;
8358                 }
8359             }
8360           /* RIP addressing for 64bit mode.  */
8361           else if (i.base_reg->reg_num == RegIP)
8362             {
8363               gas_assert (!i.tm.opcode_modifier.sib);
8364               i.rm.regmem = NO_BASE_REGISTER;
8365               i.types[op].bitfield.disp8 = 0;
8366               i.types[op].bitfield.disp16 = 0;
8367               i.types[op].bitfield.disp32 = 0;
8368               i.types[op].bitfield.disp32s = 1;
8369               i.types[op].bitfield.disp64 = 0;
8370               i.flags[op] |= Operand_PCrel;
8371               if (! i.disp_operands)
8372                 fake_zero_displacement = 1;
8373             }
8374           else if (i.base_reg->reg_type.bitfield.word)
8375             {
8376               gas_assert (!i.tm.opcode_modifier.sib);
8377               switch (i.base_reg->reg_num)
8378                 {
8379                 case 3: /* (%bx)  */
8380                   if (i.index_reg == 0)
8381                     i.rm.regmem = 7;
8382                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8383                     i.rm.regmem = i.index_reg->reg_num - 6;
8384                   break;
8385                 case 5: /* (%bp)  */
8386                   default_seg = reg_ss;
8387                   if (i.index_reg == 0)
8388                     {
8389                       i.rm.regmem = 6;
8390                       if (operand_type_check (i.types[op], disp) == 0)
8391                         {
8392                           /* fake (%bp) into 0(%bp)  */
8393                           if (i.disp_encoding == disp_encoding_16bit)
8394                             i.types[op].bitfield.disp16 = 1;
8395                           else
8396                             i.types[op].bitfield.disp8 = 1;
8397                           fake_zero_displacement = 1;
8398                         }
8399                     }
8400                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8401                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8402                   break;
8403                 default: /* (%si) -> 4 or (%di) -> 5  */
8404                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8405                 }
8406               if (!fake_zero_displacement
8407                   && !i.disp_operands
8408                   && i.disp_encoding)
8409                 {
8410                   fake_zero_displacement = 1;
8411                   if (i.disp_encoding == disp_encoding_8bit)
8412                     i.types[op].bitfield.disp8 = 1;
8413                   else
8414                     i.types[op].bitfield.disp16 = 1;
8415                 }
8416               i.rm.mode = mode_from_disp_size (i.types[op]);
8417             }
8418           else /* i.base_reg and 32/64 bit mode  */
8419             {
8420               if (operand_type_check (i.types[op], disp))
8421                 {
8422                   i.types[op].bitfield.disp16 = 0;
8423                   i.types[op].bitfield.disp64 = 0;
8424                   if (!want_disp32 (&i.tm))
8425                     {
8426                       i.types[op].bitfield.disp32 = 0;
8427                       i.types[op].bitfield.disp32s = 1;
8428                     }
8429                   else
8430                     {
8431                       i.types[op].bitfield.disp32 = 1;
8432                       i.types[op].bitfield.disp32s = 0;
8433                     }
8434                 }
8435
8436               if (!i.tm.opcode_modifier.sib)
8437                 i.rm.regmem = i.base_reg->reg_num;
8438               if ((i.base_reg->reg_flags & RegRex) != 0)
8439                 i.rex |= REX_B;
8440               i.sib.base = i.base_reg->reg_num;
8441               /* x86-64 ignores REX prefix bit here to avoid decoder
8442                  complications.  */
8443               if (!(i.base_reg->reg_flags & RegRex)
8444                   && (i.base_reg->reg_num == EBP_REG_NUM
8445                    || i.base_reg->reg_num == ESP_REG_NUM))
8446                   default_seg = reg_ss;
8447               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8448                 {
8449                   fake_zero_displacement = 1;
8450                   if (i.disp_encoding == disp_encoding_32bit)
8451                     i.types[op].bitfield.disp32 = 1;
8452                   else
8453                     i.types[op].bitfield.disp8 = 1;
8454                 }
8455               i.sib.scale = i.log2_scale_factor;
8456               if (i.index_reg == 0)
8457                 {
8458                   /* Only check for VSIB. */
8459                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8460                               && i.tm.opcode_modifier.sib != VECSIB256
8461                               && i.tm.opcode_modifier.sib != VECSIB512);
8462
8463                   /* <disp>(%esp) becomes two byte modrm with no index
8464                      register.  We've already stored the code for esp
8465                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8466                      Any base register besides %esp will not use the
8467                      extra modrm byte.  */
8468                   i.sib.index = NO_INDEX_REGISTER;
8469                 }
8470               else if (!i.tm.opcode_modifier.sib)
8471                 {
8472                   if (i.index_reg->reg_num == RegIZ)
8473                     i.sib.index = NO_INDEX_REGISTER;
8474                   else
8475                     i.sib.index = i.index_reg->reg_num;
8476                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8477                   if ((i.index_reg->reg_flags & RegRex) != 0)
8478                     i.rex |= REX_X;
8479                 }
8480
8481               if (i.disp_operands
8482                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8483                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8484                 i.rm.mode = 0;
8485               else
8486                 {
8487                   if (!fake_zero_displacement
8488                       && !i.disp_operands
8489                       && i.disp_encoding)
8490                     {
8491                       fake_zero_displacement = 1;
8492                       if (i.disp_encoding == disp_encoding_8bit)
8493                         i.types[op].bitfield.disp8 = 1;
8494                       else
8495                         i.types[op].bitfield.disp32 = 1;
8496                     }
8497                   i.rm.mode = mode_from_disp_size (i.types[op]);
8498                 }
8499             }
8500
8501           if (fake_zero_displacement)
8502             {
8503               /* Fakes a zero displacement assuming that i.types[op]
8504                  holds the correct displacement size.  */
8505               expressionS *exp;
8506
8507               gas_assert (i.op[op].disps == 0);
8508               exp = &disp_expressions[i.disp_operands++];
8509               i.op[op].disps = exp;
8510               exp->X_op = O_constant;
8511               exp->X_add_number = 0;
8512               exp->X_add_symbol = (symbolS *) 0;
8513               exp->X_op_symbol = (symbolS *) 0;
8514             }
8515
8516           mem = op;
8517         }
8518       else
8519         mem = ~0;
8520
8521       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8522         {
8523           if (operand_type_check (i.types[0], imm))
8524             i.vex.register_specifier = NULL;
8525           else
8526             {
8527               /* VEX.vvvv encodes one of the sources when the first
8528                  operand is not an immediate.  */
8529               if (i.tm.opcode_modifier.vexw == VEXW0)
8530                 i.vex.register_specifier = i.op[0].regs;
8531               else
8532                 i.vex.register_specifier = i.op[1].regs;
8533             }
8534
8535           /* Destination is a XMM register encoded in the ModRM.reg
8536              and VEX.R bit.  */
8537           i.rm.reg = i.op[2].regs->reg_num;
8538           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8539             i.rex |= REX_R;
8540
8541           /* ModRM.rm and VEX.B encodes the other source.  */
8542           if (!i.mem_operands)
8543             {
8544               i.rm.mode = 3;
8545
8546               if (i.tm.opcode_modifier.vexw == VEXW0)
8547                 i.rm.regmem = i.op[1].regs->reg_num;
8548               else
8549                 i.rm.regmem = i.op[0].regs->reg_num;
8550
8551               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8552                 i.rex |= REX_B;
8553             }
8554         }
8555       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8556         {
8557           i.vex.register_specifier = i.op[2].regs;
8558           if (!i.mem_operands)
8559             {
8560               i.rm.mode = 3;
8561               i.rm.regmem = i.op[1].regs->reg_num;
8562               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8563                 i.rex |= REX_B;
8564             }
8565         }
8566       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8567          (if any) based on i.tm.extension_opcode.  Again, we must be
8568          careful to make sure that segment/control/debug/test/MMX
8569          registers are coded into the i.rm.reg field.  */
8570       else if (i.reg_operands)
8571         {
8572           unsigned int op;
8573           unsigned int vex_reg = ~0;
8574
8575           for (op = 0; op < i.operands; op++)
8576             if (i.types[op].bitfield.class == Reg
8577                 || i.types[op].bitfield.class == RegBND
8578                 || i.types[op].bitfield.class == RegMask
8579                 || i.types[op].bitfield.class == SReg
8580                 || i.types[op].bitfield.class == RegCR
8581                 || i.types[op].bitfield.class == RegDR
8582                 || i.types[op].bitfield.class == RegTR
8583                 || i.types[op].bitfield.class == RegSIMD
8584                 || i.types[op].bitfield.class == RegMMX)
8585               break;
8586
8587           if (vex_3_sources)
8588             op = dest;
8589           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8590             {
8591               /* For instructions with VexNDS, the register-only
8592                  source operand is encoded in VEX prefix. */
8593               gas_assert (mem != (unsigned int) ~0);
8594
8595               if (op > mem)
8596                 {
8597                   vex_reg = op++;
8598                   gas_assert (op < i.operands);
8599                 }
8600               else
8601                 {
8602                   /* Check register-only source operand when two source
8603                      operands are swapped.  */
8604                   if (!i.tm.operand_types[op].bitfield.baseindex
8605                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8606                     {
8607                       vex_reg = op;
8608                       op += 2;
8609                       gas_assert (mem == (vex_reg + 1)
8610                                   && op < i.operands);
8611                     }
8612                   else
8613                     {
8614                       vex_reg = op + 1;
8615                       gas_assert (vex_reg < i.operands);
8616                     }
8617                 }
8618             }
8619           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8620             {
8621               /* For instructions with VexNDD, the register destination
8622                  is encoded in VEX prefix.  */
8623               if (i.mem_operands == 0)
8624                 {
8625                   /* There is no memory operand.  */
8626                   gas_assert ((op + 2) == i.operands);
8627                   vex_reg = op + 1;
8628                 }
8629               else
8630                 {
8631                   /* There are only 2 non-immediate operands.  */
8632                   gas_assert (op < i.imm_operands + 2
8633                               && i.operands == i.imm_operands + 2);
8634                   vex_reg = i.imm_operands + 1;
8635                 }
8636             }
8637           else
8638             gas_assert (op < i.operands);
8639
8640           if (vex_reg != (unsigned int) ~0)
8641             {
8642               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8643
8644               if ((type->bitfield.class != Reg
8645                    || (!type->bitfield.dword && !type->bitfield.qword))
8646                   && type->bitfield.class != RegSIMD
8647                   && !operand_type_equal (type, &regmask))
8648                 abort ();
8649
8650               i.vex.register_specifier = i.op[vex_reg].regs;
8651             }
8652
8653           /* Don't set OP operand twice.  */
8654           if (vex_reg != op)
8655             {
8656               /* If there is an extension opcode to put here, the
8657                  register number must be put into the regmem field.  */
8658               if (i.tm.extension_opcode != None)
8659                 {
8660                   i.rm.regmem = i.op[op].regs->reg_num;
8661                   set_rex_vrex (i.op[op].regs, REX_B,
8662                                 i.tm.opcode_modifier.sse2avx);
8663                 }
8664               else
8665                 {
8666                   i.rm.reg = i.op[op].regs->reg_num;
8667                   set_rex_vrex (i.op[op].regs, REX_R,
8668                                 i.tm.opcode_modifier.sse2avx);
8669                 }
8670             }
8671
8672           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8673              must set it to 3 to indicate this is a register operand
8674              in the regmem field.  */
8675           if (!i.mem_operands)
8676             i.rm.mode = 3;
8677         }
8678
8679       /* Fill in i.rm.reg field with extension opcode (if any).  */
8680       if (i.tm.extension_opcode != None)
8681         i.rm.reg = i.tm.extension_opcode;
8682     }
8683   return default_seg;
8684 }
8685
8686 static INLINE void
8687 frag_opcode_byte (unsigned char byte)
8688 {
8689   if (now_seg != absolute_section)
8690     FRAG_APPEND_1_CHAR (byte);
8691   else
8692     ++abs_section_offset;
8693 }
8694
8695 static unsigned int
8696 flip_code16 (unsigned int code16)
8697 {
8698   gas_assert (i.tm.operands == 1);
8699
8700   return !(i.prefix[REX_PREFIX] & REX_W)
8701          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8702                       || i.tm.operand_types[0].bitfield.disp32s
8703                     : i.tm.operand_types[0].bitfield.disp16)
8704          ? CODE16 : 0;
8705 }
8706
8707 static void
8708 output_branch (void)
8709 {
8710   char *p;
8711   int size;
8712   int code16;
8713   int prefix;
8714   relax_substateT subtype;
8715   symbolS *sym;
8716   offsetT off;
8717
8718   if (now_seg == absolute_section)
8719     {
8720       as_bad (_("relaxable branches not supported in absolute section"));
8721       return;
8722     }
8723
8724   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8725   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
8726
8727   prefix = 0;
8728   if (i.prefix[DATA_PREFIX] != 0)
8729     {
8730       prefix = 1;
8731       i.prefixes -= 1;
8732       code16 ^= flip_code16(code16);
8733     }
8734   /* Pentium4 branch hints.  */
8735   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8736       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8737     {
8738       prefix++;
8739       i.prefixes--;
8740     }
8741   if (i.prefix[REX_PREFIX] != 0)
8742     {
8743       prefix++;
8744       i.prefixes--;
8745     }
8746
8747   /* BND prefixed jump.  */
8748   if (i.prefix[BND_PREFIX] != 0)
8749     {
8750       prefix++;
8751       i.prefixes--;
8752     }
8753
8754   if (i.prefixes != 0)
8755     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8756
8757   /* It's always a symbol;  End frag & setup for relax.
8758      Make sure there is enough room in this frag for the largest
8759      instruction we may generate in md_convert_frag.  This is 2
8760      bytes for the opcode and room for the prefix and largest
8761      displacement.  */
8762   frag_grow (prefix + 2 + 4);
8763   /* Prefix and 1 opcode byte go in fr_fix.  */
8764   p = frag_more (prefix + 1);
8765   if (i.prefix[DATA_PREFIX] != 0)
8766     *p++ = DATA_PREFIX_OPCODE;
8767   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8768       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8769     *p++ = i.prefix[SEG_PREFIX];
8770   if (i.prefix[BND_PREFIX] != 0)
8771     *p++ = BND_PREFIX_OPCODE;
8772   if (i.prefix[REX_PREFIX] != 0)
8773     *p++ = i.prefix[REX_PREFIX];
8774   *p = i.tm.base_opcode;
8775
8776   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8777     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8778   else if (cpu_arch_flags.bitfield.cpui386)
8779     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8780   else
8781     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8782   subtype |= code16;
8783
8784   sym = i.op[0].disps->X_add_symbol;
8785   off = i.op[0].disps->X_add_number;
8786
8787   if (i.op[0].disps->X_op != O_constant
8788       && i.op[0].disps->X_op != O_symbol)
8789     {
8790       /* Handle complex expressions.  */
8791       sym = make_expr_symbol (i.op[0].disps);
8792       off = 0;
8793     }
8794
8795   /* 1 possible extra opcode + 4 byte displacement go in var part.
8796      Pass reloc in fr_var.  */
8797   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8798 }
8799
8800 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8801 /* Return TRUE iff PLT32 relocation should be used for branching to
8802    symbol S.  */
8803
8804 static bool
8805 need_plt32_p (symbolS *s)
8806 {
8807   /* PLT32 relocation is ELF only.  */
8808   if (!IS_ELF)
8809     return false;
8810
8811 #ifdef TE_SOLARIS
8812   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8813      krtld support it.  */
8814   return false;
8815 #endif
8816
8817   /* Since there is no need to prepare for PLT branch on x86-64, we
8818      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8819      be used as a marker for 32-bit PC-relative branches.  */
8820   if (!object_64bit)
8821     return false;
8822
8823   if (s == NULL)
8824     return false;
8825
8826   /* Weak or undefined symbol need PLT32 relocation.  */
8827   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8828     return true;
8829
8830   /* Non-global symbol doesn't need PLT32 relocation.  */
8831   if (! S_IS_EXTERNAL (s))
8832     return false;
8833
8834   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8835      non-default visibilities are treated as normal global symbol
8836      so that PLT32 relocation can be used as a marker for 32-bit
8837      PC-relative branches.  It is useful for linker relaxation.  */
8838   return true;
8839 }
8840 #endif
8841
8842 static void
8843 output_jump (void)
8844 {
8845   char *p;
8846   int size;
8847   fixS *fixP;
8848   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8849
8850   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8851     {
8852       /* This is a loop or jecxz type instruction.  */
8853       size = 1;
8854       if (i.prefix[ADDR_PREFIX] != 0)
8855         {
8856           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8857           i.prefixes -= 1;
8858         }
8859       /* Pentium4 branch hints.  */
8860       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8861           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8862         {
8863           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8864           i.prefixes--;
8865         }
8866     }
8867   else
8868     {
8869       int code16;
8870
8871       code16 = 0;
8872       if (flag_code == CODE_16BIT)
8873         code16 = CODE16;
8874
8875       if (i.prefix[DATA_PREFIX] != 0)
8876         {
8877           frag_opcode_byte (DATA_PREFIX_OPCODE);
8878           i.prefixes -= 1;
8879           code16 ^= flip_code16(code16);
8880         }
8881
8882       size = 4;
8883       if (code16)
8884         size = 2;
8885     }
8886
8887   /* BND prefixed jump.  */
8888   if (i.prefix[BND_PREFIX] != 0)
8889     {
8890       frag_opcode_byte (i.prefix[BND_PREFIX]);
8891       i.prefixes -= 1;
8892     }
8893
8894   if (i.prefix[REX_PREFIX] != 0)
8895     {
8896       frag_opcode_byte (i.prefix[REX_PREFIX]);
8897       i.prefixes -= 1;
8898     }
8899
8900   if (i.prefixes != 0)
8901     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8902
8903   if (now_seg == absolute_section)
8904     {
8905       abs_section_offset += i.opcode_length + size;
8906       return;
8907     }
8908
8909   p = frag_more (i.opcode_length + size);
8910   switch (i.opcode_length)
8911     {
8912     case 2:
8913       *p++ = i.tm.base_opcode >> 8;
8914       /* Fall through.  */
8915     case 1:
8916       *p++ = i.tm.base_opcode;
8917       break;
8918     default:
8919       abort ();
8920     }
8921
8922 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8923   if (size == 4
8924       && jump_reloc == NO_RELOC
8925       && need_plt32_p (i.op[0].disps->X_add_symbol))
8926     jump_reloc = BFD_RELOC_X86_64_PLT32;
8927 #endif
8928
8929   jump_reloc = reloc (size, 1, 1, jump_reloc);
8930
8931   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8932                       i.op[0].disps, 1, jump_reloc);
8933
8934   /* All jumps handled here are signed, but don't unconditionally use a
8935      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8936      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8937      respectively.  */
8938   switch (size)
8939     {
8940     case 1:
8941       fixP->fx_signed = 1;
8942       break;
8943
8944     case 2:
8945       if (i.tm.base_opcode == 0xc7f8)
8946         fixP->fx_signed = 1;
8947       break;
8948
8949     case 4:
8950       if (flag_code == CODE_64BIT)
8951         fixP->fx_signed = 1;
8952       break;
8953     }
8954 }
8955
8956 static void
8957 output_interseg_jump (void)
8958 {
8959   char *p;
8960   int size;
8961   int prefix;
8962   int code16;
8963
8964   code16 = 0;
8965   if (flag_code == CODE_16BIT)
8966     code16 = CODE16;
8967
8968   prefix = 0;
8969   if (i.prefix[DATA_PREFIX] != 0)
8970     {
8971       prefix = 1;
8972       i.prefixes -= 1;
8973       code16 ^= CODE16;
8974     }
8975
8976   gas_assert (!i.prefix[REX_PREFIX]);
8977
8978   size = 4;
8979   if (code16)
8980     size = 2;
8981
8982   if (i.prefixes != 0)
8983     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8984
8985   if (now_seg == absolute_section)
8986     {
8987       abs_section_offset += prefix + 1 + 2 + size;
8988       return;
8989     }
8990
8991   /* 1 opcode; 2 segment; offset  */
8992   p = frag_more (prefix + 1 + 2 + size);
8993
8994   if (i.prefix[DATA_PREFIX] != 0)
8995     *p++ = DATA_PREFIX_OPCODE;
8996
8997   if (i.prefix[REX_PREFIX] != 0)
8998     *p++ = i.prefix[REX_PREFIX];
8999
9000   *p++ = i.tm.base_opcode;
9001   if (i.op[1].imms->X_op == O_constant)
9002     {
9003       offsetT n = i.op[1].imms->X_add_number;
9004
9005       if (size == 2
9006           && !fits_in_unsigned_word (n)
9007           && !fits_in_signed_word (n))
9008         {
9009           as_bad (_("16-bit jump out of range"));
9010           return;
9011         }
9012       md_number_to_chars (p, n, size);
9013     }
9014   else
9015     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9016                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9017
9018   p += size;
9019   if (i.op[0].imms->X_op == O_constant)
9020     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9021   else
9022     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9023                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9024 }
9025
9026 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9027 void
9028 x86_cleanup (void)
9029 {
9030   char *p;
9031   asection *seg = now_seg;
9032   subsegT subseg = now_subseg;
9033   asection *sec;
9034   unsigned int alignment, align_size_1;
9035   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9036   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9037   unsigned int padding;
9038
9039   if (!IS_ELF || !x86_used_note)
9040     return;
9041
9042   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9043
9044   /* The .note.gnu.property section layout:
9045
9046      Field      Length          Contents
9047      ----       ----            ----
9048      n_namsz    4               4
9049      n_descsz   4               The note descriptor size
9050      n_type     4               NT_GNU_PROPERTY_TYPE_0
9051      n_name     4               "GNU"
9052      n_desc     n_descsz        The program property array
9053      ....       ....            ....
9054    */
9055
9056   /* Create the .note.gnu.property section.  */
9057   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9058   bfd_set_section_flags (sec,
9059                          (SEC_ALLOC
9060                           | SEC_LOAD
9061                           | SEC_DATA
9062                           | SEC_HAS_CONTENTS
9063                           | SEC_READONLY));
9064
9065   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9066     {
9067       align_size_1 = 7;
9068       alignment = 3;
9069     }
9070   else
9071     {
9072       align_size_1 = 3;
9073       alignment = 2;
9074     }
9075
9076   bfd_set_section_alignment (sec, alignment);
9077   elf_section_type (sec) = SHT_NOTE;
9078
9079   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9080                                   + 4-byte data  */
9081   isa_1_descsz_raw = 4 + 4 + 4;
9082   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9083   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9084
9085   feature_2_descsz_raw = isa_1_descsz;
9086   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9087                                       + 4-byte data  */
9088   feature_2_descsz_raw += 4 + 4 + 4;
9089   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9090   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9091                       & ~align_size_1);
9092
9093   descsz = feature_2_descsz;
9094   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9095   p = frag_more (4 + 4 + 4 + 4 + descsz);
9096
9097   /* Write n_namsz.  */
9098   md_number_to_chars (p, (valueT) 4, 4);
9099
9100   /* Write n_descsz.  */
9101   md_number_to_chars (p + 4, (valueT) descsz, 4);
9102
9103   /* Write n_type.  */
9104   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9105
9106   /* Write n_name.  */
9107   memcpy (p + 4 * 3, "GNU", 4);
9108
9109   /* Write 4-byte type.  */
9110   md_number_to_chars (p + 4 * 4,
9111                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9112
9113   /* Write 4-byte data size.  */
9114   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9115
9116   /* Write 4-byte data.  */
9117   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9118
9119   /* Zero out paddings.  */
9120   padding = isa_1_descsz - isa_1_descsz_raw;
9121   if (padding)
9122     memset (p + 4 * 7, 0, padding);
9123
9124   /* Write 4-byte type.  */
9125   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9126                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9127
9128   /* Write 4-byte data size.  */
9129   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9130
9131   /* Write 4-byte data.  */
9132   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9133                       (valueT) x86_feature_2_used, 4);
9134
9135   /* Zero out paddings.  */
9136   padding = feature_2_descsz - feature_2_descsz_raw;
9137   if (padding)
9138     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9139
9140   /* We probably can't restore the current segment, for there likely
9141      isn't one yet...  */
9142   if (seg && subseg)
9143     subseg_set (seg, subseg);
9144 }
9145 #endif
9146
9147 static unsigned int
9148 encoding_length (const fragS *start_frag, offsetT start_off,
9149                  const char *frag_now_ptr)
9150 {
9151   unsigned int len = 0;
9152
9153   if (start_frag != frag_now)
9154     {
9155       const fragS *fr = start_frag;
9156
9157       do {
9158         len += fr->fr_fix;
9159         fr = fr->fr_next;
9160       } while (fr && fr != frag_now);
9161     }
9162
9163   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9164 }
9165
9166 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9167    be macro-fused with conditional jumps.
9168    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9169    or is one of the following format:
9170
9171     cmp m, imm
9172     add m, imm
9173     sub m, imm
9174    test m, imm
9175     and m, imm
9176     inc m
9177     dec m
9178
9179    it is unfusible.  */
9180
9181 static int
9182 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9183 {
9184   /* No RIP address.  */
9185   if (i.base_reg && i.base_reg->reg_num == RegIP)
9186     return 0;
9187
9188   /* No opcodes outside of base encoding space.  */
9189   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9190     return 0;
9191
9192   /* add, sub without add/sub m, imm.  */
9193   if (i.tm.base_opcode <= 5
9194       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9195       || ((i.tm.base_opcode | 3) == 0x83
9196           && (i.tm.extension_opcode == 0x5
9197               || i.tm.extension_opcode == 0x0)))
9198     {
9199       *mf_cmp_p = mf_cmp_alu_cmp;
9200       return !(i.mem_operands && i.imm_operands);
9201     }
9202
9203   /* and without and m, imm.  */
9204   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9205       || ((i.tm.base_opcode | 3) == 0x83
9206           && i.tm.extension_opcode == 0x4))
9207     {
9208       *mf_cmp_p = mf_cmp_test_and;
9209       return !(i.mem_operands && i.imm_operands);
9210     }
9211
9212   /* test without test m imm.  */
9213   if ((i.tm.base_opcode | 1) == 0x85
9214       || (i.tm.base_opcode | 1) == 0xa9
9215       || ((i.tm.base_opcode | 1) == 0xf7
9216           && i.tm.extension_opcode == 0))
9217     {
9218       *mf_cmp_p = mf_cmp_test_and;
9219       return !(i.mem_operands && i.imm_operands);
9220     }
9221
9222   /* cmp without cmp m, imm.  */
9223   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9224       || ((i.tm.base_opcode | 3) == 0x83
9225           && (i.tm.extension_opcode == 0x7)))
9226     {
9227       *mf_cmp_p = mf_cmp_alu_cmp;
9228       return !(i.mem_operands && i.imm_operands);
9229     }
9230
9231   /* inc, dec without inc/dec m.   */
9232   if ((i.tm.cpu_flags.bitfield.cpuno64
9233        && (i.tm.base_opcode | 0xf) == 0x4f)
9234       || ((i.tm.base_opcode | 1) == 0xff
9235           && i.tm.extension_opcode <= 0x1))
9236     {
9237       *mf_cmp_p = mf_cmp_incdec;
9238       return !i.mem_operands;
9239     }
9240
9241   return 0;
9242 }
9243
9244 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9245
9246 static int
9247 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9248 {
9249   /* NB: Don't work with COND_JUMP86 without i386.  */
9250   if (!align_branch_power
9251       || now_seg == absolute_section
9252       || !cpu_arch_flags.bitfield.cpui386
9253       || !(align_branch & align_branch_fused_bit))
9254     return 0;
9255
9256   if (maybe_fused_with_jcc_p (mf_cmp_p))
9257     {
9258       if (last_insn.kind == last_insn_other
9259           || last_insn.seg != now_seg)
9260         return 1;
9261       if (flag_debug)
9262         as_warn_where (last_insn.file, last_insn.line,
9263                        _("`%s` skips -malign-branch-boundary on `%s`"),
9264                        last_insn.name, i.tm.name);
9265     }
9266
9267   return 0;
9268 }
9269
9270 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9271
9272 static int
9273 add_branch_prefix_frag_p (void)
9274 {
9275   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9276      to PadLock instructions since they include prefixes in opcode.  */
9277   if (!align_branch_power
9278       || !align_branch_prefix_size
9279       || now_seg == absolute_section
9280       || i.tm.cpu_flags.bitfield.cpupadlock
9281       || !cpu_arch_flags.bitfield.cpui386)
9282     return 0;
9283
9284   /* Don't add prefix if it is a prefix or there is no operand in case
9285      that segment prefix is special.  */
9286   if (!i.operands || i.tm.opcode_modifier.isprefix)
9287     return 0;
9288
9289   if (last_insn.kind == last_insn_other
9290       || last_insn.seg != now_seg)
9291     return 1;
9292
9293   if (flag_debug)
9294     as_warn_where (last_insn.file, last_insn.line,
9295                    _("`%s` skips -malign-branch-boundary on `%s`"),
9296                    last_insn.name, i.tm.name);
9297
9298   return 0;
9299 }
9300
9301 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9302
9303 static int
9304 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9305                            enum mf_jcc_kind *mf_jcc_p)
9306 {
9307   int add_padding;
9308
9309   /* NB: Don't work with COND_JUMP86 without i386.  */
9310   if (!align_branch_power
9311       || now_seg == absolute_section
9312       || !cpu_arch_flags.bitfield.cpui386
9313       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9314     return 0;
9315
9316   add_padding = 0;
9317
9318   /* Check for jcc and direct jmp.  */
9319   if (i.tm.opcode_modifier.jump == JUMP)
9320     {
9321       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9322         {
9323           *branch_p = align_branch_jmp;
9324           add_padding = align_branch & align_branch_jmp_bit;
9325         }
9326       else
9327         {
9328           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9329              igore the lowest bit.  */
9330           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9331           *branch_p = align_branch_jcc;
9332           if ((align_branch & align_branch_jcc_bit))
9333             add_padding = 1;
9334         }
9335     }
9336   else if ((i.tm.base_opcode | 1) == 0xc3)
9337     {
9338       /* Near ret.  */
9339       *branch_p = align_branch_ret;
9340       if ((align_branch & align_branch_ret_bit))
9341         add_padding = 1;
9342     }
9343   else
9344     {
9345       /* Check for indirect jmp, direct and indirect calls.  */
9346       if (i.tm.base_opcode == 0xe8)
9347         {
9348           /* Direct call.  */
9349           *branch_p = align_branch_call;
9350           if ((align_branch & align_branch_call_bit))
9351             add_padding = 1;
9352         }
9353       else if (i.tm.base_opcode == 0xff
9354                && (i.tm.extension_opcode == 2
9355                    || i.tm.extension_opcode == 4))
9356         {
9357           /* Indirect call and jmp.  */
9358           *branch_p = align_branch_indirect;
9359           if ((align_branch & align_branch_indirect_bit))
9360             add_padding = 1;
9361         }
9362
9363       if (add_padding
9364           && i.disp_operands
9365           && tls_get_addr
9366           && (i.op[0].disps->X_op == O_symbol
9367               || (i.op[0].disps->X_op == O_subtract
9368                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9369         {
9370           symbolS *s = i.op[0].disps->X_add_symbol;
9371           /* No padding to call to global or undefined tls_get_addr.  */
9372           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9373               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9374             return 0;
9375         }
9376     }
9377
9378   if (add_padding
9379       && last_insn.kind != last_insn_other
9380       && last_insn.seg == now_seg)
9381     {
9382       if (flag_debug)
9383         as_warn_where (last_insn.file, last_insn.line,
9384                        _("`%s` skips -malign-branch-boundary on `%s`"),
9385                        last_insn.name, i.tm.name);
9386       return 0;
9387     }
9388
9389   return add_padding;
9390 }
9391
9392 static void
9393 output_insn (void)
9394 {
9395   fragS *insn_start_frag;
9396   offsetT insn_start_off;
9397   fragS *fragP = NULL;
9398   enum align_branch_kind branch = align_branch_none;
9399   /* The initializer is arbitrary just to avoid uninitialized error.
9400      it's actually either assigned in add_branch_padding_frag_p
9401      or never be used.  */
9402   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9403
9404 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9405   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9406     {
9407       if ((i.xstate & xstate_tmm) == xstate_tmm
9408           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9409         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9410
9411       if (i.tm.cpu_flags.bitfield.cpu8087
9412           || i.tm.cpu_flags.bitfield.cpu287
9413           || i.tm.cpu_flags.bitfield.cpu387
9414           || i.tm.cpu_flags.bitfield.cpu687
9415           || i.tm.cpu_flags.bitfield.cpufisttp)
9416         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9417
9418       if ((i.xstate & xstate_mmx)
9419           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9420               && !is_any_vex_encoding (&i.tm)
9421               && (i.tm.base_opcode == 0x77 /* emms */
9422                   || i.tm.base_opcode == 0x0e /* femms */)))
9423         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9424
9425       if (i.index_reg)
9426         {
9427           if (i.index_reg->reg_type.bitfield.zmmword)
9428             i.xstate |= xstate_zmm;
9429           else if (i.index_reg->reg_type.bitfield.ymmword)
9430             i.xstate |= xstate_ymm;
9431           else if (i.index_reg->reg_type.bitfield.xmmword)
9432             i.xstate |= xstate_xmm;
9433         }
9434
9435       /* vzeroall / vzeroupper */
9436       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9437         i.xstate |= xstate_ymm;
9438
9439       if ((i.xstate & xstate_xmm)
9440           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9441           || (i.tm.base_opcode == 0xae
9442               && (i.tm.cpu_flags.bitfield.cpusse
9443                   || i.tm.cpu_flags.bitfield.cpuavx))
9444           || i.tm.cpu_flags.bitfield.cpuwidekl
9445           || i.tm.cpu_flags.bitfield.cpukl)
9446         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9447
9448       if ((i.xstate & xstate_ymm) == xstate_ymm)
9449         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9450       if ((i.xstate & xstate_zmm) == xstate_zmm)
9451         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9452       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9453         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9454       if (i.tm.cpu_flags.bitfield.cpufxsr)
9455         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9456       if (i.tm.cpu_flags.bitfield.cpuxsave)
9457         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9458       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9459         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9460       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9461         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9462
9463       if (x86_feature_2_used
9464           || i.tm.cpu_flags.bitfield.cpucmov
9465           || i.tm.cpu_flags.bitfield.cpusyscall
9466           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9467               && i.tm.base_opcode == 0xc7
9468               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9469               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9470         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9471       if (i.tm.cpu_flags.bitfield.cpusse3
9472           || i.tm.cpu_flags.bitfield.cpussse3
9473           || i.tm.cpu_flags.bitfield.cpusse4_1
9474           || i.tm.cpu_flags.bitfield.cpusse4_2
9475           || i.tm.cpu_flags.bitfield.cpucx16
9476           || i.tm.cpu_flags.bitfield.cpupopcnt
9477           /* LAHF-SAHF insns in 64-bit mode.  */
9478           || (flag_code == CODE_64BIT
9479               && (i.tm.base_opcode | 1) == 0x9f
9480               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9481         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9482       if (i.tm.cpu_flags.bitfield.cpuavx
9483           || i.tm.cpu_flags.bitfield.cpuavx2
9484           /* Any VEX encoded insns execpt for CpuAVX512F, CpuAVX512BW,
9485              CpuAVX512DQ, LPW, TBM and AMX.  */
9486           || (i.tm.opcode_modifier.vex
9487               && !i.tm.cpu_flags.bitfield.cpuavx512f
9488               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9489               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9490               && !i.tm.cpu_flags.bitfield.cpulwp
9491               && !i.tm.cpu_flags.bitfield.cputbm
9492               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9493           || i.tm.cpu_flags.bitfield.cpuf16c
9494           || i.tm.cpu_flags.bitfield.cpufma
9495           || i.tm.cpu_flags.bitfield.cpulzcnt
9496           || i.tm.cpu_flags.bitfield.cpumovbe
9497           || i.tm.cpu_flags.bitfield.cpuxsaves
9498           || (x86_feature_2_used
9499               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9500                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9501                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9502         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9503       if (i.tm.cpu_flags.bitfield.cpuavx512f
9504           || i.tm.cpu_flags.bitfield.cpuavx512bw
9505           || i.tm.cpu_flags.bitfield.cpuavx512dq
9506           || i.tm.cpu_flags.bitfield.cpuavx512vl
9507           /* Any EVEX encoded insns except for AVX512ER, AVX512PF and
9508              VNNIW.  */
9509           || (i.tm.opcode_modifier.evex
9510               && !i.tm.cpu_flags.bitfield.cpuavx512er
9511               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9512               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9513         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9514     }
9515 #endif
9516
9517   /* Tie dwarf2 debug info to the address at the start of the insn.
9518      We can't do this after the insn has been output as the current
9519      frag may have been closed off.  eg. by frag_var.  */
9520   dwarf2_emit_insn (0);
9521
9522   insn_start_frag = frag_now;
9523   insn_start_off = frag_now_fix ();
9524
9525   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9526     {
9527       char *p;
9528       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9529       unsigned int max_branch_padding_size = 14;
9530
9531       /* Align section to boundary.  */
9532       record_alignment (now_seg, align_branch_power);
9533
9534       /* Make room for padding.  */
9535       frag_grow (max_branch_padding_size);
9536
9537       /* Start of the padding.  */
9538       p = frag_more (0);
9539
9540       fragP = frag_now;
9541
9542       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9543                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9544                 NULL, 0, p);
9545
9546       fragP->tc_frag_data.mf_type = mf_jcc;
9547       fragP->tc_frag_data.branch_type = branch;
9548       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9549     }
9550
9551   /* Output jumps.  */
9552   if (i.tm.opcode_modifier.jump == JUMP)
9553     output_branch ();
9554   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9555            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9556     output_jump ();
9557   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9558     output_interseg_jump ();
9559   else
9560     {
9561       /* Output normal instructions here.  */
9562       char *p;
9563       unsigned char *q;
9564       unsigned int j;
9565       enum mf_cmp_kind mf_cmp;
9566
9567       if (avoid_fence
9568           && (i.tm.base_opcode == 0xaee8
9569               || i.tm.base_opcode == 0xaef0
9570               || i.tm.base_opcode == 0xaef8))
9571         {
9572           /* Encode lfence, mfence, and sfence as
9573              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9574           if (now_seg != absolute_section)
9575             {
9576               offsetT val = 0x240483f0ULL;
9577
9578               p = frag_more (5);
9579               md_number_to_chars (p, val, 5);
9580             }
9581           else
9582             abs_section_offset += 5;
9583           return;
9584         }
9585
9586       /* Some processors fail on LOCK prefix. This options makes
9587          assembler ignore LOCK prefix and serves as a workaround.  */
9588       if (omit_lock_prefix)
9589         {
9590           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9591               && i.tm.opcode_modifier.isprefix)
9592             return;
9593           i.prefix[LOCK_PREFIX] = 0;
9594         }
9595
9596       if (branch)
9597         /* Skip if this is a branch.  */
9598         ;
9599       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9600         {
9601           /* Make room for padding.  */
9602           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9603           p = frag_more (0);
9604
9605           fragP = frag_now;
9606
9607           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9608                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9609                     NULL, 0, p);
9610
9611           fragP->tc_frag_data.mf_type = mf_cmp;
9612           fragP->tc_frag_data.branch_type = align_branch_fused;
9613           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9614         }
9615       else if (add_branch_prefix_frag_p ())
9616         {
9617           unsigned int max_prefix_size = align_branch_prefix_size;
9618
9619           /* Make room for padding.  */
9620           frag_grow (max_prefix_size);
9621           p = frag_more (0);
9622
9623           fragP = frag_now;
9624
9625           frag_var (rs_machine_dependent, max_prefix_size, 0,
9626                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9627                     NULL, 0, p);
9628
9629           fragP->tc_frag_data.max_bytes = max_prefix_size;
9630         }
9631
9632       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9633          don't need the explicit prefix.  */
9634       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
9635         {
9636           switch (i.tm.opcode_modifier.opcodeprefix)
9637             {
9638             case PREFIX_0X66:
9639               add_prefix (0x66);
9640               break;
9641             case PREFIX_0XF2:
9642               add_prefix (0xf2);
9643               break;
9644             case PREFIX_0XF3:
9645               if (!i.tm.cpu_flags.bitfield.cpupadlock
9646                   || (i.prefix[REP_PREFIX] != 0xf3))
9647                 add_prefix (0xf3);
9648               break;
9649             case PREFIX_NONE:
9650               switch (i.opcode_length)
9651                 {
9652                 case 2:
9653                   break;
9654                 case 1:
9655                   /* Check for pseudo prefixes.  */
9656                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9657                     break;
9658                   as_bad_where (insn_start_frag->fr_file,
9659                                 insn_start_frag->fr_line,
9660                                 _("pseudo prefix without instruction"));
9661                   return;
9662                 default:
9663                   abort ();
9664                 }
9665               break;
9666             default:
9667               abort ();
9668             }
9669
9670 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9671           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9672              R_X86_64_GOTTPOFF relocation so that linker can safely
9673              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9674              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9675              relocation for GDesc -> IE/LE optimization.  */
9676           if (x86_elf_abi == X86_64_X32_ABI
9677               && i.operands == 2
9678               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9679                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9680               && i.prefix[REX_PREFIX] == 0)
9681             add_prefix (REX_OPCODE);
9682 #endif
9683
9684           /* The prefix bytes.  */
9685           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9686             if (*q)
9687               frag_opcode_byte (*q);
9688         }
9689       else
9690         {
9691           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9692             if (*q)
9693               switch (j)
9694                 {
9695                 case SEG_PREFIX:
9696                 case ADDR_PREFIX:
9697                   frag_opcode_byte (*q);
9698                   break;
9699                 default:
9700                   /* There should be no other prefixes for instructions
9701                      with VEX prefix.  */
9702                   abort ();
9703                 }
9704
9705           /* For EVEX instructions i.vrex should become 0 after
9706              build_evex_prefix.  For VEX instructions upper 16 registers
9707              aren't available, so VREX should be 0.  */
9708           if (i.vrex)
9709             abort ();
9710           /* Now the VEX prefix.  */
9711           if (now_seg != absolute_section)
9712             {
9713               p = frag_more (i.vex.length);
9714               for (j = 0; j < i.vex.length; j++)
9715                 p[j] = i.vex.bytes[j];
9716             }
9717           else
9718             abs_section_offset += i.vex.length;
9719         }
9720
9721       /* Now the opcode; be careful about word order here!  */
9722       j = i.opcode_length;
9723       if (!i.vex.length)
9724         switch (i.tm.opcode_modifier.opcodespace)
9725           {
9726           case SPACE_BASE:
9727             break;
9728           case SPACE_0F:
9729             ++j;
9730             break;
9731           case SPACE_0F38:
9732           case SPACE_0F3A:
9733             j += 2;
9734             break;
9735           default:
9736             abort ();
9737           }
9738
9739       if (now_seg == absolute_section)
9740         abs_section_offset += j;
9741       else if (j == 1)
9742         {
9743           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9744         }
9745       else
9746         {
9747           p = frag_more (j);
9748           if (!i.vex.length
9749               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9750             {
9751               *p++ = 0x0f;
9752               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9753                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9754                        ? 0x38 : 0x3a;
9755             }
9756
9757           switch (i.opcode_length)
9758             {
9759             case 2:
9760               /* Put out high byte first: can't use md_number_to_chars!  */
9761               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9762               /* Fall through.  */
9763             case 1:
9764               *p = i.tm.base_opcode & 0xff;
9765               break;
9766             default:
9767               abort ();
9768               break;
9769             }
9770
9771         }
9772
9773       /* Now the modrm byte and sib byte (if present).  */
9774       if (i.tm.opcode_modifier.modrm)
9775         {
9776           frag_opcode_byte ((i.rm.regmem << 0)
9777                              | (i.rm.reg << 3)
9778                              | (i.rm.mode << 6));
9779           /* If i.rm.regmem == ESP (4)
9780              && i.rm.mode != (Register mode)
9781              && not 16 bit
9782              ==> need second modrm byte.  */
9783           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9784               && i.rm.mode != 3
9785               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9786             frag_opcode_byte ((i.sib.base << 0)
9787                               | (i.sib.index << 3)
9788                               | (i.sib.scale << 6));
9789         }
9790
9791       if (i.disp_operands)
9792         output_disp (insn_start_frag, insn_start_off);
9793
9794       if (i.imm_operands)
9795         output_imm (insn_start_frag, insn_start_off);
9796
9797       /*
9798        * frag_now_fix () returning plain abs_section_offset when we're in the
9799        * absolute section, and abs_section_offset not getting updated as data
9800        * gets added to the frag breaks the logic below.
9801        */
9802       if (now_seg != absolute_section)
9803         {
9804           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9805           if (j > 15)
9806             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9807                      j);
9808           else if (fragP)
9809             {
9810               /* NB: Don't add prefix with GOTPC relocation since
9811                  output_disp() above depends on the fixed encoding
9812                  length.  Can't add prefix with TLS relocation since
9813                  it breaks TLS linker optimization.  */
9814               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9815               /* Prefix count on the current instruction.  */
9816               unsigned int count = i.vex.length;
9817               unsigned int k;
9818               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9819                 /* REX byte is encoded in VEX/EVEX prefix.  */
9820                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9821                   count++;
9822
9823               /* Count prefixes for extended opcode maps.  */
9824               if (!i.vex.length)
9825                 switch (i.tm.opcode_modifier.opcodespace)
9826                   {
9827                   case SPACE_BASE:
9828                     break;
9829                   case SPACE_0F:
9830                     count++;
9831                     break;
9832                   case SPACE_0F38:
9833                   case SPACE_0F3A:
9834                     count += 2;
9835                     break;
9836                   default:
9837                     abort ();
9838                   }
9839
9840               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9841                   == BRANCH_PREFIX)
9842                 {
9843                   /* Set the maximum prefix size in BRANCH_PREFIX
9844                      frag.  */
9845                   if (fragP->tc_frag_data.max_bytes > max)
9846                     fragP->tc_frag_data.max_bytes = max;
9847                   if (fragP->tc_frag_data.max_bytes > count)
9848                     fragP->tc_frag_data.max_bytes -= count;
9849                   else
9850                     fragP->tc_frag_data.max_bytes = 0;
9851                 }
9852               else
9853                 {
9854                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9855                      frag.  */
9856                   unsigned int max_prefix_size;
9857                   if (align_branch_prefix_size > max)
9858                     max_prefix_size = max;
9859                   else
9860                     max_prefix_size = align_branch_prefix_size;
9861                   if (max_prefix_size > count)
9862                     fragP->tc_frag_data.max_prefix_length
9863                       = max_prefix_size - count;
9864                 }
9865
9866               /* Use existing segment prefix if possible.  Use CS
9867                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9868                  segment prefix with ESP/EBP base register and use DS
9869                  segment prefix without ESP/EBP base register.  */
9870               if (i.prefix[SEG_PREFIX])
9871                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9872               else if (flag_code == CODE_64BIT)
9873                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9874               else if (i.base_reg
9875                        && (i.base_reg->reg_num == 4
9876                            || i.base_reg->reg_num == 5))
9877                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9878               else
9879                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9880             }
9881         }
9882     }
9883
9884   /* NB: Don't work with COND_JUMP86 without i386.  */
9885   if (align_branch_power
9886       && now_seg != absolute_section
9887       && cpu_arch_flags.bitfield.cpui386)
9888     {
9889       /* Terminate each frag so that we can add prefix and check for
9890          fused jcc.  */
9891       frag_wane (frag_now);
9892       frag_new (0);
9893     }
9894
9895 #ifdef DEBUG386
9896   if (flag_debug)
9897     {
9898       pi ("" /*line*/, &i);
9899     }
9900 #endif /* DEBUG386  */
9901 }
9902
9903 /* Return the size of the displacement operand N.  */
9904
9905 static int
9906 disp_size (unsigned int n)
9907 {
9908   int size = 4;
9909
9910   if (i.types[n].bitfield.disp64)
9911     size = 8;
9912   else if (i.types[n].bitfield.disp8)
9913     size = 1;
9914   else if (i.types[n].bitfield.disp16)
9915     size = 2;
9916   return size;
9917 }
9918
9919 /* Return the size of the immediate operand N.  */
9920
9921 static int
9922 imm_size (unsigned int n)
9923 {
9924   int size = 4;
9925   if (i.types[n].bitfield.imm64)
9926     size = 8;
9927   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9928     size = 1;
9929   else if (i.types[n].bitfield.imm16)
9930     size = 2;
9931   return size;
9932 }
9933
9934 static void
9935 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9936 {
9937   char *p;
9938   unsigned int n;
9939
9940   for (n = 0; n < i.operands; n++)
9941     {
9942       if (operand_type_check (i.types[n], disp))
9943         {
9944           int size = disp_size (n);
9945
9946           if (now_seg == absolute_section)
9947             abs_section_offset += size;
9948           else if (i.op[n].disps->X_op == O_constant)
9949             {
9950               offsetT val = i.op[n].disps->X_add_number;
9951
9952               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9953                                      size);
9954               p = frag_more (size);
9955               md_number_to_chars (p, val, size);
9956             }
9957           else
9958             {
9959               enum bfd_reloc_code_real reloc_type;
9960               int sign = i.types[n].bitfield.disp32s;
9961               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9962               fixS *fixP;
9963
9964               /* We can't have 8 bit displacement here.  */
9965               gas_assert (!i.types[n].bitfield.disp8);
9966
9967               /* The PC relative address is computed relative
9968                  to the instruction boundary, so in case immediate
9969                  fields follows, we need to adjust the value.  */
9970               if (pcrel && i.imm_operands)
9971                 {
9972                   unsigned int n1;
9973                   int sz = 0;
9974
9975                   for (n1 = 0; n1 < i.operands; n1++)
9976                     if (operand_type_check (i.types[n1], imm))
9977                       {
9978                         /* Only one immediate is allowed for PC
9979                            relative address.  */
9980                         gas_assert (sz == 0);
9981                         sz = imm_size (n1);
9982                         i.op[n].disps->X_add_number -= sz;
9983                       }
9984                   /* We should find the immediate.  */
9985                   gas_assert (sz != 0);
9986                 }
9987
9988               p = frag_more (size);
9989               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9990               if (GOT_symbol
9991                   && GOT_symbol == i.op[n].disps->X_add_symbol
9992                   && (((reloc_type == BFD_RELOC_32
9993                         || reloc_type == BFD_RELOC_X86_64_32S
9994                         || (reloc_type == BFD_RELOC_64
9995                             && object_64bit))
9996                        && (i.op[n].disps->X_op == O_symbol
9997                            || (i.op[n].disps->X_op == O_add
9998                                && ((symbol_get_value_expression
9999                                     (i.op[n].disps->X_op_symbol)->X_op)
10000                                    == O_subtract))))
10001                       || reloc_type == BFD_RELOC_32_PCREL))
10002                 {
10003                   if (!object_64bit)
10004                     {
10005                       reloc_type = BFD_RELOC_386_GOTPC;
10006                       i.has_gotpc_tls_reloc = true;
10007                       i.op[n].disps->X_add_number +=
10008                         encoding_length (insn_start_frag, insn_start_off, p);
10009                     }
10010                   else if (reloc_type == BFD_RELOC_64)
10011                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10012                   else
10013                     /* Don't do the adjustment for x86-64, as there
10014                        the pcrel addressing is relative to the _next_
10015                        insn, and that is taken care of in other code.  */
10016                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10017                 }
10018               else if (align_branch_power)
10019                 {
10020                   switch (reloc_type)
10021                     {
10022                     case BFD_RELOC_386_TLS_GD:
10023                     case BFD_RELOC_386_TLS_LDM:
10024                     case BFD_RELOC_386_TLS_IE:
10025                     case BFD_RELOC_386_TLS_IE_32:
10026                     case BFD_RELOC_386_TLS_GOTIE:
10027                     case BFD_RELOC_386_TLS_GOTDESC:
10028                     case BFD_RELOC_386_TLS_DESC_CALL:
10029                     case BFD_RELOC_X86_64_TLSGD:
10030                     case BFD_RELOC_X86_64_TLSLD:
10031                     case BFD_RELOC_X86_64_GOTTPOFF:
10032                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10033                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10034                       i.has_gotpc_tls_reloc = true;
10035                     default:
10036                       break;
10037                     }
10038                 }
10039               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10040                                   size, i.op[n].disps, pcrel,
10041                                   reloc_type);
10042
10043               if (flag_code == CODE_64BIT && size == 4 && pcrel
10044                   && !i.prefix[ADDR_PREFIX])
10045                 fixP->fx_signed = 1;
10046
10047               /* Check for "call/jmp *mem", "mov mem, %reg",
10048                  "test %reg, mem" and "binop mem, %reg" where binop
10049                  is one of adc, add, and, cmp, or, sbb, sub, xor
10050                  instructions without data prefix.  Always generate
10051                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10052               if (i.prefix[DATA_PREFIX] == 0
10053                   && (generate_relax_relocations
10054                       || (!object_64bit
10055                           && i.rm.mode == 0
10056                           && i.rm.regmem == 5))
10057                   && (i.rm.mode == 2
10058                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10059                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10060                   && ((i.operands == 1
10061                        && i.tm.base_opcode == 0xff
10062                        && (i.rm.reg == 2 || i.rm.reg == 4))
10063                       || (i.operands == 2
10064                           && (i.tm.base_opcode == 0x8b
10065                               || i.tm.base_opcode == 0x85
10066                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10067                 {
10068                   if (object_64bit)
10069                     {
10070                       fixP->fx_tcbit = i.rex != 0;
10071                       if (i.base_reg
10072                           && (i.base_reg->reg_num == RegIP))
10073                       fixP->fx_tcbit2 = 1;
10074                     }
10075                   else
10076                     fixP->fx_tcbit2 = 1;
10077                 }
10078             }
10079         }
10080     }
10081 }
10082
10083 static void
10084 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10085 {
10086   char *p;
10087   unsigned int n;
10088
10089   for (n = 0; n < i.operands; n++)
10090     {
10091       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
10092       if (i.rounding.type != rc_none && n == i.rounding.operand)
10093         continue;
10094
10095       if (operand_type_check (i.types[n], imm))
10096         {
10097           int size = imm_size (n);
10098
10099           if (now_seg == absolute_section)
10100             abs_section_offset += size;
10101           else if (i.op[n].imms->X_op == O_constant)
10102             {
10103               offsetT val;
10104
10105               val = offset_in_range (i.op[n].imms->X_add_number,
10106                                      size);
10107               p = frag_more (size);
10108               md_number_to_chars (p, val, size);
10109             }
10110           else
10111             {
10112               /* Not absolute_section.
10113                  Need a 32-bit fixup (don't support 8bit
10114                  non-absolute imms).  Try to support other
10115                  sizes ...  */
10116               enum bfd_reloc_code_real reloc_type;
10117               int sign;
10118
10119               if (i.types[n].bitfield.imm32s
10120                   && (i.suffix == QWORD_MNEM_SUFFIX
10121                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10122                 sign = 1;
10123               else
10124                 sign = 0;
10125
10126               p = frag_more (size);
10127               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10128
10129               /*   This is tough to explain.  We end up with this one if we
10130                * have operands that look like
10131                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10132                * obtain the absolute address of the GOT, and it is strongly
10133                * preferable from a performance point of view to avoid using
10134                * a runtime relocation for this.  The actual sequence of
10135                * instructions often look something like:
10136                *
10137                *        call    .L66
10138                * .L66:
10139                *        popl    %ebx
10140                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10141                *
10142                *   The call and pop essentially return the absolute address
10143                * of the label .L66 and store it in %ebx.  The linker itself
10144                * will ultimately change the first operand of the addl so
10145                * that %ebx points to the GOT, but to keep things simple, the
10146                * .o file must have this operand set so that it generates not
10147                * the absolute address of .L66, but the absolute address of
10148                * itself.  This allows the linker itself simply treat a GOTPC
10149                * relocation as asking for a pcrel offset to the GOT to be
10150                * added in, and the addend of the relocation is stored in the
10151                * operand field for the instruction itself.
10152                *
10153                *   Our job here is to fix the operand so that it would add
10154                * the correct offset so that %ebx would point to itself.  The
10155                * thing that is tricky is that .-.L66 will point to the
10156                * beginning of the instruction, so we need to further modify
10157                * the operand so that it will point to itself.  There are
10158                * other cases where you have something like:
10159                *
10160                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10161                *
10162                * and here no correction would be required.  Internally in
10163                * the assembler we treat operands of this form as not being
10164                * pcrel since the '.' is explicitly mentioned, and I wonder
10165                * whether it would simplify matters to do it this way.  Who
10166                * knows.  In earlier versions of the PIC patches, the
10167                * pcrel_adjust field was used to store the correction, but
10168                * since the expression is not pcrel, I felt it would be
10169                * confusing to do it this way.  */
10170
10171               if ((reloc_type == BFD_RELOC_32
10172                    || reloc_type == BFD_RELOC_X86_64_32S
10173                    || reloc_type == BFD_RELOC_64)
10174                   && GOT_symbol
10175                   && GOT_symbol == i.op[n].imms->X_add_symbol
10176                   && (i.op[n].imms->X_op == O_symbol
10177                       || (i.op[n].imms->X_op == O_add
10178                           && ((symbol_get_value_expression
10179                                (i.op[n].imms->X_op_symbol)->X_op)
10180                               == O_subtract))))
10181                 {
10182                   if (!object_64bit)
10183                     reloc_type = BFD_RELOC_386_GOTPC;
10184                   else if (size == 4)
10185                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10186                   else if (size == 8)
10187                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10188                   i.has_gotpc_tls_reloc = true;
10189                   i.op[n].imms->X_add_number +=
10190                     encoding_length (insn_start_frag, insn_start_off, p);
10191                 }
10192               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10193                            i.op[n].imms, 0, reloc_type);
10194             }
10195         }
10196     }
10197 }
10198 \f
10199 /* x86_cons_fix_new is called via the expression parsing code when a
10200    reloc is needed.  We use this hook to get the correct .got reloc.  */
10201 static int cons_sign = -1;
10202
10203 void
10204 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10205                   expressionS *exp, bfd_reloc_code_real_type r)
10206 {
10207   r = reloc (len, 0, cons_sign, r);
10208
10209 #ifdef TE_PE
10210   if (exp->X_op == O_secrel)
10211     {
10212       exp->X_op = O_symbol;
10213       r = BFD_RELOC_32_SECREL;
10214     }
10215 #endif
10216
10217   fix_new_exp (frag, off, len, exp, 0, r);
10218 }
10219
10220 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10221    purpose of the `.dc.a' internal pseudo-op.  */
10222
10223 int
10224 x86_address_bytes (void)
10225 {
10226   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10227     return 4;
10228   return stdoutput->arch_info->bits_per_address / 8;
10229 }
10230
10231 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10232      || defined (LEX_AT)) && !defined (TE_PE)
10233 # define lex_got(reloc, adjust, types) NULL
10234 #else
10235 /* Parse operands of the form
10236    <symbol>@GOTOFF+<nnn>
10237    and similar .plt or .got references.
10238
10239    If we find one, set up the correct relocation in RELOC and copy the
10240    input string, minus the `@GOTOFF' into a malloc'd buffer for
10241    parsing by the calling routine.  Return this buffer, and if ADJUST
10242    is non-null set it to the length of the string we removed from the
10243    input line.  Otherwise return NULL.  */
10244 static char *
10245 lex_got (enum bfd_reloc_code_real *rel,
10246          int *adjust,
10247          i386_operand_type *types)
10248 {
10249   /* Some of the relocations depend on the size of what field is to
10250      be relocated.  But in our callers i386_immediate and i386_displacement
10251      we don't yet know the operand size (this will be set by insn
10252      matching).  Hence we record the word32 relocation here,
10253      and adjust the reloc according to the real size in reloc().  */
10254   static const struct {
10255     const char *str;
10256     int len;
10257     const enum bfd_reloc_code_real rel[2];
10258     const i386_operand_type types64;
10259     bool need_GOT_symbol;
10260   } gotrel[] = {
10261 #ifndef TE_PE
10262 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10263     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10264                                         BFD_RELOC_SIZE32 },
10265       OPERAND_TYPE_IMM32_64, false },
10266 #endif
10267     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10268                                        BFD_RELOC_X86_64_PLTOFF64 },
10269       OPERAND_TYPE_IMM64, true },
10270     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10271                                        BFD_RELOC_X86_64_PLT32    },
10272       OPERAND_TYPE_IMM32_32S_DISP32, false },
10273     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10274                                        BFD_RELOC_X86_64_GOTPLT64 },
10275       OPERAND_TYPE_IMM64_DISP64, true },
10276     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10277                                        BFD_RELOC_X86_64_GOTOFF64 },
10278       OPERAND_TYPE_IMM64_DISP64, true },
10279     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10280                                        BFD_RELOC_X86_64_GOTPCREL },
10281       OPERAND_TYPE_IMM32_32S_DISP32, true },
10282     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10283                                        BFD_RELOC_X86_64_TLSGD    },
10284       OPERAND_TYPE_IMM32_32S_DISP32, true },
10285     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10286                                        _dummy_first_bfd_reloc_code_real },
10287       OPERAND_TYPE_NONE, true },
10288     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10289                                        BFD_RELOC_X86_64_TLSLD    },
10290       OPERAND_TYPE_IMM32_32S_DISP32, true },
10291     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10292                                        BFD_RELOC_X86_64_GOTTPOFF },
10293       OPERAND_TYPE_IMM32_32S_DISP32, true },
10294     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10295                                        BFD_RELOC_X86_64_TPOFF32  },
10296       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10297     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10298                                        _dummy_first_bfd_reloc_code_real },
10299       OPERAND_TYPE_NONE, true },
10300     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10301                                        BFD_RELOC_X86_64_DTPOFF32 },
10302       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10303     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10304                                        _dummy_first_bfd_reloc_code_real },
10305       OPERAND_TYPE_NONE, true },
10306     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10307                                        _dummy_first_bfd_reloc_code_real },
10308       OPERAND_TYPE_NONE, true },
10309     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10310                                        BFD_RELOC_X86_64_GOT32    },
10311       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10312     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10313                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10314       OPERAND_TYPE_IMM32_32S_DISP32, true },
10315     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10316                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10317       OPERAND_TYPE_IMM32_32S_DISP32, true },
10318 #else /* TE_PE */
10319     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10320                                        BFD_RELOC_32_SECREL },
10321       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10322 #endif
10323   };
10324   char *cp;
10325   unsigned int j;
10326
10327 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10328   if (!IS_ELF)
10329     return NULL;
10330 #endif
10331
10332   for (cp = input_line_pointer; *cp != '@'; cp++)
10333     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10334       return NULL;
10335
10336   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10337     {
10338       int len = gotrel[j].len;
10339       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10340         {
10341           if (gotrel[j].rel[object_64bit] != 0)
10342             {
10343               int first, second;
10344               char *tmpbuf, *past_reloc;
10345
10346               *rel = gotrel[j].rel[object_64bit];
10347
10348               if (types)
10349                 {
10350                   if (flag_code != CODE_64BIT)
10351                     {
10352                       types->bitfield.imm32 = 1;
10353                       types->bitfield.disp32 = 1;
10354                     }
10355                   else
10356                     *types = gotrel[j].types64;
10357                 }
10358
10359               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10360                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10361
10362               /* The length of the first part of our input line.  */
10363               first = cp - input_line_pointer;
10364
10365               /* The second part goes from after the reloc token until
10366                  (and including) an end_of_line char or comma.  */
10367               past_reloc = cp + 1 + len;
10368               cp = past_reloc;
10369               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10370                 ++cp;
10371               second = cp + 1 - past_reloc;
10372
10373               /* Allocate and copy string.  The trailing NUL shouldn't
10374                  be necessary, but be safe.  */
10375               tmpbuf = XNEWVEC (char, first + second + 2);
10376               memcpy (tmpbuf, input_line_pointer, first);
10377               if (second != 0 && *past_reloc != ' ')
10378                 /* Replace the relocation token with ' ', so that
10379                    errors like foo@GOTOFF1 will be detected.  */
10380                 tmpbuf[first++] = ' ';
10381               else
10382                 /* Increment length by 1 if the relocation token is
10383                    removed.  */
10384                 len++;
10385               if (adjust)
10386                 *adjust = len;
10387               memcpy (tmpbuf + first, past_reloc, second);
10388               tmpbuf[first + second] = '\0';
10389               return tmpbuf;
10390             }
10391
10392           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10393                   gotrel[j].str, 1 << (5 + object_64bit));
10394           return NULL;
10395         }
10396     }
10397
10398   /* Might be a symbol version string.  Don't as_bad here.  */
10399   return NULL;
10400 }
10401 #endif
10402
10403 bfd_reloc_code_real_type
10404 x86_cons (expressionS *exp, int size)
10405 {
10406   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10407
10408   intel_syntax = -intel_syntax;
10409
10410   exp->X_md = 0;
10411   if (size == 4 || (object_64bit && size == 8))
10412     {
10413       /* Handle @GOTOFF and the like in an expression.  */
10414       char *save;
10415       char *gotfree_input_line;
10416       int adjust = 0;
10417
10418       save = input_line_pointer;
10419       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10420       if (gotfree_input_line)
10421         input_line_pointer = gotfree_input_line;
10422
10423       expression (exp);
10424
10425       if (gotfree_input_line)
10426         {
10427           /* expression () has merrily parsed up to the end of line,
10428              or a comma - in the wrong buffer.  Transfer how far
10429              input_line_pointer has moved to the right buffer.  */
10430           input_line_pointer = (save
10431                                 + (input_line_pointer - gotfree_input_line)
10432                                 + adjust);
10433           free (gotfree_input_line);
10434           if (exp->X_op == O_constant
10435               || exp->X_op == O_absent
10436               || exp->X_op == O_illegal
10437               || exp->X_op == O_register
10438               || exp->X_op == O_big)
10439             {
10440               char c = *input_line_pointer;
10441               *input_line_pointer = 0;
10442               as_bad (_("missing or invalid expression `%s'"), save);
10443               *input_line_pointer = c;
10444             }
10445           else if ((got_reloc == BFD_RELOC_386_PLT32
10446                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10447                    && exp->X_op != O_symbol)
10448             {
10449               char c = *input_line_pointer;
10450               *input_line_pointer = 0;
10451               as_bad (_("invalid PLT expression `%s'"), save);
10452               *input_line_pointer = c;
10453             }
10454         }
10455     }
10456   else
10457     expression (exp);
10458
10459   intel_syntax = -intel_syntax;
10460
10461   if (intel_syntax)
10462     i386_intel_simplify (exp);
10463
10464   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10465   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10466     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10467
10468   return got_reloc;
10469 }
10470
10471 static void
10472 signed_cons (int size)
10473 {
10474   if (object_64bit)
10475     cons_sign = 1;
10476   cons (size);
10477   cons_sign = -1;
10478 }
10479
10480 #ifdef TE_PE
10481 static void
10482 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10483 {
10484   expressionS exp;
10485
10486   do
10487     {
10488       expression (&exp);
10489       if (exp.X_op == O_symbol)
10490         exp.X_op = O_secrel;
10491
10492       emit_expr (&exp, 4);
10493     }
10494   while (*input_line_pointer++ == ',');
10495
10496   input_line_pointer--;
10497   demand_empty_rest_of_line ();
10498 }
10499 #endif
10500
10501 /* Handle Vector operations.  */
10502
10503 static char *
10504 check_VecOperations (char *op_string)
10505 {
10506   const reg_entry *mask;
10507   const char *saved;
10508   char *end_op;
10509
10510   while (*op_string)
10511     {
10512       saved = op_string;
10513       if (*op_string == '{')
10514         {
10515           op_string++;
10516
10517           /* Check broadcasts.  */
10518           if (startswith (op_string, "1to"))
10519             {
10520               unsigned int bcst_type;
10521
10522               if (i.broadcast.type)
10523                 goto duplicated_vec_op;
10524
10525               op_string += 3;
10526               if (*op_string == '8')
10527                 bcst_type = 8;
10528               else if (*op_string == '4')
10529                 bcst_type = 4;
10530               else if (*op_string == '2')
10531                 bcst_type = 2;
10532               else if (*op_string == '1'
10533                        && *(op_string+1) == '6')
10534                 {
10535                   bcst_type = 16;
10536                   op_string++;
10537                 }
10538               else
10539                 {
10540                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10541                   return NULL;
10542                 }
10543               op_string++;
10544
10545               i.broadcast.type = bcst_type;
10546               i.broadcast.operand = this_operand;
10547             }
10548           /* Check masking operation.  */
10549           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10550             {
10551               if (mask == &bad_reg)
10552                 return NULL;
10553
10554               /* k0 can't be used for write mask.  */
10555               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10556                 {
10557                   as_bad (_("`%s%s' can't be used for write mask"),
10558                           register_prefix, mask->reg_name);
10559                   return NULL;
10560                 }
10561
10562               if (!i.mask.reg)
10563                 {
10564                   i.mask.reg = mask;
10565                   i.mask.operand = this_operand;
10566                 }
10567               else if (i.mask.reg->reg_num)
10568                 goto duplicated_vec_op;
10569               else
10570                 {
10571                   i.mask.reg = mask;
10572
10573                   /* Only "{z}" is allowed here.  No need to check
10574                      zeroing mask explicitly.  */
10575                   if (i.mask.operand != (unsigned int) this_operand)
10576                     {
10577                       as_bad (_("invalid write mask `%s'"), saved);
10578                       return NULL;
10579                     }
10580                 }
10581
10582               op_string = end_op;
10583             }
10584           /* Check zeroing-flag for masking operation.  */
10585           else if (*op_string == 'z')
10586             {
10587               if (!i.mask.reg)
10588                 {
10589                   i.mask.reg = reg_k0;
10590                   i.mask.zeroing = 1;
10591                   i.mask.operand = this_operand;
10592                 }
10593               else
10594                 {
10595                   if (i.mask.zeroing)
10596                     {
10597                     duplicated_vec_op:
10598                       as_bad (_("duplicated `%s'"), saved);
10599                       return NULL;
10600                     }
10601
10602                   i.mask.zeroing = 1;
10603
10604                   /* Only "{%k}" is allowed here.  No need to check mask
10605                      register explicitly.  */
10606                   if (i.mask.operand != (unsigned int) this_operand)
10607                     {
10608                       as_bad (_("invalid zeroing-masking `%s'"),
10609                               saved);
10610                       return NULL;
10611                     }
10612                 }
10613
10614               op_string++;
10615             }
10616           else
10617             goto unknown_vec_op;
10618
10619           if (*op_string != '}')
10620             {
10621               as_bad (_("missing `}' in `%s'"), saved);
10622               return NULL;
10623             }
10624           op_string++;
10625
10626           /* Strip whitespace since the addition of pseudo prefixes
10627              changed how the scrubber treats '{'.  */
10628           if (is_space_char (*op_string))
10629             ++op_string;
10630
10631           continue;
10632         }
10633     unknown_vec_op:
10634       /* We don't know this one.  */
10635       as_bad (_("unknown vector operation: `%s'"), saved);
10636       return NULL;
10637     }
10638
10639   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10640     {
10641       as_bad (_("zeroing-masking only allowed with write mask"));
10642       return NULL;
10643     }
10644
10645   return op_string;
10646 }
10647
10648 static int
10649 i386_immediate (char *imm_start)
10650 {
10651   char *save_input_line_pointer;
10652   char *gotfree_input_line;
10653   segT exp_seg = 0;
10654   expressionS *exp;
10655   i386_operand_type types;
10656
10657   operand_type_set (&types, ~0);
10658
10659   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10660     {
10661       as_bad (_("at most %d immediate operands are allowed"),
10662               MAX_IMMEDIATE_OPERANDS);
10663       return 0;
10664     }
10665
10666   exp = &im_expressions[i.imm_operands++];
10667   i.op[this_operand].imms = exp;
10668
10669   if (is_space_char (*imm_start))
10670     ++imm_start;
10671
10672   save_input_line_pointer = input_line_pointer;
10673   input_line_pointer = imm_start;
10674
10675   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10676   if (gotfree_input_line)
10677     input_line_pointer = gotfree_input_line;
10678
10679   exp_seg = expression (exp);
10680
10681   SKIP_WHITESPACE ();
10682   if (*input_line_pointer)
10683     as_bad (_("junk `%s' after expression"), input_line_pointer);
10684
10685   input_line_pointer = save_input_line_pointer;
10686   if (gotfree_input_line)
10687     {
10688       free (gotfree_input_line);
10689
10690       if (exp->X_op == O_constant)
10691         exp->X_op = O_illegal;
10692     }
10693
10694   if (exp_seg == reg_section)
10695     {
10696       as_bad (_("illegal immediate register operand %s"), imm_start);
10697       return 0;
10698     }
10699
10700   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10701 }
10702
10703 static int
10704 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10705                          i386_operand_type types, const char *imm_start)
10706 {
10707   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10708     {
10709       if (imm_start)
10710         as_bad (_("missing or invalid immediate expression `%s'"),
10711                 imm_start);
10712       return 0;
10713     }
10714   else if (exp->X_op == O_constant)
10715     {
10716       /* Size it properly later.  */
10717       i.types[this_operand].bitfield.imm64 = 1;
10718
10719       /* If not 64bit, sign/zero extend val, to account for wraparound
10720          when !BFD64.  */
10721       if (flag_code != CODE_64BIT)
10722         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10723     }
10724 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10725   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10726            && exp_seg != absolute_section
10727            && exp_seg != text_section
10728            && exp_seg != data_section
10729            && exp_seg != bss_section
10730            && exp_seg != undefined_section
10731            && !bfd_is_com_section (exp_seg))
10732     {
10733       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10734       return 0;
10735     }
10736 #endif
10737   else
10738     {
10739       /* This is an address.  The size of the address will be
10740          determined later, depending on destination register,
10741          suffix, or the default for the section.  */
10742       i.types[this_operand].bitfield.imm8 = 1;
10743       i.types[this_operand].bitfield.imm16 = 1;
10744       i.types[this_operand].bitfield.imm32 = 1;
10745       i.types[this_operand].bitfield.imm32s = 1;
10746       i.types[this_operand].bitfield.imm64 = 1;
10747       i.types[this_operand] = operand_type_and (i.types[this_operand],
10748                                                 types);
10749     }
10750
10751   return 1;
10752 }
10753
10754 static char *
10755 i386_scale (char *scale)
10756 {
10757   offsetT val;
10758   char *save = input_line_pointer;
10759
10760   input_line_pointer = scale;
10761   val = get_absolute_expression ();
10762
10763   switch (val)
10764     {
10765     case 1:
10766       i.log2_scale_factor = 0;
10767       break;
10768     case 2:
10769       i.log2_scale_factor = 1;
10770       break;
10771     case 4:
10772       i.log2_scale_factor = 2;
10773       break;
10774     case 8:
10775       i.log2_scale_factor = 3;
10776       break;
10777     default:
10778       {
10779         char sep = *input_line_pointer;
10780
10781         *input_line_pointer = '\0';
10782         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10783                 scale);
10784         *input_line_pointer = sep;
10785         input_line_pointer = save;
10786         return NULL;
10787       }
10788     }
10789   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10790     {
10791       as_warn (_("scale factor of %d without an index register"),
10792                1 << i.log2_scale_factor);
10793       i.log2_scale_factor = 0;
10794     }
10795   scale = input_line_pointer;
10796   input_line_pointer = save;
10797   return scale;
10798 }
10799
10800 static int
10801 i386_displacement (char *disp_start, char *disp_end)
10802 {
10803   expressionS *exp;
10804   segT exp_seg = 0;
10805   char *save_input_line_pointer;
10806   char *gotfree_input_line;
10807   int override;
10808   i386_operand_type bigdisp, types = anydisp;
10809   int ret;
10810
10811   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10812     {
10813       as_bad (_("at most %d displacement operands are allowed"),
10814               MAX_MEMORY_OPERANDS);
10815       return 0;
10816     }
10817
10818   operand_type_set (&bigdisp, 0);
10819   if (i.jumpabsolute
10820       || i.types[this_operand].bitfield.baseindex
10821       || (current_templates->start->opcode_modifier.jump != JUMP
10822           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10823     {
10824       i386_addressing_mode ();
10825       override = (i.prefix[ADDR_PREFIX] != 0);
10826       if (flag_code == CODE_64BIT)
10827         {
10828           if (!override)
10829             {
10830               bigdisp.bitfield.disp32s = 1;
10831               bigdisp.bitfield.disp64 = 1;
10832             }
10833           else
10834             bigdisp.bitfield.disp32 = 1;
10835         }
10836       else if ((flag_code == CODE_16BIT) ^ override)
10837           bigdisp.bitfield.disp16 = 1;
10838       else
10839           bigdisp.bitfield.disp32 = 1;
10840     }
10841   else
10842     {
10843       /* For PC-relative branches, the width of the displacement may be
10844          dependent upon data size, but is never dependent upon address size.
10845          Also make sure to not unintentionally match against a non-PC-relative
10846          branch template.  */
10847       static templates aux_templates;
10848       const insn_template *t = current_templates->start;
10849       bool has_intel64 = false;
10850
10851       aux_templates.start = t;
10852       while (++t < current_templates->end)
10853         {
10854           if (t->opcode_modifier.jump
10855               != current_templates->start->opcode_modifier.jump)
10856             break;
10857           if ((t->opcode_modifier.isa64 >= INTEL64))
10858             has_intel64 = true;
10859         }
10860       if (t < current_templates->end)
10861         {
10862           aux_templates.end = t;
10863           current_templates = &aux_templates;
10864         }
10865
10866       override = (i.prefix[DATA_PREFIX] != 0);
10867       if (flag_code == CODE_64BIT)
10868         {
10869           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10870               && (!intel64 || !has_intel64))
10871             bigdisp.bitfield.disp16 = 1;
10872           else
10873             bigdisp.bitfield.disp32s = 1;
10874         }
10875       else
10876         {
10877           if (!override)
10878             override = (i.suffix == (flag_code != CODE_16BIT
10879                                      ? WORD_MNEM_SUFFIX
10880                                      : LONG_MNEM_SUFFIX));
10881           bigdisp.bitfield.disp32 = 1;
10882           if ((flag_code == CODE_16BIT) ^ override)
10883             {
10884               bigdisp.bitfield.disp32 = 0;
10885               bigdisp.bitfield.disp16 = 1;
10886             }
10887         }
10888     }
10889   i.types[this_operand] = operand_type_or (i.types[this_operand],
10890                                            bigdisp);
10891
10892   exp = &disp_expressions[i.disp_operands];
10893   i.op[this_operand].disps = exp;
10894   i.disp_operands++;
10895   save_input_line_pointer = input_line_pointer;
10896   input_line_pointer = disp_start;
10897   END_STRING_AND_SAVE (disp_end);
10898
10899 #ifndef GCC_ASM_O_HACK
10900 #define GCC_ASM_O_HACK 0
10901 #endif
10902 #if GCC_ASM_O_HACK
10903   END_STRING_AND_SAVE (disp_end + 1);
10904   if (i.types[this_operand].bitfield.baseIndex
10905       && displacement_string_end[-1] == '+')
10906     {
10907       /* This hack is to avoid a warning when using the "o"
10908          constraint within gcc asm statements.
10909          For instance:
10910
10911          #define _set_tssldt_desc(n,addr,limit,type) \
10912          __asm__ __volatile__ ( \
10913          "movw %w2,%0\n\t" \
10914          "movw %w1,2+%0\n\t" \
10915          "rorl $16,%1\n\t" \
10916          "movb %b1,4+%0\n\t" \
10917          "movb %4,5+%0\n\t" \
10918          "movb $0,6+%0\n\t" \
10919          "movb %h1,7+%0\n\t" \
10920          "rorl $16,%1" \
10921          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10922
10923          This works great except that the output assembler ends
10924          up looking a bit weird if it turns out that there is
10925          no offset.  You end up producing code that looks like:
10926
10927          #APP
10928          movw $235,(%eax)
10929          movw %dx,2+(%eax)
10930          rorl $16,%edx
10931          movb %dl,4+(%eax)
10932          movb $137,5+(%eax)
10933          movb $0,6+(%eax)
10934          movb %dh,7+(%eax)
10935          rorl $16,%edx
10936          #NO_APP
10937
10938          So here we provide the missing zero.  */
10939
10940       *displacement_string_end = '0';
10941     }
10942 #endif
10943   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10944   if (gotfree_input_line)
10945     input_line_pointer = gotfree_input_line;
10946
10947   exp_seg = expression (exp);
10948
10949   SKIP_WHITESPACE ();
10950   if (*input_line_pointer)
10951     as_bad (_("junk `%s' after expression"), input_line_pointer);
10952 #if GCC_ASM_O_HACK
10953   RESTORE_END_STRING (disp_end + 1);
10954 #endif
10955   input_line_pointer = save_input_line_pointer;
10956   if (gotfree_input_line)
10957     {
10958       free (gotfree_input_line);
10959
10960       if (exp->X_op == O_constant || exp->X_op == O_register)
10961         exp->X_op = O_illegal;
10962     }
10963
10964   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10965
10966   RESTORE_END_STRING (disp_end);
10967
10968   return ret;
10969 }
10970
10971 static int
10972 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10973                             i386_operand_type types, const char *disp_start)
10974 {
10975   i386_operand_type bigdisp;
10976   int ret = 1;
10977
10978   /* We do this to make sure that the section symbol is in
10979      the symbol table.  We will ultimately change the relocation
10980      to be relative to the beginning of the section.  */
10981   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10982       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10983       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10984     {
10985       if (exp->X_op != O_symbol)
10986         goto inv_disp;
10987
10988       if (S_IS_LOCAL (exp->X_add_symbol)
10989           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10990           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10991         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10992       exp->X_op = O_subtract;
10993       exp->X_op_symbol = GOT_symbol;
10994       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10995         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10996       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10997         i.reloc[this_operand] = BFD_RELOC_64;
10998       else
10999         i.reloc[this_operand] = BFD_RELOC_32;
11000     }
11001
11002   else if (exp->X_op == O_absent
11003            || exp->X_op == O_illegal
11004            || exp->X_op == O_big)
11005     {
11006     inv_disp:
11007       as_bad (_("missing or invalid displacement expression `%s'"),
11008               disp_start);
11009       ret = 0;
11010     }
11011
11012 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11013   else if (exp->X_op != O_constant
11014            && OUTPUT_FLAVOR == bfd_target_aout_flavour
11015            && exp_seg != absolute_section
11016            && exp_seg != text_section
11017            && exp_seg != data_section
11018            && exp_seg != bss_section
11019            && exp_seg != undefined_section
11020            && !bfd_is_com_section (exp_seg))
11021     {
11022       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11023       ret = 0;
11024     }
11025 #endif
11026
11027   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
11028       /* Constants get taken care of by optimize_disp().  */
11029       && exp->X_op != O_constant)
11030     i.types[this_operand].bitfield.disp8 = 1;
11031
11032   /* Check if this is a displacement only operand.  */
11033   bigdisp = i.types[this_operand];
11034   bigdisp.bitfield.disp8 = 0;
11035   bigdisp.bitfield.disp16 = 0;
11036   bigdisp.bitfield.disp32 = 0;
11037   bigdisp.bitfield.disp32s = 0;
11038   bigdisp.bitfield.disp64 = 0;
11039   if (operand_type_all_zero (&bigdisp))
11040     i.types[this_operand] = operand_type_and (i.types[this_operand],
11041                                               types);
11042
11043   return ret;
11044 }
11045
11046 /* Return the active addressing mode, taking address override and
11047    registers forming the address into consideration.  Update the
11048    address override prefix if necessary.  */
11049
11050 static enum flag_code
11051 i386_addressing_mode (void)
11052 {
11053   enum flag_code addr_mode;
11054
11055   if (i.prefix[ADDR_PREFIX])
11056     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11057   else if (flag_code == CODE_16BIT
11058            && current_templates->start->cpu_flags.bitfield.cpumpx
11059            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11060               from md_assemble() by "is not a valid base/index expression"
11061               when there is a base and/or index.  */
11062            && !i.types[this_operand].bitfield.baseindex)
11063     {
11064       /* MPX insn memory operands with neither base nor index must be forced
11065          to use 32-bit addressing in 16-bit mode.  */
11066       addr_mode = CODE_32BIT;
11067       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11068       ++i.prefixes;
11069       gas_assert (!i.types[this_operand].bitfield.disp16);
11070       gas_assert (!i.types[this_operand].bitfield.disp32);
11071     }
11072   else
11073     {
11074       addr_mode = flag_code;
11075
11076 #if INFER_ADDR_PREFIX
11077       if (i.mem_operands == 0)
11078         {
11079           /* Infer address prefix from the first memory operand.  */
11080           const reg_entry *addr_reg = i.base_reg;
11081
11082           if (addr_reg == NULL)
11083             addr_reg = i.index_reg;
11084
11085           if (addr_reg)
11086             {
11087               if (addr_reg->reg_type.bitfield.dword)
11088                 addr_mode = CODE_32BIT;
11089               else if (flag_code != CODE_64BIT
11090                        && addr_reg->reg_type.bitfield.word)
11091                 addr_mode = CODE_16BIT;
11092
11093               if (addr_mode != flag_code)
11094                 {
11095                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11096                   i.prefixes += 1;
11097                   /* Change the size of any displacement too.  At most one
11098                      of Disp16 or Disp32 is set.
11099                      FIXME.  There doesn't seem to be any real need for
11100                      separate Disp16 and Disp32 flags.  The same goes for
11101                      Imm16 and Imm32.  Removing them would probably clean
11102                      up the code quite a lot.  */
11103                   if (flag_code != CODE_64BIT
11104                       && (i.types[this_operand].bitfield.disp16
11105                           || i.types[this_operand].bitfield.disp32))
11106                     i.types[this_operand]
11107                       = operand_type_xor (i.types[this_operand], disp16_32);
11108                 }
11109             }
11110         }
11111 #endif
11112     }
11113
11114   return addr_mode;
11115 }
11116
11117 /* Make sure the memory operand we've been dealt is valid.
11118    Return 1 on success, 0 on a failure.  */
11119
11120 static int
11121 i386_index_check (const char *operand_string)
11122 {
11123   const char *kind = "base/index";
11124   enum flag_code addr_mode = i386_addressing_mode ();
11125   const insn_template *t = current_templates->start;
11126
11127   if (t->opcode_modifier.isstring
11128       && !t->cpu_flags.bitfield.cpupadlock
11129       && (current_templates->end[-1].opcode_modifier.isstring
11130           || i.mem_operands))
11131     {
11132       /* Memory operands of string insns are special in that they only allow
11133          a single register (rDI, rSI, or rBX) as their memory address.  */
11134       const reg_entry *expected_reg;
11135       static const char *di_si[][2] =
11136         {
11137           { "esi", "edi" },
11138           { "si", "di" },
11139           { "rsi", "rdi" }
11140         };
11141       static const char *bx[] = { "ebx", "bx", "rbx" };
11142
11143       kind = "string address";
11144
11145       if (t->opcode_modifier.prefixok == PrefixRep)
11146         {
11147           int es_op = current_templates->end[-1].opcode_modifier.isstring
11148                       - IS_STRING_ES_OP0;
11149           int op = 0;
11150
11151           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11152               || ((!i.mem_operands != !intel_syntax)
11153                   && current_templates->end[-1].operand_types[1]
11154                      .bitfield.baseindex))
11155             op = 1;
11156           expected_reg
11157             = (const reg_entry *) str_hash_find (reg_hash,
11158                                                  di_si[addr_mode][op == es_op]);
11159         }
11160       else
11161         expected_reg
11162           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11163
11164       if (i.base_reg != expected_reg
11165           || i.index_reg
11166           || operand_type_check (i.types[this_operand], disp))
11167         {
11168           /* The second memory operand must have the same size as
11169              the first one.  */
11170           if (i.mem_operands
11171               && i.base_reg
11172               && !((addr_mode == CODE_64BIT
11173                     && i.base_reg->reg_type.bitfield.qword)
11174                    || (addr_mode == CODE_32BIT
11175                        ? i.base_reg->reg_type.bitfield.dword
11176                        : i.base_reg->reg_type.bitfield.word)))
11177             goto bad_address;
11178
11179           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11180                    operand_string,
11181                    intel_syntax ? '[' : '(',
11182                    register_prefix,
11183                    expected_reg->reg_name,
11184                    intel_syntax ? ']' : ')');
11185           return 1;
11186         }
11187       else
11188         return 1;
11189
11190     bad_address:
11191       as_bad (_("`%s' is not a valid %s expression"),
11192               operand_string, kind);
11193       return 0;
11194     }
11195   else
11196     {
11197       if (addr_mode != CODE_16BIT)
11198         {
11199           /* 32-bit/64-bit checks.  */
11200           if (i.disp_encoding == disp_encoding_16bit)
11201             {
11202             bad_disp:
11203               as_bad (_("invalid `%s' prefix"),
11204                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11205               return 0;
11206             }
11207
11208           if ((i.base_reg
11209                && ((addr_mode == CODE_64BIT
11210                     ? !i.base_reg->reg_type.bitfield.qword
11211                     : !i.base_reg->reg_type.bitfield.dword)
11212                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11213                    || i.base_reg->reg_num == RegIZ))
11214               || (i.index_reg
11215                   && !i.index_reg->reg_type.bitfield.xmmword
11216                   && !i.index_reg->reg_type.bitfield.ymmword
11217                   && !i.index_reg->reg_type.bitfield.zmmword
11218                   && ((addr_mode == CODE_64BIT
11219                        ? !i.index_reg->reg_type.bitfield.qword
11220                        : !i.index_reg->reg_type.bitfield.dword)
11221                       || !i.index_reg->reg_type.bitfield.baseindex)))
11222             goto bad_address;
11223
11224           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11225           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11226                && t->opcode_modifier.opcodespace == SPACE_0F
11227                && t->base_opcode == 0x1b)
11228               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11229                   && t->opcode_modifier.opcodespace == SPACE_0F
11230                   && (t->base_opcode & ~1) == 0x1a)
11231               || t->opcode_modifier.sib == SIBMEM)
11232             {
11233               /* They cannot use RIP-relative addressing. */
11234               if (i.base_reg && i.base_reg->reg_num == RegIP)
11235                 {
11236                   as_bad (_("`%s' cannot be used here"), operand_string);
11237                   return 0;
11238                 }
11239
11240               /* bndldx and bndstx ignore their scale factor. */
11241               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11242                   && t->opcode_modifier.opcodespace == SPACE_0F
11243                   && (t->base_opcode & ~1) == 0x1a
11244                   && i.log2_scale_factor)
11245                 as_warn (_("register scaling is being ignored here"));
11246             }
11247         }
11248       else
11249         {
11250           /* 16-bit checks.  */
11251           if (i.disp_encoding == disp_encoding_32bit)
11252             goto bad_disp;
11253
11254           if ((i.base_reg
11255                && (!i.base_reg->reg_type.bitfield.word
11256                    || !i.base_reg->reg_type.bitfield.baseindex))
11257               || (i.index_reg
11258                   && (!i.index_reg->reg_type.bitfield.word
11259                       || !i.index_reg->reg_type.bitfield.baseindex
11260                       || !(i.base_reg
11261                            && i.base_reg->reg_num < 6
11262                            && i.index_reg->reg_num >= 6
11263                            && i.log2_scale_factor == 0))))
11264             goto bad_address;
11265         }
11266     }
11267   return 1;
11268 }
11269
11270 /* Handle vector immediates.  */
11271
11272 static int
11273 RC_SAE_immediate (const char *imm_start)
11274 {
11275   unsigned int match_found, j;
11276   const char *pstr = imm_start;
11277   expressionS *exp;
11278
11279   if (*pstr != '{')
11280     return 0;
11281
11282   pstr++;
11283   match_found = 0;
11284   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11285     {
11286       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11287         {
11288           if (i.rounding.type != rc_none)
11289             {
11290               as_bad (_("duplicated `%s'"), imm_start);
11291               return 0;
11292             }
11293
11294           i.rounding.type = RC_NamesTable[j].type;
11295           i.rounding.operand = this_operand;
11296
11297           pstr += RC_NamesTable[j].len;
11298           match_found = 1;
11299           break;
11300         }
11301     }
11302   if (!match_found)
11303     return 0;
11304
11305   if (*pstr++ != '}')
11306     {
11307       as_bad (_("Missing '}': '%s'"), imm_start);
11308       return 0;
11309     }
11310   /* RC/SAE immediate string should contain nothing more.  */;
11311   if (*pstr != 0)
11312     {
11313       as_bad (_("Junk after '}': '%s'"), imm_start);
11314       return 0;
11315     }
11316
11317   exp = &im_expressions[i.imm_operands++];
11318   i.op[this_operand].imms = exp;
11319
11320   exp->X_op = O_constant;
11321   exp->X_add_number = 0;
11322   exp->X_add_symbol = (symbolS *) 0;
11323   exp->X_op_symbol = (symbolS *) 0;
11324
11325   i.types[this_operand].bitfield.imm8 = 1;
11326   return 1;
11327 }
11328
11329 /* Only string instructions can have a second memory operand, so
11330    reduce current_templates to just those if it contains any.  */
11331 static int
11332 maybe_adjust_templates (void)
11333 {
11334   const insn_template *t;
11335
11336   gas_assert (i.mem_operands == 1);
11337
11338   for (t = current_templates->start; t < current_templates->end; ++t)
11339     if (t->opcode_modifier.isstring)
11340       break;
11341
11342   if (t < current_templates->end)
11343     {
11344       static templates aux_templates;
11345       bool recheck;
11346
11347       aux_templates.start = t;
11348       for (; t < current_templates->end; ++t)
11349         if (!t->opcode_modifier.isstring)
11350           break;
11351       aux_templates.end = t;
11352
11353       /* Determine whether to re-check the first memory operand.  */
11354       recheck = (aux_templates.start != current_templates->start
11355                  || t != current_templates->end);
11356
11357       current_templates = &aux_templates;
11358
11359       if (recheck)
11360         {
11361           i.mem_operands = 0;
11362           if (i.memop1_string != NULL
11363               && i386_index_check (i.memop1_string) == 0)
11364             return 0;
11365           i.mem_operands = 1;
11366         }
11367     }
11368
11369   return 1;
11370 }
11371
11372 static INLINE bool starts_memory_operand (char c)
11373 {
11374   return ISDIGIT (c)
11375          || is_identifier_char (c)
11376          || strchr ("([\"+-!~", c);
11377 }
11378
11379 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11380    on error.  */
11381
11382 static int
11383 i386_att_operand (char *operand_string)
11384 {
11385   const reg_entry *r;
11386   char *end_op;
11387   char *op_string = operand_string;
11388
11389   if (is_space_char (*op_string))
11390     ++op_string;
11391
11392   /* We check for an absolute prefix (differentiating,
11393      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11394   if (*op_string == ABSOLUTE_PREFIX)
11395     {
11396       ++op_string;
11397       if (is_space_char (*op_string))
11398         ++op_string;
11399       i.jumpabsolute = true;
11400     }
11401
11402   /* Check if operand is a register.  */
11403   if ((r = parse_register (op_string, &end_op)) != NULL)
11404     {
11405       i386_operand_type temp;
11406
11407       if (r == &bad_reg)
11408         return 0;
11409
11410       /* Check for a segment override by searching for ':' after a
11411          segment register.  */
11412       op_string = end_op;
11413       if (is_space_char (*op_string))
11414         ++op_string;
11415       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11416         {
11417           i.seg[i.mem_operands] = r;
11418
11419           /* Skip the ':' and whitespace.  */
11420           ++op_string;
11421           if (is_space_char (*op_string))
11422             ++op_string;
11423
11424           /* Handle case of %es:*foo.  */
11425           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11426             {
11427               ++op_string;
11428               if (is_space_char (*op_string))
11429                 ++op_string;
11430               i.jumpabsolute = true;
11431             }
11432
11433           if (!starts_memory_operand (*op_string))
11434             {
11435               as_bad (_("bad memory operand `%s'"), op_string);
11436               return 0;
11437             }
11438           goto do_memory_reference;
11439         }
11440
11441       /* Handle vector operations.  */
11442       if (*op_string == '{')
11443         {
11444           op_string = check_VecOperations (op_string);
11445           if (op_string == NULL)
11446             return 0;
11447         }
11448
11449       if (*op_string)
11450         {
11451           as_bad (_("junk `%s' after register"), op_string);
11452           return 0;
11453         }
11454       temp = r->reg_type;
11455       temp.bitfield.baseindex = 0;
11456       i.types[this_operand] = operand_type_or (i.types[this_operand],
11457                                                temp);
11458       i.types[this_operand].bitfield.unspecified = 0;
11459       i.op[this_operand].regs = r;
11460       i.reg_operands++;
11461     }
11462   else if (*op_string == REGISTER_PREFIX)
11463     {
11464       as_bad (_("bad register name `%s'"), op_string);
11465       return 0;
11466     }
11467   else if (*op_string == IMMEDIATE_PREFIX)
11468     {
11469       ++op_string;
11470       if (i.jumpabsolute)
11471         {
11472           as_bad (_("immediate operand illegal with absolute jump"));
11473           return 0;
11474         }
11475       if (!i386_immediate (op_string))
11476         return 0;
11477     }
11478   else if (RC_SAE_immediate (operand_string))
11479     {
11480       /* If it is a RC or SAE immediate, do nothing.  */
11481       ;
11482     }
11483   else if (starts_memory_operand (*op_string))
11484     {
11485       /* This is a memory reference of some sort.  */
11486       char *base_string;
11487
11488       /* Start and end of displacement string expression (if found).  */
11489       char *displacement_string_start;
11490       char *displacement_string_end;
11491
11492     do_memory_reference:
11493       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11494         return 0;
11495       if ((i.mem_operands == 1
11496            && !current_templates->start->opcode_modifier.isstring)
11497           || i.mem_operands == 2)
11498         {
11499           as_bad (_("too many memory references for `%s'"),
11500                   current_templates->start->name);
11501           return 0;
11502         }
11503
11504       /* Check for base index form.  We detect the base index form by
11505          looking for an ')' at the end of the operand, searching
11506          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11507          after the '('.  */
11508       base_string = op_string + strlen (op_string);
11509
11510       /* Handle vector operations.  */
11511       --base_string;
11512       if (is_space_char (*base_string))
11513         --base_string;
11514
11515       if (*base_string == '}')
11516         {
11517           char *vop_start = NULL;
11518
11519           while (base_string-- > op_string)
11520             {
11521               if (*base_string == '"')
11522                 break;
11523               if (*base_string != '{')
11524                 continue;
11525
11526               vop_start = base_string;
11527
11528               --base_string;
11529               if (is_space_char (*base_string))
11530                 --base_string;
11531
11532               if (*base_string != '}')
11533                 break;
11534
11535               vop_start = NULL;
11536             }
11537
11538           if (!vop_start)
11539             {
11540               as_bad (_("unbalanced figure braces"));
11541               return 0;
11542             }
11543
11544           if (check_VecOperations (vop_start) == NULL)
11545             return 0;
11546         }
11547
11548       /* If we only have a displacement, set-up for it to be parsed later.  */
11549       displacement_string_start = op_string;
11550       displacement_string_end = base_string + 1;
11551
11552       if (*base_string == ')')
11553         {
11554           char *temp_string;
11555
11556           /* We've already checked that the number of left & right ()'s are
11557              equal, so this loop will not be infinite.  */
11558           do
11559             {
11560               base_string--;
11561             }
11562           while (*base_string != '(' && *base_string != ')'
11563                  && *base_string != '"');
11564
11565           temp_string = base_string;
11566
11567           /* Skip past '(' and whitespace.  */
11568           if (*base_string == '(')
11569             ++base_string;
11570           if (is_space_char (*base_string))
11571             ++base_string;
11572
11573           if (*base_string == ','
11574               || ((i.base_reg = parse_register (base_string, &end_op))
11575                   != NULL))
11576             {
11577               displacement_string_end = temp_string;
11578
11579               i.types[this_operand].bitfield.baseindex = 1;
11580
11581               if (i.base_reg)
11582                 {
11583                   if (i.base_reg == &bad_reg)
11584                     return 0;
11585                   base_string = end_op;
11586                   if (is_space_char (*base_string))
11587                     ++base_string;
11588                 }
11589
11590               /* There may be an index reg or scale factor here.  */
11591               if (*base_string == ',')
11592                 {
11593                   ++base_string;
11594                   if (is_space_char (*base_string))
11595                     ++base_string;
11596
11597                   if ((i.index_reg = parse_register (base_string, &end_op))
11598                       != NULL)
11599                     {
11600                       if (i.index_reg == &bad_reg)
11601                         return 0;
11602                       base_string = end_op;
11603                       if (is_space_char (*base_string))
11604                         ++base_string;
11605                       if (*base_string == ',')
11606                         {
11607                           ++base_string;
11608                           if (is_space_char (*base_string))
11609                             ++base_string;
11610                         }
11611                       else if (*base_string != ')')
11612                         {
11613                           as_bad (_("expecting `,' or `)' "
11614                                     "after index register in `%s'"),
11615                                   operand_string);
11616                           return 0;
11617                         }
11618                     }
11619                   else if (*base_string == REGISTER_PREFIX)
11620                     {
11621                       end_op = strchr (base_string, ',');
11622                       if (end_op)
11623                         *end_op = '\0';
11624                       as_bad (_("bad register name `%s'"), base_string);
11625                       return 0;
11626                     }
11627
11628                   /* Check for scale factor.  */
11629                   if (*base_string != ')')
11630                     {
11631                       char *end_scale = i386_scale (base_string);
11632
11633                       if (!end_scale)
11634                         return 0;
11635
11636                       base_string = end_scale;
11637                       if (is_space_char (*base_string))
11638                         ++base_string;
11639                       if (*base_string != ')')
11640                         {
11641                           as_bad (_("expecting `)' "
11642                                     "after scale factor in `%s'"),
11643                                   operand_string);
11644                           return 0;
11645                         }
11646                     }
11647                   else if (!i.index_reg)
11648                     {
11649                       as_bad (_("expecting index register or scale factor "
11650                                 "after `,'; got '%c'"),
11651                               *base_string);
11652                       return 0;
11653                     }
11654                 }
11655               else if (*base_string != ')')
11656                 {
11657                   as_bad (_("expecting `,' or `)' "
11658                             "after base register in `%s'"),
11659                           operand_string);
11660                   return 0;
11661                 }
11662             }
11663           else if (*base_string == REGISTER_PREFIX)
11664             {
11665               end_op = strchr (base_string, ',');
11666               if (end_op)
11667                 *end_op = '\0';
11668               as_bad (_("bad register name `%s'"), base_string);
11669               return 0;
11670             }
11671         }
11672
11673       /* If there's an expression beginning the operand, parse it,
11674          assuming displacement_string_start and
11675          displacement_string_end are meaningful.  */
11676       if (displacement_string_start != displacement_string_end)
11677         {
11678           if (!i386_displacement (displacement_string_start,
11679                                   displacement_string_end))
11680             return 0;
11681         }
11682
11683       /* Special case for (%dx) while doing input/output op.  */
11684       if (i.base_reg
11685           && i.base_reg->reg_type.bitfield.instance == RegD
11686           && i.base_reg->reg_type.bitfield.word
11687           && i.index_reg == 0
11688           && i.log2_scale_factor == 0
11689           && i.seg[i.mem_operands] == 0
11690           && !operand_type_check (i.types[this_operand], disp))
11691         {
11692           i.types[this_operand] = i.base_reg->reg_type;
11693           return 1;
11694         }
11695
11696       if (i386_index_check (operand_string) == 0)
11697         return 0;
11698       i.flags[this_operand] |= Operand_Mem;
11699       if (i.mem_operands == 0)
11700         i.memop1_string = xstrdup (operand_string);
11701       i.mem_operands++;
11702     }
11703   else
11704     {
11705       /* It's not a memory operand; argh!  */
11706       as_bad (_("invalid char %s beginning operand %d `%s'"),
11707               output_invalid (*op_string),
11708               this_operand + 1,
11709               op_string);
11710       return 0;
11711     }
11712   return 1;                     /* Normal return.  */
11713 }
11714 \f
11715 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11716    that an rs_machine_dependent frag may reach.  */
11717
11718 unsigned int
11719 i386_frag_max_var (fragS *frag)
11720 {
11721   /* The only relaxable frags are for jumps.
11722      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11723   gas_assert (frag->fr_type == rs_machine_dependent);
11724   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11725 }
11726
11727 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11728 static int
11729 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11730 {
11731   /* STT_GNU_IFUNC symbol must go through PLT.  */
11732   if ((symbol_get_bfdsym (fr_symbol)->flags
11733        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11734     return 0;
11735
11736   if (!S_IS_EXTERNAL (fr_symbol))
11737     /* Symbol may be weak or local.  */
11738     return !S_IS_WEAK (fr_symbol);
11739
11740   /* Global symbols with non-default visibility can't be preempted. */
11741   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11742     return 1;
11743
11744   if (fr_var != NO_RELOC)
11745     switch ((enum bfd_reloc_code_real) fr_var)
11746       {
11747       case BFD_RELOC_386_PLT32:
11748       case BFD_RELOC_X86_64_PLT32:
11749         /* Symbol with PLT relocation may be preempted. */
11750         return 0;
11751       default:
11752         abort ();
11753       }
11754
11755   /* Global symbols with default visibility in a shared library may be
11756      preempted by another definition.  */
11757   return !shared;
11758 }
11759 #endif
11760
11761 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11762    Note also work for Skylake and Cascadelake.
11763 ---------------------------------------------------------------------
11764 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11765 | ------  | ----------- | ------- | -------- |
11766 |   Jo    |      N      |    N    |     Y    |
11767 |   Jno   |      N      |    N    |     Y    |
11768 |  Jc/Jb  |      Y      |    N    |     Y    |
11769 | Jae/Jnb |      Y      |    N    |     Y    |
11770 |  Je/Jz  |      Y      |    Y    |     Y    |
11771 | Jne/Jnz |      Y      |    Y    |     Y    |
11772 | Jna/Jbe |      Y      |    N    |     Y    |
11773 | Ja/Jnbe |      Y      |    N    |     Y    |
11774 |   Js    |      N      |    N    |     Y    |
11775 |   Jns   |      N      |    N    |     Y    |
11776 |  Jp/Jpe |      N      |    N    |     Y    |
11777 | Jnp/Jpo |      N      |    N    |     Y    |
11778 | Jl/Jnge |      Y      |    Y    |     Y    |
11779 | Jge/Jnl |      Y      |    Y    |     Y    |
11780 | Jle/Jng |      Y      |    Y    |     Y    |
11781 | Jg/Jnle |      Y      |    Y    |     Y    |
11782 ---------------------------------------------------------------------  */
11783 static int
11784 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11785 {
11786   if (mf_cmp == mf_cmp_alu_cmp)
11787     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11788             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11789   if (mf_cmp == mf_cmp_incdec)
11790     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11791             || mf_jcc == mf_jcc_jle);
11792   if (mf_cmp == mf_cmp_test_and)
11793     return 1;
11794   return 0;
11795 }
11796
11797 /* Return the next non-empty frag.  */
11798
11799 static fragS *
11800 i386_next_non_empty_frag (fragS *fragP)
11801 {
11802   /* There may be a frag with a ".fill 0" when there is no room in
11803      the current frag for frag_grow in output_insn.  */
11804   for (fragP = fragP->fr_next;
11805        (fragP != NULL
11806         && fragP->fr_type == rs_fill
11807         && fragP->fr_fix == 0);
11808        fragP = fragP->fr_next)
11809     ;
11810   return fragP;
11811 }
11812
11813 /* Return the next jcc frag after BRANCH_PADDING.  */
11814
11815 static fragS *
11816 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11817 {
11818   fragS *branch_fragP;
11819   if (!pad_fragP)
11820     return NULL;
11821
11822   if (pad_fragP->fr_type == rs_machine_dependent
11823       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11824           == BRANCH_PADDING))
11825     {
11826       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11827       if (branch_fragP->fr_type != rs_machine_dependent)
11828         return NULL;
11829       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11830           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11831                                    pad_fragP->tc_frag_data.mf_type))
11832         return branch_fragP;
11833     }
11834
11835   return NULL;
11836 }
11837
11838 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11839
11840 static void
11841 i386_classify_machine_dependent_frag (fragS *fragP)
11842 {
11843   fragS *cmp_fragP;
11844   fragS *pad_fragP;
11845   fragS *branch_fragP;
11846   fragS *next_fragP;
11847   unsigned int max_prefix_length;
11848
11849   if (fragP->tc_frag_data.classified)
11850     return;
11851
11852   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11853      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11854   for (next_fragP = fragP;
11855        next_fragP != NULL;
11856        next_fragP = next_fragP->fr_next)
11857     {
11858       next_fragP->tc_frag_data.classified = 1;
11859       if (next_fragP->fr_type == rs_machine_dependent)
11860         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11861           {
11862           case BRANCH_PADDING:
11863             /* The BRANCH_PADDING frag must be followed by a branch
11864                frag.  */
11865             branch_fragP = i386_next_non_empty_frag (next_fragP);
11866             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11867             break;
11868           case FUSED_JCC_PADDING:
11869             /* Check if this is a fused jcc:
11870                FUSED_JCC_PADDING
11871                CMP like instruction
11872                BRANCH_PADDING
11873                COND_JUMP
11874                */
11875             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11876             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11877             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11878             if (branch_fragP)
11879               {
11880                 /* The BRANCH_PADDING frag is merged with the
11881                    FUSED_JCC_PADDING frag.  */
11882                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11883                 /* CMP like instruction size.  */
11884                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11885                 frag_wane (pad_fragP);
11886                 /* Skip to branch_fragP.  */
11887                 next_fragP = branch_fragP;
11888               }
11889             else if (next_fragP->tc_frag_data.max_prefix_length)
11890               {
11891                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11892                    a fused jcc.  */
11893                 next_fragP->fr_subtype
11894                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11895                 next_fragP->tc_frag_data.max_bytes
11896                   = next_fragP->tc_frag_data.max_prefix_length;
11897                 /* This will be updated in the BRANCH_PREFIX scan.  */
11898                 next_fragP->tc_frag_data.max_prefix_length = 0;
11899               }
11900             else
11901               frag_wane (next_fragP);
11902             break;
11903           }
11904     }
11905
11906   /* Stop if there is no BRANCH_PREFIX.  */
11907   if (!align_branch_prefix_size)
11908     return;
11909
11910   /* Scan for BRANCH_PREFIX.  */
11911   for (; fragP != NULL; fragP = fragP->fr_next)
11912     {
11913       if (fragP->fr_type != rs_machine_dependent
11914           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11915               != BRANCH_PREFIX))
11916         continue;
11917
11918       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11919          COND_JUMP_PREFIX.  */
11920       max_prefix_length = 0;
11921       for (next_fragP = fragP;
11922            next_fragP != NULL;
11923            next_fragP = next_fragP->fr_next)
11924         {
11925           if (next_fragP->fr_type == rs_fill)
11926             /* Skip rs_fill frags.  */
11927             continue;
11928           else if (next_fragP->fr_type != rs_machine_dependent)
11929             /* Stop for all other frags.  */
11930             break;
11931
11932           /* rs_machine_dependent frags.  */
11933           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11934               == BRANCH_PREFIX)
11935             {
11936               /* Count BRANCH_PREFIX frags.  */
11937               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11938                 {
11939                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11940                   frag_wane (next_fragP);
11941                 }
11942               else
11943                 max_prefix_length
11944                   += next_fragP->tc_frag_data.max_bytes;
11945             }
11946           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11947                     == BRANCH_PADDING)
11948                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11949                        == FUSED_JCC_PADDING))
11950             {
11951               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11952               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11953               break;
11954             }
11955           else
11956             /* Stop for other rs_machine_dependent frags.  */
11957             break;
11958         }
11959
11960       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11961
11962       /* Skip to the next frag.  */
11963       fragP = next_fragP;
11964     }
11965 }
11966
11967 /* Compute padding size for
11968
11969         FUSED_JCC_PADDING
11970         CMP like instruction
11971         BRANCH_PADDING
11972         COND_JUMP/UNCOND_JUMP
11973
11974    or
11975
11976         BRANCH_PADDING
11977         COND_JUMP/UNCOND_JUMP
11978  */
11979
11980 static int
11981 i386_branch_padding_size (fragS *fragP, offsetT address)
11982 {
11983   unsigned int offset, size, padding_size;
11984   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11985
11986   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11987   if (!address)
11988     address = fragP->fr_address;
11989   address += fragP->fr_fix;
11990
11991   /* CMP like instrunction size.  */
11992   size = fragP->tc_frag_data.cmp_size;
11993
11994   /* The base size of the branch frag.  */
11995   size += branch_fragP->fr_fix;
11996
11997   /* Add opcode and displacement bytes for the rs_machine_dependent
11998      branch frag.  */
11999   if (branch_fragP->fr_type == rs_machine_dependent)
12000     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12001
12002   /* Check if branch is within boundary and doesn't end at the last
12003      byte.  */
12004   offset = address & ((1U << align_branch_power) - 1);
12005   if ((offset + size) >= (1U << align_branch_power))
12006     /* Padding needed to avoid crossing boundary.  */
12007     padding_size = (1U << align_branch_power) - offset;
12008   else
12009     /* No padding needed.  */
12010     padding_size = 0;
12011
12012   /* The return value may be saved in tc_frag_data.length which is
12013      unsigned byte.  */
12014   if (!fits_in_unsigned_byte (padding_size))
12015     abort ();
12016
12017   return padding_size;
12018 }
12019
12020 /* i386_generic_table_relax_frag()
12021
12022    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12023    grow/shrink padding to align branch frags.  Hand others to
12024    relax_frag().  */
12025
12026 long
12027 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12028 {
12029   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12030       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12031     {
12032       long padding_size = i386_branch_padding_size (fragP, 0);
12033       long grow = padding_size - fragP->tc_frag_data.length;
12034
12035       /* When the BRANCH_PREFIX frag is used, the computed address
12036          must match the actual address and there should be no padding.  */
12037       if (fragP->tc_frag_data.padding_address
12038           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12039               || padding_size))
12040         abort ();
12041
12042       /* Update the padding size.  */
12043       if (grow)
12044         fragP->tc_frag_data.length = padding_size;
12045
12046       return grow;
12047     }
12048   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12049     {
12050       fragS *padding_fragP, *next_fragP;
12051       long padding_size, left_size, last_size;
12052
12053       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12054       if (!padding_fragP)
12055         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12056         return (fragP->tc_frag_data.length
12057                 - fragP->tc_frag_data.last_length);
12058
12059       /* Compute the relative address of the padding frag in the very
12060         first time where the BRANCH_PREFIX frag sizes are zero.  */
12061       if (!fragP->tc_frag_data.padding_address)
12062         fragP->tc_frag_data.padding_address
12063           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12064
12065       /* First update the last length from the previous interation.  */
12066       left_size = fragP->tc_frag_data.prefix_length;
12067       for (next_fragP = fragP;
12068            next_fragP != padding_fragP;
12069            next_fragP = next_fragP->fr_next)
12070         if (next_fragP->fr_type == rs_machine_dependent
12071             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12072                 == BRANCH_PREFIX))
12073           {
12074             if (left_size)
12075               {
12076                 int max = next_fragP->tc_frag_data.max_bytes;
12077                 if (max)
12078                   {
12079                     int size;
12080                     if (max > left_size)
12081                       size = left_size;
12082                     else
12083                       size = max;
12084                     left_size -= size;
12085                     next_fragP->tc_frag_data.last_length = size;
12086                   }
12087               }
12088             else
12089               next_fragP->tc_frag_data.last_length = 0;
12090           }
12091
12092       /* Check the padding size for the padding frag.  */
12093       padding_size = i386_branch_padding_size
12094         (padding_fragP, (fragP->fr_address
12095                          + fragP->tc_frag_data.padding_address));
12096
12097       last_size = fragP->tc_frag_data.prefix_length;
12098       /* Check if there is change from the last interation.  */
12099       if (padding_size == last_size)
12100         {
12101           /* Update the expected address of the padding frag.  */
12102           padding_fragP->tc_frag_data.padding_address
12103             = (fragP->fr_address + padding_size
12104                + fragP->tc_frag_data.padding_address);
12105           return 0;
12106         }
12107
12108       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12109         {
12110           /* No padding if there is no sufficient room.  Clear the
12111              expected address of the padding frag.  */
12112           padding_fragP->tc_frag_data.padding_address = 0;
12113           padding_size = 0;
12114         }
12115       else
12116         /* Store the expected address of the padding frag.  */
12117         padding_fragP->tc_frag_data.padding_address
12118           = (fragP->fr_address + padding_size
12119              + fragP->tc_frag_data.padding_address);
12120
12121       fragP->tc_frag_data.prefix_length = padding_size;
12122
12123       /* Update the length for the current interation.  */
12124       left_size = padding_size;
12125       for (next_fragP = fragP;
12126            next_fragP != padding_fragP;
12127            next_fragP = next_fragP->fr_next)
12128         if (next_fragP->fr_type == rs_machine_dependent
12129             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12130                 == BRANCH_PREFIX))
12131           {
12132             if (left_size)
12133               {
12134                 int max = next_fragP->tc_frag_data.max_bytes;
12135                 if (max)
12136                   {
12137                     int size;
12138                     if (max > left_size)
12139                       size = left_size;
12140                     else
12141                       size = max;
12142                     left_size -= size;
12143                     next_fragP->tc_frag_data.length = size;
12144                   }
12145               }
12146             else
12147               next_fragP->tc_frag_data.length = 0;
12148           }
12149
12150       return (fragP->tc_frag_data.length
12151               - fragP->tc_frag_data.last_length);
12152     }
12153   return relax_frag (segment, fragP, stretch);
12154 }
12155
12156 /* md_estimate_size_before_relax()
12157
12158    Called just before relax() for rs_machine_dependent frags.  The x86
12159    assembler uses these frags to handle variable size jump
12160    instructions.
12161
12162    Any symbol that is now undefined will not become defined.
12163    Return the correct fr_subtype in the frag.
12164    Return the initial "guess for variable size of frag" to caller.
12165    The guess is actually the growth beyond the fixed part.  Whatever
12166    we do to grow the fixed or variable part contributes to our
12167    returned value.  */
12168
12169 int
12170 md_estimate_size_before_relax (fragS *fragP, segT segment)
12171 {
12172   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12173       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12174       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12175     {
12176       i386_classify_machine_dependent_frag (fragP);
12177       return fragP->tc_frag_data.length;
12178     }
12179
12180   /* We've already got fragP->fr_subtype right;  all we have to do is
12181      check for un-relaxable symbols.  On an ELF system, we can't relax
12182      an externally visible symbol, because it may be overridden by a
12183      shared library.  */
12184   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12185 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12186       || (IS_ELF
12187           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12188                                                 fragP->fr_var))
12189 #endif
12190 #if defined (OBJ_COFF) && defined (TE_PE)
12191       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12192           && S_IS_WEAK (fragP->fr_symbol))
12193 #endif
12194       )
12195     {
12196       /* Symbol is undefined in this segment, or we need to keep a
12197          reloc so that weak symbols can be overridden.  */
12198       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12199       enum bfd_reloc_code_real reloc_type;
12200       unsigned char *opcode;
12201       int old_fr_fix;
12202       fixS *fixP = NULL;
12203
12204       if (fragP->fr_var != NO_RELOC)
12205         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12206       else if (size == 2)
12207         reloc_type = BFD_RELOC_16_PCREL;
12208 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12209       else if (need_plt32_p (fragP->fr_symbol))
12210         reloc_type = BFD_RELOC_X86_64_PLT32;
12211 #endif
12212       else
12213         reloc_type = BFD_RELOC_32_PCREL;
12214
12215       old_fr_fix = fragP->fr_fix;
12216       opcode = (unsigned char *) fragP->fr_opcode;
12217
12218       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12219         {
12220         case UNCOND_JUMP:
12221           /* Make jmp (0xeb) a (d)word displacement jump.  */
12222           opcode[0] = 0xe9;
12223           fragP->fr_fix += size;
12224           fixP = fix_new (fragP, old_fr_fix, size,
12225                           fragP->fr_symbol,
12226                           fragP->fr_offset, 1,
12227                           reloc_type);
12228           break;
12229
12230         case COND_JUMP86:
12231           if (size == 2
12232               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12233             {
12234               /* Negate the condition, and branch past an
12235                  unconditional jump.  */
12236               opcode[0] ^= 1;
12237               opcode[1] = 3;
12238               /* Insert an unconditional jump.  */
12239               opcode[2] = 0xe9;
12240               /* We added two extra opcode bytes, and have a two byte
12241                  offset.  */
12242               fragP->fr_fix += 2 + 2;
12243               fix_new (fragP, old_fr_fix + 2, 2,
12244                        fragP->fr_symbol,
12245                        fragP->fr_offset, 1,
12246                        reloc_type);
12247               break;
12248             }
12249           /* Fall through.  */
12250
12251         case COND_JUMP:
12252           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12253             {
12254               fragP->fr_fix += 1;
12255               fixP = fix_new (fragP, old_fr_fix, 1,
12256                               fragP->fr_symbol,
12257                               fragP->fr_offset, 1,
12258                               BFD_RELOC_8_PCREL);
12259               fixP->fx_signed = 1;
12260               break;
12261             }
12262
12263           /* This changes the byte-displacement jump 0x7N
12264              to the (d)word-displacement jump 0x0f,0x8N.  */
12265           opcode[1] = opcode[0] + 0x10;
12266           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12267           /* We've added an opcode byte.  */
12268           fragP->fr_fix += 1 + size;
12269           fixP = fix_new (fragP, old_fr_fix + 1, size,
12270                           fragP->fr_symbol,
12271                           fragP->fr_offset, 1,
12272                           reloc_type);
12273           break;
12274
12275         default:
12276           BAD_CASE (fragP->fr_subtype);
12277           break;
12278         }
12279
12280       /* All jumps handled here are signed, but don't unconditionally use a
12281          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12282          around at 4G (outside of 64-bit mode) and 64k.  */
12283       if (size == 4 && flag_code == CODE_64BIT)
12284         fixP->fx_signed = 1;
12285
12286       frag_wane (fragP);
12287       return fragP->fr_fix - old_fr_fix;
12288     }
12289
12290   /* Guess size depending on current relax state.  Initially the relax
12291      state will correspond to a short jump and we return 1, because
12292      the variable part of the frag (the branch offset) is one byte
12293      long.  However, we can relax a section more than once and in that
12294      case we must either set fr_subtype back to the unrelaxed state,
12295      or return the value for the appropriate branch.  */
12296   return md_relax_table[fragP->fr_subtype].rlx_length;
12297 }
12298
12299 /* Called after relax() is finished.
12300
12301    In:  Address of frag.
12302         fr_type == rs_machine_dependent.
12303         fr_subtype is what the address relaxed to.
12304
12305    Out: Any fixSs and constants are set up.
12306         Caller will turn frag into a ".space 0".  */
12307
12308 void
12309 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12310                  fragS *fragP)
12311 {
12312   unsigned char *opcode;
12313   unsigned char *where_to_put_displacement = NULL;
12314   offsetT target_address;
12315   offsetT opcode_address;
12316   unsigned int extension = 0;
12317   offsetT displacement_from_opcode_start;
12318
12319   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12320       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12321       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12322     {
12323       /* Generate nop padding.  */
12324       unsigned int size = fragP->tc_frag_data.length;
12325       if (size)
12326         {
12327           if (size > fragP->tc_frag_data.max_bytes)
12328             abort ();
12329
12330           if (flag_debug)
12331             {
12332               const char *msg;
12333               const char *branch = "branch";
12334               const char *prefix = "";
12335               fragS *padding_fragP;
12336               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12337                   == BRANCH_PREFIX)
12338                 {
12339                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12340                   switch (fragP->tc_frag_data.default_prefix)
12341                     {
12342                     default:
12343                       abort ();
12344                       break;
12345                     case CS_PREFIX_OPCODE:
12346                       prefix = " cs";
12347                       break;
12348                     case DS_PREFIX_OPCODE:
12349                       prefix = " ds";
12350                       break;
12351                     case ES_PREFIX_OPCODE:
12352                       prefix = " es";
12353                       break;
12354                     case FS_PREFIX_OPCODE:
12355                       prefix = " fs";
12356                       break;
12357                     case GS_PREFIX_OPCODE:
12358                       prefix = " gs";
12359                       break;
12360                     case SS_PREFIX_OPCODE:
12361                       prefix = " ss";
12362                       break;
12363                     }
12364                   if (padding_fragP)
12365                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12366                             "%s within %d-byte boundary\n");
12367                   else
12368                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12369                             "align %s within %d-byte boundary\n");
12370                 }
12371               else
12372                 {
12373                   padding_fragP = fragP;
12374                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12375                           "%s within %d-byte boundary\n");
12376                 }
12377
12378               if (padding_fragP)
12379                 switch (padding_fragP->tc_frag_data.branch_type)
12380                   {
12381                   case align_branch_jcc:
12382                     branch = "jcc";
12383                     break;
12384                   case align_branch_fused:
12385                     branch = "fused jcc";
12386                     break;
12387                   case align_branch_jmp:
12388                     branch = "jmp";
12389                     break;
12390                   case align_branch_call:
12391                     branch = "call";
12392                     break;
12393                   case align_branch_indirect:
12394                     branch = "indiret branch";
12395                     break;
12396                   case align_branch_ret:
12397                     branch = "ret";
12398                     break;
12399                   default:
12400                     break;
12401                   }
12402
12403               fprintf (stdout, msg,
12404                        fragP->fr_file, fragP->fr_line, size, prefix,
12405                        (long long) fragP->fr_address, branch,
12406                        1 << align_branch_power);
12407             }
12408           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12409             memset (fragP->fr_opcode,
12410                     fragP->tc_frag_data.default_prefix, size);
12411           else
12412             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12413                                 size, 0);
12414           fragP->fr_fix += size;
12415         }
12416       return;
12417     }
12418
12419   opcode = (unsigned char *) fragP->fr_opcode;
12420
12421   /* Address we want to reach in file space.  */
12422   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12423
12424   /* Address opcode resides at in file space.  */
12425   opcode_address = fragP->fr_address + fragP->fr_fix;
12426
12427   /* Displacement from opcode start to fill into instruction.  */
12428   displacement_from_opcode_start = target_address - opcode_address;
12429
12430   if ((fragP->fr_subtype & BIG) == 0)
12431     {
12432       /* Don't have to change opcode.  */
12433       extension = 1;            /* 1 opcode + 1 displacement  */
12434       where_to_put_displacement = &opcode[1];
12435     }
12436   else
12437     {
12438       if (no_cond_jump_promotion
12439           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12440         as_warn_where (fragP->fr_file, fragP->fr_line,
12441                        _("long jump required"));
12442
12443       switch (fragP->fr_subtype)
12444         {
12445         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12446           extension = 4;                /* 1 opcode + 4 displacement  */
12447           opcode[0] = 0xe9;
12448           where_to_put_displacement = &opcode[1];
12449           break;
12450
12451         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12452           extension = 2;                /* 1 opcode + 2 displacement  */
12453           opcode[0] = 0xe9;
12454           where_to_put_displacement = &opcode[1];
12455           break;
12456
12457         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12458         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12459           extension = 5;                /* 2 opcode + 4 displacement  */
12460           opcode[1] = opcode[0] + 0x10;
12461           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12462           where_to_put_displacement = &opcode[2];
12463           break;
12464
12465         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12466           extension = 3;                /* 2 opcode + 2 displacement  */
12467           opcode[1] = opcode[0] + 0x10;
12468           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12469           where_to_put_displacement = &opcode[2];
12470           break;
12471
12472         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12473           extension = 4;
12474           opcode[0] ^= 1;
12475           opcode[1] = 3;
12476           opcode[2] = 0xe9;
12477           where_to_put_displacement = &opcode[3];
12478           break;
12479
12480         default:
12481           BAD_CASE (fragP->fr_subtype);
12482           break;
12483         }
12484     }
12485
12486   /* If size if less then four we are sure that the operand fits,
12487      but if it's 4, then it could be that the displacement is larger
12488      then -/+ 2GB.  */
12489   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12490       && object_64bit
12491       && ((addressT) (displacement_from_opcode_start - extension
12492                       + ((addressT) 1 << 31))
12493           > (((addressT) 2 << 31) - 1)))
12494     {
12495       as_bad_where (fragP->fr_file, fragP->fr_line,
12496                     _("jump target out of range"));
12497       /* Make us emit 0.  */
12498       displacement_from_opcode_start = extension;
12499     }
12500   /* Now put displacement after opcode.  */
12501   md_number_to_chars ((char *) where_to_put_displacement,
12502                       (valueT) (displacement_from_opcode_start - extension),
12503                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12504   fragP->fr_fix += extension;
12505 }
12506 \f
12507 /* Apply a fixup (fixP) to segment data, once it has been determined
12508    by our caller that we have all the info we need to fix it up.
12509
12510    Parameter valP is the pointer to the value of the bits.
12511
12512    On the 386, immediates, displacements, and data pointers are all in
12513    the same (little-endian) format, so we don't need to care about which
12514    we are handling.  */
12515
12516 void
12517 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12518 {
12519   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12520   valueT value = *valP;
12521
12522 #if !defined (TE_Mach)
12523   if (fixP->fx_pcrel)
12524     {
12525       switch (fixP->fx_r_type)
12526         {
12527         default:
12528           break;
12529
12530         case BFD_RELOC_64:
12531           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12532           break;
12533         case BFD_RELOC_32:
12534         case BFD_RELOC_X86_64_32S:
12535           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12536           break;
12537         case BFD_RELOC_16:
12538           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12539           break;
12540         case BFD_RELOC_8:
12541           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12542           break;
12543         }
12544     }
12545
12546   if (fixP->fx_addsy != NULL
12547       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12548           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12549           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12550           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12551       && !use_rela_relocations)
12552     {
12553       /* This is a hack.  There should be a better way to handle this.
12554          This covers for the fact that bfd_install_relocation will
12555          subtract the current location (for partial_inplace, PC relative
12556          relocations); see more below.  */
12557 #ifndef OBJ_AOUT
12558       if (IS_ELF
12559 #ifdef TE_PE
12560           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12561 #endif
12562           )
12563         value += fixP->fx_where + fixP->fx_frag->fr_address;
12564 #endif
12565 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12566       if (IS_ELF)
12567         {
12568           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12569
12570           if ((sym_seg == seg
12571                || (symbol_section_p (fixP->fx_addsy)
12572                    && sym_seg != absolute_section))
12573               && !generic_force_reloc (fixP))
12574             {
12575               /* Yes, we add the values in twice.  This is because
12576                  bfd_install_relocation subtracts them out again.  I think
12577                  bfd_install_relocation is broken, but I don't dare change
12578                  it.  FIXME.  */
12579               value += fixP->fx_where + fixP->fx_frag->fr_address;
12580             }
12581         }
12582 #endif
12583 #if defined (OBJ_COFF) && defined (TE_PE)
12584       /* For some reason, the PE format does not store a
12585          section address offset for a PC relative symbol.  */
12586       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12587           || S_IS_WEAK (fixP->fx_addsy))
12588         value += md_pcrel_from (fixP);
12589 #endif
12590     }
12591 #if defined (OBJ_COFF) && defined (TE_PE)
12592   if (fixP->fx_addsy != NULL
12593       && S_IS_WEAK (fixP->fx_addsy)
12594       /* PR 16858: Do not modify weak function references.  */
12595       && ! fixP->fx_pcrel)
12596     {
12597 #if !defined (TE_PEP)
12598       /* For x86 PE weak function symbols are neither PC-relative
12599          nor do they set S_IS_FUNCTION.  So the only reliable way
12600          to detect them is to check the flags of their containing
12601          section.  */
12602       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12603           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12604         ;
12605       else
12606 #endif
12607       value -= S_GET_VALUE (fixP->fx_addsy);
12608     }
12609 #endif
12610
12611   /* Fix a few things - the dynamic linker expects certain values here,
12612      and we must not disappoint it.  */
12613 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12614   if (IS_ELF && fixP->fx_addsy)
12615     switch (fixP->fx_r_type)
12616       {
12617       case BFD_RELOC_386_PLT32:
12618       case BFD_RELOC_X86_64_PLT32:
12619         /* Make the jump instruction point to the address of the operand.
12620            At runtime we merely add the offset to the actual PLT entry.
12621            NB: Subtract the offset size only for jump instructions.  */
12622         if (fixP->fx_pcrel)
12623           value = -4;
12624         break;
12625
12626       case BFD_RELOC_386_TLS_GD:
12627       case BFD_RELOC_386_TLS_LDM:
12628       case BFD_RELOC_386_TLS_IE_32:
12629       case BFD_RELOC_386_TLS_IE:
12630       case BFD_RELOC_386_TLS_GOTIE:
12631       case BFD_RELOC_386_TLS_GOTDESC:
12632       case BFD_RELOC_X86_64_TLSGD:
12633       case BFD_RELOC_X86_64_TLSLD:
12634       case BFD_RELOC_X86_64_GOTTPOFF:
12635       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12636         value = 0; /* Fully resolved at runtime.  No addend.  */
12637         /* Fallthrough */
12638       case BFD_RELOC_386_TLS_LE:
12639       case BFD_RELOC_386_TLS_LDO_32:
12640       case BFD_RELOC_386_TLS_LE_32:
12641       case BFD_RELOC_X86_64_DTPOFF32:
12642       case BFD_RELOC_X86_64_DTPOFF64:
12643       case BFD_RELOC_X86_64_TPOFF32:
12644       case BFD_RELOC_X86_64_TPOFF64:
12645         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12646         break;
12647
12648       case BFD_RELOC_386_TLS_DESC_CALL:
12649       case BFD_RELOC_X86_64_TLSDESC_CALL:
12650         value = 0; /* Fully resolved at runtime.  No addend.  */
12651         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12652         fixP->fx_done = 0;
12653         return;
12654
12655       case BFD_RELOC_VTABLE_INHERIT:
12656       case BFD_RELOC_VTABLE_ENTRY:
12657         fixP->fx_done = 0;
12658         return;
12659
12660       default:
12661         break;
12662       }
12663 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12664
12665   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12666   if (!object_64bit)
12667     value = extend_to_32bit_address (value);
12668
12669   *valP = value;
12670 #endif /* !defined (TE_Mach)  */
12671
12672   /* Are we finished with this relocation now?  */
12673   if (fixP->fx_addsy == NULL)
12674     {
12675       fixP->fx_done = 1;
12676       switch (fixP->fx_r_type)
12677         {
12678         case BFD_RELOC_X86_64_32S:
12679           fixP->fx_signed = 1;
12680           break;
12681
12682         default:
12683           break;
12684         }
12685     }
12686 #if defined (OBJ_COFF) && defined (TE_PE)
12687   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12688     {
12689       fixP->fx_done = 0;
12690       /* Remember value for tc_gen_reloc.  */
12691       fixP->fx_addnumber = value;
12692       /* Clear out the frag for now.  */
12693       value = 0;
12694     }
12695 #endif
12696   else if (use_rela_relocations)
12697     {
12698       fixP->fx_no_overflow = 1;
12699       /* Remember value for tc_gen_reloc.  */
12700       fixP->fx_addnumber = value;
12701       value = 0;
12702     }
12703
12704   md_number_to_chars (p, value, fixP->fx_size);
12705 }
12706 \f
12707 const char *
12708 md_atof (int type, char *litP, int *sizeP)
12709 {
12710   /* This outputs the LITTLENUMs in REVERSE order;
12711      in accord with the bigendian 386.  */
12712   return ieee_md_atof (type, litP, sizeP, false);
12713 }
12714 \f
12715 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12716
12717 static char *
12718 output_invalid (int c)
12719 {
12720   if (ISPRINT (c))
12721     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12722               "'%c'", c);
12723   else
12724     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12725               "(0x%x)", (unsigned char) c);
12726   return output_invalid_buf;
12727 }
12728
12729 /* Verify that @r can be used in the current context.  */
12730
12731 static bool check_register (const reg_entry *r)
12732 {
12733   if (allow_pseudo_reg)
12734     return true;
12735
12736   if (operand_type_all_zero (&r->reg_type))
12737     return false;
12738
12739   if ((r->reg_type.bitfield.dword
12740        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12741        || r->reg_type.bitfield.class == RegCR
12742        || r->reg_type.bitfield.class == RegDR)
12743       && !cpu_arch_flags.bitfield.cpui386)
12744     return false;
12745
12746   if (r->reg_type.bitfield.class == RegTR
12747       && (flag_code == CODE_64BIT
12748           || !cpu_arch_flags.bitfield.cpui386
12749           || cpu_arch_isa_flags.bitfield.cpui586
12750           || cpu_arch_isa_flags.bitfield.cpui686))
12751     return false;
12752
12753   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12754     return false;
12755
12756   if (!cpu_arch_flags.bitfield.cpuavx512f)
12757     {
12758       if (r->reg_type.bitfield.zmmword
12759           || r->reg_type.bitfield.class == RegMask)
12760         return false;
12761
12762       if (!cpu_arch_flags.bitfield.cpuavx)
12763         {
12764           if (r->reg_type.bitfield.ymmword)
12765             return false;
12766
12767           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12768             return false;
12769         }
12770     }
12771
12772   if (r->reg_type.bitfield.tmmword
12773       && (!cpu_arch_flags.bitfield.cpuamx_tile
12774           || flag_code != CODE_64BIT))
12775     return false;
12776
12777   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12778     return false;
12779
12780   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12781   if (!allow_index_reg && r->reg_num == RegIZ)
12782     return false;
12783
12784   /* Upper 16 vector registers are only available with VREX in 64bit
12785      mode, and require EVEX encoding.  */
12786   if (r->reg_flags & RegVRex)
12787     {
12788       if (!cpu_arch_flags.bitfield.cpuavx512f
12789           || flag_code != CODE_64BIT)
12790         return false;
12791
12792       if (i.vec_encoding == vex_encoding_default)
12793         i.vec_encoding = vex_encoding_evex;
12794       else if (i.vec_encoding != vex_encoding_evex)
12795         i.vec_encoding = vex_encoding_error;
12796     }
12797
12798   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12799       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12800       && flag_code != CODE_64BIT)
12801     return false;
12802
12803   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12804       && !intel_syntax)
12805     return false;
12806
12807   return true;
12808 }
12809
12810 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12811
12812 static const reg_entry *
12813 parse_real_register (char *reg_string, char **end_op)
12814 {
12815   char *s = reg_string;
12816   char *p;
12817   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12818   const reg_entry *r;
12819
12820   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12821   if (*s == REGISTER_PREFIX)
12822     ++s;
12823
12824   if (is_space_char (*s))
12825     ++s;
12826
12827   p = reg_name_given;
12828   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12829     {
12830       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12831         return (const reg_entry *) NULL;
12832       s++;
12833     }
12834
12835   /* For naked regs, make sure that we are not dealing with an identifier.
12836      This prevents confusing an identifier like `eax_var' with register
12837      `eax'.  */
12838   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12839     return (const reg_entry *) NULL;
12840
12841   *end_op = s;
12842
12843   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12844
12845   /* Handle floating point regs, allowing spaces in the (i) part.  */
12846   if (r == reg_st0)
12847     {
12848       if (!cpu_arch_flags.bitfield.cpu8087
12849           && !cpu_arch_flags.bitfield.cpu287
12850           && !cpu_arch_flags.bitfield.cpu387
12851           && !allow_pseudo_reg)
12852         return (const reg_entry *) NULL;
12853
12854       if (is_space_char (*s))
12855         ++s;
12856       if (*s == '(')
12857         {
12858           ++s;
12859           if (is_space_char (*s))
12860             ++s;
12861           if (*s >= '0' && *s <= '7')
12862             {
12863               int fpr = *s - '0';
12864               ++s;
12865               if (is_space_char (*s))
12866                 ++s;
12867               if (*s == ')')
12868                 {
12869                   *end_op = s + 1;
12870                   know (r[fpr].reg_num == fpr);
12871                   return r + fpr;
12872                 }
12873             }
12874           /* We have "%st(" then garbage.  */
12875           return (const reg_entry *) NULL;
12876         }
12877     }
12878
12879   return r && check_register (r) ? r : NULL;
12880 }
12881
12882 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12883
12884 static const reg_entry *
12885 parse_register (char *reg_string, char **end_op)
12886 {
12887   const reg_entry *r;
12888
12889   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12890     r = parse_real_register (reg_string, end_op);
12891   else
12892     r = NULL;
12893   if (!r)
12894     {
12895       char *save = input_line_pointer;
12896       char c;
12897       symbolS *symbolP;
12898
12899       input_line_pointer = reg_string;
12900       c = get_symbol_name (&reg_string);
12901       symbolP = symbol_find (reg_string);
12902       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12903         {
12904           const expressionS *e = symbol_get_value_expression (symbolP);
12905
12906           know (e->X_op == O_register);
12907           know (e->X_add_number >= 0
12908                 && (valueT) e->X_add_number < i386_regtab_size);
12909           r = i386_regtab + e->X_add_number;
12910           if (!check_register (r))
12911             {
12912               as_bad (_("register '%s%s' cannot be used here"),
12913                       register_prefix, r->reg_name);
12914               r = &bad_reg;
12915             }
12916           *end_op = input_line_pointer;
12917         }
12918       *input_line_pointer = c;
12919       input_line_pointer = save;
12920     }
12921   return r;
12922 }
12923
12924 int
12925 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12926 {
12927   const reg_entry *r;
12928   char *end = input_line_pointer;
12929
12930   *end = *nextcharP;
12931   r = parse_register (name, &input_line_pointer);
12932   if (r && end <= input_line_pointer)
12933     {
12934       *nextcharP = *input_line_pointer;
12935       *input_line_pointer = 0;
12936       if (r != &bad_reg)
12937         {
12938           e->X_op = O_register;
12939           e->X_add_number = r - i386_regtab;
12940         }
12941       else
12942           e->X_op = O_illegal;
12943       return 1;
12944     }
12945   input_line_pointer = end;
12946   *end = 0;
12947   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12948 }
12949
12950 void
12951 md_operand (expressionS *e)
12952 {
12953   char *end;
12954   const reg_entry *r;
12955
12956   switch (*input_line_pointer)
12957     {
12958     case REGISTER_PREFIX:
12959       r = parse_real_register (input_line_pointer, &end);
12960       if (r)
12961         {
12962           e->X_op = O_register;
12963           e->X_add_number = r - i386_regtab;
12964           input_line_pointer = end;
12965         }
12966       break;
12967
12968     case '[':
12969       gas_assert (intel_syntax);
12970       end = input_line_pointer++;
12971       expression (e);
12972       if (*input_line_pointer == ']')
12973         {
12974           ++input_line_pointer;
12975           e->X_op_symbol = make_expr_symbol (e);
12976           e->X_add_symbol = NULL;
12977           e->X_add_number = 0;
12978           e->X_op = O_index;
12979         }
12980       else
12981         {
12982           e->X_op = O_absent;
12983           input_line_pointer = end;
12984         }
12985       break;
12986     }
12987 }
12988
12989 \f
12990 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12991 const char *md_shortopts = "kVQ:sqnO::";
12992 #else
12993 const char *md_shortopts = "qnO::";
12994 #endif
12995
12996 #define OPTION_32 (OPTION_MD_BASE + 0)
12997 #define OPTION_64 (OPTION_MD_BASE + 1)
12998 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12999 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13000 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13001 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13002 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13003 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13004 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13005 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13006 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13007 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13008 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13009 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13010 #define OPTION_X32 (OPTION_MD_BASE + 14)
13011 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13012 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13013 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13014 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13015 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13016 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13017 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13018 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13019 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13020 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13021 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13022 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13023 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13024 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13025 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13026 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13027 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13028 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13029 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13030
13031 struct option md_longopts[] =
13032 {
13033   {"32", no_argument, NULL, OPTION_32},
13034 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13035      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13036   {"64", no_argument, NULL, OPTION_64},
13037 #endif
13038 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13039   {"x32", no_argument, NULL, OPTION_X32},
13040   {"mshared", no_argument, NULL, OPTION_MSHARED},
13041   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13042 #endif
13043   {"divide", no_argument, NULL, OPTION_DIVIDE},
13044   {"march", required_argument, NULL, OPTION_MARCH},
13045   {"mtune", required_argument, NULL, OPTION_MTUNE},
13046   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13047   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13048   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13049   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13050   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13051   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13052   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13053   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13054   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13055   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13056   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13057   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13058 # if defined (TE_PE) || defined (TE_PEP)
13059   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13060 #endif
13061   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13062   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13063   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13064   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13065   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13066   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13067   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13068   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13069   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13070   {"mlfence-before-indirect-branch", required_argument, NULL,
13071    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13072   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13073   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13074   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13075   {NULL, no_argument, NULL, 0}
13076 };
13077 size_t md_longopts_size = sizeof (md_longopts);
13078
13079 int
13080 md_parse_option (int c, const char *arg)
13081 {
13082   unsigned int j;
13083   char *arch, *next, *saved, *type;
13084
13085   switch (c)
13086     {
13087     case 'n':
13088       optimize_align_code = 0;
13089       break;
13090
13091     case 'q':
13092       quiet_warnings = 1;
13093       break;
13094
13095 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13096       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13097          should be emitted or not.  FIXME: Not implemented.  */
13098     case 'Q':
13099       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13100         return 0;
13101       break;
13102
13103       /* -V: SVR4 argument to print version ID.  */
13104     case 'V':
13105       print_version_id ();
13106       break;
13107
13108       /* -k: Ignore for FreeBSD compatibility.  */
13109     case 'k':
13110       break;
13111
13112     case 's':
13113       /* -s: On i386 Solaris, this tells the native assembler to use
13114          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13115       break;
13116
13117     case OPTION_MSHARED:
13118       shared = 1;
13119       break;
13120
13121     case OPTION_X86_USED_NOTE:
13122       if (strcasecmp (arg, "yes") == 0)
13123         x86_used_note = 1;
13124       else if (strcasecmp (arg, "no") == 0)
13125         x86_used_note = 0;
13126       else
13127         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13128       break;
13129
13130
13131 #endif
13132 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13133      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13134     case OPTION_64:
13135       {
13136         const char **list, **l;
13137
13138         list = bfd_target_list ();
13139         for (l = list; *l != NULL; l++)
13140           if (startswith (*l, "elf64-x86-64")
13141               || strcmp (*l, "coff-x86-64") == 0
13142               || strcmp (*l, "pe-x86-64") == 0
13143               || strcmp (*l, "pei-x86-64") == 0
13144               || strcmp (*l, "mach-o-x86-64") == 0)
13145             {
13146               default_arch = "x86_64";
13147               break;
13148             }
13149         if (*l == NULL)
13150           as_fatal (_("no compiled in support for x86_64"));
13151         free (list);
13152       }
13153       break;
13154 #endif
13155
13156 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13157     case OPTION_X32:
13158       if (IS_ELF)
13159         {
13160           const char **list, **l;
13161
13162           list = bfd_target_list ();
13163           for (l = list; *l != NULL; l++)
13164             if (startswith (*l, "elf32-x86-64"))
13165               {
13166                 default_arch = "x86_64:32";
13167                 break;
13168               }
13169           if (*l == NULL)
13170             as_fatal (_("no compiled in support for 32bit x86_64"));
13171           free (list);
13172         }
13173       else
13174         as_fatal (_("32bit x86_64 is only supported for ELF"));
13175       break;
13176 #endif
13177
13178     case OPTION_32:
13179       default_arch = "i386";
13180       break;
13181
13182     case OPTION_DIVIDE:
13183 #ifdef SVR4_COMMENT_CHARS
13184       {
13185         char *n, *t;
13186         const char *s;
13187
13188         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13189         t = n;
13190         for (s = i386_comment_chars; *s != '\0'; s++)
13191           if (*s != '/')
13192             *t++ = *s;
13193         *t = '\0';
13194         i386_comment_chars = n;
13195       }
13196 #endif
13197       break;
13198
13199     case OPTION_MARCH:
13200       saved = xstrdup (arg);
13201       arch = saved;
13202       /* Allow -march=+nosse.  */
13203       if (*arch == '+')
13204         arch++;
13205       do
13206         {
13207           if (*arch == '.')
13208             as_fatal (_("invalid -march= option: `%s'"), arg);
13209           next = strchr (arch, '+');
13210           if (next)
13211             *next++ = '\0';
13212           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13213             {
13214               if (strcmp (arch, cpu_arch [j].name) == 0)
13215                 {
13216                   /* Processor.  */
13217                   if (! cpu_arch[j].flags.bitfield.cpui386)
13218                     continue;
13219
13220                   cpu_arch_name = cpu_arch[j].name;
13221                   cpu_sub_arch_name = NULL;
13222                   cpu_arch_flags = cpu_arch[j].flags;
13223                   cpu_arch_isa = cpu_arch[j].type;
13224                   cpu_arch_isa_flags = cpu_arch[j].flags;
13225                   if (!cpu_arch_tune_set)
13226                     {
13227                       cpu_arch_tune = cpu_arch_isa;
13228                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13229                     }
13230                   break;
13231                 }
13232               else if (*cpu_arch [j].name == '.'
13233                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
13234                 {
13235                   /* ISA extension.  */
13236                   i386_cpu_flags flags;
13237
13238                   flags = cpu_flags_or (cpu_arch_flags,
13239                                         cpu_arch[j].flags);
13240
13241                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13242                     {
13243                       if (cpu_sub_arch_name)
13244                         {
13245                           char *name = cpu_sub_arch_name;
13246                           cpu_sub_arch_name = concat (name,
13247                                                       cpu_arch[j].name,
13248                                                       (const char *) NULL);
13249                           free (name);
13250                         }
13251                       else
13252                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
13253                       cpu_arch_flags = flags;
13254                       cpu_arch_isa_flags = flags;
13255                     }
13256                   else
13257                     cpu_arch_isa_flags
13258                       = cpu_flags_or (cpu_arch_isa_flags,
13259                                       cpu_arch[j].flags);
13260                   break;
13261                 }
13262             }
13263
13264           if (j >= ARRAY_SIZE (cpu_arch))
13265             {
13266               /* Disable an ISA extension.  */
13267               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13268                 if (strcmp (arch, cpu_noarch [j].name) == 0)
13269                   {
13270                     i386_cpu_flags flags;
13271
13272                     flags = cpu_flags_and_not (cpu_arch_flags,
13273                                                cpu_noarch[j].flags);
13274                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13275                       {
13276                         if (cpu_sub_arch_name)
13277                           {
13278                             char *name = cpu_sub_arch_name;
13279                             cpu_sub_arch_name = concat (arch,
13280                                                         (const char *) NULL);
13281                             free (name);
13282                           }
13283                         else
13284                           cpu_sub_arch_name = xstrdup (arch);
13285                         cpu_arch_flags = flags;
13286                         cpu_arch_isa_flags = flags;
13287                       }
13288                     break;
13289                   }
13290
13291               if (j >= ARRAY_SIZE (cpu_noarch))
13292                 j = ARRAY_SIZE (cpu_arch);
13293             }
13294
13295           if (j >= ARRAY_SIZE (cpu_arch))
13296             as_fatal (_("invalid -march= option: `%s'"), arg);
13297
13298           arch = next;
13299         }
13300       while (next != NULL);
13301       free (saved);
13302       break;
13303
13304     case OPTION_MTUNE:
13305       if (*arg == '.')
13306         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13307       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13308         {
13309           if (strcmp (arg, cpu_arch [j].name) == 0)
13310             {
13311               cpu_arch_tune_set = 1;
13312               cpu_arch_tune = cpu_arch [j].type;
13313               cpu_arch_tune_flags = cpu_arch[j].flags;
13314               break;
13315             }
13316         }
13317       if (j >= ARRAY_SIZE (cpu_arch))
13318         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13319       break;
13320
13321     case OPTION_MMNEMONIC:
13322       if (strcasecmp (arg, "att") == 0)
13323         intel_mnemonic = 0;
13324       else if (strcasecmp (arg, "intel") == 0)
13325         intel_mnemonic = 1;
13326       else
13327         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13328       break;
13329
13330     case OPTION_MSYNTAX:
13331       if (strcasecmp (arg, "att") == 0)
13332         intel_syntax = 0;
13333       else if (strcasecmp (arg, "intel") == 0)
13334         intel_syntax = 1;
13335       else
13336         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13337       break;
13338
13339     case OPTION_MINDEX_REG:
13340       allow_index_reg = 1;
13341       break;
13342
13343     case OPTION_MNAKED_REG:
13344       allow_naked_reg = 1;
13345       break;
13346
13347     case OPTION_MSSE2AVX:
13348       sse2avx = 1;
13349       break;
13350
13351     case OPTION_MSSE_CHECK:
13352       if (strcasecmp (arg, "error") == 0)
13353         sse_check = check_error;
13354       else if (strcasecmp (arg, "warning") == 0)
13355         sse_check = check_warning;
13356       else if (strcasecmp (arg, "none") == 0)
13357         sse_check = check_none;
13358       else
13359         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13360       break;
13361
13362     case OPTION_MOPERAND_CHECK:
13363       if (strcasecmp (arg, "error") == 0)
13364         operand_check = check_error;
13365       else if (strcasecmp (arg, "warning") == 0)
13366         operand_check = check_warning;
13367       else if (strcasecmp (arg, "none") == 0)
13368         operand_check = check_none;
13369       else
13370         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13371       break;
13372
13373     case OPTION_MAVXSCALAR:
13374       if (strcasecmp (arg, "128") == 0)
13375         avxscalar = vex128;
13376       else if (strcasecmp (arg, "256") == 0)
13377         avxscalar = vex256;
13378       else
13379         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13380       break;
13381
13382     case OPTION_MVEXWIG:
13383       if (strcmp (arg, "0") == 0)
13384         vexwig = vexw0;
13385       else if (strcmp (arg, "1") == 0)
13386         vexwig = vexw1;
13387       else
13388         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13389       break;
13390
13391     case OPTION_MADD_BND_PREFIX:
13392       add_bnd_prefix = 1;
13393       break;
13394
13395     case OPTION_MEVEXLIG:
13396       if (strcmp (arg, "128") == 0)
13397         evexlig = evexl128;
13398       else if (strcmp (arg, "256") == 0)
13399         evexlig = evexl256;
13400       else  if (strcmp (arg, "512") == 0)
13401         evexlig = evexl512;
13402       else
13403         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13404       break;
13405
13406     case OPTION_MEVEXRCIG:
13407       if (strcmp (arg, "rne") == 0)
13408         evexrcig = rne;
13409       else if (strcmp (arg, "rd") == 0)
13410         evexrcig = rd;
13411       else if (strcmp (arg, "ru") == 0)
13412         evexrcig = ru;
13413       else if (strcmp (arg, "rz") == 0)
13414         evexrcig = rz;
13415       else
13416         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13417       break;
13418
13419     case OPTION_MEVEXWIG:
13420       if (strcmp (arg, "0") == 0)
13421         evexwig = evexw0;
13422       else if (strcmp (arg, "1") == 0)
13423         evexwig = evexw1;
13424       else
13425         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13426       break;
13427
13428 # if defined (TE_PE) || defined (TE_PEP)
13429     case OPTION_MBIG_OBJ:
13430       use_big_obj = 1;
13431       break;
13432 #endif
13433
13434     case OPTION_MOMIT_LOCK_PREFIX:
13435       if (strcasecmp (arg, "yes") == 0)
13436         omit_lock_prefix = 1;
13437       else if (strcasecmp (arg, "no") == 0)
13438         omit_lock_prefix = 0;
13439       else
13440         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13441       break;
13442
13443     case OPTION_MFENCE_AS_LOCK_ADD:
13444       if (strcasecmp (arg, "yes") == 0)
13445         avoid_fence = 1;
13446       else if (strcasecmp (arg, "no") == 0)
13447         avoid_fence = 0;
13448       else
13449         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13450       break;
13451
13452     case OPTION_MLFENCE_AFTER_LOAD:
13453       if (strcasecmp (arg, "yes") == 0)
13454         lfence_after_load = 1;
13455       else if (strcasecmp (arg, "no") == 0)
13456         lfence_after_load = 0;
13457       else
13458         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13459       break;
13460
13461     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13462       if (strcasecmp (arg, "all") == 0)
13463         {
13464           lfence_before_indirect_branch = lfence_branch_all;
13465           if (lfence_before_ret == lfence_before_ret_none)
13466             lfence_before_ret = lfence_before_ret_shl;
13467         }
13468       else if (strcasecmp (arg, "memory") == 0)
13469         lfence_before_indirect_branch = lfence_branch_memory;
13470       else if (strcasecmp (arg, "register") == 0)
13471         lfence_before_indirect_branch = lfence_branch_register;
13472       else if (strcasecmp (arg, "none") == 0)
13473         lfence_before_indirect_branch = lfence_branch_none;
13474       else
13475         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13476                   arg);
13477       break;
13478
13479     case OPTION_MLFENCE_BEFORE_RET:
13480       if (strcasecmp (arg, "or") == 0)
13481         lfence_before_ret = lfence_before_ret_or;
13482       else if (strcasecmp (arg, "not") == 0)
13483         lfence_before_ret = lfence_before_ret_not;
13484       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13485         lfence_before_ret = lfence_before_ret_shl;
13486       else if (strcasecmp (arg, "none") == 0)
13487         lfence_before_ret = lfence_before_ret_none;
13488       else
13489         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13490                   arg);
13491       break;
13492
13493     case OPTION_MRELAX_RELOCATIONS:
13494       if (strcasecmp (arg, "yes") == 0)
13495         generate_relax_relocations = 1;
13496       else if (strcasecmp (arg, "no") == 0)
13497         generate_relax_relocations = 0;
13498       else
13499         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13500       break;
13501
13502     case OPTION_MALIGN_BRANCH_BOUNDARY:
13503       {
13504         char *end;
13505         long int align = strtoul (arg, &end, 0);
13506         if (*end == '\0')
13507           {
13508             if (align == 0)
13509               {
13510                 align_branch_power = 0;
13511                 break;
13512               }
13513             else if (align >= 16)
13514               {
13515                 int align_power;
13516                 for (align_power = 0;
13517                      (align & 1) == 0;
13518                      align >>= 1, align_power++)
13519                   continue;
13520                 /* Limit alignment power to 31.  */
13521                 if (align == 1 && align_power < 32)
13522                   {
13523                     align_branch_power = align_power;
13524                     break;
13525                   }
13526               }
13527           }
13528         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13529       }
13530       break;
13531
13532     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13533       {
13534         char *end;
13535         int align = strtoul (arg, &end, 0);
13536         /* Some processors only support 5 prefixes.  */
13537         if (*end == '\0' && align >= 0 && align < 6)
13538           {
13539             align_branch_prefix_size = align;
13540             break;
13541           }
13542         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13543                   arg);
13544       }
13545       break;
13546
13547     case OPTION_MALIGN_BRANCH:
13548       align_branch = 0;
13549       saved = xstrdup (arg);
13550       type = saved;
13551       do
13552         {
13553           next = strchr (type, '+');
13554           if (next)
13555             *next++ = '\0';
13556           if (strcasecmp (type, "jcc") == 0)
13557             align_branch |= align_branch_jcc_bit;
13558           else if (strcasecmp (type, "fused") == 0)
13559             align_branch |= align_branch_fused_bit;
13560           else if (strcasecmp (type, "jmp") == 0)
13561             align_branch |= align_branch_jmp_bit;
13562           else if (strcasecmp (type, "call") == 0)
13563             align_branch |= align_branch_call_bit;
13564           else if (strcasecmp (type, "ret") == 0)
13565             align_branch |= align_branch_ret_bit;
13566           else if (strcasecmp (type, "indirect") == 0)
13567             align_branch |= align_branch_indirect_bit;
13568           else
13569             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13570           type = next;
13571         }
13572       while (next != NULL);
13573       free (saved);
13574       break;
13575
13576     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13577       align_branch_power = 5;
13578       align_branch_prefix_size = 5;
13579       align_branch = (align_branch_jcc_bit
13580                       | align_branch_fused_bit
13581                       | align_branch_jmp_bit);
13582       break;
13583
13584     case OPTION_MAMD64:
13585       isa64 = amd64;
13586       break;
13587
13588     case OPTION_MINTEL64:
13589       isa64 = intel64;
13590       break;
13591
13592     case 'O':
13593       if (arg == NULL)
13594         {
13595           optimize = 1;
13596           /* Turn off -Os.  */
13597           optimize_for_space = 0;
13598         }
13599       else if (*arg == 's')
13600         {
13601           optimize_for_space = 1;
13602           /* Turn on all encoding optimizations.  */
13603           optimize = INT_MAX;
13604         }
13605       else
13606         {
13607           optimize = atoi (arg);
13608           /* Turn off -Os.  */
13609           optimize_for_space = 0;
13610         }
13611       break;
13612
13613     default:
13614       return 0;
13615     }
13616   return 1;
13617 }
13618
13619 #define MESSAGE_TEMPLATE \
13620 "                                                                                "
13621
13622 static char *
13623 output_message (FILE *stream, char *p, char *message, char *start,
13624                 int *left_p, const char *name, int len)
13625 {
13626   int size = sizeof (MESSAGE_TEMPLATE);
13627   int left = *left_p;
13628
13629   /* Reserve 2 spaces for ", " or ",\0" */
13630   left -= len + 2;
13631
13632   /* Check if there is any room.  */
13633   if (left >= 0)
13634     {
13635       if (p != start)
13636         {
13637           *p++ = ',';
13638           *p++ = ' ';
13639         }
13640       p = mempcpy (p, name, len);
13641     }
13642   else
13643     {
13644       /* Output the current message now and start a new one.  */
13645       *p++ = ',';
13646       *p = '\0';
13647       fprintf (stream, "%s\n", message);
13648       p = start;
13649       left = size - (start - message) - len - 2;
13650
13651       gas_assert (left >= 0);
13652
13653       p = mempcpy (p, name, len);
13654     }
13655
13656   *left_p = left;
13657   return p;
13658 }
13659
13660 static void
13661 show_arch (FILE *stream, int ext, int check)
13662 {
13663   static char message[] = MESSAGE_TEMPLATE;
13664   char *start = message + 27;
13665   char *p;
13666   int size = sizeof (MESSAGE_TEMPLATE);
13667   int left;
13668   const char *name;
13669   int len;
13670   unsigned int j;
13671
13672   p = start;
13673   left = size - (start - message);
13674   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13675     {
13676       /* Should it be skipped?  */
13677       if (cpu_arch [j].skip)
13678         continue;
13679
13680       name = cpu_arch [j].name;
13681       len = cpu_arch [j].len;
13682       if (*name == '.')
13683         {
13684           /* It is an extension.  Skip if we aren't asked to show it.  */
13685           if (ext)
13686             {
13687               name++;
13688               len--;
13689             }
13690           else
13691             continue;
13692         }
13693       else if (ext)
13694         {
13695           /* It is an processor.  Skip if we show only extension.  */
13696           continue;
13697         }
13698       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
13699         {
13700           /* It is an impossible processor - skip.  */
13701           continue;
13702         }
13703
13704       p = output_message (stream, p, message, start, &left, name, len);
13705     }
13706
13707   /* Display disabled extensions.  */
13708   if (ext)
13709     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13710       {
13711         name = cpu_noarch [j].name;
13712         len = cpu_noarch [j].len;
13713         p = output_message (stream, p, message, start, &left, name,
13714                             len);
13715       }
13716
13717   *p = '\0';
13718   fprintf (stream, "%s\n", message);
13719 }
13720
13721 void
13722 md_show_usage (FILE *stream)
13723 {
13724 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13725   fprintf (stream, _("\
13726   -Qy, -Qn                ignored\n\
13727   -V                      print assembler version number\n\
13728   -k                      ignored\n"));
13729 #endif
13730   fprintf (stream, _("\
13731   -n                      Do not optimize code alignment\n\
13732   -q                      quieten some warnings\n"));
13733 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13734   fprintf (stream, _("\
13735   -s                      ignored\n"));
13736 #endif
13737 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13738                       || defined (TE_PE) || defined (TE_PEP))
13739   fprintf (stream, _("\
13740   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
13741 #endif
13742 #ifdef SVR4_COMMENT_CHARS
13743   fprintf (stream, _("\
13744   --divide                do not treat `/' as a comment character\n"));
13745 #else
13746   fprintf (stream, _("\
13747   --divide                ignored\n"));
13748 #endif
13749   fprintf (stream, _("\
13750   -march=CPU[,+EXTENSION...]\n\
13751                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13752   show_arch (stream, 0, 1);
13753   fprintf (stream, _("\
13754                           EXTENSION is combination of:\n"));
13755   show_arch (stream, 1, 0);
13756   fprintf (stream, _("\
13757   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13758   show_arch (stream, 0, 0);
13759   fprintf (stream, _("\
13760   -msse2avx               encode SSE instructions with VEX prefix\n"));
13761   fprintf (stream, _("\
13762   -msse-check=[none|error|warning] (default: warning)\n\
13763                           check SSE instructions\n"));
13764   fprintf (stream, _("\
13765   -moperand-check=[none|error|warning] (default: warning)\n\
13766                           check operand combinations for validity\n"));
13767   fprintf (stream, _("\
13768   -mavxscalar=[128|256] (default: 128)\n\
13769                           encode scalar AVX instructions with specific vector\n\
13770                            length\n"));
13771   fprintf (stream, _("\
13772   -mvexwig=[0|1] (default: 0)\n\
13773                           encode VEX instructions with specific VEX.W value\n\
13774                            for VEX.W bit ignored instructions\n"));
13775   fprintf (stream, _("\
13776   -mevexlig=[128|256|512] (default: 128)\n\
13777                           encode scalar EVEX instructions with specific vector\n\
13778                            length\n"));
13779   fprintf (stream, _("\
13780   -mevexwig=[0|1] (default: 0)\n\
13781                           encode EVEX instructions with specific EVEX.W value\n\
13782                            for EVEX.W bit ignored instructions\n"));
13783   fprintf (stream, _("\
13784   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13785                           encode EVEX instructions with specific EVEX.RC value\n\
13786                            for SAE-only ignored instructions\n"));
13787   fprintf (stream, _("\
13788   -mmnemonic=[att|intel] "));
13789   if (SYSV386_COMPAT)
13790     fprintf (stream, _("(default: att)\n"));
13791   else
13792     fprintf (stream, _("(default: intel)\n"));
13793   fprintf (stream, _("\
13794                           use AT&T/Intel mnemonic\n"));
13795   fprintf (stream, _("\
13796   -msyntax=[att|intel] (default: att)\n\
13797                           use AT&T/Intel syntax\n"));
13798   fprintf (stream, _("\
13799   -mindex-reg             support pseudo index registers\n"));
13800   fprintf (stream, _("\
13801   -mnaked-reg             don't require `%%' prefix for registers\n"));
13802   fprintf (stream, _("\
13803   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13804 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13805   fprintf (stream, _("\
13806   -mshared                disable branch optimization for shared code\n"));
13807   fprintf (stream, _("\
13808   -mx86-used-note=[no|yes] "));
13809   if (DEFAULT_X86_USED_NOTE)
13810     fprintf (stream, _("(default: yes)\n"));
13811   else
13812     fprintf (stream, _("(default: no)\n"));
13813   fprintf (stream, _("\
13814                           generate x86 used ISA and feature properties\n"));
13815 #endif
13816 #if defined (TE_PE) || defined (TE_PEP)
13817   fprintf (stream, _("\
13818   -mbig-obj               generate big object files\n"));
13819 #endif
13820   fprintf (stream, _("\
13821   -momit-lock-prefix=[no|yes] (default: no)\n\
13822                           strip all lock prefixes\n"));
13823   fprintf (stream, _("\
13824   -mfence-as-lock-add=[no|yes] (default: no)\n\
13825                           encode lfence, mfence and sfence as\n\
13826                            lock addl $0x0, (%%{re}sp)\n"));
13827   fprintf (stream, _("\
13828   -mrelax-relocations=[no|yes] "));
13829   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13830     fprintf (stream, _("(default: yes)\n"));
13831   else
13832     fprintf (stream, _("(default: no)\n"));
13833   fprintf (stream, _("\
13834                           generate relax relocations\n"));
13835   fprintf (stream, _("\
13836   -malign-branch-boundary=NUM (default: 0)\n\
13837                           align branches within NUM byte boundary\n"));
13838   fprintf (stream, _("\
13839   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13840                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13841                            indirect\n\
13842                           specify types of branches to align\n"));
13843   fprintf (stream, _("\
13844   -malign-branch-prefix-size=NUM (default: 5)\n\
13845                           align branches with NUM prefixes per instruction\n"));
13846   fprintf (stream, _("\
13847   -mbranches-within-32B-boundaries\n\
13848                           align branches within 32 byte boundary\n"));
13849   fprintf (stream, _("\
13850   -mlfence-after-load=[no|yes] (default: no)\n\
13851                           generate lfence after load\n"));
13852   fprintf (stream, _("\
13853   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13854                           generate lfence before indirect near branch\n"));
13855   fprintf (stream, _("\
13856   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
13857                           generate lfence before ret\n"));
13858   fprintf (stream, _("\
13859   -mamd64                 accept only AMD64 ISA [default]\n"));
13860   fprintf (stream, _("\
13861   -mintel64               accept only Intel64 ISA\n"));
13862 }
13863
13864 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13865      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13866      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13867
13868 /* Pick the target format to use.  */
13869
13870 const char *
13871 i386_target_format (void)
13872 {
13873   if (startswith (default_arch, "x86_64"))
13874     {
13875       update_code_flag (CODE_64BIT, 1);
13876       if (default_arch[6] == '\0')
13877         x86_elf_abi = X86_64_ABI;
13878       else
13879         x86_elf_abi = X86_64_X32_ABI;
13880     }
13881   else if (!strcmp (default_arch, "i386"))
13882     update_code_flag (CODE_32BIT, 1);
13883   else if (!strcmp (default_arch, "iamcu"))
13884     {
13885       update_code_flag (CODE_32BIT, 1);
13886       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13887         {
13888           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13889           cpu_arch_name = "iamcu";
13890           cpu_sub_arch_name = NULL;
13891           cpu_arch_flags = iamcu_flags;
13892           cpu_arch_isa = PROCESSOR_IAMCU;
13893           cpu_arch_isa_flags = iamcu_flags;
13894           if (!cpu_arch_tune_set)
13895             {
13896               cpu_arch_tune = cpu_arch_isa;
13897               cpu_arch_tune_flags = cpu_arch_isa_flags;
13898             }
13899         }
13900       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13901         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13902                   cpu_arch_name);
13903     }
13904   else
13905     as_fatal (_("unknown architecture"));
13906
13907   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13908     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13909   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13910     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13911
13912   switch (OUTPUT_FLAVOR)
13913     {
13914 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13915     case bfd_target_aout_flavour:
13916       return AOUT_TARGET_FORMAT;
13917 #endif
13918 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13919 # if defined (TE_PE) || defined (TE_PEP)
13920     case bfd_target_coff_flavour:
13921       if (flag_code == CODE_64BIT)
13922         {
13923           object_64bit = 1;
13924           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13925         }
13926       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
13927 # elif defined (TE_GO32)
13928     case bfd_target_coff_flavour:
13929       return "coff-go32";
13930 # else
13931     case bfd_target_coff_flavour:
13932       return "coff-i386";
13933 # endif
13934 #endif
13935 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13936     case bfd_target_elf_flavour:
13937       {
13938         const char *format;
13939
13940         switch (x86_elf_abi)
13941           {
13942           default:
13943             format = ELF_TARGET_FORMAT;
13944 #ifndef TE_SOLARIS
13945             tls_get_addr = "___tls_get_addr";
13946 #endif
13947             break;
13948           case X86_64_ABI:
13949             use_rela_relocations = 1;
13950             object_64bit = 1;
13951 #ifndef TE_SOLARIS
13952             tls_get_addr = "__tls_get_addr";
13953 #endif
13954             format = ELF_TARGET_FORMAT64;
13955             break;
13956           case X86_64_X32_ABI:
13957             use_rela_relocations = 1;
13958             object_64bit = 1;
13959 #ifndef TE_SOLARIS
13960             tls_get_addr = "__tls_get_addr";
13961 #endif
13962             disallow_64bit_reloc = 1;
13963             format = ELF_TARGET_FORMAT32;
13964             break;
13965           }
13966         if (cpu_arch_isa == PROCESSOR_L1OM)
13967           {
13968             if (x86_elf_abi != X86_64_ABI)
13969               as_fatal (_("Intel L1OM is 64bit only"));
13970             return ELF_TARGET_L1OM_FORMAT;
13971           }
13972         else if (cpu_arch_isa == PROCESSOR_K1OM)
13973           {
13974             if (x86_elf_abi != X86_64_ABI)
13975               as_fatal (_("Intel K1OM is 64bit only"));
13976             return ELF_TARGET_K1OM_FORMAT;
13977           }
13978         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13979           {
13980             if (x86_elf_abi != I386_ABI)
13981               as_fatal (_("Intel MCU is 32bit only"));
13982             return ELF_TARGET_IAMCU_FORMAT;
13983           }
13984         else
13985           return format;
13986       }
13987 #endif
13988 #if defined (OBJ_MACH_O)
13989     case bfd_target_mach_o_flavour:
13990       if (flag_code == CODE_64BIT)
13991         {
13992           use_rela_relocations = 1;
13993           object_64bit = 1;
13994           return "mach-o-x86-64";
13995         }
13996       else
13997         return "mach-o-i386";
13998 #endif
13999     default:
14000       abort ();
14001       return NULL;
14002     }
14003 }
14004
14005 #endif /* OBJ_MAYBE_ more than one  */
14006 \f
14007 symbolS *
14008 md_undefined_symbol (char *name)
14009 {
14010   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14011       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14012       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14013       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14014     {
14015       if (!GOT_symbol)
14016         {
14017           if (symbol_find (name))
14018             as_bad (_("GOT already in symbol table"));
14019           GOT_symbol = symbol_new (name, undefined_section,
14020                                    &zero_address_frag, 0);
14021         };
14022       return GOT_symbol;
14023     }
14024   return 0;
14025 }
14026
14027 /* Round up a section size to the appropriate boundary.  */
14028
14029 valueT
14030 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14031 {
14032 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14033   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14034     {
14035       /* For a.out, force the section size to be aligned.  If we don't do
14036          this, BFD will align it for us, but it will not write out the
14037          final bytes of the section.  This may be a bug in BFD, but it is
14038          easier to fix it here since that is how the other a.out targets
14039          work.  */
14040       int align;
14041
14042       align = bfd_section_alignment (segment);
14043       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14044     }
14045 #endif
14046
14047   return size;
14048 }
14049
14050 /* On the i386, PC-relative offsets are relative to the start of the
14051    next instruction.  That is, the address of the offset, plus its
14052    size, since the offset is always the last part of the insn.  */
14053
14054 long
14055 md_pcrel_from (fixS *fixP)
14056 {
14057   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14058 }
14059
14060 #ifndef I386COFF
14061
14062 static void
14063 s_bss (int ignore ATTRIBUTE_UNUSED)
14064 {
14065   int temp;
14066
14067 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14068   if (IS_ELF)
14069     obj_elf_section_change_hook ();
14070 #endif
14071   temp = get_absolute_expression ();
14072   subseg_set (bss_section, (subsegT) temp);
14073   demand_empty_rest_of_line ();
14074 }
14075
14076 #endif
14077
14078 /* Remember constant directive.  */
14079
14080 void
14081 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14082 {
14083   if (last_insn.kind != last_insn_directive
14084       && (bfd_section_flags (now_seg) & SEC_CODE))
14085     {
14086       last_insn.seg = now_seg;
14087       last_insn.kind = last_insn_directive;
14088       last_insn.name = "constant directive";
14089       last_insn.file = as_where (&last_insn.line);
14090       if (lfence_before_ret != lfence_before_ret_none)
14091         {
14092           if (lfence_before_indirect_branch != lfence_branch_none)
14093             as_warn (_("constant directive skips -mlfence-before-ret "
14094                        "and -mlfence-before-indirect-branch"));
14095           else
14096             as_warn (_("constant directive skips -mlfence-before-ret"));
14097         }
14098       else if (lfence_before_indirect_branch != lfence_branch_none)
14099         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14100     }
14101 }
14102
14103 int
14104 i386_validate_fix (fixS *fixp)
14105 {
14106 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14107   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14108       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14109     return IS_ELF && fixp->fx_addsy
14110            && (!S_IS_DEFINED (fixp->fx_addsy)
14111                || S_IS_EXTERNAL (fixp->fx_addsy));
14112 #endif
14113
14114   if (fixp->fx_subsy)
14115     {
14116       if (fixp->fx_subsy == GOT_symbol)
14117         {
14118           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14119             {
14120               if (!object_64bit)
14121                 abort ();
14122 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14123               if (fixp->fx_tcbit2)
14124                 fixp->fx_r_type = (fixp->fx_tcbit
14125                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14126                                    : BFD_RELOC_X86_64_GOTPCRELX);
14127               else
14128 #endif
14129                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14130             }
14131           else
14132             {
14133               if (!object_64bit)
14134                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14135               else
14136                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14137             }
14138           fixp->fx_subsy = 0;
14139         }
14140     }
14141 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14142   else
14143     {
14144       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14145          to section.  Since PLT32 relocation must be against symbols,
14146          turn such PLT32 relocation into PC32 relocation.  */
14147       if (fixp->fx_addsy
14148           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14149               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14150           && symbol_section_p (fixp->fx_addsy))
14151         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14152       if (!object_64bit)
14153         {
14154           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14155               && fixp->fx_tcbit2)
14156             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14157         }
14158     }
14159 #endif
14160
14161   return 1;
14162 }
14163
14164 arelent *
14165 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14166 {
14167   arelent *rel;
14168   bfd_reloc_code_real_type code;
14169
14170   switch (fixp->fx_r_type)
14171     {
14172 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14173       symbolS *sym;
14174
14175     case BFD_RELOC_SIZE32:
14176     case BFD_RELOC_SIZE64:
14177       if (fixp->fx_addsy
14178           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14179           && (!fixp->fx_subsy
14180               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14181         sym = fixp->fx_addsy;
14182       else if (fixp->fx_subsy
14183                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14184                && (!fixp->fx_addsy
14185                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14186         sym = fixp->fx_subsy;
14187       else
14188         sym = NULL;
14189       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14190         {
14191           /* Resolve size relocation against local symbol to size of
14192              the symbol plus addend.  */
14193           valueT value = S_GET_SIZE (sym);
14194
14195           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14196             value = bfd_section_size (S_GET_SEGMENT (sym));
14197           if (sym == fixp->fx_subsy)
14198             {
14199               value = -value;
14200               if (fixp->fx_addsy)
14201                 value += S_GET_VALUE (fixp->fx_addsy);
14202             }
14203           else if (fixp->fx_subsy)
14204             value -= S_GET_VALUE (fixp->fx_subsy);
14205           value += fixp->fx_offset;
14206           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14207               && object_64bit
14208               && !fits_in_unsigned_long (value))
14209             as_bad_where (fixp->fx_file, fixp->fx_line,
14210                           _("symbol size computation overflow"));
14211           fixp->fx_addsy = NULL;
14212           fixp->fx_subsy = NULL;
14213           md_apply_fix (fixp, (valueT *) &value, NULL);
14214           return NULL;
14215         }
14216       if (!fixp->fx_addsy || fixp->fx_subsy)
14217         {
14218           as_bad_where (fixp->fx_file, fixp->fx_line,
14219                         "unsupported expression involving @size");
14220           return NULL;
14221         }
14222 #endif
14223       /* Fall through.  */
14224
14225     case BFD_RELOC_X86_64_PLT32:
14226     case BFD_RELOC_X86_64_GOT32:
14227     case BFD_RELOC_X86_64_GOTPCREL:
14228     case BFD_RELOC_X86_64_GOTPCRELX:
14229     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14230     case BFD_RELOC_386_PLT32:
14231     case BFD_RELOC_386_GOT32:
14232     case BFD_RELOC_386_GOT32X:
14233     case BFD_RELOC_386_GOTOFF:
14234     case BFD_RELOC_386_GOTPC:
14235     case BFD_RELOC_386_TLS_GD:
14236     case BFD_RELOC_386_TLS_LDM:
14237     case BFD_RELOC_386_TLS_LDO_32:
14238     case BFD_RELOC_386_TLS_IE_32:
14239     case BFD_RELOC_386_TLS_IE:
14240     case BFD_RELOC_386_TLS_GOTIE:
14241     case BFD_RELOC_386_TLS_LE_32:
14242     case BFD_RELOC_386_TLS_LE:
14243     case BFD_RELOC_386_TLS_GOTDESC:
14244     case BFD_RELOC_386_TLS_DESC_CALL:
14245     case BFD_RELOC_X86_64_TLSGD:
14246     case BFD_RELOC_X86_64_TLSLD:
14247     case BFD_RELOC_X86_64_DTPOFF32:
14248     case BFD_RELOC_X86_64_DTPOFF64:
14249     case BFD_RELOC_X86_64_GOTTPOFF:
14250     case BFD_RELOC_X86_64_TPOFF32:
14251     case BFD_RELOC_X86_64_TPOFF64:
14252     case BFD_RELOC_X86_64_GOTOFF64:
14253     case BFD_RELOC_X86_64_GOTPC32:
14254     case BFD_RELOC_X86_64_GOT64:
14255     case BFD_RELOC_X86_64_GOTPCREL64:
14256     case BFD_RELOC_X86_64_GOTPC64:
14257     case BFD_RELOC_X86_64_GOTPLT64:
14258     case BFD_RELOC_X86_64_PLTOFF64:
14259     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14260     case BFD_RELOC_X86_64_TLSDESC_CALL:
14261     case BFD_RELOC_RVA:
14262     case BFD_RELOC_VTABLE_ENTRY:
14263     case BFD_RELOC_VTABLE_INHERIT:
14264 #ifdef TE_PE
14265     case BFD_RELOC_32_SECREL:
14266 #endif
14267       code = fixp->fx_r_type;
14268       break;
14269     case BFD_RELOC_X86_64_32S:
14270       if (!fixp->fx_pcrel)
14271         {
14272           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14273           code = fixp->fx_r_type;
14274           break;
14275         }
14276       /* Fall through.  */
14277     default:
14278       if (fixp->fx_pcrel)
14279         {
14280           switch (fixp->fx_size)
14281             {
14282             default:
14283               as_bad_where (fixp->fx_file, fixp->fx_line,
14284                             _("can not do %d byte pc-relative relocation"),
14285                             fixp->fx_size);
14286               code = BFD_RELOC_32_PCREL;
14287               break;
14288             case 1: code = BFD_RELOC_8_PCREL;  break;
14289             case 2: code = BFD_RELOC_16_PCREL; break;
14290             case 4: code = BFD_RELOC_32_PCREL; break;
14291 #ifdef BFD64
14292             case 8: code = BFD_RELOC_64_PCREL; break;
14293 #endif
14294             }
14295         }
14296       else
14297         {
14298           switch (fixp->fx_size)
14299             {
14300             default:
14301               as_bad_where (fixp->fx_file, fixp->fx_line,
14302                             _("can not do %d byte relocation"),
14303                             fixp->fx_size);
14304               code = BFD_RELOC_32;
14305               break;
14306             case 1: code = BFD_RELOC_8;  break;
14307             case 2: code = BFD_RELOC_16; break;
14308             case 4: code = BFD_RELOC_32; break;
14309 #ifdef BFD64
14310             case 8: code = BFD_RELOC_64; break;
14311 #endif
14312             }
14313         }
14314       break;
14315     }
14316
14317   if ((code == BFD_RELOC_32
14318        || code == BFD_RELOC_32_PCREL
14319        || code == BFD_RELOC_X86_64_32S)
14320       && GOT_symbol
14321       && fixp->fx_addsy == GOT_symbol)
14322     {
14323       if (!object_64bit)
14324         code = BFD_RELOC_386_GOTPC;
14325       else
14326         code = BFD_RELOC_X86_64_GOTPC32;
14327     }
14328   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14329       && GOT_symbol
14330       && fixp->fx_addsy == GOT_symbol)
14331     {
14332       code = BFD_RELOC_X86_64_GOTPC64;
14333     }
14334
14335   rel = XNEW (arelent);
14336   rel->sym_ptr_ptr = XNEW (asymbol *);
14337   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14338
14339   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14340
14341   if (!use_rela_relocations)
14342     {
14343       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14344          vtable entry to be used in the relocation's section offset.  */
14345       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14346         rel->address = fixp->fx_offset;
14347 #if defined (OBJ_COFF) && defined (TE_PE)
14348       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14349         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14350       else
14351 #endif
14352       rel->addend = 0;
14353     }
14354   /* Use the rela in 64bit mode.  */
14355   else
14356     {
14357       if (disallow_64bit_reloc)
14358         switch (code)
14359           {
14360           case BFD_RELOC_X86_64_DTPOFF64:
14361           case BFD_RELOC_X86_64_TPOFF64:
14362           case BFD_RELOC_64_PCREL:
14363           case BFD_RELOC_X86_64_GOTOFF64:
14364           case BFD_RELOC_X86_64_GOT64:
14365           case BFD_RELOC_X86_64_GOTPCREL64:
14366           case BFD_RELOC_X86_64_GOTPC64:
14367           case BFD_RELOC_X86_64_GOTPLT64:
14368           case BFD_RELOC_X86_64_PLTOFF64:
14369             as_bad_where (fixp->fx_file, fixp->fx_line,
14370                           _("cannot represent relocation type %s in x32 mode"),
14371                           bfd_get_reloc_code_name (code));
14372             break;
14373           default:
14374             break;
14375           }
14376
14377       if (!fixp->fx_pcrel)
14378         rel->addend = fixp->fx_offset;
14379       else
14380         switch (code)
14381           {
14382           case BFD_RELOC_X86_64_PLT32:
14383           case BFD_RELOC_X86_64_GOT32:
14384           case BFD_RELOC_X86_64_GOTPCREL:
14385           case BFD_RELOC_X86_64_GOTPCRELX:
14386           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14387           case BFD_RELOC_X86_64_TLSGD:
14388           case BFD_RELOC_X86_64_TLSLD:
14389           case BFD_RELOC_X86_64_GOTTPOFF:
14390           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14391           case BFD_RELOC_X86_64_TLSDESC_CALL:
14392             rel->addend = fixp->fx_offset - fixp->fx_size;
14393             break;
14394           default:
14395             rel->addend = (section->vma
14396                            - fixp->fx_size
14397                            + fixp->fx_addnumber
14398                            + md_pcrel_from (fixp));
14399             break;
14400           }
14401     }
14402
14403   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14404   if (rel->howto == NULL)
14405     {
14406       as_bad_where (fixp->fx_file, fixp->fx_line,
14407                     _("cannot represent relocation type %s"),
14408                     bfd_get_reloc_code_name (code));
14409       /* Set howto to a garbage value so that we can keep going.  */
14410       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14411       gas_assert (rel->howto != NULL);
14412     }
14413
14414   return rel;
14415 }
14416
14417 #include "tc-i386-intel.c"
14418
14419 void
14420 tc_x86_parse_to_dw2regnum (expressionS *exp)
14421 {
14422   int saved_naked_reg;
14423   char saved_register_dot;
14424
14425   saved_naked_reg = allow_naked_reg;
14426   allow_naked_reg = 1;
14427   saved_register_dot = register_chars['.'];
14428   register_chars['.'] = '.';
14429   allow_pseudo_reg = 1;
14430   expression_and_evaluate (exp);
14431   allow_pseudo_reg = 0;
14432   register_chars['.'] = saved_register_dot;
14433   allow_naked_reg = saved_naked_reg;
14434
14435   if (exp->X_op == O_register && exp->X_add_number >= 0)
14436     {
14437       if ((addressT) exp->X_add_number < i386_regtab_size)
14438         {
14439           exp->X_op = O_constant;
14440           exp->X_add_number = i386_regtab[exp->X_add_number]
14441                               .dw2_regnum[flag_code >> 1];
14442         }
14443       else
14444         exp->X_op = O_illegal;
14445     }
14446 }
14447
14448 void
14449 tc_x86_frame_initial_instructions (void)
14450 {
14451   static unsigned int sp_regno[2];
14452
14453   if (!sp_regno[flag_code >> 1])
14454     {
14455       char *saved_input = input_line_pointer;
14456       char sp[][4] = {"esp", "rsp"};
14457       expressionS exp;
14458
14459       input_line_pointer = sp[flag_code >> 1];
14460       tc_x86_parse_to_dw2regnum (&exp);
14461       gas_assert (exp.X_op == O_constant);
14462       sp_regno[flag_code >> 1] = exp.X_add_number;
14463       input_line_pointer = saved_input;
14464     }
14465
14466   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14467   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14468 }
14469
14470 int
14471 x86_dwarf2_addr_size (void)
14472 {
14473 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14474   if (x86_elf_abi == X86_64_X32_ABI)
14475     return 4;
14476 #endif
14477   return bfd_arch_bits_per_address (stdoutput) / 8;
14478 }
14479
14480 int
14481 i386_elf_section_type (const char *str, size_t len)
14482 {
14483   if (flag_code == CODE_64BIT
14484       && len == sizeof ("unwind") - 1
14485       && startswith (str, "unwind"))
14486     return SHT_X86_64_UNWIND;
14487
14488   return -1;
14489 }
14490
14491 #ifdef TE_SOLARIS
14492 void
14493 i386_solaris_fix_up_eh_frame (segT sec)
14494 {
14495   if (flag_code == CODE_64BIT)
14496     elf_section_type (sec) = SHT_X86_64_UNWIND;
14497 }
14498 #endif
14499
14500 #ifdef TE_PE
14501 void
14502 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14503 {
14504   expressionS exp;
14505
14506   exp.X_op = O_secrel;
14507   exp.X_add_symbol = symbol;
14508   exp.X_add_number = 0;
14509   emit_expr (&exp, size);
14510 }
14511 #endif
14512
14513 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14514 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14515
14516 bfd_vma
14517 x86_64_section_letter (int letter, const char **ptr_msg)
14518 {
14519   if (flag_code == CODE_64BIT)
14520     {
14521       if (letter == 'l')
14522         return SHF_X86_64_LARGE;
14523
14524       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14525     }
14526   else
14527     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14528   return -1;
14529 }
14530
14531 bfd_vma
14532 x86_64_section_word (char *str, size_t len)
14533 {
14534   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14535     return SHF_X86_64_LARGE;
14536
14537   return -1;
14538 }
14539
14540 static void
14541 handle_large_common (int small ATTRIBUTE_UNUSED)
14542 {
14543   if (flag_code != CODE_64BIT)
14544     {
14545       s_comm_internal (0, elf_common_parse);
14546       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14547     }
14548   else
14549     {
14550       static segT lbss_section;
14551       asection *saved_com_section_ptr = elf_com_section_ptr;
14552       asection *saved_bss_section = bss_section;
14553
14554       if (lbss_section == NULL)
14555         {
14556           flagword applicable;
14557           segT seg = now_seg;
14558           subsegT subseg = now_subseg;
14559
14560           /* The .lbss section is for local .largecomm symbols.  */
14561           lbss_section = subseg_new (".lbss", 0);
14562           applicable = bfd_applicable_section_flags (stdoutput);
14563           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14564           seg_info (lbss_section)->bss = 1;
14565
14566           subseg_set (seg, subseg);
14567         }
14568
14569       elf_com_section_ptr = &_bfd_elf_large_com_section;
14570       bss_section = lbss_section;
14571
14572       s_comm_internal (0, elf_common_parse);
14573
14574       elf_com_section_ptr = saved_com_section_ptr;
14575       bss_section = saved_bss_section;
14576     }
14577 }
14578 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */