gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include <limits.h>
  38
  39 #ifndef INFER_ADDR_PREFIX
  40 #define INFER_ADDR_PREFIX 1
  41 #endif
  42
  43 #ifndef DEFAULT_ARCH
  44 #define DEFAULT_ARCH "i386"
  45 #endif
  46
  47 #ifndef INLINE
  48 #if __GNUC__ >= 2
  49 #define INLINE __inline__
  50 #else
  51 #define INLINE
  52 #endif
  53 #endif
  54
  55 /* Prefixes will be emitted in the order defined below.
  56    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  57    instruction, and so must come before any prefixes.
  58    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  59    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  60 #define WAIT_PREFIX     0
  61 #define SEG_PREFIX      1
  62 #define ADDR_PREFIX     2
  63 #define DATA_PREFIX     3
  64 #define REP_PREFIX      4
  65 #define HLE_PREFIX      REP_PREFIX
  66 #define BND_PREFIX      REP_PREFIX
  67 #define LOCK_PREFIX     5
  68 #define REX_PREFIX      6       /* must come last.  */
  69 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  70
  71 /* we define the syntax here (modulo base,index,scale syntax) */
  72 #define REGISTER_PREFIX '%'
  73 #define IMMEDIATE_PREFIX '$'
  74 #define ABSOLUTE_PREFIX '*'
  75
  76 /* these are the instruction mnemonic suffixes in AT&T syntax or
  77    memory operand size in Intel syntax.  */
  78 #define WORD_MNEM_SUFFIX  'w'
  79 #define BYTE_MNEM_SUFFIX  'b'
  80 #define SHORT_MNEM_SUFFIX 's'
  81 #define LONG_MNEM_SUFFIX  'l'
  82 #define QWORD_MNEM_SUFFIX  'q'
  83
  84 #define END_OF_INSN '\0'
  85
  86 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  87
  88 /* This matches the C -> StaticRounding alias in the opcode table.  */
  89 #define commutative staticrounding
  90
  91 /*
  92   'templates' is for grouping together 'template' structures for opcodes
  93   of the same name.  This is only used for storing the insns in the grand
  94   ole hash table of insns.
  95   The templates themselves start at START and range up to (but not including)
  96   END.
  97   */
  98 typedef struct
  99 {
 100   const insn_template *start;
 101   const insn_template *end;
 102 }
 103 templates;
 104
 105 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 106 typedef struct
 107 {
 108   unsigned int regmem;  /* codes register or memory operand */
 109   unsigned int reg;     /* codes register operand (or extended opcode) */
 110   unsigned int mode;    /* how to interpret regmem & reg */
 111 }
 112 modrm_byte;
 113
 114 /* x86-64 extension prefix.  */
 115 typedef int rex_byte;
 116
 117 /* 386 opcode byte to code indirect addressing.  */
 118 typedef struct
 119 {
 120   unsigned base;
 121   unsigned index;
 122   unsigned scale;
 123 }
 124 sib_byte;
 125
 126 /* x86 arch names, types and features */
 127 typedef struct
 128 {
 129   const char *name;             /* arch name */
 130   unsigned int len:8;           /* arch string length */
 131   bool skip:1;                  /* show_arch should skip this. */
 132   enum processor_type type;     /* arch type */
 133   i386_cpu_flags enable;                /* cpu feature enable flags */
 134   i386_cpu_flags disable;       /* cpu feature disable flags */
 135 }
 136 arch_entry;
 137
 138 static void update_code_flag (int, int);
 139 static void set_code_flag (int);
 140 static void set_16bit_gcc_code_flag (int);
 141 static void set_intel_syntax (int);
 142 static void set_intel_mnemonic (int);
 143 static void set_allow_index_reg (int);
 144 static void set_check (int);
 145 static void set_cpu_arch (int);
 146 #ifdef TE_PE
 147 static void pe_directive_secrel (int);
 148 static void pe_directive_secidx (int);
 149 #endif
 150 static void signed_cons (int);
 151 static char *output_invalid (int c);
 152 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 153                                     const char *);
 154 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 155                                        const char *);
 156 static int i386_att_operand (char *);
 157 static int i386_intel_operand (char *, int);
 158 static int i386_intel_simplify (expressionS *);
 159 static int i386_intel_parse_name (const char *, expressionS *);
 160 static const reg_entry *parse_register (char *, char **);
 161 static const char *parse_insn (const char *, char *);
 162 static char *parse_operands (char *, const char *);
 163 static void swap_operands (void);
 164 static void swap_2_operands (unsigned int, unsigned int);
 165 static enum flag_code i386_addressing_mode (void);
 166 static void optimize_imm (void);
 167 static void optimize_disp (void);
 168 static const insn_template *match_template (char);
 169 static int check_string (void);
 170 static int process_suffix (void);
 171 static int check_byte_reg (void);
 172 static int check_long_reg (void);
 173 static int check_qword_reg (void);
 174 static int check_word_reg (void);
 175 static int finalize_imm (void);
 176 static int process_operands (void);
 177 static const reg_entry *build_modrm_byte (void);
 178 static void output_insn (void);
 179 static void output_imm (fragS *, offsetT);
 180 static void output_disp (fragS *, offsetT);
 181 #ifndef I386COFF
 182 static void s_bss (int);
 183 #endif
 184 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 185 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 186
 187 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 188 static unsigned int x86_isa_1_used;
 189 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 190 static unsigned int x86_feature_2_used;
 191 /* Generate x86 used ISA and feature properties.  */
 192 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 193 #endif
 194
 195 static const char *default_arch = DEFAULT_ARCH;
 196
 197 /* parse_register() returns this when a register alias cannot be used.  */
 198 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 199                                    { Dw2Inval, Dw2Inval } };
 200
 201 static const reg_entry *reg_eax;
 202 static const reg_entry *reg_ds;
 203 static const reg_entry *reg_es;
 204 static const reg_entry *reg_ss;
 205 static const reg_entry *reg_st0;
 206 static const reg_entry *reg_k0;
 207
 208 /* VEX prefix.  */
 209 typedef struct
 210 {
 211   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 212   unsigned char bytes[4];
 213   unsigned int length;
 214   /* Destination or source register specifier.  */
 215   const reg_entry *register_specifier;
 216 } vex_prefix;
 217
 218 /* 'md_assemble ()' gathers together information and puts it into a
 219    i386_insn.  */
 220
 221 union i386_op
 222   {
 223     expressionS *disps;
 224     expressionS *imms;
 225     const reg_entry *regs;
 226   };
 227
 228 enum i386_error
 229   {
 230     no_error, /* Must be first.  */
 231     operand_size_mismatch,
 232     operand_type_mismatch,
 233     register_type_mismatch,
 234     number_of_operands_mismatch,
 235     invalid_instruction_suffix,
 236     bad_imm4,
 237     unsupported_with_intel_mnemonic,
 238     unsupported_syntax,
 239     unsupported,
 240     unsupported_on_arch,
 241     unsupported_64bit,
 242     invalid_sib_address,
 243     invalid_vsib_address,
 244     invalid_vector_register_set,
 245     invalid_tmm_register_set,
 246     invalid_dest_and_src_register_set,
 247     unsupported_vector_index_register,
 248     unsupported_broadcast,
 249     broadcast_needed,
 250     unsupported_masking,
 251     mask_not_on_destination,
 252     no_default_mask,
 253     unsupported_rc_sae,
 254     invalid_register_operand,
 255   };
 256
 257 struct _i386_insn
 258   {
 259     /* TM holds the template for the insn were currently assembling.  */
 260     insn_template tm;
 261
 262     /* SUFFIX holds the instruction size suffix for byte, word, dword
 263        or qword, if given.  */
 264     char suffix;
 265
 266     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 267     unsigned char opcode_length;
 268
 269     /* OPERANDS gives the number of given operands.  */
 270     unsigned int operands;
 271
 272     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 273        of given register, displacement, memory operands and immediate
 274        operands.  */
 275     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 276
 277     /* TYPES [i] is the type (see above #defines) which tells us how to
 278        use OP[i] for the corresponding operand.  */
 279     i386_operand_type types[MAX_OPERANDS];
 280
 281     /* Displacement expression, immediate expression, or register for each
 282        operand.  */
 283     union i386_op op[MAX_OPERANDS];
 284
 285     /* Flags for operands.  */
 286     unsigned int flags[MAX_OPERANDS];
 287 #define Operand_PCrel 1
 288 #define Operand_Mem   2
 289
 290     /* Relocation type for operand */
 291     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 292
 293     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 294        the base index byte below.  */
 295     const reg_entry *base_reg;
 296     const reg_entry *index_reg;
 297     unsigned int log2_scale_factor;
 298
 299     /* SEG gives the seg_entries of this insn.  They are zero unless
 300        explicit segment overrides are given.  */
 301     const reg_entry *seg[2];
 302
 303     /* PREFIX holds all the given prefix opcodes (usually null).
 304        PREFIXES is the number of prefix opcodes.  */
 305     unsigned int prefixes;
 306     unsigned char prefix[MAX_PREFIXES];
 307
 308     /* Register is in low 3 bits of opcode.  */
 309     bool short_form;
 310
 311     /* The operand to a branch insn indicates an absolute branch.  */
 312     bool jumpabsolute;
 313
 314     /* The operand to a branch insn indicates a far branch.  */
 315     bool far_branch;
 316
 317     /* There is a memory operand of (%dx) which should be only used
 318        with input/output instructions.  */
 319     bool input_output_operand;
 320
 321     /* Extended states.  */
 322     enum
 323       {
 324         /* Use MMX state.  */
 325         xstate_mmx = 1 << 0,
 326         /* Use XMM state.  */
 327         xstate_xmm = 1 << 1,
 328         /* Use YMM state.  */
 329         xstate_ymm = 1 << 2 | xstate_xmm,
 330         /* Use ZMM state.  */
 331         xstate_zmm = 1 << 3 | xstate_ymm,
 332         /* Use TMM state.  */
 333         xstate_tmm = 1 << 4,
 334         /* Use MASK state.  */
 335         xstate_mask = 1 << 5
 336       } xstate;
 337
 338     /* Has GOTPC or TLS relocation.  */
 339     bool has_gotpc_tls_reloc;
 340
 341     /* RM and SIB are the modrm byte and the sib byte where the
 342        addressing modes of this insn are encoded.  */
 343     modrm_byte rm;
 344     rex_byte rex;
 345     rex_byte vrex;
 346     sib_byte sib;
 347     vex_prefix vex;
 348
 349     /* Masking attributes.
 350
 351        The struct describes masking, applied to OPERAND in the instruction.
 352        REG is a pointer to the corresponding mask register.  ZEROING tells
 353        whether merging or zeroing mask is used.  */
 354     struct Mask_Operation
 355     {
 356       const reg_entry *reg;
 357       unsigned int zeroing;
 358       /* The operand where this operation is associated.  */
 359       unsigned int operand;
 360     } mask;
 361
 362     /* Rounding control and SAE attributes.  */
 363     struct RC_Operation
 364     {
 365       enum rc_type
 366         {
 367           rc_none = -1,
 368           rne,
 369           rd,
 370           ru,
 371           rz,
 372           saeonly
 373         } type;
 374       /* In Intel syntax the operand modifier form is supposed to be used, but
 375          we continue to accept the immediate forms as well.  */
 376       bool modifier;
 377     } rounding;
 378
 379     /* Broadcasting attributes.
 380
 381        The struct describes broadcasting, applied to OPERAND.  TYPE is
 382        expresses the broadcast factor.  */
 383     struct Broadcast_Operation
 384     {
 385       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 386       unsigned int type;
 387
 388       /* Index of broadcasted operand.  */
 389       unsigned int operand;
 390
 391       /* Number of bytes to broadcast.  */
 392       unsigned int bytes;
 393     } broadcast;
 394
 395     /* Compressed disp8*N attribute.  */
 396     unsigned int memshift;
 397
 398     /* Prefer load or store in encoding.  */
 399     enum
 400       {
 401         dir_encoding_default = 0,
 402         dir_encoding_load,
 403         dir_encoding_store,
 404         dir_encoding_swap
 405       } dir_encoding;
 406
 407     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 408     enum
 409       {
 410         disp_encoding_default = 0,
 411         disp_encoding_8bit,
 412         disp_encoding_16bit,
 413         disp_encoding_32bit
 414       } disp_encoding;
 415
 416     /* Prefer the REX byte in encoding.  */
 417     bool rex_encoding;
 418
 419     /* Disable instruction size optimization.  */
 420     bool no_optimize;
 421
 422     /* How to encode vector instructions.  */
 423     enum
 424       {
 425         vex_encoding_default = 0,
 426         vex_encoding_vex,
 427         vex_encoding_vex3,
 428         vex_encoding_evex,
 429         vex_encoding_error
 430       } vec_encoding;
 431
 432     /* REP prefix.  */
 433     const char *rep_prefix;
 434
 435     /* HLE prefix.  */
 436     const char *hle_prefix;
 437
 438     /* Have BND prefix.  */
 439     const char *bnd_prefix;
 440
 441     /* Have NOTRACK prefix.  */
 442     const char *notrack_prefix;
 443
 444     /* Error message.  */
 445     enum i386_error error;
 446   };
 447
 448 typedef struct _i386_insn i386_insn;
 449
 450 /* Link RC type with corresponding string, that'll be looked for in
 451    asm.  */
 452 struct RC_name
 453 {
 454   enum rc_type type;
 455   const char *name;
 456   unsigned int len;
 457 };
 458
 459 static const struct RC_name RC_NamesTable[] =
 460 {
 461   {  rne, STRING_COMMA_LEN ("rn-sae") },
 462   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 463   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 464   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 465   {  saeonly,  STRING_COMMA_LEN ("sae") },
 466 };
 467
 468 /* To be indexed by segment register number.  */
 469 static const unsigned char i386_seg_prefixes[] = {
 470   ES_PREFIX_OPCODE,
 471   CS_PREFIX_OPCODE,
 472   SS_PREFIX_OPCODE,
 473   DS_PREFIX_OPCODE,
 474   FS_PREFIX_OPCODE,
 475   GS_PREFIX_OPCODE
 476 };
 477
 478 /* List of chars besides those in app.c:symbol_chars that can start an
 479    operand.  Used to prevent the scrubber eating vital white-space.  */
 480 const char extra_symbol_chars[] = "*%-([{}"
 481 #ifdef LEX_AT
 482         "@"
 483 #endif
 484 #ifdef LEX_QM
 485         "?"
 486 #endif
 487         ;
 488
 489 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 490      && !defined (TE_GNU)                               \
 491      && !defined (TE_LINUX)                             \
 492      && !defined (TE_Haiku)                             \
 493      && !defined (TE_FreeBSD)                           \
 494      && !defined (TE_DragonFly)                         \
 495      && !defined (TE_NetBSD))
 496 /* This array holds the chars that always start a comment.  If the
 497    pre-processor is disabled, these aren't very useful.  The option
 498    --divide will remove '/' from this list.  */
 499 const char *i386_comment_chars = "#/";
 500 #define SVR4_COMMENT_CHARS 1
 501 #define PREFIX_SEPARATOR '\\'
 502
 503 #else
 504 const char *i386_comment_chars = "#";
 505 #define PREFIX_SEPARATOR '/'
 506 #endif
 507
 508 /* This array holds the chars that only start a comment at the beginning of
 509    a line.  If the line seems to have the form '# 123 filename'
 510    .line and .file directives will appear in the pre-processed output.
 511    Note that input_file.c hand checks for '#' at the beginning of the
 512    first line of the input file.  This is because the compiler outputs
 513    #NO_APP at the beginning of its output.
 514    Also note that comments started like this one will always work if
 515    '/' isn't otherwise defined.  */
 516 const char line_comment_chars[] = "#/";
 517
 518 const char line_separator_chars[] = ";";
 519
 520 /* Chars that can be used to separate mant from exp in floating point
 521    nums.  */
 522 const char EXP_CHARS[] = "eE";
 523
 524 /* Chars that mean this number is a floating point constant
 525    As in 0f12.456
 526    or    0d1.2345e12.  */
 527 const char FLT_CHARS[] = "fFdDxXhHbB";
 528
 529 /* Tables for lexical analysis.  */
 530 static char mnemonic_chars[256];
 531 static char register_chars[256];
 532 static char operand_chars[256];
 533 static char identifier_chars[256];
 534
 535 /* Lexical macros.  */
 536 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 537 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 538 #define is_register_char(x) (register_chars[(unsigned char) x])
 539 #define is_space_char(x) ((x) == ' ')
 540 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 541
 542 /* All non-digit non-letter characters that may occur in an operand.  */
 543 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 544
 545 /* md_assemble() always leaves the strings it's passed unaltered.  To
 546    effect this we maintain a stack of saved characters that we've smashed
 547    with '\0's (indicating end of strings for various sub-fields of the
 548    assembler instruction).  */
 549 static char save_stack[32];
 550 static char *save_stack_p;
 551 #define END_STRING_AND_SAVE(s) \
 552         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 553 #define RESTORE_END_STRING(s) \
 554         do { *(s) = *--save_stack_p; } while (0)
 555
 556 /* The instruction we're assembling.  */
 557 static i386_insn i;
 558
 559 /* Possible templates for current insn.  */
 560 static const templates *current_templates;
 561
 562 /* Per instruction expressionS buffers: max displacements & immediates.  */
 563 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 564 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 565
 566 /* Current operand we are working on.  */
 567 static int this_operand = -1;
 568
 569 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 570    these.  */
 571
 572 enum flag_code {
 573         CODE_32BIT,
 574         CODE_16BIT,
 575         CODE_64BIT };
 576
 577 static enum flag_code flag_code;
 578 static unsigned int object_64bit;
 579 static unsigned int disallow_64bit_reloc;
 580 static int use_rela_relocations = 0;
 581 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 582 static const char *tls_get_addr;
 583
 584 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 585      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 586      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 587
 588 /* The ELF ABI to use.  */
 589 enum x86_elf_abi
 590 {
 591   I386_ABI,
 592   X86_64_ABI,
 593   X86_64_X32_ABI
 594 };
 595
 596 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 597 #endif
 598
 599 #if defined (TE_PE) || defined (TE_PEP)
 600 /* Use big object file format.  */
 601 static int use_big_obj = 0;
 602 #endif
 603
 604 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 605 /* 1 if generating code for a shared library.  */
 606 static int shared = 0;
 607
 608 unsigned int x86_sframe_cfa_sp_reg;
 609 /* The other CFA base register for SFrame unwind info.  */
 610 unsigned int x86_sframe_cfa_fp_reg;
 611 unsigned int x86_sframe_cfa_ra_reg;
 612
 613 #endif
 614
 615 /* 1 for intel syntax,
 616    0 if att syntax.  */
 617 static int intel_syntax = 0;
 618
 619 static enum x86_64_isa
 620 {
 621   amd64 = 1,    /* AMD64 ISA.  */
 622   intel64       /* Intel64 ISA.  */
 623 } isa64;
 624
 625 /* 1 for intel mnemonic,
 626    0 if att mnemonic.  */
 627 static int intel_mnemonic = !SYSV386_COMPAT;
 628
 629 /* 1 if pseudo registers are permitted.  */
 630 static int allow_pseudo_reg = 0;
 631
 632 /* 1 if register prefix % not required.  */
 633 static int allow_naked_reg = 0;
 634
 635 /* 1 if the assembler should add BND prefix for all control-transferring
 636    instructions supporting it, even if this prefix wasn't specified
 637    explicitly.  */
 638 static int add_bnd_prefix = 0;
 639
 640 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 641 static int allow_index_reg = 0;
 642
 643 /* 1 if the assembler should ignore LOCK prefix, even if it was
 644    specified explicitly.  */
 645 static int omit_lock_prefix = 0;
 646
 647 /* 1 if the assembler should encode lfence, mfence, and sfence as
 648    "lock addl $0, (%{re}sp)".  */
 649 static int avoid_fence = 0;
 650
 651 /* 1 if lfence should be inserted after every load.  */
 652 static int lfence_after_load = 0;
 653
 654 /* Non-zero if lfence should be inserted before indirect branch.  */
 655 static enum lfence_before_indirect_branch_kind
 656   {
 657     lfence_branch_none = 0,
 658     lfence_branch_register,
 659     lfence_branch_memory,
 660     lfence_branch_all
 661   }
 662 lfence_before_indirect_branch;
 663
 664 /* Non-zero if lfence should be inserted before ret.  */
 665 static enum lfence_before_ret_kind
 666   {
 667     lfence_before_ret_none = 0,
 668     lfence_before_ret_not,
 669     lfence_before_ret_or,
 670     lfence_before_ret_shl
 671   }
 672 lfence_before_ret;
 673
 674 /* Types of previous instruction is .byte or prefix.  */
 675 static struct
 676   {
 677     segT seg;
 678     const char *file;
 679     const char *name;
 680     unsigned int line;
 681     enum last_insn_kind
 682       {
 683         last_insn_other = 0,
 684         last_insn_directive,
 685         last_insn_prefix
 686       } kind;
 687   } last_insn;
 688
 689 /* 1 if the assembler should generate relax relocations.  */
 690
 691 static int generate_relax_relocations
 692   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 693
 694 static enum check_kind
 695   {
 696     check_none = 0,
 697     check_warning,
 698     check_error
 699   }
 700 sse_check, operand_check = check_warning;
 701
 702 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 703 static int align_branch_power = 0;
 704
 705 /* Types of branches to align.  */
 706 enum align_branch_kind
 707   {
 708     align_branch_none = 0,
 709     align_branch_jcc = 1,
 710     align_branch_fused = 2,
 711     align_branch_jmp = 3,
 712     align_branch_call = 4,
 713     align_branch_indirect = 5,
 714     align_branch_ret = 6
 715   };
 716
 717 /* Type bits of branches to align.  */
 718 enum align_branch_bit
 719   {
 720     align_branch_jcc_bit = 1 << align_branch_jcc,
 721     align_branch_fused_bit = 1 << align_branch_fused,
 722     align_branch_jmp_bit = 1 << align_branch_jmp,
 723     align_branch_call_bit = 1 << align_branch_call,
 724     align_branch_indirect_bit = 1 << align_branch_indirect,
 725     align_branch_ret_bit = 1 << align_branch_ret
 726   };
 727
 728 static unsigned int align_branch = (align_branch_jcc_bit
 729                                     | align_branch_fused_bit
 730                                     | align_branch_jmp_bit);
 731
 732 /* Types of condition jump used by macro-fusion.  */
 733 enum mf_jcc_kind
 734   {
 735     mf_jcc_jo = 0,  /* base opcode 0x70  */
 736     mf_jcc_jc,      /* base opcode 0x72  */
 737     mf_jcc_je,      /* base opcode 0x74  */
 738     mf_jcc_jna,     /* base opcode 0x76  */
 739     mf_jcc_js,      /* base opcode 0x78  */
 740     mf_jcc_jp,      /* base opcode 0x7a  */
 741     mf_jcc_jl,      /* base opcode 0x7c  */
 742     mf_jcc_jle,     /* base opcode 0x7e  */
 743   };
 744
 745 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 746 enum mf_cmp_kind
 747   {
 748     mf_cmp_test_and,  /* test/cmp */
 749     mf_cmp_alu_cmp,  /* add/sub/cmp */
 750     mf_cmp_incdec  /* inc/dec */
 751   };
 752
 753 /* The maximum padding size for fused jcc.  CMP like instruction can
 754    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 755    prefixes.   */
 756 #define MAX_FUSED_JCC_PADDING_SIZE 20
 757
 758 /* The maximum number of prefixes added for an instruction.  */
 759 static unsigned int align_branch_prefix_size = 5;
 760
 761 /* Optimization:
 762    1. Clear the REX_W bit with register operand if possible.
 763    2. Above plus use 128bit vector instruction to clear the full vector
 764       register.
 765  */
 766 static int optimize = 0;
 767
 768 /* Optimization:
 769    1. Clear the REX_W bit with register operand if possible.
 770    2. Above plus use 128bit vector instruction to clear the full vector
 771       register.
 772    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 773       "testb $imm7,%r8".
 774  */
 775 static int optimize_for_space = 0;
 776
 777 /* Register prefix used for error message.  */
 778 static const char *register_prefix = "%";
 779
 780 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 781    leave, push, and pop instructions so that gcc has the same stack
 782    frame as in 32 bit mode.  */
 783 static char stackop_size = '\0';
 784
 785 /* Non-zero to optimize code alignment.  */
 786 int optimize_align_code = 1;
 787
 788 /* Non-zero to quieten some warnings.  */
 789 static int quiet_warnings = 0;
 790
 791 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 792 static bool pre_386_16bit_warned;
 793
 794 /* CPU name.  */
 795 static const char *cpu_arch_name = NULL;
 796 static char *cpu_sub_arch_name = NULL;
 797
 798 /* CPU feature flags.  */
 799 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 800
 801 /* If we have selected a cpu we are generating instructions for.  */
 802 static int cpu_arch_tune_set = 0;
 803
 804 /* Cpu we are generating instructions for.  */
 805 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 806
 807 /* CPU feature flags of cpu we are generating instructions for.  */
 808 static i386_cpu_flags cpu_arch_tune_flags;
 809
 810 /* CPU instruction set architecture used.  */
 811 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 812
 813 /* CPU feature flags of instruction set architecture used.  */
 814 i386_cpu_flags cpu_arch_isa_flags;
 815
 816 /* If set, conditional jumps are not automatically promoted to handle
 817    larger than a byte offset.  */
 818 static bool no_cond_jump_promotion = false;
 819
 820 /* Encode SSE instructions with VEX prefix.  */
 821 static unsigned int sse2avx;
 822
 823 /* Encode aligned vector move as unaligned vector move.  */
 824 static unsigned int use_unaligned_vector_move;
 825
 826 /* Encode scalar AVX instructions with specific vector length.  */
 827 static enum
 828   {
 829     vex128 = 0,
 830     vex256
 831   } avxscalar;
 832
 833 /* Encode VEX WIG instructions with specific vex.w.  */
 834 static enum
 835   {
 836     vexw0 = 0,
 837     vexw1
 838   } vexwig;
 839
 840 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 841 static enum
 842   {
 843     evexl128 = 0,
 844     evexl256,
 845     evexl512
 846   } evexlig;
 847
 848 /* Encode EVEX WIG instructions with specific evex.w.  */
 849 static enum
 850   {
 851     evexw0 = 0,
 852     evexw1
 853   } evexwig;
 854
 855 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 856 static enum rc_type evexrcig = rne;
 857
 858 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 859 static symbolS *GOT_symbol;
 860
 861 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 862 unsigned int x86_dwarf2_return_column;
 863
 864 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 865 int x86_cie_data_alignment;
 866
 867 /* Interface to relax_segment.
 868    There are 3 major relax states for 386 jump insns because the
 869    different types of jumps add different sizes to frags when we're
 870    figuring out what sort of jump to choose to reach a given label.
 871
 872    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 873    branches which are handled by md_estimate_size_before_relax() and
 874    i386_generic_table_relax_frag().  */
 875
 876 /* Types.  */
 877 #define UNCOND_JUMP 0
 878 #define COND_JUMP 1
 879 #define COND_JUMP86 2
 880 #define BRANCH_PADDING 3
 881 #define BRANCH_PREFIX 4
 882 #define FUSED_JCC_PADDING 5
 883
 884 /* Sizes.  */
 885 #define CODE16  1
 886 #define SMALL   0
 887 #define SMALL16 (SMALL | CODE16)
 888 #define BIG     2
 889 #define BIG16   (BIG | CODE16)
 890
 891 #ifndef INLINE
 892 #ifdef __GNUC__
 893 #define INLINE __inline__
 894 #else
 895 #define INLINE
 896 #endif
 897 #endif
 898
 899 #define ENCODE_RELAX_STATE(type, size) \
 900   ((relax_substateT) (((type) << 2) | (size)))
 901 #define TYPE_FROM_RELAX_STATE(s) \
 902   ((s) >> 2)
 903 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 904     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 905
 906 /* This table is used by relax_frag to promote short jumps to long
 907    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 908    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 909    don't allow a short jump in a 32 bit code segment to be promoted to
 910    a 16 bit offset jump because it's slower (requires data size
 911    prefix), and doesn't work, unless the destination is in the bottom
 912    64k of the code segment (The top 16 bits of eip are zeroed).  */
 913
 914 const relax_typeS md_relax_table[] =
 915 {
 916   /* The fields are:
 917      1) most positive reach of this state,
 918      2) most negative reach of this state,
 919      3) how many bytes this mode will have in the variable part of the frag
 920      4) which index into the table to try if we can't fit into this one.  */
 921
 922   /* UNCOND_JUMP states.  */
 923   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 924   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 925   /* dword jmp adds 4 bytes to frag:
 926      0 extra opcode bytes, 4 displacement bytes.  */
 927   {0, 0, 4, 0},
 928   /* word jmp adds 2 byte2 to frag:
 929      0 extra opcode bytes, 2 displacement bytes.  */
 930   {0, 0, 2, 0},
 931
 932   /* COND_JUMP states.  */
 933   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 934   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 935   /* dword conditionals adds 5 bytes to frag:
 936      1 extra opcode byte, 4 displacement bytes.  */
 937   {0, 0, 5, 0},
 938   /* word conditionals add 3 bytes to frag:
 939      1 extra opcode byte, 2 displacement bytes.  */
 940   {0, 0, 3, 0},
 941
 942   /* COND_JUMP86 states.  */
 943   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 944   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 945   /* dword conditionals adds 5 bytes to frag:
 946      1 extra opcode byte, 4 displacement bytes.  */
 947   {0, 0, 5, 0},
 948   /* word conditionals add 4 bytes to frag:
 949      1 displacement byte and a 3 byte long branch insn.  */
 950   {0, 0, 4, 0}
 951 };
 952
 953 #define ARCH(n, t, f, s) \
 954   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 955     CPU_NONE_FLAGS }
 956 #define SUBARCH(n, e, d, s) \
 957   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 958     CPU_ ## d ## _FLAGS }
 959
 960 static const arch_entry cpu_arch[] =
 961 {
 962   /* Do not replace the first two entries - i386_target_format() and
 963      set_cpu_arch() rely on them being there in this order.  */
 964   ARCH (generic32, GENERIC32, GENERIC32, false),
 965   ARCH (generic64, GENERIC64, GENERIC64, false),
 966   ARCH (i8086, UNKNOWN, NONE, false),
 967   ARCH (i186, UNKNOWN, 186, false),
 968   ARCH (i286, UNKNOWN, 286, false),
 969   ARCH (i386, I386, 386, false),
 970   ARCH (i486, I486, 486, false),
 971   ARCH (i586, PENTIUM, 586, false),
 972   ARCH (i686, PENTIUMPRO, 686, false),
 973   ARCH (pentium, PENTIUM, 586, false),
 974   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 975   ARCH (pentiumii, PENTIUMPRO, P2, false),
 976   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 977   ARCH (pentium4, PENTIUM4, P4, false),
 978   ARCH (prescott, NOCONA, CORE, false),
 979   ARCH (nocona, NOCONA, NOCONA, false),
 980   ARCH (yonah, CORE, CORE, true),
 981   ARCH (core, CORE, CORE, false),
 982   ARCH (merom, CORE2, CORE2, true),
 983   ARCH (core2, CORE2, CORE2, false),
 984   ARCH (corei7, COREI7, COREI7, false),
 985   ARCH (iamcu, IAMCU, IAMCU, false),
 986   ARCH (k6, K6, K6, false),
 987   ARCH (k6_2, K6, K6_2, false),
 988   ARCH (athlon, ATHLON, ATHLON, false),
 989   ARCH (sledgehammer, K8, K8, true),
 990   ARCH (opteron, K8, K8, false),
 991   ARCH (k8, K8, K8, false),
 992   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 993   ARCH (bdver1, BD, BDVER1, false),
 994   ARCH (bdver2, BD, BDVER2, false),
 995   ARCH (bdver3, BD, BDVER3, false),
 996   ARCH (bdver4, BD, BDVER4, false),
 997   ARCH (znver1, ZNVER, ZNVER1, false),
 998   ARCH (znver2, ZNVER, ZNVER2, false),
 999   ARCH (znver3, ZNVER, ZNVER3, false),
1000   ARCH (znver4, ZNVER, ZNVER4, false),
1001   ARCH (btver1, BT, BTVER1, false),
1002   ARCH (btver2, BT, BTVER2, false),
1003
1004   SUBARCH (8087, 8087, ANY_8087, false),
1005   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1006   SUBARCH (287, 287, ANY_287, false),
1007   SUBARCH (387, 387, ANY_387, false),
1008   SUBARCH (687, 687, ANY_687, false),
1009   SUBARCH (cmov, CMOV, CMOV, false),
1010   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1011   SUBARCH (mmx, MMX, ANY_MMX, false),
1012   SUBARCH (sse, SSE, ANY_SSE, false),
1013   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1014   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1015   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1016   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1017   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1018   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1019   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1020   SUBARCH (avx, AVX, ANY_AVX, false),
1021   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1022   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1023   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1024   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1025   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1026   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1027   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1028   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1029   SUBARCH (vmx, VMX, ANY_VMX, false),
1030   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1031   SUBARCH (smx, SMX, SMX, false),
1032   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1033   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1034   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1035   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1036   SUBARCH (aes, AES, ANY_AES, false),
1037   SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
1038   SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
1039   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1040   SUBARCH (rdrnd, RDRND, RDRND, false),
1041   SUBARCH (f16c, F16C, ANY_F16C, false),
1042   SUBARCH (bmi2, BMI2, BMI2, false),
1043   SUBARCH (fma, FMA, ANY_FMA, false),
1044   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1045   SUBARCH (xop, XOP, ANY_XOP, false),
1046   SUBARCH (lwp, LWP, ANY_LWP, false),
1047   SUBARCH (movbe, MOVBE, MOVBE, false),
1048   SUBARCH (cx16, CX16, CX16, false),
1049   SUBARCH (ept, EPT, ANY_EPT, false),
1050   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1051   SUBARCH (popcnt, POPCNT, POPCNT, false),
1052   SUBARCH (hle, HLE, HLE, false),
1053   SUBARCH (rtm, RTM, ANY_RTM, false),
1054   SUBARCH (tsx, TSX, TSX, false),
1055   SUBARCH (invpcid, INVPCID, INVPCID, false),
1056   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1057   SUBARCH (nop, NOP, NOP, false),
1058   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1059   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1060   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1061   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1062   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1063   SUBARCH (pacifica, SVME, ANY_SVME, true),
1064   SUBARCH (svme, SVME, ANY_SVME, false),
1065   SUBARCH (abm, ABM, ABM, false),
1066   SUBARCH (bmi, BMI, BMI, false),
1067   SUBARCH (tbm, TBM, TBM, false),
1068   SUBARCH (adx, ADX, ADX, false),
1069   SUBARCH (rdseed, RDSEED, RDSEED, false),
1070   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1071   SUBARCH (smap, SMAP, SMAP, false),
1072   SUBARCH (mpx, MPX, ANY_MPX, false),
1073   SUBARCH (sha, SHA, ANY_SHA, false),
1074   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1075   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1076   SUBARCH (se1, SE1, SE1, false),
1077   SUBARCH (clwb, CLWB, CLWB, false),
1078   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1079   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1080   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1081   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1082   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1083   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1084   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1085   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1086   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1087   SUBARCH (clzero, CLZERO, CLZERO, false),
1088   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1089   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1090   SUBARCH (rdpid, RDPID, RDPID, false),
1091   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1092   SUBARCH (ibt, IBT, IBT, false),
1093   SUBARCH (shstk, SHSTK, SHSTK, false),
1094   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1095   SUBARCH (vaes, VAES, ANY_VAES, false),
1096   SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
1097   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1098   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1099   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1100   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1101   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1102   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1103   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1104   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1105   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1106   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1107   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1108   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1109            ANY_AVX512_VP2INTERSECT, false),
1110   SUBARCH (tdx, TDX, TDX, false),
1111   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1112   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1113   SUBARCH (rdpru, RDPRU, RDPRU, false),
1114   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1115   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1116   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1117   SUBARCH (kl, KL, ANY_KL, false),
1118   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1119   SUBARCH (uintr, UINTR, UINTR, false),
1120   SUBARCH (hreset, HRESET, HRESET, false),
1121   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1122   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1123   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1124   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1125   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1126   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1127   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1128   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1129   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1130   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1131 };
1132
1133 #undef SUBARCH
1134 #undef ARCH
1135
1136 #ifdef I386COFF
1137 /* Like s_lcomm_internal in gas/read.c but the alignment string
1138    is allowed to be optional.  */
1139
1140 static symbolS *
1141 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1142 {
1143   addressT align = 0;
1144
1145   SKIP_WHITESPACE ();
1146
1147   if (needs_align
1148       && *input_line_pointer == ',')
1149     {
1150       align = parse_align (needs_align - 1);
1151
1152       if (align == (addressT) -1)
1153         return NULL;
1154     }
1155   else
1156     {
1157       if (size >= 8)
1158         align = 3;
1159       else if (size >= 4)
1160         align = 2;
1161       else if (size >= 2)
1162         align = 1;
1163       else
1164         align = 0;
1165     }
1166
1167   bss_alloc (symbolP, size, align);
1168   return symbolP;
1169 }
1170
1171 static void
1172 pe_lcomm (int needs_align)
1173 {
1174   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1175 }
1176 #endif
1177
1178 const pseudo_typeS md_pseudo_table[] =
1179 {
1180 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1181   {"align", s_align_bytes, 0},
1182 #else
1183   {"align", s_align_ptwo, 0},
1184 #endif
1185   {"arch", set_cpu_arch, 0},
1186 #ifndef I386COFF
1187   {"bss", s_bss, 0},
1188 #else
1189   {"lcomm", pe_lcomm, 1},
1190 #endif
1191   {"ffloat", float_cons, 'f'},
1192   {"dfloat", float_cons, 'd'},
1193   {"tfloat", float_cons, 'x'},
1194   {"hfloat", float_cons, 'h'},
1195   {"bfloat16", float_cons, 'b'},
1196   {"value", cons, 2},
1197   {"slong", signed_cons, 4},
1198   {"noopt", s_ignore, 0},
1199   {"optim", s_ignore, 0},
1200   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1201   {"code16", set_code_flag, CODE_16BIT},
1202   {"code32", set_code_flag, CODE_32BIT},
1203 #ifdef BFD64
1204   {"code64", set_code_flag, CODE_64BIT},
1205 #endif
1206   {"intel_syntax", set_intel_syntax, 1},
1207   {"att_syntax", set_intel_syntax, 0},
1208   {"intel_mnemonic", set_intel_mnemonic, 1},
1209   {"att_mnemonic", set_intel_mnemonic, 0},
1210   {"allow_index_reg", set_allow_index_reg, 1},
1211   {"disallow_index_reg", set_allow_index_reg, 0},
1212   {"sse_check", set_check, 0},
1213   {"operand_check", set_check, 1},
1214 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1215   {"largecomm", handle_large_common, 0},
1216 #else
1217   {"file", dwarf2_directive_file, 0},
1218   {"loc", dwarf2_directive_loc, 0},
1219   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1220 #endif
1221 #ifdef TE_PE
1222   {"secrel32", pe_directive_secrel, 0},
1223   {"secidx", pe_directive_secidx, 0},
1224 #endif
1225   {0, 0, 0}
1226 };
1227
1228 /* For interface with expression ().  */
1229 extern char *input_line_pointer;
1230
1231 /* Hash table for instruction mnemonic lookup.  */
1232 static htab_t op_hash;
1233
1234 /* Hash table for register lookup.  */
1235 static htab_t reg_hash;
1236 \f
1237   /* Various efficient no-op patterns for aligning code labels.
1238      Note: Don't try to assemble the instructions in the comments.
1239      0L and 0w are not legal.  */
1240 static const unsigned char f32_1[] =
1241   {0x90};                               /* nop                  */
1242 static const unsigned char f32_2[] =
1243   {0x66,0x90};                          /* xchg %ax,%ax         */
1244 static const unsigned char f32_3[] =
1245   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1246 static const unsigned char f32_4[] =
1247   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1248 static const unsigned char f32_6[] =
1249   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1250 static const unsigned char f32_7[] =
1251   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1252 static const unsigned char f16_3[] =
1253   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1254 static const unsigned char f16_4[] =
1255   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1256 static const unsigned char jump_disp8[] =
1257   {0xeb};                               /* jmp disp8           */
1258 static const unsigned char jump32_disp32[] =
1259   {0xe9};                               /* jmp disp32          */
1260 static const unsigned char jump16_disp32[] =
1261   {0x66,0xe9};                          /* jmp disp32          */
1262 /* 32-bit NOPs patterns.  */
1263 static const unsigned char *const f32_patt[] = {
1264   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1265 };
1266 /* 16-bit NOPs patterns.  */
1267 static const unsigned char *const f16_patt[] = {
1268   f32_1, f32_2, f16_3, f16_4
1269 };
1270 /* nopl (%[re]ax) */
1271 static const unsigned char alt_3[] =
1272   {0x0f,0x1f,0x00};
1273 /* nopl 0(%[re]ax) */
1274 static const unsigned char alt_4[] =
1275   {0x0f,0x1f,0x40,0x00};
1276 /* nopl 0(%[re]ax,%[re]ax,1) */
1277 static const unsigned char alt_5[] =
1278   {0x0f,0x1f,0x44,0x00,0x00};
1279 /* nopw 0(%[re]ax,%[re]ax,1) */
1280 static const unsigned char alt_6[] =
1281   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1282 /* nopl 0L(%[re]ax) */
1283 static const unsigned char alt_7[] =
1284   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1285 /* nopl 0L(%[re]ax,%[re]ax,1) */
1286 static const unsigned char alt_8[] =
1287   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1288 /* nopw 0L(%[re]ax,%[re]ax,1) */
1289 static const unsigned char alt_9[] =
1290   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1291 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1292 static const unsigned char alt_10[] =
1293   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1294 /* data16 nopw %cs:0L(%eax,%eax,1) */
1295 static const unsigned char alt_11[] =
1296   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1297 /* 32-bit and 64-bit NOPs patterns.  */
1298 static const unsigned char *const alt_patt[] = {
1299   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1300   alt_9, alt_10, alt_11
1301 };
1302
1303 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1304    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1305
1306 static void
1307 i386_output_nops (char *where, const unsigned char *const *patt,
1308                   int count, int max_single_nop_size)
1309
1310 {
1311   /* Place the longer NOP first.  */
1312   int last;
1313   int offset;
1314   const unsigned char *nops;
1315
1316   if (max_single_nop_size < 1)
1317     {
1318       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1319                 max_single_nop_size);
1320       return;
1321     }
1322
1323   nops = patt[max_single_nop_size - 1];
1324
1325   /* Use the smaller one if the requsted one isn't available.  */
1326   if (nops == NULL)
1327     {
1328       max_single_nop_size--;
1329       nops = patt[max_single_nop_size - 1];
1330     }
1331
1332   last = count % max_single_nop_size;
1333
1334   count -= last;
1335   for (offset = 0; offset < count; offset += max_single_nop_size)
1336     memcpy (where + offset, nops, max_single_nop_size);
1337
1338   if (last)
1339     {
1340       nops = patt[last - 1];
1341       if (nops == NULL)
1342         {
1343           /* Use the smaller one plus one-byte NOP if the needed one
1344              isn't available.  */
1345           last--;
1346           nops = patt[last - 1];
1347           memcpy (where + offset, nops, last);
1348           where[offset + last] = *patt[0];
1349         }
1350       else
1351         memcpy (where + offset, nops, last);
1352     }
1353 }
1354
1355 static INLINE int
1356 fits_in_imm7 (offsetT num)
1357 {
1358   return (num & 0x7f) == num;
1359 }
1360
1361 static INLINE int
1362 fits_in_imm31 (offsetT num)
1363 {
1364   return (num & 0x7fffffff) == num;
1365 }
1366
1367 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1368    single NOP instruction LIMIT.  */
1369
1370 void
1371 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1372 {
1373   const unsigned char *const *patt = NULL;
1374   int max_single_nop_size;
1375   /* Maximum number of NOPs before switching to jump over NOPs.  */
1376   int max_number_of_nops;
1377
1378   switch (fragP->fr_type)
1379     {
1380     case rs_fill_nop:
1381     case rs_align_code:
1382       break;
1383     case rs_machine_dependent:
1384       /* Allow NOP padding for jumps and calls.  */
1385       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1386           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1387         break;
1388       /* Fall through.  */
1389     default:
1390       return;
1391     }
1392
1393   /* We need to decide which NOP sequence to use for 32bit and
1394      64bit. When -mtune= is used:
1395
1396      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1397      PROCESSOR_GENERIC32, f32_patt will be used.
1398      2. For the rest, alt_patt will be used.
1399
1400      When -mtune= isn't used, alt_patt will be used if
1401      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1402      be used.
1403
1404      When -march= or .arch is used, we can't use anything beyond
1405      cpu_arch_isa_flags.   */
1406
1407   if (flag_code == CODE_16BIT)
1408     {
1409       patt = f16_patt;
1410       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1411       /* Limit number of NOPs to 2 in 16-bit mode.  */
1412       max_number_of_nops = 2;
1413     }
1414   else
1415     {
1416       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1417         {
1418           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1419           switch (cpu_arch_tune)
1420             {
1421             case PROCESSOR_UNKNOWN:
1422               /* We use cpu_arch_isa_flags to check if we SHOULD
1423                  optimize with nops.  */
1424               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1425                 patt = alt_patt;
1426               else
1427                 patt = f32_patt;
1428               break;
1429             case PROCESSOR_PENTIUM4:
1430             case PROCESSOR_NOCONA:
1431             case PROCESSOR_CORE:
1432             case PROCESSOR_CORE2:
1433             case PROCESSOR_COREI7:
1434             case PROCESSOR_GENERIC64:
1435             case PROCESSOR_K6:
1436             case PROCESSOR_ATHLON:
1437             case PROCESSOR_K8:
1438             case PROCESSOR_AMDFAM10:
1439             case PROCESSOR_BD:
1440             case PROCESSOR_ZNVER:
1441             case PROCESSOR_BT:
1442               patt = alt_patt;
1443               break;
1444             case PROCESSOR_I386:
1445             case PROCESSOR_I486:
1446             case PROCESSOR_PENTIUM:
1447             case PROCESSOR_PENTIUMPRO:
1448             case PROCESSOR_IAMCU:
1449             case PROCESSOR_GENERIC32:
1450               patt = f32_patt;
1451               break;
1452             case PROCESSOR_NONE:
1453               abort ();
1454             }
1455         }
1456       else
1457         {
1458           switch (fragP->tc_frag_data.tune)
1459             {
1460             case PROCESSOR_UNKNOWN:
1461               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1462                  PROCESSOR_UNKNOWN.  */
1463               abort ();
1464               break;
1465
1466             case PROCESSOR_I386:
1467             case PROCESSOR_I486:
1468             case PROCESSOR_PENTIUM:
1469             case PROCESSOR_IAMCU:
1470             case PROCESSOR_K6:
1471             case PROCESSOR_ATHLON:
1472             case PROCESSOR_K8:
1473             case PROCESSOR_AMDFAM10:
1474             case PROCESSOR_BD:
1475             case PROCESSOR_ZNVER:
1476             case PROCESSOR_BT:
1477             case PROCESSOR_GENERIC32:
1478               /* We use cpu_arch_isa_flags to check if we CAN optimize
1479                  with nops.  */
1480               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1481                 patt = alt_patt;
1482               else
1483                 patt = f32_patt;
1484               break;
1485             case PROCESSOR_PENTIUMPRO:
1486             case PROCESSOR_PENTIUM4:
1487             case PROCESSOR_NOCONA:
1488             case PROCESSOR_CORE:
1489             case PROCESSOR_CORE2:
1490             case PROCESSOR_COREI7:
1491               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1492                 patt = alt_patt;
1493               else
1494                 patt = f32_patt;
1495               break;
1496             case PROCESSOR_GENERIC64:
1497               patt = alt_patt;
1498               break;
1499             case PROCESSOR_NONE:
1500               abort ();
1501             }
1502         }
1503
1504       if (patt == f32_patt)
1505         {
1506           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1507           /* Limit number of NOPs to 2 for older processors.  */
1508           max_number_of_nops = 2;
1509         }
1510       else
1511         {
1512           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1513           /* Limit number of NOPs to 7 for newer processors.  */
1514           max_number_of_nops = 7;
1515         }
1516     }
1517
1518   if (limit == 0)
1519     limit = max_single_nop_size;
1520
1521   if (fragP->fr_type == rs_fill_nop)
1522     {
1523       /* Output NOPs for .nop directive.  */
1524       if (limit > max_single_nop_size)
1525         {
1526           as_bad_where (fragP->fr_file, fragP->fr_line,
1527                         _("invalid single nop size: %d "
1528                           "(expect within [0, %d])"),
1529                         limit, max_single_nop_size);
1530           return;
1531         }
1532     }
1533   else if (fragP->fr_type != rs_machine_dependent)
1534     fragP->fr_var = count;
1535
1536   if ((count / max_single_nop_size) > max_number_of_nops)
1537     {
1538       /* Generate jump over NOPs.  */
1539       offsetT disp = count - 2;
1540       if (fits_in_imm7 (disp))
1541         {
1542           /* Use "jmp disp8" if possible.  */
1543           count = disp;
1544           where[0] = jump_disp8[0];
1545           where[1] = count;
1546           where += 2;
1547         }
1548       else
1549         {
1550           unsigned int size_of_jump;
1551
1552           if (flag_code == CODE_16BIT)
1553             {
1554               where[0] = jump16_disp32[0];
1555               where[1] = jump16_disp32[1];
1556               size_of_jump = 2;
1557             }
1558           else
1559             {
1560               where[0] = jump32_disp32[0];
1561               size_of_jump = 1;
1562             }
1563
1564           count -= size_of_jump + 4;
1565           if (!fits_in_imm31 (count))
1566             {
1567               as_bad_where (fragP->fr_file, fragP->fr_line,
1568                             _("jump over nop padding out of range"));
1569               return;
1570             }
1571
1572           md_number_to_chars (where + size_of_jump, count, 4);
1573           where += size_of_jump + 4;
1574         }
1575     }
1576
1577   /* Generate multiple NOPs.  */
1578   i386_output_nops (where, patt, count, limit);
1579 }
1580
1581 static INLINE int
1582 operand_type_all_zero (const union i386_operand_type *x)
1583 {
1584   switch (ARRAY_SIZE(x->array))
1585     {
1586     case 3:
1587       if (x->array[2])
1588         return 0;
1589       /* Fall through.  */
1590     case 2:
1591       if (x->array[1])
1592         return 0;
1593       /* Fall through.  */
1594     case 1:
1595       return !x->array[0];
1596     default:
1597       abort ();
1598     }
1599 }
1600
1601 static INLINE void
1602 operand_type_set (union i386_operand_type *x, unsigned int v)
1603 {
1604   switch (ARRAY_SIZE(x->array))
1605     {
1606     case 3:
1607       x->array[2] = v;
1608       /* Fall through.  */
1609     case 2:
1610       x->array[1] = v;
1611       /* Fall through.  */
1612     case 1:
1613       x->array[0] = v;
1614       /* Fall through.  */
1615       break;
1616     default:
1617       abort ();
1618     }
1619
1620   x->bitfield.class = ClassNone;
1621   x->bitfield.instance = InstanceNone;
1622 }
1623
1624 static INLINE int
1625 operand_type_equal (const union i386_operand_type *x,
1626                     const union i386_operand_type *y)
1627 {
1628   switch (ARRAY_SIZE(x->array))
1629     {
1630     case 3:
1631       if (x->array[2] != y->array[2])
1632         return 0;
1633       /* Fall through.  */
1634     case 2:
1635       if (x->array[1] != y->array[1])
1636         return 0;
1637       /* Fall through.  */
1638     case 1:
1639       return x->array[0] == y->array[0];
1640       break;
1641     default:
1642       abort ();
1643     }
1644 }
1645
1646 static INLINE int
1647 cpu_flags_all_zero (const union i386_cpu_flags *x)
1648 {
1649   switch (ARRAY_SIZE(x->array))
1650     {
1651     case 5:
1652       if (x->array[4])
1653         return 0;
1654       /* Fall through.  */
1655     case 4:
1656       if (x->array[3])
1657         return 0;
1658       /* Fall through.  */
1659     case 3:
1660       if (x->array[2])
1661         return 0;
1662       /* Fall through.  */
1663     case 2:
1664       if (x->array[1])
1665         return 0;
1666       /* Fall through.  */
1667     case 1:
1668       return !x->array[0];
1669     default:
1670       abort ();
1671     }
1672 }
1673
1674 static INLINE int
1675 cpu_flags_equal (const union i386_cpu_flags *x,
1676                  const union i386_cpu_flags *y)
1677 {
1678   switch (ARRAY_SIZE(x->array))
1679     {
1680     case 5:
1681       if (x->array[4] != y->array[4])
1682         return 0;
1683       /* Fall through.  */
1684     case 4:
1685       if (x->array[3] != y->array[3])
1686         return 0;
1687       /* Fall through.  */
1688     case 3:
1689       if (x->array[2] != y->array[2])
1690         return 0;
1691       /* Fall through.  */
1692     case 2:
1693       if (x->array[1] != y->array[1])
1694         return 0;
1695       /* Fall through.  */
1696     case 1:
1697       return x->array[0] == y->array[0];
1698       break;
1699     default:
1700       abort ();
1701     }
1702 }
1703
1704 static INLINE int
1705 cpu_flags_check_cpu64 (i386_cpu_flags f)
1706 {
1707   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1708            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1709 }
1710
1711 static INLINE i386_cpu_flags
1712 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1713 {
1714   switch (ARRAY_SIZE (x.array))
1715     {
1716     case 5:
1717       x.array [4] &= y.array [4];
1718       /* Fall through.  */
1719     case 4:
1720       x.array [3] &= y.array [3];
1721       /* Fall through.  */
1722     case 3:
1723       x.array [2] &= y.array [2];
1724       /* Fall through.  */
1725     case 2:
1726       x.array [1] &= y.array [1];
1727       /* Fall through.  */
1728     case 1:
1729       x.array [0] &= y.array [0];
1730       break;
1731     default:
1732       abort ();
1733     }
1734   return x;
1735 }
1736
1737 static INLINE i386_cpu_flags
1738 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1739 {
1740   switch (ARRAY_SIZE (x.array))
1741     {
1742     case 5:
1743       x.array [4] |= y.array [4];
1744       /* Fall through.  */
1745     case 4:
1746       x.array [3] |= y.array [3];
1747       /* Fall through.  */
1748     case 3:
1749       x.array [2] |= y.array [2];
1750       /* Fall through.  */
1751     case 2:
1752       x.array [1] |= y.array [1];
1753       /* Fall through.  */
1754     case 1:
1755       x.array [0] |= y.array [0];
1756       break;
1757     default:
1758       abort ();
1759     }
1760   return x;
1761 }
1762
1763 static INLINE i386_cpu_flags
1764 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1765 {
1766   switch (ARRAY_SIZE (x.array))
1767     {
1768     case 5:
1769       x.array [4] &= ~y.array [4];
1770       /* Fall through.  */
1771     case 4:
1772       x.array [3] &= ~y.array [3];
1773       /* Fall through.  */
1774     case 3:
1775       x.array [2] &= ~y.array [2];
1776       /* Fall through.  */
1777     case 2:
1778       x.array [1] &= ~y.array [1];
1779       /* Fall through.  */
1780     case 1:
1781       x.array [0] &= ~y.array [0];
1782       break;
1783     default:
1784       abort ();
1785     }
1786   return x;
1787 }
1788
1789 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1790
1791 #define CPU_FLAGS_ARCH_MATCH            0x1
1792 #define CPU_FLAGS_64BIT_MATCH           0x2
1793
1794 #define CPU_FLAGS_PERFECT_MATCH \
1795   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1796
1797 /* Return CPU flags match bits. */
1798
1799 static int
1800 cpu_flags_match (const insn_template *t)
1801 {
1802   i386_cpu_flags x = t->cpu_flags;
1803   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1804
1805   x.bitfield.cpu64 = 0;
1806   x.bitfield.cpuno64 = 0;
1807
1808   if (cpu_flags_all_zero (&x))
1809     {
1810       /* This instruction is available on all archs.  */
1811       match |= CPU_FLAGS_ARCH_MATCH;
1812     }
1813   else
1814     {
1815       /* This instruction is available only on some archs.  */
1816       i386_cpu_flags cpu = cpu_arch_flags;
1817
1818       /* AVX512VL is no standalone feature - match it and then strip it.  */
1819       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1820         return match;
1821       x.bitfield.cpuavx512vl = 0;
1822
1823       /* AVX and AVX2 present at the same time express an operand size
1824          dependency - strip AVX2 for the purposes here.  The operand size
1825          dependent check occurs in check_vecOperands().  */
1826       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1827         x.bitfield.cpuavx2 = 0;
1828
1829       cpu = cpu_flags_and (x, cpu);
1830       if (!cpu_flags_all_zero (&cpu))
1831         {
1832           if (x.bitfield.cpuavx)
1833             {
1834               /* We need to check a few extra flags with AVX.  */
1835               if (cpu.bitfield.cpuavx
1836                   && (!t->opcode_modifier.sse2avx
1837                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1838                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1839                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1840                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1841                 match |= CPU_FLAGS_ARCH_MATCH;
1842             }
1843           else if (x.bitfield.cpuavx512f)
1844             {
1845               /* We need to check a few extra flags with AVX512F.  */
1846               if (cpu.bitfield.cpuavx512f
1847                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1848                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1849                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1850                 match |= CPU_FLAGS_ARCH_MATCH;
1851             }
1852           else
1853             match |= CPU_FLAGS_ARCH_MATCH;
1854         }
1855     }
1856   return match;
1857 }
1858
1859 static INLINE i386_operand_type
1860 operand_type_and (i386_operand_type x, i386_operand_type y)
1861 {
1862   if (x.bitfield.class != y.bitfield.class)
1863     x.bitfield.class = ClassNone;
1864   if (x.bitfield.instance != y.bitfield.instance)
1865     x.bitfield.instance = InstanceNone;
1866
1867   switch (ARRAY_SIZE (x.array))
1868     {
1869     case 3:
1870       x.array [2] &= y.array [2];
1871       /* Fall through.  */
1872     case 2:
1873       x.array [1] &= y.array [1];
1874       /* Fall through.  */
1875     case 1:
1876       x.array [0] &= y.array [0];
1877       break;
1878     default:
1879       abort ();
1880     }
1881   return x;
1882 }
1883
1884 static INLINE i386_operand_type
1885 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1886 {
1887   gas_assert (y.bitfield.class == ClassNone);
1888   gas_assert (y.bitfield.instance == InstanceNone);
1889
1890   switch (ARRAY_SIZE (x.array))
1891     {
1892     case 3:
1893       x.array [2] &= ~y.array [2];
1894       /* Fall through.  */
1895     case 2:
1896       x.array [1] &= ~y.array [1];
1897       /* Fall through.  */
1898     case 1:
1899       x.array [0] &= ~y.array [0];
1900       break;
1901     default:
1902       abort ();
1903     }
1904   return x;
1905 }
1906
1907 static INLINE i386_operand_type
1908 operand_type_or (i386_operand_type x, i386_operand_type y)
1909 {
1910   gas_assert (x.bitfield.class == ClassNone ||
1911               y.bitfield.class == ClassNone ||
1912               x.bitfield.class == y.bitfield.class);
1913   gas_assert (x.bitfield.instance == InstanceNone ||
1914               y.bitfield.instance == InstanceNone ||
1915               x.bitfield.instance == y.bitfield.instance);
1916
1917   switch (ARRAY_SIZE (x.array))
1918     {
1919     case 3:
1920       x.array [2] |= y.array [2];
1921       /* Fall through.  */
1922     case 2:
1923       x.array [1] |= y.array [1];
1924       /* Fall through.  */
1925     case 1:
1926       x.array [0] |= y.array [0];
1927       break;
1928     default:
1929       abort ();
1930     }
1931   return x;
1932 }
1933
1934 static INLINE i386_operand_type
1935 operand_type_xor (i386_operand_type x, i386_operand_type y)
1936 {
1937   gas_assert (y.bitfield.class == ClassNone);
1938   gas_assert (y.bitfield.instance == InstanceNone);
1939
1940   switch (ARRAY_SIZE (x.array))
1941     {
1942     case 3:
1943       x.array [2] ^= y.array [2];
1944       /* Fall through.  */
1945     case 2:
1946       x.array [1] ^= y.array [1];
1947       /* Fall through.  */
1948     case 1:
1949       x.array [0] ^= y.array [0];
1950       break;
1951     default:
1952       abort ();
1953     }
1954   return x;
1955 }
1956
1957 static const i386_operand_type anydisp = {
1958   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1959 };
1960
1961 enum operand_type
1962 {
1963   reg,
1964   imm,
1965   disp,
1966   anymem
1967 };
1968
1969 static INLINE int
1970 operand_type_check (i386_operand_type t, enum operand_type c)
1971 {
1972   switch (c)
1973     {
1974     case reg:
1975       return t.bitfield.class == Reg;
1976
1977     case imm:
1978       return (t.bitfield.imm8
1979               || t.bitfield.imm8s
1980               || t.bitfield.imm16
1981               || t.bitfield.imm32
1982               || t.bitfield.imm32s
1983               || t.bitfield.imm64);
1984
1985     case disp:
1986       return (t.bitfield.disp8
1987               || t.bitfield.disp16
1988               || t.bitfield.disp32
1989               || t.bitfield.disp64);
1990
1991     case anymem:
1992       return (t.bitfield.disp8
1993               || t.bitfield.disp16
1994               || t.bitfield.disp32
1995               || t.bitfield.disp64
1996               || t.bitfield.baseindex);
1997
1998     default:
1999       abort ();
2000     }
2001
2002   return 0;
2003 }
2004
2005 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2006    between operand GIVEN and opeand WANTED for instruction template T.  */
2007
2008 static INLINE int
2009 match_operand_size (const insn_template *t, unsigned int wanted,
2010                     unsigned int given)
2011 {
2012   return !((i.types[given].bitfield.byte
2013             && !t->operand_types[wanted].bitfield.byte)
2014            || (i.types[given].bitfield.word
2015                && !t->operand_types[wanted].bitfield.word)
2016            || (i.types[given].bitfield.dword
2017                && !t->operand_types[wanted].bitfield.dword)
2018            || (i.types[given].bitfield.qword
2019                && (!t->operand_types[wanted].bitfield.qword
2020                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2021                       mode, when they're used where a 64-bit GPR could also
2022                       be used.  Checking is needed for Intel Syntax only.  */
2023                    || (intel_syntax
2024                        && flag_code != CODE_64BIT
2025                        && (t->operand_types[wanted].bitfield.class == Reg
2026                            || t->operand_types[wanted].bitfield.class == Accum
2027                            || t->opcode_modifier.isstring))))
2028            || (i.types[given].bitfield.tbyte
2029                && !t->operand_types[wanted].bitfield.tbyte));
2030 }
2031
2032 /* Return 1 if there is no conflict in SIMD register between operand
2033    GIVEN and opeand WANTED for instruction template T.  */
2034
2035 static INLINE int
2036 match_simd_size (const insn_template *t, unsigned int wanted,
2037                  unsigned int given)
2038 {
2039   return !((i.types[given].bitfield.xmmword
2040             && !t->operand_types[wanted].bitfield.xmmword)
2041            || (i.types[given].bitfield.ymmword
2042                && !t->operand_types[wanted].bitfield.ymmword)
2043            || (i.types[given].bitfield.zmmword
2044                && !t->operand_types[wanted].bitfield.zmmword)
2045            || (i.types[given].bitfield.tmmword
2046                && !t->operand_types[wanted].bitfield.tmmword));
2047 }
2048
2049 /* Return 1 if there is no conflict in any size between operand GIVEN
2050    and opeand WANTED for instruction template T.  */
2051
2052 static INLINE int
2053 match_mem_size (const insn_template *t, unsigned int wanted,
2054                 unsigned int given)
2055 {
2056   return (match_operand_size (t, wanted, given)
2057           && !((i.types[given].bitfield.unspecified
2058                 && !i.broadcast.type
2059                 && !i.broadcast.bytes
2060                 && !t->operand_types[wanted].bitfield.unspecified)
2061                || (i.types[given].bitfield.fword
2062                    && !t->operand_types[wanted].bitfield.fword)
2063                /* For scalar opcode templates to allow register and memory
2064                   operands at the same time, some special casing is needed
2065                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2066                   down-conversion vpmov*.  */
2067                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2068                     && t->operand_types[wanted].bitfield.byte
2069                        + t->operand_types[wanted].bitfield.word
2070                        + t->operand_types[wanted].bitfield.dword
2071                        + t->operand_types[wanted].bitfield.qword
2072                        > !!t->opcode_modifier.broadcast)
2073                    ? (i.types[given].bitfield.xmmword
2074                       || i.types[given].bitfield.ymmword
2075                       || i.types[given].bitfield.zmmword)
2076                    : !match_simd_size(t, wanted, given))));
2077 }
2078
2079 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2080    operands for instruction template T, and it has MATCH_REVERSE set if there
2081    is no size conflict on any operands for the template with operands reversed
2082    (and the template allows for reversing in the first place).  */
2083
2084 #define MATCH_STRAIGHT 1
2085 #define MATCH_REVERSE  2
2086
2087 static INLINE unsigned int
2088 operand_size_match (const insn_template *t)
2089 {
2090   unsigned int j, match = MATCH_STRAIGHT;
2091
2092   /* Don't check non-absolute jump instructions.  */
2093   if (t->opcode_modifier.jump
2094       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2095     return match;
2096
2097   /* Check memory and accumulator operand size.  */
2098   for (j = 0; j < i.operands; j++)
2099     {
2100       if (i.types[j].bitfield.class != Reg
2101           && i.types[j].bitfield.class != RegSIMD
2102           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2103         continue;
2104
2105       if (t->operand_types[j].bitfield.class == Reg
2106           && !match_operand_size (t, j, j))
2107         {
2108           match = 0;
2109           break;
2110         }
2111
2112       if (t->operand_types[j].bitfield.class == RegSIMD
2113           && !match_simd_size (t, j, j))
2114         {
2115           match = 0;
2116           break;
2117         }
2118
2119       if (t->operand_types[j].bitfield.instance == Accum
2120           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2121         {
2122           match = 0;
2123           break;
2124         }
2125
2126       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2127         {
2128           match = 0;
2129           break;
2130         }
2131     }
2132
2133   if (!t->opcode_modifier.d)
2134     return match;
2135
2136   /* Check reverse.  */
2137   gas_assert ((i.operands >= 2 && i.operands <= 3)
2138               || t->opcode_modifier.vexsources);
2139
2140   for (j = 0; j < i.operands; j++)
2141     {
2142       unsigned int given = i.operands - j - 1;
2143
2144       /* For 4- and 5-operand insns VEX.W controls just the first two
2145          register operands.  */
2146       if (t->opcode_modifier.vexsources)
2147         given = j < 2 ? 1 - j : j;
2148
2149       if (t->operand_types[j].bitfield.class == Reg
2150           && !match_operand_size (t, j, given))
2151         return match;
2152
2153       if (t->operand_types[j].bitfield.class == RegSIMD
2154           && !match_simd_size (t, j, given))
2155         return match;
2156
2157       if (t->operand_types[j].bitfield.instance == Accum
2158           && (!match_operand_size (t, j, given)
2159               || !match_simd_size (t, j, given)))
2160         return match;
2161
2162       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2163         return match;
2164     }
2165
2166   return match | MATCH_REVERSE;
2167 }
2168
2169 static INLINE int
2170 operand_type_match (i386_operand_type overlap,
2171                     i386_operand_type given)
2172 {
2173   i386_operand_type temp = overlap;
2174
2175   temp.bitfield.unspecified = 0;
2176   temp.bitfield.byte = 0;
2177   temp.bitfield.word = 0;
2178   temp.bitfield.dword = 0;
2179   temp.bitfield.fword = 0;
2180   temp.bitfield.qword = 0;
2181   temp.bitfield.tbyte = 0;
2182   temp.bitfield.xmmword = 0;
2183   temp.bitfield.ymmword = 0;
2184   temp.bitfield.zmmword = 0;
2185   temp.bitfield.tmmword = 0;
2186   if (operand_type_all_zero (&temp))
2187     goto mismatch;
2188
2189   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2190     return 1;
2191
2192  mismatch:
2193   i.error = operand_type_mismatch;
2194   return 0;
2195 }
2196
2197 /* If given types g0 and g1 are registers they must be of the same type
2198    unless the expected operand type register overlap is null.
2199    Intel syntax sized memory operands are also checked here.  */
2200
2201 static INLINE int
2202 operand_type_register_match (i386_operand_type g0,
2203                              i386_operand_type t0,
2204                              i386_operand_type g1,
2205                              i386_operand_type t1)
2206 {
2207   if (g0.bitfield.class != Reg
2208       && g0.bitfield.class != RegSIMD
2209       && (g0.bitfield.unspecified
2210           || !operand_type_check (g0, anymem)))
2211     return 1;
2212
2213   if (g1.bitfield.class != Reg
2214       && g1.bitfield.class != RegSIMD
2215       && (g1.bitfield.unspecified
2216           || !operand_type_check (g1, anymem)))
2217     return 1;
2218
2219   if (g0.bitfield.byte == g1.bitfield.byte
2220       && g0.bitfield.word == g1.bitfield.word
2221       && g0.bitfield.dword == g1.bitfield.dword
2222       && g0.bitfield.qword == g1.bitfield.qword
2223       && g0.bitfield.xmmword == g1.bitfield.xmmword
2224       && g0.bitfield.ymmword == g1.bitfield.ymmword
2225       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2226     return 1;
2227
2228   /* If expectations overlap in no more than a single size, all is fine. */
2229   g0 = operand_type_and (t0, t1);
2230   if (g0.bitfield.byte
2231       + g0.bitfield.word
2232       + g0.bitfield.dword
2233       + g0.bitfield.qword
2234       + g0.bitfield.xmmword
2235       + g0.bitfield.ymmword
2236       + g0.bitfield.zmmword <= 1)
2237     return 1;
2238
2239   i.error = register_type_mismatch;
2240
2241   return 0;
2242 }
2243
2244 static INLINE unsigned int
2245 register_number (const reg_entry *r)
2246 {
2247   unsigned int nr = r->reg_num;
2248
2249   if (r->reg_flags & RegRex)
2250     nr += 8;
2251
2252   if (r->reg_flags & RegVRex)
2253     nr += 16;
2254
2255   return nr;
2256 }
2257
2258 static INLINE unsigned int
2259 mode_from_disp_size (i386_operand_type t)
2260 {
2261   if (t.bitfield.disp8)
2262     return 1;
2263   else if (t.bitfield.disp16
2264            || t.bitfield.disp32)
2265     return 2;
2266   else
2267     return 0;
2268 }
2269
2270 static INLINE int
2271 fits_in_signed_byte (addressT num)
2272 {
2273   return num + 0x80 <= 0xff;
2274 }
2275
2276 static INLINE int
2277 fits_in_unsigned_byte (addressT num)
2278 {
2279   return num <= 0xff;
2280 }
2281
2282 static INLINE int
2283 fits_in_unsigned_word (addressT num)
2284 {
2285   return num <= 0xffff;
2286 }
2287
2288 static INLINE int
2289 fits_in_signed_word (addressT num)
2290 {
2291   return num + 0x8000 <= 0xffff;
2292 }
2293
2294 static INLINE int
2295 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2296 {
2297 #ifndef BFD64
2298   return 1;
2299 #else
2300   return num + 0x80000000 <= 0xffffffff;
2301 #endif
2302 }                               /* fits_in_signed_long() */
2303
2304 static INLINE int
2305 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2306 {
2307 #ifndef BFD64
2308   return 1;
2309 #else
2310   return num <= 0xffffffff;
2311 #endif
2312 }                               /* fits_in_unsigned_long() */
2313
2314 static INLINE valueT extend_to_32bit_address (addressT num)
2315 {
2316 #ifdef BFD64
2317   if (fits_in_unsigned_long(num))
2318     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2319
2320   if (!fits_in_signed_long (num))
2321     return num & 0xffffffff;
2322 #endif
2323
2324   return num;
2325 }
2326
2327 static INLINE int
2328 fits_in_disp8 (offsetT num)
2329 {
2330   int shift = i.memshift;
2331   unsigned int mask;
2332
2333   if (shift == -1)
2334     abort ();
2335
2336   mask = (1 << shift) - 1;
2337
2338   /* Return 0 if NUM isn't properly aligned.  */
2339   if ((num & mask))
2340     return 0;
2341
2342   /* Check if NUM will fit in 8bit after shift.  */
2343   return fits_in_signed_byte (num >> shift);
2344 }
2345
2346 static INLINE int
2347 fits_in_imm4 (offsetT num)
2348 {
2349   return (num & 0xf) == num;
2350 }
2351
2352 static i386_operand_type
2353 smallest_imm_type (offsetT num)
2354 {
2355   i386_operand_type t;
2356
2357   operand_type_set (&t, 0);
2358   t.bitfield.imm64 = 1;
2359
2360   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2361     {
2362       /* This code is disabled on the 486 because all the Imm1 forms
2363          in the opcode table are slower on the i486.  They're the
2364          versions with the implicitly specified single-position
2365          displacement, which has another syntax if you really want to
2366          use that form.  */
2367       t.bitfield.imm1 = 1;
2368       t.bitfield.imm8 = 1;
2369       t.bitfield.imm8s = 1;
2370       t.bitfield.imm16 = 1;
2371       t.bitfield.imm32 = 1;
2372       t.bitfield.imm32s = 1;
2373     }
2374   else if (fits_in_signed_byte (num))
2375     {
2376       t.bitfield.imm8 = 1;
2377       t.bitfield.imm8s = 1;
2378       t.bitfield.imm16 = 1;
2379       t.bitfield.imm32 = 1;
2380       t.bitfield.imm32s = 1;
2381     }
2382   else if (fits_in_unsigned_byte (num))
2383     {
2384       t.bitfield.imm8 = 1;
2385       t.bitfield.imm16 = 1;
2386       t.bitfield.imm32 = 1;
2387       t.bitfield.imm32s = 1;
2388     }
2389   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2390     {
2391       t.bitfield.imm16 = 1;
2392       t.bitfield.imm32 = 1;
2393       t.bitfield.imm32s = 1;
2394     }
2395   else if (fits_in_signed_long (num))
2396     {
2397       t.bitfield.imm32 = 1;
2398       t.bitfield.imm32s = 1;
2399     }
2400   else if (fits_in_unsigned_long (num))
2401     t.bitfield.imm32 = 1;
2402
2403   return t;
2404 }
2405
2406 static offsetT
2407 offset_in_range (offsetT val, int size)
2408 {
2409   addressT mask;
2410
2411   switch (size)
2412     {
2413     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2414     case 2: mask = ((addressT) 1 << 16) - 1; break;
2415 #ifdef BFD64
2416     case 4: mask = ((addressT) 1 << 32) - 1; break;
2417 #endif
2418     case sizeof (val): return val;
2419     default: abort ();
2420     }
2421
2422   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2423     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2424              (uint64_t) val, (uint64_t) (val & mask));
2425
2426   return val & mask;
2427 }
2428
2429 enum PREFIX_GROUP
2430 {
2431   PREFIX_EXIST = 0,
2432   PREFIX_LOCK,
2433   PREFIX_REP,
2434   PREFIX_DS,
2435   PREFIX_OTHER
2436 };
2437
2438 /* Returns
2439    a. PREFIX_EXIST if attempting to add a prefix where one from the
2440    same class already exists.
2441    b. PREFIX_LOCK if lock prefix is added.
2442    c. PREFIX_REP if rep/repne prefix is added.
2443    d. PREFIX_DS if ds prefix is added.
2444    e. PREFIX_OTHER if other prefix is added.
2445  */
2446
2447 static enum PREFIX_GROUP
2448 add_prefix (unsigned int prefix)
2449 {
2450   enum PREFIX_GROUP ret = PREFIX_OTHER;
2451   unsigned int q;
2452
2453   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2454       && flag_code == CODE_64BIT)
2455     {
2456       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2457           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2458           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2459           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2460         ret = PREFIX_EXIST;
2461       q = REX_PREFIX;
2462     }
2463   else
2464     {
2465       switch (prefix)
2466         {
2467         default:
2468           abort ();
2469
2470         case DS_PREFIX_OPCODE:
2471           ret = PREFIX_DS;
2472           /* Fall through.  */
2473         case CS_PREFIX_OPCODE:
2474         case ES_PREFIX_OPCODE:
2475         case FS_PREFIX_OPCODE:
2476         case GS_PREFIX_OPCODE:
2477         case SS_PREFIX_OPCODE:
2478           q = SEG_PREFIX;
2479           break;
2480
2481         case REPNE_PREFIX_OPCODE:
2482         case REPE_PREFIX_OPCODE:
2483           q = REP_PREFIX;
2484           ret = PREFIX_REP;
2485           break;
2486
2487         case LOCK_PREFIX_OPCODE:
2488           q = LOCK_PREFIX;
2489           ret = PREFIX_LOCK;
2490           break;
2491
2492         case FWAIT_OPCODE:
2493           q = WAIT_PREFIX;
2494           break;
2495
2496         case ADDR_PREFIX_OPCODE:
2497           q = ADDR_PREFIX;
2498           break;
2499
2500         case DATA_PREFIX_OPCODE:
2501           q = DATA_PREFIX;
2502           break;
2503         }
2504       if (i.prefix[q] != 0)
2505         ret = PREFIX_EXIST;
2506     }
2507
2508   if (ret)
2509     {
2510       if (!i.prefix[q])
2511         ++i.prefixes;
2512       i.prefix[q] |= prefix;
2513     }
2514   else
2515     as_bad (_("same type of prefix used twice"));
2516
2517   return ret;
2518 }
2519
2520 static void
2521 update_code_flag (int value, int check)
2522 {
2523   PRINTF_LIKE ((*as_error));
2524
2525   flag_code = (enum flag_code) value;
2526   if (flag_code == CODE_64BIT)
2527     {
2528       cpu_arch_flags.bitfield.cpu64 = 1;
2529       cpu_arch_flags.bitfield.cpuno64 = 0;
2530     }
2531   else
2532     {
2533       cpu_arch_flags.bitfield.cpu64 = 0;
2534       cpu_arch_flags.bitfield.cpuno64 = 1;
2535     }
2536   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2537     {
2538       if (check)
2539         as_error = as_fatal;
2540       else
2541         as_error = as_bad;
2542       (*as_error) (_("64bit mode not supported on `%s'."),
2543                    cpu_arch_name ? cpu_arch_name : default_arch);
2544     }
2545   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2546     {
2547       if (check)
2548         as_error = as_fatal;
2549       else
2550         as_error = as_bad;
2551       (*as_error) (_("32bit mode not supported on `%s'."),
2552                    cpu_arch_name ? cpu_arch_name : default_arch);
2553     }
2554   stackop_size = '\0';
2555 }
2556
2557 static void
2558 set_code_flag (int value)
2559 {
2560   update_code_flag (value, 0);
2561 }
2562
2563 static void
2564 set_16bit_gcc_code_flag (int new_code_flag)
2565 {
2566   flag_code = (enum flag_code) new_code_flag;
2567   if (flag_code != CODE_16BIT)
2568     abort ();
2569   cpu_arch_flags.bitfield.cpu64 = 0;
2570   cpu_arch_flags.bitfield.cpuno64 = 1;
2571   stackop_size = LONG_MNEM_SUFFIX;
2572 }
2573
2574 static void
2575 set_intel_syntax (int syntax_flag)
2576 {
2577   /* Find out if register prefixing is specified.  */
2578   int ask_naked_reg = 0;
2579
2580   SKIP_WHITESPACE ();
2581   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2582     {
2583       char *string;
2584       int e = get_symbol_name (&string);
2585
2586       if (strcmp (string, "prefix") == 0)
2587         ask_naked_reg = 1;
2588       else if (strcmp (string, "noprefix") == 0)
2589         ask_naked_reg = -1;
2590       else
2591         as_bad (_("bad argument to syntax directive."));
2592       (void) restore_line_pointer (e);
2593     }
2594   demand_empty_rest_of_line ();
2595
2596   intel_syntax = syntax_flag;
2597
2598   if (ask_naked_reg == 0)
2599     allow_naked_reg = (intel_syntax
2600                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2601   else
2602     allow_naked_reg = (ask_naked_reg < 0);
2603
2604   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2605
2606   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2607   identifier_chars['$'] = intel_syntax ? '$' : 0;
2608   register_prefix = allow_naked_reg ? "" : "%";
2609 }
2610
2611 static void
2612 set_intel_mnemonic (int mnemonic_flag)
2613 {
2614   intel_mnemonic = mnemonic_flag;
2615 }
2616
2617 static void
2618 set_allow_index_reg (int flag)
2619 {
2620   allow_index_reg = flag;
2621 }
2622
2623 static void
2624 set_check (int what)
2625 {
2626   enum check_kind *kind;
2627   const char *str;
2628
2629   if (what)
2630     {
2631       kind = &operand_check;
2632       str = "operand";
2633     }
2634   else
2635     {
2636       kind = &sse_check;
2637       str = "sse";
2638     }
2639
2640   SKIP_WHITESPACE ();
2641
2642   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2643     {
2644       char *string;
2645       int e = get_symbol_name (&string);
2646
2647       if (strcmp (string, "none") == 0)
2648         *kind = check_none;
2649       else if (strcmp (string, "warning") == 0)
2650         *kind = check_warning;
2651       else if (strcmp (string, "error") == 0)
2652         *kind = check_error;
2653       else
2654         as_bad (_("bad argument to %s_check directive."), str);
2655       (void) restore_line_pointer (e);
2656     }
2657   else
2658     as_bad (_("missing argument for %s_check directive"), str);
2659
2660   demand_empty_rest_of_line ();
2661 }
2662
2663 static void
2664 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2665                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2666 {
2667 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2668   static const char *arch;
2669
2670   /* Intel MCU is only supported on ELF.  */
2671   if (!IS_ELF)
2672     return;
2673
2674   if (!arch)
2675     {
2676       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2677          use default_arch.  */
2678       arch = cpu_arch_name;
2679       if (!arch)
2680         arch = default_arch;
2681     }
2682
2683   /* If we are targeting Intel MCU, we must enable it.  */
2684   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2685       == new_flag.bitfield.cpuiamcu)
2686     return;
2687
2688   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2689 #endif
2690 }
2691
2692 static void
2693 extend_cpu_sub_arch_name (const char *name)
2694 {
2695   if (cpu_sub_arch_name)
2696     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2697                                   ".", name, (const char *) NULL);
2698   else
2699     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2700 }
2701
2702 static void
2703 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2704 {
2705   typedef struct arch_stack_entry
2706   {
2707     const struct arch_stack_entry *prev;
2708     const char *name;
2709     char *sub_name;
2710     i386_cpu_flags flags;
2711     i386_cpu_flags isa_flags;
2712     enum processor_type isa;
2713     enum flag_code flag_code;
2714     char stackop_size;
2715     bool no_cond_jump_promotion;
2716   } arch_stack_entry;
2717   static const arch_stack_entry *arch_stack_top;
2718
2719   SKIP_WHITESPACE ();
2720
2721   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2722     {
2723       char *s;
2724       int e = get_symbol_name (&s);
2725       const char *string = s;
2726       unsigned int j = 0;
2727       i386_cpu_flags flags;
2728
2729       if (strcmp (string, "default") == 0)
2730         {
2731           if (strcmp (default_arch, "iamcu") == 0)
2732             string = default_arch;
2733           else
2734             {
2735               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2736
2737               cpu_arch_name = NULL;
2738               free (cpu_sub_arch_name);
2739               cpu_sub_arch_name = NULL;
2740               cpu_arch_flags = cpu_unknown_flags;
2741               if (flag_code == CODE_64BIT)
2742                 {
2743                   cpu_arch_flags.bitfield.cpu64 = 1;
2744                   cpu_arch_flags.bitfield.cpuno64 = 0;
2745                 }
2746               else
2747                 {
2748                   cpu_arch_flags.bitfield.cpu64 = 0;
2749                   cpu_arch_flags.bitfield.cpuno64 = 1;
2750                 }
2751               cpu_arch_isa = PROCESSOR_UNKNOWN;
2752               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2753               if (!cpu_arch_tune_set)
2754                 {
2755                   cpu_arch_tune = cpu_arch_isa;
2756                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2757                 }
2758
2759               j = ARRAY_SIZE (cpu_arch) + 1;
2760             }
2761         }
2762       else if (strcmp (string, "push") == 0)
2763         {
2764           arch_stack_entry *top = XNEW (arch_stack_entry);
2765
2766           top->name = cpu_arch_name;
2767           if (cpu_sub_arch_name)
2768             top->sub_name = xstrdup (cpu_sub_arch_name);
2769           else
2770             top->sub_name = NULL;
2771           top->flags = cpu_arch_flags;
2772           top->isa = cpu_arch_isa;
2773           top->isa_flags = cpu_arch_isa_flags;
2774           top->flag_code = flag_code;
2775           top->stackop_size = stackop_size;
2776           top->no_cond_jump_promotion = no_cond_jump_promotion;
2777
2778           top->prev = arch_stack_top;
2779           arch_stack_top = top;
2780
2781           (void) restore_line_pointer (e);
2782           demand_empty_rest_of_line ();
2783           return;
2784         }
2785       else if (strcmp (string, "pop") == 0)
2786         {
2787           const arch_stack_entry *top = arch_stack_top;
2788
2789           if (!top)
2790             as_bad (_(".arch stack is empty"));
2791           else if (top->flag_code != flag_code
2792                    || top->stackop_size != stackop_size)
2793             {
2794               static const unsigned int bits[] = {
2795                 [CODE_16BIT] = 16,
2796                 [CODE_32BIT] = 32,
2797                 [CODE_64BIT] = 64,
2798               };
2799
2800               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2801                       bits[top->flag_code],
2802                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2803             }
2804           else
2805             {
2806               arch_stack_top = top->prev;
2807
2808               cpu_arch_name = top->name;
2809               free (cpu_sub_arch_name);
2810               cpu_sub_arch_name = top->sub_name;
2811               cpu_arch_flags = top->flags;
2812               cpu_arch_isa = top->isa;
2813               cpu_arch_isa_flags = top->isa_flags;
2814               no_cond_jump_promotion = top->no_cond_jump_promotion;
2815
2816               XDELETE (top);
2817             }
2818
2819           (void) restore_line_pointer (e);
2820           demand_empty_rest_of_line ();
2821           return;
2822         }
2823
2824       for (; j < ARRAY_SIZE (cpu_arch); j++)
2825         {
2826           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2827              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2828             {
2829               if (*string != '.')
2830                 {
2831                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2832
2833                   cpu_arch_name = cpu_arch[j].name;
2834                   free (cpu_sub_arch_name);
2835                   cpu_sub_arch_name = NULL;
2836                   cpu_arch_flags = cpu_arch[j].enable;
2837                   if (flag_code == CODE_64BIT)
2838                     {
2839                       cpu_arch_flags.bitfield.cpu64 = 1;
2840                       cpu_arch_flags.bitfield.cpuno64 = 0;
2841                     }
2842                   else
2843                     {
2844                       cpu_arch_flags.bitfield.cpu64 = 0;
2845                       cpu_arch_flags.bitfield.cpuno64 = 1;
2846                     }
2847                   cpu_arch_isa = cpu_arch[j].type;
2848                   cpu_arch_isa_flags = cpu_arch[j].enable;
2849                   if (!cpu_arch_tune_set)
2850                     {
2851                       cpu_arch_tune = cpu_arch_isa;
2852                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2853                     }
2854                   pre_386_16bit_warned = false;
2855                   break;
2856                 }
2857
2858               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2859                 continue;
2860
2861               flags = cpu_flags_or (cpu_arch_flags,
2862                                     cpu_arch[j].enable);
2863
2864               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2865                 {
2866                   extend_cpu_sub_arch_name (string + 1);
2867                   cpu_arch_flags = flags;
2868                   cpu_arch_isa_flags = flags;
2869                 }
2870               else
2871                 cpu_arch_isa_flags
2872                   = cpu_flags_or (cpu_arch_isa_flags,
2873                                   cpu_arch[j].enable);
2874               (void) restore_line_pointer (e);
2875               demand_empty_rest_of_line ();
2876               return;
2877             }
2878         }
2879
2880       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2881         {
2882           /* Disable an ISA extension.  */
2883           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2884             if (cpu_arch[j].type == PROCESSOR_NONE
2885                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2886               {
2887                 flags = cpu_flags_and_not (cpu_arch_flags,
2888                                            cpu_arch[j].disable);
2889                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2890                   {
2891                     extend_cpu_sub_arch_name (string + 1);
2892                     cpu_arch_flags = flags;
2893                     cpu_arch_isa_flags = flags;
2894                   }
2895                 (void) restore_line_pointer (e);
2896                 demand_empty_rest_of_line ();
2897                 return;
2898               }
2899         }
2900
2901       if (j == ARRAY_SIZE (cpu_arch))
2902         as_bad (_("no such architecture: `%s'"), string);
2903
2904       *input_line_pointer = e;
2905     }
2906   else
2907     as_bad (_("missing cpu architecture"));
2908
2909   no_cond_jump_promotion = 0;
2910   if (*input_line_pointer == ','
2911       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2912     {
2913       char *string;
2914       char e;
2915
2916       ++input_line_pointer;
2917       e = get_symbol_name (&string);
2918
2919       if (strcmp (string, "nojumps") == 0)
2920         no_cond_jump_promotion = 1;
2921       else if (strcmp (string, "jumps") == 0)
2922         ;
2923       else
2924         as_bad (_("no such architecture modifier: `%s'"), string);
2925
2926       (void) restore_line_pointer (e);
2927     }
2928
2929   demand_empty_rest_of_line ();
2930 }
2931
2932 enum bfd_architecture
2933 i386_arch (void)
2934 {
2935   if (cpu_arch_isa == PROCESSOR_IAMCU)
2936     {
2937       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2938           || flag_code == CODE_64BIT)
2939         as_fatal (_("Intel MCU is 32bit ELF only"));
2940       return bfd_arch_iamcu;
2941     }
2942   else
2943     return bfd_arch_i386;
2944 }
2945
2946 unsigned long
2947 i386_mach (void)
2948 {
2949   if (startswith (default_arch, "x86_64"))
2950     {
2951       if (default_arch[6] == '\0')
2952         return bfd_mach_x86_64;
2953       else
2954         return bfd_mach_x64_32;
2955     }
2956   else if (!strcmp (default_arch, "i386")
2957            || !strcmp (default_arch, "iamcu"))
2958     {
2959       if (cpu_arch_isa == PROCESSOR_IAMCU)
2960         {
2961           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2962             as_fatal (_("Intel MCU is 32bit ELF only"));
2963           return bfd_mach_i386_iamcu;
2964         }
2965       else
2966         return bfd_mach_i386_i386;
2967     }
2968   else
2969     as_fatal (_("unknown architecture"));
2970 }
2971 \f
2972 #include "opcodes/i386-tbl.h"
2973
2974 void
2975 md_begin (void)
2976 {
2977   /* Support pseudo prefixes like {disp32}.  */
2978   lex_type ['{'] = LEX_BEGIN_NAME;
2979
2980   /* Initialize op_hash hash table.  */
2981   op_hash = str_htab_create ();
2982
2983   {
2984     const insn_template *const *sets = i386_op_sets;
2985     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
2986
2987     /* Type checks to compensate for the conversion through void * which
2988        occurs during hash table insertion / lookup.  */
2989     (void) sizeof (sets == &current_templates->start);
2990     (void) sizeof (end == &current_templates->end);
2991     for (; sets < end; ++sets)
2992       if (str_hash_insert (op_hash, (*sets)->name, sets, 0))
2993         as_fatal (_("duplicate %s"), (*sets)->name);
2994   }
2995
2996   /* Initialize reg_hash hash table.  */
2997   reg_hash = str_htab_create ();
2998   {
2999     const reg_entry *regtab;
3000     unsigned int regtab_size = i386_regtab_size;
3001
3002     for (regtab = i386_regtab; regtab_size--; regtab++)
3003       {
3004         switch (regtab->reg_type.bitfield.class)
3005           {
3006           case Reg:
3007             if (regtab->reg_type.bitfield.dword)
3008               {
3009                 if (regtab->reg_type.bitfield.instance == Accum)
3010                   reg_eax = regtab;
3011               }
3012             else if (regtab->reg_type.bitfield.tbyte)
3013               {
3014                 /* There's no point inserting st(<N>) in the hash table, as
3015                    parentheses aren't included in register_chars[] anyway.  */
3016                 if (regtab->reg_type.bitfield.instance != Accum)
3017                   continue;
3018                 reg_st0 = regtab;
3019               }
3020             break;
3021
3022           case SReg:
3023             switch (regtab->reg_num)
3024               {
3025               case 0: reg_es = regtab; break;
3026               case 2: reg_ss = regtab; break;
3027               case 3: reg_ds = regtab; break;
3028               }
3029             break;
3030
3031           case RegMask:
3032             if (!regtab->reg_num)
3033               reg_k0 = regtab;
3034             break;
3035           }
3036
3037         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3038           as_fatal (_("duplicate %s"), regtab->reg_name);
3039       }
3040   }
3041
3042   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3043   {
3044     int c;
3045     char *p;
3046
3047     for (c = 0; c < 256; c++)
3048       {
3049         if (ISDIGIT (c) || ISLOWER (c))
3050           {
3051             mnemonic_chars[c] = c;
3052             register_chars[c] = c;
3053             operand_chars[c] = c;
3054           }
3055         else if (ISUPPER (c))
3056           {
3057             mnemonic_chars[c] = TOLOWER (c);
3058             register_chars[c] = mnemonic_chars[c];
3059             operand_chars[c] = c;
3060           }
3061         else if (c == '{' || c == '}')
3062           {
3063             mnemonic_chars[c] = c;
3064             operand_chars[c] = c;
3065           }
3066 #ifdef SVR4_COMMENT_CHARS
3067         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3068           operand_chars[c] = c;
3069 #endif
3070
3071         if (ISALPHA (c) || ISDIGIT (c))
3072           identifier_chars[c] = c;
3073         else if (c >= 128)
3074           {
3075             identifier_chars[c] = c;
3076             operand_chars[c] = c;
3077           }
3078       }
3079
3080 #ifdef LEX_AT
3081     identifier_chars['@'] = '@';
3082 #endif
3083 #ifdef LEX_QM
3084     identifier_chars['?'] = '?';
3085     operand_chars['?'] = '?';
3086 #endif
3087     mnemonic_chars['_'] = '_';
3088     mnemonic_chars['-'] = '-';
3089     mnemonic_chars['.'] = '.';
3090     identifier_chars['_'] = '_';
3091     identifier_chars['.'] = '.';
3092
3093     for (p = operand_special_chars; *p != '\0'; p++)
3094       operand_chars[(unsigned char) *p] = *p;
3095   }
3096
3097   if (flag_code == CODE_64BIT)
3098     {
3099 #if defined (OBJ_COFF) && defined (TE_PE)
3100       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3101                                   ? 32 : 16);
3102 #else
3103       x86_dwarf2_return_column = 16;
3104 #endif
3105       x86_cie_data_alignment = -8;
3106 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3107       x86_sframe_cfa_sp_reg = 7;
3108       x86_sframe_cfa_fp_reg = 6;
3109 #endif
3110     }
3111   else
3112     {
3113       x86_dwarf2_return_column = 8;
3114       x86_cie_data_alignment = -4;
3115     }
3116
3117   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3118      can be turned into BRANCH_PREFIX frag.  */
3119   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3120     abort ();
3121 }
3122
3123 void
3124 i386_print_statistics (FILE *file)
3125 {
3126   htab_print_statistics (file, "i386 opcode", op_hash);
3127   htab_print_statistics (file, "i386 register", reg_hash);
3128 }
3129
3130 void
3131 i386_md_end (void)
3132 {
3133   htab_delete (op_hash);
3134   htab_delete (reg_hash);
3135 }
3136 \f
3137 #ifdef DEBUG386
3138
3139 /* Debugging routines for md_assemble.  */
3140 static void pte (insn_template *);
3141 static void pt (i386_operand_type);
3142 static void pe (expressionS *);
3143 static void ps (symbolS *);
3144
3145 static void
3146 pi (const char *line, i386_insn *x)
3147 {
3148   unsigned int j;
3149
3150   fprintf (stdout, "%s: template ", line);
3151   pte (&x->tm);
3152   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3153            x->base_reg ? x->base_reg->reg_name : "none",
3154            x->index_reg ? x->index_reg->reg_name : "none",
3155            x->log2_scale_factor);
3156   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3157            x->rm.mode, x->rm.reg, x->rm.regmem);
3158   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3159            x->sib.base, x->sib.index, x->sib.scale);
3160   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3161            (x->rex & REX_W) != 0,
3162            (x->rex & REX_R) != 0,
3163            (x->rex & REX_X) != 0,
3164            (x->rex & REX_B) != 0);
3165   for (j = 0; j < x->operands; j++)
3166     {
3167       fprintf (stdout, "    #%d:  ", j + 1);
3168       pt (x->types[j]);
3169       fprintf (stdout, "\n");
3170       if (x->types[j].bitfield.class == Reg
3171           || x->types[j].bitfield.class == RegMMX
3172           || x->types[j].bitfield.class == RegSIMD
3173           || x->types[j].bitfield.class == RegMask
3174           || x->types[j].bitfield.class == SReg
3175           || x->types[j].bitfield.class == RegCR
3176           || x->types[j].bitfield.class == RegDR
3177           || x->types[j].bitfield.class == RegTR
3178           || x->types[j].bitfield.class == RegBND)
3179         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3180       if (operand_type_check (x->types[j], imm))
3181         pe (x->op[j].imms);
3182       if (operand_type_check (x->types[j], disp))
3183         pe (x->op[j].disps);
3184     }
3185 }
3186
3187 static void
3188 pte (insn_template *t)
3189 {
3190   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3191   static const char *const opc_spc[] = {
3192     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3193     "XOP08", "XOP09", "XOP0A",
3194   };
3195   unsigned int j;
3196
3197   fprintf (stdout, " %d operands ", t->operands);
3198   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3199     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3200   if (opc_spc[t->opcode_modifier.opcodespace])
3201     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3202   fprintf (stdout, "opcode %x ", t->base_opcode);
3203   if (t->extension_opcode != None)
3204     fprintf (stdout, "ext %x ", t->extension_opcode);
3205   if (t->opcode_modifier.d)
3206     fprintf (stdout, "D");
3207   if (t->opcode_modifier.w)
3208     fprintf (stdout, "W");
3209   fprintf (stdout, "\n");
3210   for (j = 0; j < t->operands; j++)
3211     {
3212       fprintf (stdout, "    #%d type ", j + 1);
3213       pt (t->operand_types[j]);
3214       fprintf (stdout, "\n");
3215     }
3216 }
3217
3218 static void
3219 pe (expressionS *e)
3220 {
3221   fprintf (stdout, "    operation     %d\n", e->X_op);
3222   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3223            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3224   if (e->X_add_symbol)
3225     {
3226       fprintf (stdout, "    add_symbol    ");
3227       ps (e->X_add_symbol);
3228       fprintf (stdout, "\n");
3229     }
3230   if (e->X_op_symbol)
3231     {
3232       fprintf (stdout, "    op_symbol    ");
3233       ps (e->X_op_symbol);
3234       fprintf (stdout, "\n");
3235     }
3236 }
3237
3238 static void
3239 ps (symbolS *s)
3240 {
3241   fprintf (stdout, "%s type %s%s",
3242            S_GET_NAME (s),
3243            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3244            segment_name (S_GET_SEGMENT (s)));
3245 }
3246
3247 static struct type_name
3248   {
3249     i386_operand_type mask;
3250     const char *name;
3251   }
3252 const type_names[] =
3253 {
3254   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3255   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3256   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3257   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3258   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3259   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3260   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3261   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3262   { { .bitfield = { .imm8 = 1 } }, "i8" },
3263   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3264   { { .bitfield = { .imm16 = 1 } }, "i16" },
3265   { { .bitfield = { .imm32 = 1 } }, "i32" },
3266   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3267   { { .bitfield = { .imm64 = 1 } }, "i64" },
3268   { { .bitfield = { .imm1 = 1 } }, "i1" },
3269   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3270   { { .bitfield = { .disp8 = 1 } }, "d8" },
3271   { { .bitfield = { .disp16 = 1 } }, "d16" },
3272   { { .bitfield = { .disp32 = 1 } }, "d32" },
3273   { { .bitfield = { .disp64 = 1 } }, "d64" },
3274   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3275   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3276   { { .bitfield = { .class = RegCR } }, "control reg" },
3277   { { .bitfield = { .class = RegTR } }, "test reg" },
3278   { { .bitfield = { .class = RegDR } }, "debug reg" },
3279   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3280   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3281   { { .bitfield = { .class = SReg } }, "SReg" },
3282   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3283   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3284   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3285   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3286   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3287   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3288 };
3289
3290 static void
3291 pt (i386_operand_type t)
3292 {
3293   unsigned int j;
3294   i386_operand_type a;
3295
3296   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3297     {
3298       a = operand_type_and (t, type_names[j].mask);
3299       if (operand_type_equal (&a, &type_names[j].mask))
3300         fprintf (stdout, "%s, ",  type_names[j].name);
3301     }
3302   fflush (stdout);
3303 }
3304
3305 #endif /* DEBUG386 */
3306 \f
3307 static bfd_reloc_code_real_type
3308 reloc (unsigned int size,
3309        int pcrel,
3310        int sign,
3311        bfd_reloc_code_real_type other)
3312 {
3313   if (other != NO_RELOC)
3314     {
3315       reloc_howto_type *rel;
3316
3317       if (size == 8)
3318         switch (other)
3319           {
3320           case BFD_RELOC_X86_64_GOT32:
3321             return BFD_RELOC_X86_64_GOT64;
3322             break;
3323           case BFD_RELOC_X86_64_GOTPLT64:
3324             return BFD_RELOC_X86_64_GOTPLT64;
3325             break;
3326           case BFD_RELOC_X86_64_PLTOFF64:
3327             return BFD_RELOC_X86_64_PLTOFF64;
3328             break;
3329           case BFD_RELOC_X86_64_GOTPC32:
3330             other = BFD_RELOC_X86_64_GOTPC64;
3331             break;
3332           case BFD_RELOC_X86_64_GOTPCREL:
3333             other = BFD_RELOC_X86_64_GOTPCREL64;
3334             break;
3335           case BFD_RELOC_X86_64_TPOFF32:
3336             other = BFD_RELOC_X86_64_TPOFF64;
3337             break;
3338           case BFD_RELOC_X86_64_DTPOFF32:
3339             other = BFD_RELOC_X86_64_DTPOFF64;
3340             break;
3341           default:
3342             break;
3343           }
3344
3345 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3346       if (other == BFD_RELOC_SIZE32)
3347         {
3348           if (size == 8)
3349             other = BFD_RELOC_SIZE64;
3350           if (pcrel)
3351             {
3352               as_bad (_("there are no pc-relative size relocations"));
3353               return NO_RELOC;
3354             }
3355         }
3356 #endif
3357
3358       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3359       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3360         sign = -1;
3361
3362       rel = bfd_reloc_type_lookup (stdoutput, other);
3363       if (!rel)
3364         as_bad (_("unknown relocation (%u)"), other);
3365       else if (size != bfd_get_reloc_size (rel))
3366         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3367                 bfd_get_reloc_size (rel),
3368                 size);
3369       else if (pcrel && !rel->pc_relative)
3370         as_bad (_("non-pc-relative relocation for pc-relative field"));
3371       else if ((rel->complain_on_overflow == complain_overflow_signed
3372                 && !sign)
3373                || (rel->complain_on_overflow == complain_overflow_unsigned
3374                    && sign > 0))
3375         as_bad (_("relocated field and relocation type differ in signedness"));
3376       else
3377         return other;
3378       return NO_RELOC;
3379     }
3380
3381   if (pcrel)
3382     {
3383       if (!sign)
3384         as_bad (_("there are no unsigned pc-relative relocations"));
3385       switch (size)
3386         {
3387         case 1: return BFD_RELOC_8_PCREL;
3388         case 2: return BFD_RELOC_16_PCREL;
3389         case 4: return BFD_RELOC_32_PCREL;
3390         case 8: return BFD_RELOC_64_PCREL;
3391         }
3392       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3393     }
3394   else
3395     {
3396       if (sign > 0)
3397         switch (size)
3398           {
3399           case 4: return BFD_RELOC_X86_64_32S;
3400           }
3401       else
3402         switch (size)
3403           {
3404           case 1: return BFD_RELOC_8;
3405           case 2: return BFD_RELOC_16;
3406           case 4: return BFD_RELOC_32;
3407           case 8: return BFD_RELOC_64;
3408           }
3409       as_bad (_("cannot do %s %u byte relocation"),
3410               sign > 0 ? "signed" : "unsigned", size);
3411     }
3412
3413   return NO_RELOC;
3414 }
3415
3416 /* Here we decide which fixups can be adjusted to make them relative to
3417    the beginning of the section instead of the symbol.  Basically we need
3418    to make sure that the dynamic relocations are done correctly, so in
3419    some cases we force the original symbol to be used.  */
3420
3421 int
3422 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3423 {
3424 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3425   if (!IS_ELF)
3426     return 1;
3427
3428   /* Don't adjust pc-relative references to merge sections in 64-bit
3429      mode.  */
3430   if (use_rela_relocations
3431       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3432       && fixP->fx_pcrel)
3433     return 0;
3434
3435   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3436      and changed later by validate_fix.  */
3437   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3438       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3439     return 0;
3440
3441   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3442      for size relocations.  */
3443   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3444       || fixP->fx_r_type == BFD_RELOC_SIZE64
3445       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3446       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3447       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3448       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3449       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3450       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3451       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3452       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3453       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3454       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3455       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3456       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3457       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3458       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3459       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3460       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3461       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3462       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3463       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3464       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3465       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3466       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3467       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3472       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3473       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3474     return 0;
3475 #endif
3476   return 1;
3477 }
3478
3479 static INLINE bool
3480 want_disp32 (const insn_template *t)
3481 {
3482   return flag_code != CODE_64BIT
3483          || i.prefix[ADDR_PREFIX]
3484          || (t->base_opcode == 0x8d
3485              && t->opcode_modifier.opcodespace == SPACE_BASE
3486              && (!i.types[1].bitfield.qword
3487                 || t->opcode_modifier.size == SIZE32));
3488 }
3489
3490 static int
3491 intel_float_operand (const char *mnemonic)
3492 {
3493   /* Note that the value returned is meaningful only for opcodes with (memory)
3494      operands, hence the code here is free to improperly handle opcodes that
3495      have no operands (for better performance and smaller code). */
3496
3497   if (mnemonic[0] != 'f')
3498     return 0; /* non-math */
3499
3500   switch (mnemonic[1])
3501     {
3502     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3503        the fs segment override prefix not currently handled because no
3504        call path can make opcodes without operands get here */
3505     case 'i':
3506       return 2 /* integer op */;
3507     case 'l':
3508       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3509         return 3; /* fldcw/fldenv */
3510       break;
3511     case 'n':
3512       if (mnemonic[2] != 'o' /* fnop */)
3513         return 3; /* non-waiting control op */
3514       break;
3515     case 'r':
3516       if (mnemonic[2] == 's')
3517         return 3; /* frstor/frstpm */
3518       break;
3519     case 's':
3520       if (mnemonic[2] == 'a')
3521         return 3; /* fsave */
3522       if (mnemonic[2] == 't')
3523         {
3524           switch (mnemonic[3])
3525             {
3526             case 'c': /* fstcw */
3527             case 'd': /* fstdw */
3528             case 'e': /* fstenv */
3529             case 's': /* fsts[gw] */
3530               return 3;
3531             }
3532         }
3533       break;
3534     case 'x':
3535       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3536         return 0; /* fxsave/fxrstor are not really math ops */
3537       break;
3538     }
3539
3540   return 1;
3541 }
3542
3543 static INLINE void
3544 install_template (const insn_template *t)
3545 {
3546   unsigned int l;
3547
3548   i.tm = *t;
3549
3550   /* Note that for pseudo prefixes this produces a length of 1. But for them
3551      the length isn't interesting at all.  */
3552   for (l = 1; l < 4; ++l)
3553     if (!(t->base_opcode >> (8 * l)))
3554       break;
3555
3556   i.opcode_length = l;
3557 }
3558
3559 /* Build the VEX prefix.  */
3560
3561 static void
3562 build_vex_prefix (const insn_template *t)
3563 {
3564   unsigned int register_specifier;
3565   unsigned int vector_length;
3566   unsigned int w;
3567
3568   /* Check register specifier.  */
3569   if (i.vex.register_specifier)
3570     {
3571       register_specifier =
3572         ~register_number (i.vex.register_specifier) & 0xf;
3573       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3574     }
3575   else
3576     register_specifier = 0xf;
3577
3578   /* Use 2-byte VEX prefix by swapping destination and source operand
3579      if there are more than 1 register operand.  */
3580   if (i.reg_operands > 1
3581       && i.vec_encoding != vex_encoding_vex3
3582       && i.dir_encoding == dir_encoding_default
3583       && i.operands == i.reg_operands
3584       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3585       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3586       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3587       && i.rex == REX_B)
3588     {
3589       unsigned int xchg = i.operands - 1;
3590       union i386_op temp_op;
3591       i386_operand_type temp_type;
3592
3593       temp_type = i.types[xchg];
3594       i.types[xchg] = i.types[0];
3595       i.types[0] = temp_type;
3596       temp_op = i.op[xchg];
3597       i.op[xchg] = i.op[0];
3598       i.op[0] = temp_op;
3599
3600       gas_assert (i.rm.mode == 3);
3601
3602       i.rex = REX_R;
3603       xchg = i.rm.regmem;
3604       i.rm.regmem = i.rm.reg;
3605       i.rm.reg = xchg;
3606
3607       if (i.tm.opcode_modifier.d)
3608         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3609                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3610       else /* Use the next insn.  */
3611         install_template (&t[1]);
3612     }
3613
3614   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3615      are no memory operands and at least 3 register ones.  */
3616   if (i.reg_operands >= 3
3617       && i.vec_encoding != vex_encoding_vex3
3618       && i.reg_operands == i.operands - i.imm_operands
3619       && i.tm.opcode_modifier.vex
3620       && i.tm.opcode_modifier.commutative
3621       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3622       && i.rex == REX_B
3623       && i.vex.register_specifier
3624       && !(i.vex.register_specifier->reg_flags & RegRex))
3625     {
3626       unsigned int xchg = i.operands - i.reg_operands;
3627       union i386_op temp_op;
3628       i386_operand_type temp_type;
3629
3630       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3631       gas_assert (!i.tm.opcode_modifier.sae);
3632       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3633                                       &i.types[i.operands - 3]));
3634       gas_assert (i.rm.mode == 3);
3635
3636       temp_type = i.types[xchg];
3637       i.types[xchg] = i.types[xchg + 1];
3638       i.types[xchg + 1] = temp_type;
3639       temp_op = i.op[xchg];
3640       i.op[xchg] = i.op[xchg + 1];
3641       i.op[xchg + 1] = temp_op;
3642
3643       i.rex = 0;
3644       xchg = i.rm.regmem | 8;
3645       i.rm.regmem = ~register_specifier & 0xf;
3646       gas_assert (!(i.rm.regmem & 8));
3647       i.vex.register_specifier += xchg - i.rm.regmem;
3648       register_specifier = ~xchg & 0xf;
3649     }
3650
3651   if (i.tm.opcode_modifier.vex == VEXScalar)
3652     vector_length = avxscalar;
3653   else if (i.tm.opcode_modifier.vex == VEX256)
3654     vector_length = 1;
3655   else
3656     {
3657       unsigned int op;
3658
3659       /* Determine vector length from the last multi-length vector
3660          operand.  */
3661       vector_length = 0;
3662       for (op = t->operands; op--;)
3663         if (t->operand_types[op].bitfield.xmmword
3664             && t->operand_types[op].bitfield.ymmword
3665             && i.types[op].bitfield.ymmword)
3666           {
3667             vector_length = 1;
3668             break;
3669           }
3670     }
3671
3672   /* Check the REX.W bit and VEXW.  */
3673   if (i.tm.opcode_modifier.vexw == VEXWIG)
3674     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3675   else if (i.tm.opcode_modifier.vexw)
3676     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3677   else
3678     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3679
3680   /* Use 2-byte VEX prefix if possible.  */
3681   if (w == 0
3682       && i.vec_encoding != vex_encoding_vex3
3683       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3684       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3685     {
3686       /* 2-byte VEX prefix.  */
3687       unsigned int r;
3688
3689       i.vex.length = 2;
3690       i.vex.bytes[0] = 0xc5;
3691
3692       /* Check the REX.R bit.  */
3693       r = (i.rex & REX_R) ? 0 : 1;
3694       i.vex.bytes[1] = (r << 7
3695                         | register_specifier << 3
3696                         | vector_length << 2
3697                         | i.tm.opcode_modifier.opcodeprefix);
3698     }
3699   else
3700     {
3701       /* 3-byte VEX prefix.  */
3702       i.vex.length = 3;
3703
3704       switch (i.tm.opcode_modifier.opcodespace)
3705         {
3706         case SPACE_0F:
3707         case SPACE_0F38:
3708         case SPACE_0F3A:
3709           i.vex.bytes[0] = 0xc4;
3710           break;
3711         case SPACE_XOP08:
3712         case SPACE_XOP09:
3713         case SPACE_XOP0A:
3714           i.vex.bytes[0] = 0x8f;
3715           break;
3716         default:
3717           abort ();
3718         }
3719
3720       /* The high 3 bits of the second VEX byte are 1's compliment
3721          of RXB bits from REX.  */
3722       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3723
3724       i.vex.bytes[2] = (w << 7
3725                         | register_specifier << 3
3726                         | vector_length << 2
3727                         | i.tm.opcode_modifier.opcodeprefix);
3728     }
3729 }
3730
3731 static INLINE bool
3732 is_evex_encoding (const insn_template *t)
3733 {
3734   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3735          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3736          || t->opcode_modifier.sae;
3737 }
3738
3739 static INLINE bool
3740 is_any_vex_encoding (const insn_template *t)
3741 {
3742   return t->opcode_modifier.vex || is_evex_encoding (t);
3743 }
3744
3745 static unsigned int
3746 get_broadcast_bytes (const insn_template *t, bool diag)
3747 {
3748   unsigned int op, bytes;
3749   const i386_operand_type *types;
3750
3751   if (i.broadcast.type)
3752     return i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
3753                                 * i.broadcast.type);
3754
3755   gas_assert (intel_syntax);
3756
3757   for (op = 0; op < t->operands; ++op)
3758     if (t->operand_types[op].bitfield.baseindex)
3759       break;
3760
3761   gas_assert (op < t->operands);
3762
3763   if (t->opcode_modifier.evex
3764       && t->opcode_modifier.evex != EVEXDYN)
3765     switch (i.broadcast.bytes)
3766       {
3767       case 1:
3768         if (t->operand_types[op].bitfield.word)
3769           return 2;
3770       /* Fall through.  */
3771       case 2:
3772         if (t->operand_types[op].bitfield.dword)
3773           return 4;
3774       /* Fall through.  */
3775       case 4:
3776         if (t->operand_types[op].bitfield.qword)
3777           return 8;
3778       /* Fall through.  */
3779       case 8:
3780         if (t->operand_types[op].bitfield.xmmword)
3781           return 16;
3782         if (t->operand_types[op].bitfield.ymmword)
3783           return 32;
3784         if (t->operand_types[op].bitfield.zmmword)
3785           return 64;
3786       /* Fall through.  */
3787       default:
3788         abort ();
3789       }
3790
3791   gas_assert (op + 1 < t->operands);
3792
3793   if (t->operand_types[op + 1].bitfield.xmmword
3794       + t->operand_types[op + 1].bitfield.ymmword
3795       + t->operand_types[op + 1].bitfield.zmmword > 1)
3796     {
3797       types = &i.types[op + 1];
3798       diag = false;
3799     }
3800   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3801     types = &t->operand_types[op];
3802
3803   if (types->bitfield.zmmword)
3804     bytes = 64;
3805   else if (types->bitfield.ymmword)
3806     bytes = 32;
3807   else
3808     bytes = 16;
3809
3810   if (diag)
3811     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3812              t->name, bytes * 8);
3813
3814   return bytes;
3815 }
3816
3817 /* Build the EVEX prefix.  */
3818
3819 static void
3820 build_evex_prefix (void)
3821 {
3822   unsigned int register_specifier, w;
3823   rex_byte vrex_used = 0;
3824
3825   /* Check register specifier.  */
3826   if (i.vex.register_specifier)
3827     {
3828       gas_assert ((i.vrex & REX_X) == 0);
3829
3830       register_specifier = i.vex.register_specifier->reg_num;
3831       if ((i.vex.register_specifier->reg_flags & RegRex))
3832         register_specifier += 8;
3833       /* The upper 16 registers are encoded in the fourth byte of the
3834          EVEX prefix.  */
3835       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3836         i.vex.bytes[3] = 0x8;
3837       register_specifier = ~register_specifier & 0xf;
3838     }
3839   else
3840     {
3841       register_specifier = 0xf;
3842
3843       /* Encode upper 16 vector index register in the fourth byte of
3844          the EVEX prefix.  */
3845       if (!(i.vrex & REX_X))
3846         i.vex.bytes[3] = 0x8;
3847       else
3848         vrex_used |= REX_X;
3849     }
3850
3851   /* 4 byte EVEX prefix.  */
3852   i.vex.length = 4;
3853   i.vex.bytes[0] = 0x62;
3854
3855   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3856      bits from REX.  */
3857   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3858   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3859   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3860
3861   /* The fifth bit of the second EVEX byte is 1's compliment of the
3862      REX_R bit in VREX.  */
3863   if (!(i.vrex & REX_R))
3864     i.vex.bytes[1] |= 0x10;
3865   else
3866     vrex_used |= REX_R;
3867
3868   if ((i.reg_operands + i.imm_operands) == i.operands)
3869     {
3870       /* When all operands are registers, the REX_X bit in REX is not
3871          used.  We reuse it to encode the upper 16 registers, which is
3872          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3873          as 1's compliment.  */
3874       if ((i.vrex & REX_B))
3875         {
3876           vrex_used |= REX_B;
3877           i.vex.bytes[1] &= ~0x40;
3878         }
3879     }
3880
3881   /* EVEX instructions shouldn't need the REX prefix.  */
3882   i.vrex &= ~vrex_used;
3883   gas_assert (i.vrex == 0);
3884
3885   /* Check the REX.W bit and VEXW.  */
3886   if (i.tm.opcode_modifier.vexw == VEXWIG)
3887     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3888   else if (i.tm.opcode_modifier.vexw)
3889     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3890   else
3891     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3892
3893   /* The third byte of the EVEX prefix.  */
3894   i.vex.bytes[2] = ((w << 7)
3895                     | (register_specifier << 3)
3896                     | 4 /* Encode the U bit.  */
3897                     | i.tm.opcode_modifier.opcodeprefix);
3898
3899   /* The fourth byte of the EVEX prefix.  */
3900   /* The zeroing-masking bit.  */
3901   if (i.mask.reg && i.mask.zeroing)
3902     i.vex.bytes[3] |= 0x80;
3903
3904   /* Don't always set the broadcast bit if there is no RC.  */
3905   if (i.rounding.type == rc_none)
3906     {
3907       /* Encode the vector length.  */
3908       unsigned int vec_length;
3909
3910       if (!i.tm.opcode_modifier.evex
3911           || i.tm.opcode_modifier.evex == EVEXDYN)
3912         {
3913           unsigned int op;
3914
3915           /* Determine vector length from the last multi-length vector
3916              operand.  */
3917           for (op = i.operands; op--;)
3918             if (i.tm.operand_types[op].bitfield.xmmword
3919                 + i.tm.operand_types[op].bitfield.ymmword
3920                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3921               {
3922                 if (i.types[op].bitfield.zmmword)
3923                   {
3924                     i.tm.opcode_modifier.evex = EVEX512;
3925                     break;
3926                   }
3927                 else if (i.types[op].bitfield.ymmword)
3928                   {
3929                     i.tm.opcode_modifier.evex = EVEX256;
3930                     break;
3931                   }
3932                 else if (i.types[op].bitfield.xmmword)
3933                   {
3934                     i.tm.opcode_modifier.evex = EVEX128;
3935                     break;
3936                   }
3937                 else if (i.broadcast.bytes && op == i.broadcast.operand)
3938                   {
3939                     switch (get_broadcast_bytes (&i.tm, true))
3940                       {
3941                         case 64:
3942                           i.tm.opcode_modifier.evex = EVEX512;
3943                           break;
3944                         case 32:
3945                           i.tm.opcode_modifier.evex = EVEX256;
3946                           break;
3947                         case 16:
3948                           i.tm.opcode_modifier.evex = EVEX128;
3949                           break;
3950                         default:
3951                           abort ();
3952                       }
3953                     break;
3954                   }
3955               }
3956
3957           if (op >= MAX_OPERANDS)
3958             abort ();
3959         }
3960
3961       switch (i.tm.opcode_modifier.evex)
3962         {
3963         case EVEXLIG: /* LL' is ignored */
3964           vec_length = evexlig << 5;
3965           break;
3966         case EVEX128:
3967           vec_length = 0 << 5;
3968           break;
3969         case EVEX256:
3970           vec_length = 1 << 5;
3971           break;
3972         case EVEX512:
3973           vec_length = 2 << 5;
3974           break;
3975         default:
3976           abort ();
3977           break;
3978         }
3979       i.vex.bytes[3] |= vec_length;
3980       /* Encode the broadcast bit.  */
3981       if (i.broadcast.bytes)
3982         i.vex.bytes[3] |= 0x10;
3983     }
3984   else if (i.rounding.type != saeonly)
3985     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3986   else
3987     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3988
3989   if (i.mask.reg)
3990     i.vex.bytes[3] |= i.mask.reg->reg_num;
3991 }
3992
3993 static void
3994 process_immext (void)
3995 {
3996   expressionS *exp;
3997
3998   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3999      which is coded in the same place as an 8-bit immediate field
4000      would be.  Here we fake an 8-bit immediate operand from the
4001      opcode suffix stored in tm.extension_opcode.
4002
4003      AVX instructions also use this encoding, for some of
4004      3 argument instructions.  */
4005
4006   gas_assert (i.imm_operands <= 1
4007               && (i.operands <= 2
4008                   || (is_any_vex_encoding (&i.tm)
4009                       && i.operands <= 4)));
4010
4011   exp = &im_expressions[i.imm_operands++];
4012   i.op[i.operands].imms = exp;
4013   i.types[i.operands].bitfield.imm8 = 1;
4014   i.operands++;
4015   exp->X_op = O_constant;
4016   exp->X_add_number = i.tm.extension_opcode;
4017   i.tm.extension_opcode = None;
4018 }
4019
4020
4021 static int
4022 check_hle (void)
4023 {
4024   switch (i.tm.opcode_modifier.prefixok)
4025     {
4026     default:
4027       abort ();
4028     case PrefixLock:
4029     case PrefixNone:
4030     case PrefixNoTrack:
4031     case PrefixRep:
4032       as_bad (_("invalid instruction `%s' after `%s'"),
4033               i.tm.name, i.hle_prefix);
4034       return 0;
4035     case PrefixHLELock:
4036       if (i.prefix[LOCK_PREFIX])
4037         return 1;
4038       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4039       return 0;
4040     case PrefixHLEAny:
4041       return 1;
4042     case PrefixHLERelease:
4043       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4044         {
4045           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4046                   i.tm.name);
4047           return 0;
4048         }
4049       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4050         {
4051           as_bad (_("memory destination needed for instruction `%s'"
4052                     " after `xrelease'"), i.tm.name);
4053           return 0;
4054         }
4055       return 1;
4056     }
4057 }
4058
4059 /* Encode aligned vector move as unaligned vector move.  */
4060
4061 static void
4062 encode_with_unaligned_vector_move (void)
4063 {
4064   switch (i.tm.base_opcode)
4065     {
4066     case 0x28:  /* Load instructions.  */
4067     case 0x29:  /* Store instructions.  */
4068       /* movaps/movapd/vmovaps/vmovapd.  */
4069       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4070           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4071         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4072       break;
4073     case 0x6f:  /* Load instructions.  */
4074     case 0x7f:  /* Store instructions.  */
4075       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4076       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4077           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4078         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4079       break;
4080     default:
4081       break;
4082     }
4083 }
4084
4085 /* Try the shortest encoding by shortening operand size.  */
4086
4087 static void
4088 optimize_encoding (void)
4089 {
4090   unsigned int j;
4091
4092   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4093       && i.tm.base_opcode == 0x8d)
4094     {
4095       /* Optimize: -O:
4096            lea symbol, %rN    -> mov $symbol, %rN
4097            lea (%rM), %rN     -> mov %rM, %rN
4098            lea (,%rM,1), %rN  -> mov %rM, %rN
4099
4100            and in 32-bit mode for 16-bit addressing
4101
4102            lea (%rM), %rN     -> movzx %rM, %rN
4103
4104            and in 64-bit mode zap 32-bit addressing in favor of using a
4105            32-bit (or less) destination.
4106        */
4107       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4108         {
4109           if (!i.op[1].regs->reg_type.bitfield.word)
4110             i.tm.opcode_modifier.size = SIZE32;
4111           i.prefix[ADDR_PREFIX] = 0;
4112         }
4113
4114       if (!i.index_reg && !i.base_reg)
4115         {
4116           /* Handle:
4117                lea symbol, %rN    -> mov $symbol, %rN
4118            */
4119           if (flag_code == CODE_64BIT)
4120             {
4121               /* Don't transform a relocation to a 16-bit one.  */
4122               if (i.op[0].disps
4123                   && i.op[0].disps->X_op != O_constant
4124                   && i.op[1].regs->reg_type.bitfield.word)
4125                 return;
4126
4127               if (!i.op[1].regs->reg_type.bitfield.qword
4128                   || i.tm.opcode_modifier.size == SIZE32)
4129                 {
4130                   i.tm.base_opcode = 0xb8;
4131                   i.tm.opcode_modifier.modrm = 0;
4132                   if (!i.op[1].regs->reg_type.bitfield.word)
4133                     i.types[0].bitfield.imm32 = 1;
4134                   else
4135                     {
4136                       i.tm.opcode_modifier.size = SIZE16;
4137                       i.types[0].bitfield.imm16 = 1;
4138                     }
4139                 }
4140               else
4141                 {
4142                   /* Subject to further optimization below.  */
4143                   i.tm.base_opcode = 0xc7;
4144                   i.tm.extension_opcode = 0;
4145                   i.types[0].bitfield.imm32s = 1;
4146                   i.types[0].bitfield.baseindex = 0;
4147                 }
4148             }
4149           /* Outside of 64-bit mode address and operand sizes have to match if
4150              a relocation is involved, as otherwise we wouldn't (currently) or
4151              even couldn't express the relocation correctly.  */
4152           else if (i.op[0].disps
4153                    && i.op[0].disps->X_op != O_constant
4154                    && ((!i.prefix[ADDR_PREFIX])
4155                        != (flag_code == CODE_32BIT
4156                            ? i.op[1].regs->reg_type.bitfield.dword
4157                            : i.op[1].regs->reg_type.bitfield.word)))
4158             return;
4159           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4160              destination is going to grow encoding size.  */
4161           else if (flag_code == CODE_16BIT
4162                    && (optimize <= 1 || optimize_for_space)
4163                    && !i.prefix[ADDR_PREFIX]
4164                    && i.op[1].regs->reg_type.bitfield.dword)
4165             return;
4166           else
4167             {
4168               i.tm.base_opcode = 0xb8;
4169               i.tm.opcode_modifier.modrm = 0;
4170               if (i.op[1].regs->reg_type.bitfield.dword)
4171                 i.types[0].bitfield.imm32 = 1;
4172               else
4173                 i.types[0].bitfield.imm16 = 1;
4174
4175               if (i.op[0].disps
4176                   && i.op[0].disps->X_op == O_constant
4177                   && i.op[1].regs->reg_type.bitfield.dword
4178                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4179                      GCC 5. */
4180                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4181                 i.op[0].disps->X_add_number &= 0xffff;
4182             }
4183
4184           i.tm.operand_types[0] = i.types[0];
4185           i.imm_operands = 1;
4186           if (!i.op[0].imms)
4187             {
4188               i.op[0].imms = &im_expressions[0];
4189               i.op[0].imms->X_op = O_absent;
4190             }
4191         }
4192       else if (i.op[0].disps
4193                   && (i.op[0].disps->X_op != O_constant
4194                       || i.op[0].disps->X_add_number))
4195         return;
4196       else
4197         {
4198           /* Handle:
4199                lea (%rM), %rN     -> mov %rM, %rN
4200                lea (,%rM,1), %rN  -> mov %rM, %rN
4201                lea (%rM), %rN     -> movzx %rM, %rN
4202            */
4203           const reg_entry *addr_reg;
4204
4205           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4206             addr_reg = i.base_reg;
4207           else if (!i.base_reg
4208                    && i.index_reg->reg_num != RegIZ
4209                    && !i.log2_scale_factor)
4210             addr_reg = i.index_reg;
4211           else
4212             return;
4213
4214           if (addr_reg->reg_type.bitfield.word
4215               && i.op[1].regs->reg_type.bitfield.dword)
4216             {
4217               if (flag_code != CODE_32BIT)
4218                 return;
4219               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4220               i.tm.base_opcode = 0xb7;
4221             }
4222           else
4223             i.tm.base_opcode = 0x8b;
4224
4225           if (addr_reg->reg_type.bitfield.dword
4226               && i.op[1].regs->reg_type.bitfield.qword)
4227             i.tm.opcode_modifier.size = SIZE32;
4228
4229           i.op[0].regs = addr_reg;
4230           i.reg_operands = 2;
4231         }
4232
4233       i.mem_operands = 0;
4234       i.disp_operands = 0;
4235       i.prefix[ADDR_PREFIX] = 0;
4236       i.prefix[SEG_PREFIX] = 0;
4237       i.seg[0] = NULL;
4238     }
4239
4240   if (optimize_for_space
4241       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4242       && i.reg_operands == 1
4243       && i.imm_operands == 1
4244       && !i.types[1].bitfield.byte
4245       && i.op[0].imms->X_op == O_constant
4246       && fits_in_imm7 (i.op[0].imms->X_add_number)
4247       && (i.tm.base_opcode == 0xa8
4248           || (i.tm.base_opcode == 0xf6
4249               && i.tm.extension_opcode == 0x0)))
4250     {
4251       /* Optimize: -Os:
4252            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4253        */
4254       unsigned int base_regnum = i.op[1].regs->reg_num;
4255       if (flag_code == CODE_64BIT || base_regnum < 4)
4256         {
4257           i.types[1].bitfield.byte = 1;
4258           /* Ignore the suffix.  */
4259           i.suffix = 0;
4260           /* Convert to byte registers.  */
4261           if (i.types[1].bitfield.word)
4262             j = 16;
4263           else if (i.types[1].bitfield.dword)
4264             j = 32;
4265           else
4266             j = 48;
4267           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4268             j += 8;
4269           i.op[1].regs -= j;
4270         }
4271     }
4272   else if (flag_code == CODE_64BIT
4273            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4274            && ((i.types[1].bitfield.qword
4275                 && i.reg_operands == 1
4276                 && i.imm_operands == 1
4277                 && i.op[0].imms->X_op == O_constant
4278                 && ((i.tm.base_opcode == 0xb8
4279                      && i.tm.extension_opcode == None
4280                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4281                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4282                         && ((i.tm.base_opcode == 0x24
4283                              || i.tm.base_opcode == 0xa8)
4284                             || (i.tm.base_opcode == 0x80
4285                                 && i.tm.extension_opcode == 0x4)
4286                             || ((i.tm.base_opcode == 0xf6
4287                                  || (i.tm.base_opcode | 1) == 0xc7)
4288                                 && i.tm.extension_opcode == 0x0)))
4289                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4290                         && i.tm.base_opcode == 0x83
4291                         && i.tm.extension_opcode == 0x4)))
4292                || (i.types[0].bitfield.qword
4293                    && ((i.reg_operands == 2
4294                         && i.op[0].regs == i.op[1].regs
4295                         && (i.tm.base_opcode == 0x30
4296                             || i.tm.base_opcode == 0x28))
4297                        || (i.reg_operands == 1
4298                            && i.operands == 1
4299                            && i.tm.base_opcode == 0x30)))))
4300     {
4301       /* Optimize: -O:
4302            andq $imm31, %r64   -> andl $imm31, %r32
4303            andq $imm7, %r64    -> andl $imm7, %r32
4304            testq $imm31, %r64  -> testl $imm31, %r32
4305            xorq %r64, %r64     -> xorl %r32, %r32
4306            subq %r64, %r64     -> subl %r32, %r32
4307            movq $imm31, %r64   -> movl $imm31, %r32
4308            movq $imm32, %r64   -> movl $imm32, %r32
4309         */
4310       i.tm.opcode_modifier.size = SIZE32;
4311       if (i.imm_operands)
4312         {
4313           i.types[0].bitfield.imm32 = 1;
4314           i.types[0].bitfield.imm32s = 0;
4315           i.types[0].bitfield.imm64 = 0;
4316         }
4317       else
4318         {
4319           i.types[0].bitfield.dword = 1;
4320           i.types[0].bitfield.qword = 0;
4321         }
4322       i.types[1].bitfield.dword = 1;
4323       i.types[1].bitfield.qword = 0;
4324       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4325         {
4326           /* Handle
4327                movq $imm31, %r64   -> movl $imm31, %r32
4328                movq $imm32, %r64   -> movl $imm32, %r32
4329            */
4330           i.tm.operand_types[0].bitfield.imm32 = 1;
4331           i.tm.operand_types[0].bitfield.imm32s = 0;
4332           i.tm.operand_types[0].bitfield.imm64 = 0;
4333           if ((i.tm.base_opcode | 1) == 0xc7)
4334             {
4335               /* Handle
4336                    movq $imm31, %r64   -> movl $imm31, %r32
4337                */
4338               i.tm.base_opcode = 0xb8;
4339               i.tm.extension_opcode = None;
4340               i.tm.opcode_modifier.w = 0;
4341               i.tm.opcode_modifier.modrm = 0;
4342             }
4343         }
4344     }
4345   else if (optimize > 1
4346            && !optimize_for_space
4347            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4348            && i.reg_operands == 2
4349            && i.op[0].regs == i.op[1].regs
4350            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4351                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4352            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4353     {
4354       /* Optimize: -O2:
4355            andb %rN, %rN  -> testb %rN, %rN
4356            andw %rN, %rN  -> testw %rN, %rN
4357            andq %rN, %rN  -> testq %rN, %rN
4358            orb %rN, %rN   -> testb %rN, %rN
4359            orw %rN, %rN   -> testw %rN, %rN
4360            orq %rN, %rN   -> testq %rN, %rN
4361
4362            and outside of 64-bit mode
4363
4364            andl %rN, %rN  -> testl %rN, %rN
4365            orl %rN, %rN   -> testl %rN, %rN
4366        */
4367       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4368     }
4369   else if (i.reg_operands == 3
4370            && i.op[0].regs == i.op[1].regs
4371            && !i.types[2].bitfield.xmmword
4372            && (i.tm.opcode_modifier.vex
4373                || ((!i.mask.reg || i.mask.zeroing)
4374                    && is_evex_encoding (&i.tm)
4375                    && (i.vec_encoding != vex_encoding_evex
4376                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4377                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4378                        || (i.tm.operand_types[2].bitfield.zmmword
4379                            && i.types[2].bitfield.ymmword))))
4380            && i.tm.opcode_modifier.opcodespace == SPACE_0F
4381            && ((i.tm.base_opcode | 2) == 0x57
4382                || i.tm.base_opcode == 0xdf
4383                || i.tm.base_opcode == 0xef
4384                || (i.tm.base_opcode | 3) == 0xfb
4385                || i.tm.base_opcode == 0x42
4386                || i.tm.base_opcode == 0x47))
4387     {
4388       /* Optimize: -O1:
4389            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4390            vpsubq and vpsubw:
4391              EVEX VOP %zmmM, %zmmM, %zmmN
4392                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4393                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4394              EVEX VOP %ymmM, %ymmM, %ymmN
4395                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4396                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4397              VEX VOP %ymmM, %ymmM, %ymmN
4398                -> VEX VOP %xmmM, %xmmM, %xmmN
4399            VOP, one of vpandn and vpxor:
4400              VEX VOP %ymmM, %ymmM, %ymmN
4401                -> VEX VOP %xmmM, %xmmM, %xmmN
4402            VOP, one of vpandnd and vpandnq:
4403              EVEX VOP %zmmM, %zmmM, %zmmN
4404                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4405                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4406              EVEX VOP %ymmM, %ymmM, %ymmN
4407                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4408                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4409            VOP, one of vpxord and vpxorq:
4410              EVEX VOP %zmmM, %zmmM, %zmmN
4411                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4412                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4413              EVEX VOP %ymmM, %ymmM, %ymmN
4414                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4415                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4416            VOP, one of kxord and kxorq:
4417              VEX VOP %kM, %kM, %kN
4418                -> VEX kxorw %kM, %kM, %kN
4419            VOP, one of kandnd and kandnq:
4420              VEX VOP %kM, %kM, %kN
4421                -> VEX kandnw %kM, %kM, %kN
4422        */
4423       if (is_evex_encoding (&i.tm))
4424         {
4425           if (i.vec_encoding != vex_encoding_evex)
4426             {
4427               i.tm.opcode_modifier.vex = VEX128;
4428               i.tm.opcode_modifier.vexw = VEXW0;
4429               i.tm.opcode_modifier.evex = 0;
4430             }
4431           else if (optimize > 1)
4432             i.tm.opcode_modifier.evex = EVEX128;
4433           else
4434             return;
4435         }
4436       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4437         {
4438           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4439           i.tm.opcode_modifier.vexw = VEXW0;
4440         }
4441       else
4442         i.tm.opcode_modifier.vex = VEX128;
4443
4444       if (i.tm.opcode_modifier.vex)
4445         for (j = 0; j < 3; j++)
4446           {
4447             i.types[j].bitfield.xmmword = 1;
4448             i.types[j].bitfield.ymmword = 0;
4449           }
4450     }
4451   else if (i.vec_encoding != vex_encoding_evex
4452            && !i.types[0].bitfield.zmmword
4453            && !i.types[1].bitfield.zmmword
4454            && !i.mask.reg
4455            && !i.broadcast.bytes
4456            && is_evex_encoding (&i.tm)
4457            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4458                || (i.tm.base_opcode & ~4) == 0xdb
4459                || (i.tm.base_opcode & ~4) == 0xeb)
4460            && i.tm.extension_opcode == None)
4461     {
4462       /* Optimize: -O1:
4463            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4464            vmovdqu32 and vmovdqu64:
4465              EVEX VOP %xmmM, %xmmN
4466                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4467              EVEX VOP %ymmM, %ymmN
4468                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4469              EVEX VOP %xmmM, mem
4470                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4471              EVEX VOP %ymmM, mem
4472                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4473              EVEX VOP mem, %xmmN
4474                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4475              EVEX VOP mem, %ymmN
4476                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4477            VOP, one of vpand, vpandn, vpor, vpxor:
4478              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4479                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4480              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4481                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4482              EVEX VOP{d,q} mem, %xmmM, %xmmN
4483                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4484              EVEX VOP{d,q} mem, %ymmM, %ymmN
4485                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4486        */
4487       for (j = 0; j < i.operands; j++)
4488         if (operand_type_check (i.types[j], disp)
4489             && i.op[j].disps->X_op == O_constant)
4490           {
4491             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4492                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4493                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4494             int evex_disp8, vex_disp8;
4495             unsigned int memshift = i.memshift;
4496             offsetT n = i.op[j].disps->X_add_number;
4497
4498             evex_disp8 = fits_in_disp8 (n);
4499             i.memshift = 0;
4500             vex_disp8 = fits_in_disp8 (n);
4501             if (evex_disp8 != vex_disp8)
4502               {
4503                 i.memshift = memshift;
4504                 return;
4505               }
4506
4507             i.types[j].bitfield.disp8 = vex_disp8;
4508             break;
4509           }
4510       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4511           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4512         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4513       i.tm.opcode_modifier.vex
4514         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4515       i.tm.opcode_modifier.vexw = VEXW0;
4516       /* VPAND, VPOR, and VPXOR are commutative.  */
4517       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4518         i.tm.opcode_modifier.commutative = 1;
4519       i.tm.opcode_modifier.evex = 0;
4520       i.tm.opcode_modifier.masking = 0;
4521       i.tm.opcode_modifier.broadcast = 0;
4522       i.tm.opcode_modifier.disp8memshift = 0;
4523       i.memshift = 0;
4524       if (j < i.operands)
4525         i.types[j].bitfield.disp8
4526           = fits_in_disp8 (i.op[j].disps->X_add_number);
4527     }
4528 }
4529
4530 /* Return non-zero for load instruction.  */
4531
4532 static int
4533 load_insn_p (void)
4534 {
4535   unsigned int dest;
4536   int any_vex_p = is_any_vex_encoding (&i.tm);
4537   unsigned int base_opcode = i.tm.base_opcode | 1;
4538
4539   if (!any_vex_p)
4540     {
4541       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4542          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4543       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4544         return 0;
4545
4546       /* pop.   */
4547       if (strcmp (i.tm.name, "pop") == 0)
4548         return 1;
4549     }
4550
4551   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4552     {
4553       /* popf, popa.   */
4554       if (i.tm.base_opcode == 0x9d
4555           || i.tm.base_opcode == 0x61)
4556         return 1;
4557
4558       /* movs, cmps, lods, scas.  */
4559       if ((i.tm.base_opcode | 0xb) == 0xaf)
4560         return 1;
4561
4562       /* outs, xlatb.  */
4563       if (base_opcode == 0x6f
4564           || i.tm.base_opcode == 0xd7)
4565         return 1;
4566       /* NB: For AMD-specific insns with implicit memory operands,
4567          they're intentionally not covered.  */
4568     }
4569
4570   /* No memory operand.  */
4571   if (!i.mem_operands)
4572     return 0;
4573
4574   if (any_vex_p)
4575     {
4576       /* vldmxcsr.  */
4577       if (i.tm.base_opcode == 0xae
4578           && i.tm.opcode_modifier.vex
4579           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4580           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4581           && i.tm.extension_opcode == 2)
4582         return 1;
4583     }
4584   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4585     {
4586       /* test, not, neg, mul, imul, div, idiv.  */
4587       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4588           && i.tm.extension_opcode != 1)
4589         return 1;
4590
4591       /* inc, dec.  */
4592       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4593         return 1;
4594
4595       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4596       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4597         return 1;
4598
4599       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4600       if ((base_opcode == 0xc1
4601            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4602           && i.tm.extension_opcode != 6)
4603         return 1;
4604
4605       /* Check for x87 instructions.  */
4606       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4607         {
4608           /* Skip fst, fstp, fstenv, fstcw.  */
4609           if (i.tm.base_opcode == 0xd9
4610               && (i.tm.extension_opcode == 2
4611                   || i.tm.extension_opcode == 3
4612                   || i.tm.extension_opcode == 6
4613                   || i.tm.extension_opcode == 7))
4614             return 0;
4615
4616           /* Skip fisttp, fist, fistp, fstp.  */
4617           if (i.tm.base_opcode == 0xdb
4618               && (i.tm.extension_opcode == 1
4619                   || i.tm.extension_opcode == 2
4620                   || i.tm.extension_opcode == 3
4621                   || i.tm.extension_opcode == 7))
4622             return 0;
4623
4624           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4625           if (i.tm.base_opcode == 0xdd
4626               && (i.tm.extension_opcode == 1
4627                   || i.tm.extension_opcode == 2
4628                   || i.tm.extension_opcode == 3
4629                   || i.tm.extension_opcode == 6
4630                   || i.tm.extension_opcode == 7))
4631             return 0;
4632
4633           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4634           if (i.tm.base_opcode == 0xdf
4635               && (i.tm.extension_opcode == 1
4636                   || i.tm.extension_opcode == 2
4637                   || i.tm.extension_opcode == 3
4638                   || i.tm.extension_opcode == 6
4639                   || i.tm.extension_opcode == 7))
4640             return 0;
4641
4642           return 1;
4643         }
4644     }
4645   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4646     {
4647       /* bt, bts, btr, btc.  */
4648       if (i.tm.base_opcode == 0xba
4649           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4650         return 1;
4651
4652       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4653       if (i.tm.base_opcode == 0xc7
4654           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4655           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4656               || i.tm.extension_opcode == 6))
4657         return 1;
4658
4659       /* fxrstor, ldmxcsr, xrstor.  */
4660       if (i.tm.base_opcode == 0xae
4661           && (i.tm.extension_opcode == 1
4662               || i.tm.extension_opcode == 2
4663               || i.tm.extension_opcode == 5))
4664         return 1;
4665
4666       /* lgdt, lidt, lmsw.  */
4667       if (i.tm.base_opcode == 0x01
4668           && (i.tm.extension_opcode == 2
4669               || i.tm.extension_opcode == 3
4670               || i.tm.extension_opcode == 6))
4671         return 1;
4672     }
4673
4674   dest = i.operands - 1;
4675
4676   /* Check fake imm8 operand and 3 source operands.  */
4677   if ((i.tm.opcode_modifier.immext
4678        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4679       && i.types[dest].bitfield.imm8)
4680     dest--;
4681
4682   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4683   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4684       && (base_opcode == 0x1
4685           || base_opcode == 0x9
4686           || base_opcode == 0x11
4687           || base_opcode == 0x19
4688           || base_opcode == 0x21
4689           || base_opcode == 0x29
4690           || base_opcode == 0x31
4691           || base_opcode == 0x39
4692           || (base_opcode | 2) == 0x87))
4693     return 1;
4694
4695   /* xadd.  */
4696   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4697       && base_opcode == 0xc1)
4698     return 1;
4699
4700   /* Check for load instruction.  */
4701   return (i.types[dest].bitfield.class != ClassNone
4702           || i.types[dest].bitfield.instance == Accum);
4703 }
4704
4705 /* Output lfence, 0xfaee8, after instruction.  */
4706
4707 static void
4708 insert_lfence_after (void)
4709 {
4710   if (lfence_after_load && load_insn_p ())
4711     {
4712       /* There are also two REP string instructions that require
4713          special treatment. Specifically, the compare string (CMPS)
4714          and scan string (SCAS) instructions set EFLAGS in a manner
4715          that depends on the data being compared/scanned. When used
4716          with a REP prefix, the number of iterations may therefore
4717          vary depending on this data. If the data is a program secret
4718          chosen by the adversary using an LVI method,
4719          then this data-dependent behavior may leak some aspect
4720          of the secret.  */
4721       if (((i.tm.base_opcode | 0x1) == 0xa7
4722            || (i.tm.base_opcode | 0x1) == 0xaf)
4723           && i.prefix[REP_PREFIX])
4724         {
4725             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4726                      i.tm.name);
4727         }
4728       char *p = frag_more (3);
4729       *p++ = 0xf;
4730       *p++ = 0xae;
4731       *p = 0xe8;
4732     }
4733 }
4734
4735 /* Output lfence, 0xfaee8, before instruction.  */
4736
4737 static void
4738 insert_lfence_before (void)
4739 {
4740   char *p;
4741
4742   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4743     return;
4744
4745   if (i.tm.base_opcode == 0xff
4746       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4747     {
4748       /* Insert lfence before indirect branch if needed.  */
4749
4750       if (lfence_before_indirect_branch == lfence_branch_none)
4751         return;
4752
4753       if (i.operands != 1)
4754         abort ();
4755
4756       if (i.reg_operands == 1)
4757         {
4758           /* Indirect branch via register.  Don't insert lfence with
4759              -mlfence-after-load=yes.  */
4760           if (lfence_after_load
4761               || lfence_before_indirect_branch == lfence_branch_memory)
4762             return;
4763         }
4764       else if (i.mem_operands == 1
4765                && lfence_before_indirect_branch != lfence_branch_register)
4766         {
4767           as_warn (_("indirect `%s` with memory operand should be avoided"),
4768                    i.tm.name);
4769           return;
4770         }
4771       else
4772         return;
4773
4774       if (last_insn.kind != last_insn_other
4775           && last_insn.seg == now_seg)
4776         {
4777           as_warn_where (last_insn.file, last_insn.line,
4778                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4779                          last_insn.name, i.tm.name);
4780           return;
4781         }
4782
4783       p = frag_more (3);
4784       *p++ = 0xf;
4785       *p++ = 0xae;
4786       *p = 0xe8;
4787       return;
4788     }
4789
4790   /* Output or/not/shl and lfence before near ret.  */
4791   if (lfence_before_ret != lfence_before_ret_none
4792       && (i.tm.base_opcode == 0xc2
4793           || i.tm.base_opcode == 0xc3))
4794     {
4795       if (last_insn.kind != last_insn_other
4796           && last_insn.seg == now_seg)
4797         {
4798           as_warn_where (last_insn.file, last_insn.line,
4799                          _("`%s` skips -mlfence-before-ret on `%s`"),
4800                          last_insn.name, i.tm.name);
4801           return;
4802         }
4803
4804       /* Near ret ingore operand size override under CPU64.  */
4805       char prefix = flag_code == CODE_64BIT
4806                     ? 0x48
4807                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4808
4809       if (lfence_before_ret == lfence_before_ret_not)
4810         {
4811           /* not: 0xf71424, may add prefix
4812              for operand size override or 64-bit code.  */
4813           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4814           if (prefix)
4815             *p++ = prefix;
4816           *p++ = 0xf7;
4817           *p++ = 0x14;
4818           *p++ = 0x24;
4819           if (prefix)
4820             *p++ = prefix;
4821           *p++ = 0xf7;
4822           *p++ = 0x14;
4823           *p++ = 0x24;
4824         }
4825       else
4826         {
4827           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4828           if (prefix)
4829             *p++ = prefix;
4830           if (lfence_before_ret == lfence_before_ret_or)
4831             {
4832               /* or: 0x830c2400, may add prefix
4833                  for operand size override or 64-bit code.  */
4834               *p++ = 0x83;
4835               *p++ = 0x0c;
4836             }
4837           else
4838             {
4839               /* shl: 0xc1242400, may add prefix
4840                  for operand size override or 64-bit code.  */
4841               *p++ = 0xc1;
4842               *p++ = 0x24;
4843             }
4844
4845           *p++ = 0x24;
4846           *p++ = 0x0;
4847         }
4848
4849       *p++ = 0xf;
4850       *p++ = 0xae;
4851       *p = 0xe8;
4852     }
4853 }
4854
4855 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4856    parsing pass. Instead of introducing a rarely use new insn attribute this
4857    utilizes a common pattern between affected templates. It is deemed
4858    acceptable that this will lead to unnecessary pass 2 preparations in a
4859    limited set of cases.  */
4860 static INLINE bool may_need_pass2 (const insn_template *t)
4861 {
4862   return t->opcode_modifier.sse2avx
4863          /* Note that all SSE2AVX templates have at least one operand.  */
4864          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4865          : (t->opcode_modifier.opcodespace == SPACE_0F
4866             && (t->base_opcode | 1) == 0xbf)
4867            || (t->opcode_modifier.opcodespace == SPACE_BASE
4868                && t->base_opcode == 0x63);
4869 }
4870
4871 /* This is the guts of the machine-dependent assembler.  LINE points to a
4872    machine dependent instruction.  This function is supposed to emit
4873    the frags/bytes it assembles to.  */
4874
4875 void
4876 md_assemble (char *line)
4877 {
4878   unsigned int j;
4879   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4880   const char *end, *pass1_mnem = NULL;
4881   enum i386_error pass1_err = 0;
4882   const insn_template *t;
4883
4884   /* Initialize globals.  */
4885   current_templates = NULL;
4886  retry:
4887   memset (&i, '\0', sizeof (i));
4888   i.rounding.type = rc_none;
4889   for (j = 0; j < MAX_OPERANDS; j++)
4890     i.reloc[j] = NO_RELOC;
4891   memset (disp_expressions, '\0', sizeof (disp_expressions));
4892   memset (im_expressions, '\0', sizeof (im_expressions));
4893   save_stack_p = save_stack;
4894
4895   /* First parse an instruction mnemonic & call i386_operand for the operands.
4896      We assume that the scrubber has arranged it so that line[0] is the valid
4897      start of a (possibly prefixed) mnemonic.  */
4898
4899   end = parse_insn (line, mnemonic);
4900   if (end == NULL)
4901     {
4902       if (pass1_mnem != NULL)
4903         goto match_error;
4904       if (i.error != no_error)
4905         {
4906           gas_assert (current_templates != NULL);
4907           if (may_need_pass2 (current_templates->start) && !i.suffix)
4908             goto no_match;
4909           /* No point in trying a 2nd pass - it'll only find the same suffix
4910              again.  */
4911           mnem_suffix = i.suffix;
4912           goto match_error;
4913         }
4914       return;
4915     }
4916   if (may_need_pass2 (current_templates->start))
4917     {
4918       /* Make a copy of the full line in case we need to retry.  */
4919       copy = xstrdup (line);
4920     }
4921   line += end - line;
4922   mnem_suffix = i.suffix;
4923
4924   line = parse_operands (line, mnemonic);
4925   this_operand = -1;
4926   if (line == NULL)
4927     {
4928       free (copy);
4929       return;
4930     }
4931
4932   /* Now we've parsed the mnemonic into a set of templates, and have the
4933      operands at hand.  */
4934
4935   /* All Intel opcodes have reversed operands except for "bound", "enter",
4936      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4937      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4938      intersegment "jmp" and "call" instructions with 2 immediate operands so
4939      that the immediate segment precedes the offset consistently in Intel and
4940      AT&T modes.  */
4941   if (intel_syntax
4942       && i.operands > 1
4943       && (strcmp (mnemonic, "bound") != 0)
4944       && (strncmp (mnemonic, "invlpg", 6) != 0)
4945       && !startswith (mnemonic, "monitor")
4946       && !startswith (mnemonic, "mwait")
4947       && (strcmp (mnemonic, "pvalidate") != 0)
4948       && !startswith (mnemonic, "rmp")
4949       && (strcmp (mnemonic, "tpause") != 0)
4950       && (strcmp (mnemonic, "umwait") != 0)
4951       && !(i.operands == 2
4952            && operand_type_check (i.types[0], imm)
4953            && operand_type_check (i.types[1], imm)))
4954     swap_operands ();
4955
4956   /* The order of the immediates should be reversed
4957      for 2 immediates extrq and insertq instructions */
4958   if (i.imm_operands == 2
4959       && (strcmp (mnemonic, "extrq") == 0
4960           || strcmp (mnemonic, "insertq") == 0))
4961       swap_2_operands (0, 1);
4962
4963   if (i.imm_operands)
4964     optimize_imm ();
4965
4966   if (i.disp_operands && !want_disp32 (current_templates->start)
4967       && (!current_templates->start->opcode_modifier.jump
4968           || i.jumpabsolute || i.types[0].bitfield.baseindex))
4969     {
4970       for (j = 0; j < i.operands; ++j)
4971         {
4972           const expressionS *exp = i.op[j].disps;
4973
4974           if (!operand_type_check (i.types[j], disp))
4975             continue;
4976
4977           if (exp->X_op != O_constant)
4978             continue;
4979
4980           /* Since displacement is signed extended to 64bit, don't allow
4981              disp32 if it is out of range.  */
4982           if (fits_in_signed_long (exp->X_add_number))
4983             continue;
4984
4985           i.types[j].bitfield.disp32 = 0;
4986           if (i.types[j].bitfield.baseindex)
4987             {
4988               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
4989                       (uint64_t) exp->X_add_number);
4990               return;
4991             }
4992         }
4993     }
4994
4995   /* Don't optimize displacement for movabs since it only takes 64bit
4996      displacement.  */
4997   if (i.disp_operands
4998       && i.disp_encoding <= disp_encoding_8bit
4999       && (flag_code != CODE_64BIT
5000           || strcmp (mnemonic, "movabs") != 0))
5001     optimize_disp ();
5002
5003   /* Next, we find a template that matches the given insn,
5004      making sure the overlap of the given operands types is consistent
5005      with the template operand types.  */
5006
5007   if (!(t = match_template (mnem_suffix)))
5008     {
5009       const char *err_msg;
5010
5011       if (copy && !mnem_suffix)
5012         {
5013           line = copy;
5014           copy = NULL;
5015   no_match:
5016           pass1_err = i.error;
5017           pass1_mnem = current_templates->start->name;
5018           goto retry;
5019         }
5020
5021       /* If a non-/only-64bit template (group) was found in pass 1, and if
5022          _some_ template (group) was found in pass 2, squash pass 1's
5023          error.  */
5024       if (pass1_err == unsupported_64bit)
5025         pass1_mnem = NULL;
5026
5027   match_error:
5028       free (copy);
5029
5030       switch (pass1_mnem ? pass1_err : i.error)
5031         {
5032         default:
5033           abort ();
5034         case operand_size_mismatch:
5035           err_msg = _("operand size mismatch");
5036           break;
5037         case operand_type_mismatch:
5038           err_msg = _("operand type mismatch");
5039           break;
5040         case register_type_mismatch:
5041           err_msg = _("register type mismatch");
5042           break;
5043         case number_of_operands_mismatch:
5044           err_msg = _("number of operands mismatch");
5045           break;
5046         case invalid_instruction_suffix:
5047           err_msg = _("invalid instruction suffix");
5048           break;
5049         case bad_imm4:
5050           err_msg = _("constant doesn't fit in 4 bits");
5051           break;
5052         case unsupported_with_intel_mnemonic:
5053           err_msg = _("unsupported with Intel mnemonic");
5054           break;
5055         case unsupported_syntax:
5056           err_msg = _("unsupported syntax");
5057           break;
5058         case unsupported:
5059           as_bad (_("unsupported instruction `%s'"),
5060                   pass1_mnem ? pass1_mnem : current_templates->start->name);
5061           return;
5062         case unsupported_on_arch:
5063           as_bad (_("`%s' is not supported on `%s%s'"),
5064                   pass1_mnem ? pass1_mnem : current_templates->start->name,
5065                   cpu_arch_name ? cpu_arch_name : default_arch,
5066                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5067           return;
5068         case unsupported_64bit:
5069           if (ISLOWER (mnem_suffix))
5070             as_bad (_("`%s%c' is %s supported in 64-bit mode"),
5071                     pass1_mnem ? pass1_mnem : current_templates->start->name,
5072                     mnem_suffix,
5073                     flag_code == CODE_64BIT ? _("not") : _("only"));
5074           else
5075             as_bad (_("`%s' is %s supported in 64-bit mode"),
5076                     pass1_mnem ? pass1_mnem : current_templates->start->name,
5077                     flag_code == CODE_64BIT ? _("not") : _("only"));
5078           return;
5079         case invalid_sib_address:
5080           err_msg = _("invalid SIB address");
5081           break;
5082         case invalid_vsib_address:
5083           err_msg = _("invalid VSIB address");
5084           break;
5085         case invalid_vector_register_set:
5086           err_msg = _("mask, index, and destination registers must be distinct");
5087           break;
5088         case invalid_tmm_register_set:
5089           err_msg = _("all tmm registers must be distinct");
5090           break;
5091         case invalid_dest_and_src_register_set:
5092           err_msg = _("destination and source registers must be distinct");
5093           break;
5094         case unsupported_vector_index_register:
5095           err_msg = _("unsupported vector index register");
5096           break;
5097         case unsupported_broadcast:
5098           err_msg = _("unsupported broadcast");
5099           break;
5100         case broadcast_needed:
5101           err_msg = _("broadcast is needed for operand of such type");
5102           break;
5103         case unsupported_masking:
5104           err_msg = _("unsupported masking");
5105           break;
5106         case mask_not_on_destination:
5107           err_msg = _("mask not on destination operand");
5108           break;
5109         case no_default_mask:
5110           err_msg = _("default mask isn't allowed");
5111           break;
5112         case unsupported_rc_sae:
5113           err_msg = _("unsupported static rounding/sae");
5114           break;
5115         case invalid_register_operand:
5116           err_msg = _("invalid register operand");
5117           break;
5118         }
5119       as_bad (_("%s for `%s'"), err_msg,
5120               pass1_mnem ? pass1_mnem : current_templates->start->name);
5121       return;
5122     }
5123
5124   free (copy);
5125
5126   if (sse_check != check_none
5127       /* The opcode space check isn't strictly needed; it's there only to
5128          bypass the logic below when easily possible.  */
5129       && t->opcode_modifier.opcodespace >= SPACE_0F
5130       && t->opcode_modifier.opcodespace <= SPACE_0F3A
5131       && !i.tm.cpu_flags.bitfield.cpusse4a
5132       && !is_any_vex_encoding (t))
5133     {
5134       bool simd = false;
5135
5136       for (j = 0; j < t->operands; ++j)
5137         {
5138           if (t->operand_types[j].bitfield.class == RegMMX)
5139             break;
5140           if (t->operand_types[j].bitfield.class == RegSIMD)
5141             simd = true;
5142         }
5143
5144       if (j >= t->operands && simd)
5145         (sse_check == check_warning
5146          ? as_warn
5147          : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
5148     }
5149
5150   if (i.tm.opcode_modifier.fwait)
5151     if (!add_prefix (FWAIT_OPCODE))
5152       return;
5153
5154   /* Check if REP prefix is OK.  */
5155   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5156     {
5157       as_bad (_("invalid instruction `%s' after `%s'"),
5158                 i.tm.name, i.rep_prefix);
5159       return;
5160     }
5161
5162   /* Check for lock without a lockable instruction.  Destination operand
5163      must be memory unless it is xchg (0x86).  */
5164   if (i.prefix[LOCK_PREFIX]
5165       && (i.tm.opcode_modifier.prefixok < PrefixLock
5166           || i.mem_operands == 0
5167           || (i.tm.base_opcode != 0x86
5168               && !(i.flags[i.operands - 1] & Operand_Mem))))
5169     {
5170       as_bad (_("expecting lockable instruction after `lock'"));
5171       return;
5172     }
5173
5174   if (is_any_vex_encoding (&i.tm)
5175       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5176       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5177     {
5178       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5179       if (i.prefix[DATA_PREFIX])
5180         {
5181           as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
5182           return;
5183         }
5184
5185       /* Don't allow e.g. KMOV in TLS code sequences.  */
5186       for (j = i.imm_operands; j < i.operands; ++j)
5187         switch (i.reloc[j])
5188           {
5189           case BFD_RELOC_386_TLS_GOTIE:
5190           case BFD_RELOC_386_TLS_LE_32:
5191           case BFD_RELOC_X86_64_GOTTPOFF:
5192           case BFD_RELOC_X86_64_TLSLD:
5193             as_bad (_("TLS relocation cannot be used with `%s'"), i.tm.name);
5194             return;
5195           default:
5196             break;
5197           }
5198     }
5199
5200   /* Check if HLE prefix is OK.  */
5201   if (i.hle_prefix && !check_hle ())
5202     return;
5203
5204   /* Check BND prefix.  */
5205   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5206     as_bad (_("expecting valid branch instruction after `bnd'"));
5207
5208   /* Check NOTRACK prefix.  */
5209   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5210     as_bad (_("expecting indirect branch instruction after `notrack'"));
5211
5212   if (i.tm.cpu_flags.bitfield.cpumpx)
5213     {
5214       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5215         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5216       else if (flag_code != CODE_16BIT
5217                ? i.prefix[ADDR_PREFIX]
5218                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5219         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5220     }
5221
5222   /* Insert BND prefix.  */
5223   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5224     {
5225       if (!i.prefix[BND_PREFIX])
5226         add_prefix (BND_PREFIX_OPCODE);
5227       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5228         {
5229           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5230           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5231         }
5232     }
5233
5234   /* Check string instruction segment overrides.  */
5235   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5236     {
5237       gas_assert (i.mem_operands);
5238       if (!check_string ())
5239         return;
5240       i.disp_operands = 0;
5241     }
5242
5243   /* The memory operand of (%dx) should be only used with input/output
5244      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5245   if (i.input_output_operand
5246       && ((i.tm.base_opcode | 0x82) != 0xee
5247           || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
5248     {
5249       as_bad (_("input/output port address isn't allowed with `%s'"),
5250               i.tm.name);
5251       return;
5252     }
5253
5254   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5255     optimize_encoding ();
5256
5257   if (use_unaligned_vector_move)
5258     encode_with_unaligned_vector_move ();
5259
5260   if (!process_suffix ())
5261     return;
5262
5263   /* Check if IP-relative addressing requirements can be satisfied.  */
5264   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5265       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5266     as_warn (_("'%s' only supports RIP-relative address"), i.tm.name);
5267
5268   /* Update operand types and check extended states.  */
5269   for (j = 0; j < i.operands; j++)
5270     {
5271       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5272       switch (i.tm.operand_types[j].bitfield.class)
5273         {
5274         default:
5275           break;
5276         case RegMMX:
5277           i.xstate |= xstate_mmx;
5278           break;
5279         case RegMask:
5280           i.xstate |= xstate_mask;
5281           break;
5282         case RegSIMD:
5283           if (i.tm.operand_types[j].bitfield.tmmword)
5284             i.xstate |= xstate_tmm;
5285           else if (i.tm.operand_types[j].bitfield.zmmword)
5286             i.xstate |= xstate_zmm;
5287           else if (i.tm.operand_types[j].bitfield.ymmword)
5288             i.xstate |= xstate_ymm;
5289           else if (i.tm.operand_types[j].bitfield.xmmword)
5290             i.xstate |= xstate_xmm;
5291           break;
5292         }
5293     }
5294
5295   /* Make still unresolved immediate matches conform to size of immediate
5296      given in i.suffix.  */
5297   if (!finalize_imm ())
5298     return;
5299
5300   if (i.types[0].bitfield.imm1)
5301     i.imm_operands = 0; /* kludge for shift insns.  */
5302
5303   /* We only need to check those implicit registers for instructions
5304      with 3 operands or less.  */
5305   if (i.operands <= 3)
5306     for (j = 0; j < i.operands; j++)
5307       if (i.types[j].bitfield.instance != InstanceNone
5308           && !i.types[j].bitfield.xmmword)
5309         i.reg_operands--;
5310
5311   /* For insns with operands there are more diddles to do to the opcode.  */
5312   if (i.operands)
5313     {
5314       if (!process_operands ())
5315         return;
5316     }
5317   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5318     {
5319       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5320       as_warn (_("translating to `%sp'"), i.tm.name);
5321     }
5322
5323   if (is_any_vex_encoding (&i.tm))
5324     {
5325       if (!cpu_arch_flags.bitfield.cpui286)
5326         {
5327           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5328                   i.tm.name);
5329           return;
5330         }
5331
5332       /* Check for explicit REX prefix.  */
5333       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5334         {
5335           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5336           return;
5337         }
5338
5339       if (i.tm.opcode_modifier.vex)
5340         build_vex_prefix (t);
5341       else
5342         build_evex_prefix ();
5343
5344       /* The individual REX.RXBW bits got consumed.  */
5345       i.rex &= REX_OPCODE;
5346     }
5347
5348   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5349      instructions may define INT_OPCODE as well, so avoid this corner
5350      case for those instructions that use MODRM.  */
5351   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5352       && i.tm.base_opcode == INT_OPCODE
5353       && !i.tm.opcode_modifier.modrm
5354       && i.op[0].imms->X_add_number == 3)
5355     {
5356       i.tm.base_opcode = INT3_OPCODE;
5357       i.imm_operands = 0;
5358     }
5359
5360   if ((i.tm.opcode_modifier.jump == JUMP
5361        || i.tm.opcode_modifier.jump == JUMP_BYTE
5362        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5363       && i.op[0].disps->X_op == O_constant)
5364     {
5365       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5366          the absolute address given by the constant.  Since ix86 jumps and
5367          calls are pc relative, we need to generate a reloc.  */
5368       i.op[0].disps->X_add_symbol = &abs_symbol;
5369       i.op[0].disps->X_op = O_symbol;
5370     }
5371
5372   /* For 8 bit registers we need an empty rex prefix.  Also if the
5373      instruction already has a prefix, we need to convert old
5374      registers to new ones.  */
5375
5376   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5377        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5378       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5379           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5380       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5381            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5382           && i.rex != 0))
5383     {
5384       int x;
5385
5386       i.rex |= REX_OPCODE;
5387       for (x = 0; x < 2; x++)
5388         {
5389           /* Look for 8 bit operand that uses old registers.  */
5390           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5391               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5392             {
5393               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5394               /* In case it is "hi" register, give up.  */
5395               if (i.op[x].regs->reg_num > 3)
5396                 as_bad (_("can't encode register '%s%s' in an "
5397                           "instruction requiring REX prefix."),
5398                         register_prefix, i.op[x].regs->reg_name);
5399
5400               /* Otherwise it is equivalent to the extended register.
5401                  Since the encoding doesn't change this is merely
5402                  cosmetic cleanup for debug output.  */
5403
5404               i.op[x].regs = i.op[x].regs + 8;
5405             }
5406         }
5407     }
5408
5409   if (i.rex == 0 && i.rex_encoding)
5410     {
5411       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5412          that uses legacy register.  If it is "hi" register, don't add
5413          the REX_OPCODE byte.  */
5414       int x;
5415       for (x = 0; x < 2; x++)
5416         if (i.types[x].bitfield.class == Reg
5417             && i.types[x].bitfield.byte
5418             && (i.op[x].regs->reg_flags & RegRex64) == 0
5419             && i.op[x].regs->reg_num > 3)
5420           {
5421             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5422             i.rex_encoding = false;
5423             break;
5424           }
5425
5426       if (i.rex_encoding)
5427         i.rex = REX_OPCODE;
5428     }
5429
5430   if (i.rex != 0)
5431     add_prefix (REX_OPCODE | i.rex);
5432
5433   insert_lfence_before ();
5434
5435   /* We are ready to output the insn.  */
5436   output_insn ();
5437
5438   insert_lfence_after ();
5439
5440   last_insn.seg = now_seg;
5441
5442   if (i.tm.opcode_modifier.isprefix)
5443     {
5444       last_insn.kind = last_insn_prefix;
5445       last_insn.name = i.tm.name;
5446       last_insn.file = as_where (&last_insn.line);
5447     }
5448   else
5449     last_insn.kind = last_insn_other;
5450 }
5451
5452 /* The Q suffix is generally valid only in 64-bit mode, with very few
5453    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5454    and fisttp only one of their two templates is matched below: That's
5455    sufficient since other relevant attributes are the same between both
5456    respective templates.  */
5457 static INLINE bool q_suffix_allowed(const insn_template *t)
5458 {
5459   return flag_code == CODE_64BIT
5460          || (t->opcode_modifier.opcodespace == SPACE_BASE
5461              && t->base_opcode == 0xdf
5462              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5463          || (t->opcode_modifier.opcodespace == SPACE_0F
5464              && t->base_opcode == 0xc7
5465              && t->opcode_modifier.opcodeprefix == PREFIX_NONE
5466              && t->extension_opcode == 1) /* cmpxchg8b */;
5467 }
5468
5469 static const char *
5470 parse_insn (const char *line, char *mnemonic)
5471 {
5472   const char *l = line, *token_start = l;
5473   char *mnem_p;
5474   bool pass1 = !current_templates;
5475   int supported;
5476   const insn_template *t;
5477   char *dot_p = NULL;
5478
5479   while (1)
5480     {
5481       mnem_p = mnemonic;
5482       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5483         {
5484           if (*mnem_p == '.')
5485             dot_p = mnem_p;
5486           mnem_p++;
5487           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5488             {
5489               as_bad (_("no such instruction: `%s'"), token_start);
5490               return NULL;
5491             }
5492           l++;
5493         }
5494       if (!is_space_char (*l)
5495           && *l != END_OF_INSN
5496           && (intel_syntax
5497               || (*l != PREFIX_SEPARATOR
5498                   && *l != ',')))
5499         {
5500           as_bad (_("invalid character %s in mnemonic"),
5501                   output_invalid (*l));
5502           return NULL;
5503         }
5504       if (token_start == l)
5505         {
5506           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5507             as_bad (_("expecting prefix; got nothing"));
5508           else
5509             as_bad (_("expecting mnemonic; got nothing"));
5510           return NULL;
5511         }
5512
5513       /* Look up instruction (or prefix) via hash table.  */
5514       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5515
5516       if (*l != END_OF_INSN
5517           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5518           && current_templates
5519           && current_templates->start->opcode_modifier.isprefix)
5520         {
5521           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5522             {
5523               as_bad ((flag_code != CODE_64BIT
5524                        ? _("`%s' is only supported in 64-bit mode")
5525                        : _("`%s' is not supported in 64-bit mode")),
5526                       current_templates->start->name);
5527               return NULL;
5528             }
5529           /* If we are in 16-bit mode, do not allow addr16 or data16.
5530              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5531           if ((current_templates->start->opcode_modifier.size == SIZE16
5532                || current_templates->start->opcode_modifier.size == SIZE32)
5533               && flag_code != CODE_64BIT
5534               && ((current_templates->start->opcode_modifier.size == SIZE32)
5535                   ^ (flag_code == CODE_16BIT)))
5536             {
5537               as_bad (_("redundant %s prefix"),
5538                       current_templates->start->name);
5539               return NULL;
5540             }
5541
5542           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5543             {
5544               /* Handle pseudo prefixes.  */
5545               switch (current_templates->start->extension_opcode)
5546                 {
5547                 case Prefix_Disp8:
5548                   /* {disp8} */
5549                   i.disp_encoding = disp_encoding_8bit;
5550                   break;
5551                 case Prefix_Disp16:
5552                   /* {disp16} */
5553                   i.disp_encoding = disp_encoding_16bit;
5554                   break;
5555                 case Prefix_Disp32:
5556                   /* {disp32} */
5557                   i.disp_encoding = disp_encoding_32bit;
5558                   break;
5559                 case Prefix_Load:
5560                   /* {load} */
5561                   i.dir_encoding = dir_encoding_load;
5562                   break;
5563                 case Prefix_Store:
5564                   /* {store} */
5565                   i.dir_encoding = dir_encoding_store;
5566                   break;
5567                 case Prefix_VEX:
5568                   /* {vex} */
5569                   i.vec_encoding = vex_encoding_vex;
5570                   break;
5571                 case Prefix_VEX3:
5572                   /* {vex3} */
5573                   i.vec_encoding = vex_encoding_vex3;
5574                   break;
5575                 case Prefix_EVEX:
5576                   /* {evex} */
5577                   i.vec_encoding = vex_encoding_evex;
5578                   break;
5579                 case Prefix_REX:
5580                   /* {rex} */
5581                   i.rex_encoding = true;
5582                   break;
5583                 case Prefix_NoOptimize:
5584                   /* {nooptimize} */
5585                   i.no_optimize = true;
5586                   break;
5587                 default:
5588                   abort ();
5589                 }
5590             }
5591           else
5592             {
5593               /* Add prefix, checking for repeated prefixes.  */
5594               switch (add_prefix (current_templates->start->base_opcode))
5595                 {
5596                 case PREFIX_EXIST:
5597                   return NULL;
5598                 case PREFIX_DS:
5599                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5600                     i.notrack_prefix = current_templates->start->name;
5601                   break;
5602                 case PREFIX_REP:
5603                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5604                     i.hle_prefix = current_templates->start->name;
5605                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5606                     i.bnd_prefix = current_templates->start->name;
5607                   else
5608                     i.rep_prefix = current_templates->start->name;
5609                   break;
5610                 default:
5611                   break;
5612                 }
5613             }
5614           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5615           token_start = ++l;
5616         }
5617       else
5618         break;
5619     }
5620
5621   if (!current_templates)
5622     {
5623       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5624          Check if we should swap operand or force 32bit displacement in
5625          encoding.  */
5626       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5627         i.dir_encoding = dir_encoding_swap;
5628       else if (mnem_p - 3 == dot_p
5629                && dot_p[1] == 'd'
5630                && dot_p[2] == '8')
5631         i.disp_encoding = disp_encoding_8bit;
5632       else if (mnem_p - 4 == dot_p
5633                && dot_p[1] == 'd'
5634                && dot_p[2] == '3'
5635                && dot_p[3] == '2')
5636         i.disp_encoding = disp_encoding_32bit;
5637       else
5638         goto check_suffix;
5639       mnem_p = dot_p;
5640       *dot_p = '\0';
5641       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5642     }
5643
5644   if (!current_templates || !pass1)
5645     {
5646       current_templates = NULL;
5647
5648     check_suffix:
5649       if (mnem_p > mnemonic)
5650         {
5651           /* See if we can get a match by trimming off a suffix.  */
5652           switch (mnem_p[-1])
5653             {
5654             case WORD_MNEM_SUFFIX:
5655               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5656                 i.suffix = SHORT_MNEM_SUFFIX;
5657               else
5658                 /* Fall through.  */
5659               case BYTE_MNEM_SUFFIX:
5660               case QWORD_MNEM_SUFFIX:
5661                 i.suffix = mnem_p[-1];
5662               mnem_p[-1] = '\0';
5663               current_templates
5664                 = (const templates *) str_hash_find (op_hash, mnemonic);
5665               break;
5666             case SHORT_MNEM_SUFFIX:
5667             case LONG_MNEM_SUFFIX:
5668               if (!intel_syntax)
5669                 {
5670                   i.suffix = mnem_p[-1];
5671                   mnem_p[-1] = '\0';
5672                   current_templates
5673                     = (const templates *) str_hash_find (op_hash, mnemonic);
5674                 }
5675               break;
5676
5677               /* Intel Syntax.  */
5678             case 'd':
5679               if (intel_syntax)
5680                 {
5681                   if (intel_float_operand (mnemonic) == 1)
5682                     i.suffix = SHORT_MNEM_SUFFIX;
5683                   else
5684                     i.suffix = LONG_MNEM_SUFFIX;
5685                   mnem_p[-1] = '\0';
5686                   current_templates
5687                     = (const templates *) str_hash_find (op_hash, mnemonic);
5688                 }
5689               /* For compatibility reasons accept MOVSD and CMPSD without
5690                  operands even in AT&T mode.  */
5691               else if (*l == END_OF_INSN
5692                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5693                 {
5694                   mnem_p[-1] = '\0';
5695                   current_templates
5696                     = (const templates *) str_hash_find (op_hash, mnemonic);
5697                   if (current_templates != NULL
5698                       /* MOVS or CMPS */
5699                       && (current_templates->start->base_opcode | 2) == 0xa6
5700                       && current_templates->start->opcode_modifier.opcodespace
5701                          == SPACE_BASE
5702                       && mnem_p[-2] == 's')
5703                     {
5704                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5705                                mnemonic, mnemonic);
5706                       i.suffix = LONG_MNEM_SUFFIX;
5707                     }
5708                   else
5709                     {
5710                       current_templates = NULL;
5711                       mnem_p[-1] = 'd';
5712                     }
5713                 }
5714               break;
5715             }
5716         }
5717
5718       if (!current_templates)
5719         {
5720           if (pass1)
5721             as_bad (_("no such instruction: `%s'"), token_start);
5722           return NULL;
5723         }
5724     }
5725
5726   if (current_templates->start->opcode_modifier.jump == JUMP
5727       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5728     {
5729       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5730          predict taken and predict not taken respectively.
5731          I'm not sure that branch hints actually do anything on loop
5732          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5733          may work in the future and it doesn't hurt to accept them
5734          now.  */
5735       if (l[0] == ',' && l[1] == 'p')
5736         {
5737           if (l[2] == 't')
5738             {
5739               if (!add_prefix (DS_PREFIX_OPCODE))
5740                 return NULL;
5741               l += 3;
5742             }
5743           else if (l[2] == 'n')
5744             {
5745               if (!add_prefix (CS_PREFIX_OPCODE))
5746                 return NULL;
5747               l += 3;
5748             }
5749         }
5750     }
5751   /* Any other comma loses.  */
5752   if (*l == ',')
5753     {
5754       as_bad (_("invalid character %s in mnemonic"),
5755               output_invalid (*l));
5756       return NULL;
5757     }
5758
5759   /* Check if instruction is supported on specified architecture.  */
5760   supported = 0;
5761   for (t = current_templates->start; t < current_templates->end; ++t)
5762     {
5763       supported |= cpu_flags_match (t);
5764
5765       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5766         supported &= ~CPU_FLAGS_64BIT_MATCH;
5767
5768       if (supported == CPU_FLAGS_PERFECT_MATCH)
5769         return l;
5770     }
5771
5772   if (pass1)
5773     {
5774       if (supported & CPU_FLAGS_64BIT_MATCH)
5775         i.error = unsupported_on_arch;
5776       else
5777         i.error = unsupported_64bit;
5778     }
5779
5780   return NULL;
5781 }
5782
5783 static char *
5784 parse_operands (char *l, const char *mnemonic)
5785 {
5786   char *token_start;
5787
5788   /* 1 if operand is pending after ','.  */
5789   unsigned int expecting_operand = 0;
5790
5791   while (*l != END_OF_INSN)
5792     {
5793       /* Non-zero if operand parens not balanced.  */
5794       unsigned int paren_not_balanced = 0;
5795       /* True if inside double quotes.  */
5796       bool in_quotes = false;
5797
5798       /* Skip optional white space before operand.  */
5799       if (is_space_char (*l))
5800         ++l;
5801       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5802         {
5803           as_bad (_("invalid character %s before operand %d"),
5804                   output_invalid (*l),
5805                   i.operands + 1);
5806           return NULL;
5807         }
5808       token_start = l;  /* After white space.  */
5809       while (in_quotes || paren_not_balanced || *l != ',')
5810         {
5811           if (*l == END_OF_INSN)
5812             {
5813               if (in_quotes)
5814                 {
5815                   as_bad (_("unbalanced double quotes in operand %d."),
5816                           i.operands + 1);
5817                   return NULL;
5818                 }
5819               if (paren_not_balanced)
5820                 {
5821                   know (!intel_syntax);
5822                   as_bad (_("unbalanced parenthesis in operand %d."),
5823                           i.operands + 1);
5824                   return NULL;
5825                 }
5826               else
5827                 break;  /* we are done */
5828             }
5829           else if (*l == '\\' && l[1] == '"')
5830             ++l;
5831           else if (*l == '"')
5832             in_quotes = !in_quotes;
5833           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5834             {
5835               as_bad (_("invalid character %s in operand %d"),
5836                       output_invalid (*l),
5837                       i.operands + 1);
5838               return NULL;
5839             }
5840           if (!intel_syntax && !in_quotes)
5841             {
5842               if (*l == '(')
5843                 ++paren_not_balanced;
5844               if (*l == ')')
5845                 --paren_not_balanced;
5846             }
5847           l++;
5848         }
5849       if (l != token_start)
5850         {                       /* Yes, we've read in another operand.  */
5851           unsigned int operand_ok;
5852           this_operand = i.operands++;
5853           if (i.operands > MAX_OPERANDS)
5854             {
5855               as_bad (_("spurious operands; (%d operands/instruction max)"),
5856                       MAX_OPERANDS);
5857               return NULL;
5858             }
5859           i.types[this_operand].bitfield.unspecified = 1;
5860           /* Now parse operand adding info to 'i' as we go along.  */
5861           END_STRING_AND_SAVE (l);
5862
5863           if (i.mem_operands > 1)
5864             {
5865               as_bad (_("too many memory references for `%s'"),
5866                       mnemonic);
5867               return 0;
5868             }
5869
5870           if (intel_syntax)
5871             operand_ok =
5872               i386_intel_operand (token_start,
5873                                   intel_float_operand (mnemonic));
5874           else
5875             operand_ok = i386_att_operand (token_start);
5876
5877           RESTORE_END_STRING (l);
5878           if (!operand_ok)
5879             return NULL;
5880         }
5881       else
5882         {
5883           if (expecting_operand)
5884             {
5885             expecting_operand_after_comma:
5886               as_bad (_("expecting operand after ','; got nothing"));
5887               return NULL;
5888             }
5889           if (*l == ',')
5890             {
5891               as_bad (_("expecting operand before ','; got nothing"));
5892               return NULL;
5893             }
5894         }
5895
5896       /* Now *l must be either ',' or END_OF_INSN.  */
5897       if (*l == ',')
5898         {
5899           if (*++l == END_OF_INSN)
5900             {
5901               /* Just skip it, if it's \n complain.  */
5902               goto expecting_operand_after_comma;
5903             }
5904           expecting_operand = 1;
5905         }
5906     }
5907   return l;
5908 }
5909
5910 static void
5911 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5912 {
5913   union i386_op temp_op;
5914   i386_operand_type temp_type;
5915   unsigned int temp_flags;
5916   enum bfd_reloc_code_real temp_reloc;
5917
5918   temp_type = i.types[xchg2];
5919   i.types[xchg2] = i.types[xchg1];
5920   i.types[xchg1] = temp_type;
5921
5922   temp_flags = i.flags[xchg2];
5923   i.flags[xchg2] = i.flags[xchg1];
5924   i.flags[xchg1] = temp_flags;
5925
5926   temp_op = i.op[xchg2];
5927   i.op[xchg2] = i.op[xchg1];
5928   i.op[xchg1] = temp_op;
5929
5930   temp_reloc = i.reloc[xchg2];
5931   i.reloc[xchg2] = i.reloc[xchg1];
5932   i.reloc[xchg1] = temp_reloc;
5933
5934   if (i.mask.reg)
5935     {
5936       if (i.mask.operand == xchg1)
5937         i.mask.operand = xchg2;
5938       else if (i.mask.operand == xchg2)
5939         i.mask.operand = xchg1;
5940     }
5941   if (i.broadcast.type || i.broadcast.bytes)
5942     {
5943       if (i.broadcast.operand == xchg1)
5944         i.broadcast.operand = xchg2;
5945       else if (i.broadcast.operand == xchg2)
5946         i.broadcast.operand = xchg1;
5947     }
5948 }
5949
5950 static void
5951 swap_operands (void)
5952 {
5953   switch (i.operands)
5954     {
5955     case 5:
5956     case 4:
5957       swap_2_operands (1, i.operands - 2);
5958       /* Fall through.  */
5959     case 3:
5960     case 2:
5961       swap_2_operands (0, i.operands - 1);
5962       break;
5963     default:
5964       abort ();
5965     }
5966
5967   if (i.mem_operands == 2)
5968     {
5969       const reg_entry *temp_seg;
5970       temp_seg = i.seg[0];
5971       i.seg[0] = i.seg[1];
5972       i.seg[1] = temp_seg;
5973     }
5974 }
5975
5976 /* Try to ensure constant immediates are represented in the smallest
5977    opcode possible.  */
5978 static void
5979 optimize_imm (void)
5980 {
5981   char guess_suffix = 0;
5982   int op;
5983
5984   if (i.suffix)
5985     guess_suffix = i.suffix;
5986   else if (i.reg_operands)
5987     {
5988       /* Figure out a suffix from the last register operand specified.
5989          We can't do this properly yet, i.e. excluding special register
5990          instances, but the following works for instructions with
5991          immediates.  In any case, we can't set i.suffix yet.  */
5992       for (op = i.operands; --op >= 0;)
5993         if (i.types[op].bitfield.class != Reg)
5994           continue;
5995         else if (i.types[op].bitfield.byte)
5996           {
5997             guess_suffix = BYTE_MNEM_SUFFIX;
5998             break;
5999           }
6000         else if (i.types[op].bitfield.word)
6001           {
6002             guess_suffix = WORD_MNEM_SUFFIX;
6003             break;
6004           }
6005         else if (i.types[op].bitfield.dword)
6006           {
6007             guess_suffix = LONG_MNEM_SUFFIX;
6008             break;
6009           }
6010         else if (i.types[op].bitfield.qword)
6011           {
6012             guess_suffix = QWORD_MNEM_SUFFIX;
6013             break;
6014           }
6015     }
6016   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6017     guess_suffix = WORD_MNEM_SUFFIX;
6018
6019   for (op = i.operands; --op >= 0;)
6020     if (operand_type_check (i.types[op], imm))
6021       {
6022         switch (i.op[op].imms->X_op)
6023           {
6024           case O_constant:
6025             /* If a suffix is given, this operand may be shortened.  */
6026             switch (guess_suffix)
6027               {
6028               case LONG_MNEM_SUFFIX:
6029                 i.types[op].bitfield.imm32 = 1;
6030                 i.types[op].bitfield.imm64 = 1;
6031                 break;
6032               case WORD_MNEM_SUFFIX:
6033                 i.types[op].bitfield.imm16 = 1;
6034                 i.types[op].bitfield.imm32 = 1;
6035                 i.types[op].bitfield.imm32s = 1;
6036                 i.types[op].bitfield.imm64 = 1;
6037                 break;
6038               case BYTE_MNEM_SUFFIX:
6039                 i.types[op].bitfield.imm8 = 1;
6040                 i.types[op].bitfield.imm8s = 1;
6041                 i.types[op].bitfield.imm16 = 1;
6042                 i.types[op].bitfield.imm32 = 1;
6043                 i.types[op].bitfield.imm32s = 1;
6044                 i.types[op].bitfield.imm64 = 1;
6045                 break;
6046               }
6047
6048             /* If this operand is at most 16 bits, convert it
6049                to a signed 16 bit number before trying to see
6050                whether it will fit in an even smaller size.
6051                This allows a 16-bit operand such as $0xffe0 to
6052                be recognised as within Imm8S range.  */
6053             if ((i.types[op].bitfield.imm16)
6054                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6055               {
6056                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6057                                                 ^ 0x8000) - 0x8000);
6058               }
6059 #ifdef BFD64
6060             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6061             if ((i.types[op].bitfield.imm32)
6062                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6063               {
6064                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6065                                                 ^ ((offsetT) 1 << 31))
6066                                                - ((offsetT) 1 << 31));
6067               }
6068 #endif
6069             i.types[op]
6070               = operand_type_or (i.types[op],
6071                                  smallest_imm_type (i.op[op].imms->X_add_number));
6072
6073             /* We must avoid matching of Imm32 templates when 64bit
6074                only immediate is available.  */
6075             if (guess_suffix == QWORD_MNEM_SUFFIX)
6076               i.types[op].bitfield.imm32 = 0;
6077             break;
6078
6079           case O_absent:
6080           case O_register:
6081             abort ();
6082
6083             /* Symbols and expressions.  */
6084           default:
6085             /* Convert symbolic operand to proper sizes for matching, but don't
6086                prevent matching a set of insns that only supports sizes other
6087                than those matching the insn suffix.  */
6088             {
6089               i386_operand_type mask, allowed;
6090               const insn_template *t = current_templates->start;
6091
6092               operand_type_set (&mask, 0);
6093               switch (guess_suffix)
6094                 {
6095                 case QWORD_MNEM_SUFFIX:
6096                   mask.bitfield.imm64 = 1;
6097                   mask.bitfield.imm32s = 1;
6098                   break;
6099                 case LONG_MNEM_SUFFIX:
6100                   mask.bitfield.imm32 = 1;
6101                   break;
6102                 case WORD_MNEM_SUFFIX:
6103                   mask.bitfield.imm16 = 1;
6104                   break;
6105                 case BYTE_MNEM_SUFFIX:
6106                   mask.bitfield.imm8 = 1;
6107                   break;
6108                 default:
6109                   break;
6110                 }
6111
6112               allowed = operand_type_and (t->operand_types[op], mask);
6113               while (++t < current_templates->end)
6114                 {
6115                   allowed = operand_type_or (allowed, t->operand_types[op]);
6116                   allowed = operand_type_and (allowed, mask);
6117                 }
6118
6119               if (!operand_type_all_zero (&allowed))
6120                 i.types[op] = operand_type_and (i.types[op], mask);
6121             }
6122             break;
6123           }
6124       }
6125 }
6126
6127 /* Try to use the smallest displacement type too.  */
6128 static void
6129 optimize_disp (void)
6130 {
6131   int op;
6132
6133   for (op = i.operands; --op >= 0;)
6134     if (operand_type_check (i.types[op], disp))
6135       {
6136         if (i.op[op].disps->X_op == O_constant)
6137           {
6138             offsetT op_disp = i.op[op].disps->X_add_number;
6139
6140             if (!op_disp && i.types[op].bitfield.baseindex)
6141               {
6142                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6143                 i.op[op].disps = NULL;
6144                 i.disp_operands--;
6145                 continue;
6146               }
6147
6148             if (i.types[op].bitfield.disp16
6149                 && fits_in_unsigned_word (op_disp))
6150               {
6151                 /* If this operand is at most 16 bits, convert
6152                    to a signed 16 bit number and don't use 64bit
6153                    displacement.  */
6154                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6155                 i.types[op].bitfield.disp64 = 0;
6156               }
6157
6158 #ifdef BFD64
6159             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6160             if ((flag_code != CODE_64BIT
6161                  ? i.types[op].bitfield.disp32
6162                  : want_disp32 (current_templates->start)
6163                    && (!current_templates->start->opcode_modifier.jump
6164                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6165                 && fits_in_unsigned_long (op_disp))
6166               {
6167                 /* If this operand is at most 32 bits, convert
6168                    to a signed 32 bit number and don't use 64bit
6169                    displacement.  */
6170                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6171                 i.types[op].bitfield.disp64 = 0;
6172                 i.types[op].bitfield.disp32 = 1;
6173               }
6174
6175             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6176               {
6177                 i.types[op].bitfield.disp64 = 0;
6178                 i.types[op].bitfield.disp32 = 1;
6179               }
6180 #endif
6181             if ((i.types[op].bitfield.disp32
6182                  || i.types[op].bitfield.disp16)
6183                 && fits_in_disp8 (op_disp))
6184               i.types[op].bitfield.disp8 = 1;
6185
6186             i.op[op].disps->X_add_number = op_disp;
6187           }
6188         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6189                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6190           {
6191             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6192                          i.op[op].disps, 0, i.reloc[op]);
6193             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6194           }
6195         else
6196           /* We only support 64bit displacement on constants.  */
6197           i.types[op].bitfield.disp64 = 0;
6198       }
6199 }
6200
6201 /* Return 1 if there is a match in broadcast bytes between operand
6202    GIVEN and instruction template T.   */
6203
6204 static INLINE int
6205 match_broadcast_size (const insn_template *t, unsigned int given)
6206 {
6207   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6208            && i.types[given].bitfield.byte)
6209           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6210               && i.types[given].bitfield.word)
6211           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6212               && i.types[given].bitfield.dword)
6213           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6214               && i.types[given].bitfield.qword));
6215 }
6216
6217 /* Check if operands are valid for the instruction.  */
6218
6219 static int
6220 check_VecOperands (const insn_template *t)
6221 {
6222   unsigned int op;
6223   i386_cpu_flags cpu;
6224
6225   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6226      any one operand are implicity requiring AVX512VL support if the actual
6227      operand size is YMMword or XMMword.  Since this function runs after
6228      template matching, there's no need to check for YMMword/XMMword in
6229      the template.  */
6230   cpu = cpu_flags_and (t->cpu_flags, avx512);
6231   if (!cpu_flags_all_zero (&cpu)
6232       && !t->cpu_flags.bitfield.cpuavx512vl
6233       && !cpu_arch_flags.bitfield.cpuavx512vl)
6234     {
6235       for (op = 0; op < t->operands; ++op)
6236         {
6237           if (t->operand_types[op].bitfield.zmmword
6238               && (i.types[op].bitfield.ymmword
6239                   || i.types[op].bitfield.xmmword))
6240             {
6241               i.error = unsupported;
6242               return 1;
6243             }
6244         }
6245     }
6246
6247   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6248      requiring AVX2 support if the actual operand size is YMMword.  */
6249   if (t->cpu_flags.bitfield.cpuavx
6250       && t->cpu_flags.bitfield.cpuavx2
6251       && !cpu_arch_flags.bitfield.cpuavx2)
6252     {
6253       for (op = 0; op < t->operands; ++op)
6254         {
6255           if (t->operand_types[op].bitfield.xmmword
6256               && i.types[op].bitfield.ymmword)
6257             {
6258               i.error = unsupported;
6259               return 1;
6260             }
6261         }
6262     }
6263
6264   /* Without VSIB byte, we can't have a vector register for index.  */
6265   if (!t->opcode_modifier.sib
6266       && i.index_reg
6267       && (i.index_reg->reg_type.bitfield.xmmword
6268           || i.index_reg->reg_type.bitfield.ymmword
6269           || i.index_reg->reg_type.bitfield.zmmword))
6270     {
6271       i.error = unsupported_vector_index_register;
6272       return 1;
6273     }
6274
6275   /* Check if default mask is allowed.  */
6276   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6277       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6278     {
6279       i.error = no_default_mask;
6280       return 1;
6281     }
6282
6283   /* For VSIB byte, we need a vector register for index, and all vector
6284      registers must be distinct.  */
6285   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6286     {
6287       if (!i.index_reg
6288           || !((t->opcode_modifier.sib == VECSIB128
6289                 && i.index_reg->reg_type.bitfield.xmmword)
6290                || (t->opcode_modifier.sib == VECSIB256
6291                    && i.index_reg->reg_type.bitfield.ymmword)
6292                || (t->opcode_modifier.sib == VECSIB512
6293                    && i.index_reg->reg_type.bitfield.zmmword)))
6294       {
6295         i.error = invalid_vsib_address;
6296         return 1;
6297       }
6298
6299       gas_assert (i.reg_operands == 2 || i.mask.reg);
6300       if (i.reg_operands == 2 && !i.mask.reg)
6301         {
6302           gas_assert (i.types[0].bitfield.class == RegSIMD);
6303           gas_assert (i.types[0].bitfield.xmmword
6304                       || i.types[0].bitfield.ymmword);
6305           gas_assert (i.types[2].bitfield.class == RegSIMD);
6306           gas_assert (i.types[2].bitfield.xmmword
6307                       || i.types[2].bitfield.ymmword);
6308           if (operand_check == check_none)
6309             return 0;
6310           if (register_number (i.op[0].regs)
6311               != register_number (i.index_reg)
6312               && register_number (i.op[2].regs)
6313                  != register_number (i.index_reg)
6314               && register_number (i.op[0].regs)
6315                  != register_number (i.op[2].regs))
6316             return 0;
6317           if (operand_check == check_error)
6318             {
6319               i.error = invalid_vector_register_set;
6320               return 1;
6321             }
6322           as_warn (_("mask, index, and destination registers should be distinct"));
6323         }
6324       else if (i.reg_operands == 1 && i.mask.reg)
6325         {
6326           if (i.types[1].bitfield.class == RegSIMD
6327               && (i.types[1].bitfield.xmmword
6328                   || i.types[1].bitfield.ymmword
6329                   || i.types[1].bitfield.zmmword)
6330               && (register_number (i.op[1].regs)
6331                   == register_number (i.index_reg)))
6332             {
6333               if (operand_check == check_error)
6334                 {
6335                   i.error = invalid_vector_register_set;
6336                   return 1;
6337                 }
6338               if (operand_check != check_none)
6339                 as_warn (_("index and destination registers should be distinct"));
6340             }
6341         }
6342     }
6343
6344   /* For AMX instructions with 3 TMM register operands, all operands
6345       must be distinct.  */
6346   if (i.reg_operands == 3
6347       && t->operand_types[0].bitfield.tmmword
6348       && (i.op[0].regs == i.op[1].regs
6349           || i.op[0].regs == i.op[2].regs
6350           || i.op[1].regs == i.op[2].regs))
6351     {
6352       i.error = invalid_tmm_register_set;
6353       return 1;
6354     }
6355
6356   /* For some special instructions require that destination must be distinct
6357      from source registers.  */
6358   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6359     {
6360       unsigned int dest_reg = i.operands - 1;
6361
6362       know (i.operands >= 3);
6363
6364       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6365       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6366           || (i.reg_operands > 2
6367               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6368         {
6369           i.error = invalid_dest_and_src_register_set;
6370           return 1;
6371         }
6372     }
6373
6374   /* Check if broadcast is supported by the instruction and is applied
6375      to the memory operand.  */
6376   if (i.broadcast.type || i.broadcast.bytes)
6377     {
6378       i386_operand_type type, overlap;
6379
6380       /* Check if specified broadcast is supported in this instruction,
6381          and its broadcast bytes match the memory operand.  */
6382       op = i.broadcast.operand;
6383       if (!t->opcode_modifier.broadcast
6384           || !(i.flags[op] & Operand_Mem)
6385           || (!i.types[op].bitfield.unspecified
6386               && !match_broadcast_size (t, op)))
6387         {
6388         bad_broadcast:
6389           i.error = unsupported_broadcast;
6390           return 1;
6391         }
6392
6393       if (i.broadcast.type)
6394         i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6395                              * i.broadcast.type);
6396       operand_type_set (&type, 0);
6397       switch (get_broadcast_bytes (t, false))
6398         {
6399         case 2:
6400           type.bitfield.word = 1;
6401           break;
6402         case 4:
6403           type.bitfield.dword = 1;
6404           break;
6405         case 8:
6406           type.bitfield.qword = 1;
6407           break;
6408         case 16:
6409           type.bitfield.xmmword = 1;
6410           break;
6411         case 32:
6412           type.bitfield.ymmword = 1;
6413           break;
6414         case 64:
6415           type.bitfield.zmmword = 1;
6416           break;
6417         default:
6418           goto bad_broadcast;
6419         }
6420
6421       overlap = operand_type_and (type, t->operand_types[op]);
6422       if (t->operand_types[op].bitfield.class == RegSIMD
6423           && t->operand_types[op].bitfield.byte
6424              + t->operand_types[op].bitfield.word
6425              + t->operand_types[op].bitfield.dword
6426              + t->operand_types[op].bitfield.qword > 1)
6427         {
6428           overlap.bitfield.xmmword = 0;
6429           overlap.bitfield.ymmword = 0;
6430           overlap.bitfield.zmmword = 0;
6431         }
6432       if (operand_type_all_zero (&overlap))
6433           goto bad_broadcast;
6434
6435       if (t->opcode_modifier.checkoperandsize)
6436         {
6437           unsigned int j;
6438
6439           type.bitfield.baseindex = 1;
6440           for (j = 0; j < i.operands; ++j)
6441             {
6442               if (j != op
6443                   && !operand_type_register_match(i.types[j],
6444                                                   t->operand_types[j],
6445                                                   type,
6446                                                   t->operand_types[op]))
6447                 goto bad_broadcast;
6448             }
6449         }
6450     }
6451   /* If broadcast is supported in this instruction, we need to check if
6452      operand of one-element size isn't specified without broadcast.  */
6453   else if (t->opcode_modifier.broadcast && i.mem_operands)
6454     {
6455       /* Find memory operand.  */
6456       for (op = 0; op < i.operands; op++)
6457         if (i.flags[op] & Operand_Mem)
6458           break;
6459       gas_assert (op < i.operands);
6460       /* Check size of the memory operand.  */
6461       if (match_broadcast_size (t, op))
6462         {
6463           i.error = broadcast_needed;
6464           return 1;
6465         }
6466     }
6467   else
6468     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6469
6470   /* Check if requested masking is supported.  */
6471   if (i.mask.reg)
6472     {
6473       switch (t->opcode_modifier.masking)
6474         {
6475         case BOTH_MASKING:
6476           break;
6477         case MERGING_MASKING:
6478           if (i.mask.zeroing)
6479             {
6480         case 0:
6481               i.error = unsupported_masking;
6482               return 1;
6483             }
6484           break;
6485         case DYNAMIC_MASKING:
6486           /* Memory destinations allow only merging masking.  */
6487           if (i.mask.zeroing && i.mem_operands)
6488             {
6489               /* Find memory operand.  */
6490               for (op = 0; op < i.operands; op++)
6491                 if (i.flags[op] & Operand_Mem)
6492                   break;
6493               gas_assert (op < i.operands);
6494               if (op == i.operands - 1)
6495                 {
6496                   i.error = unsupported_masking;
6497                   return 1;
6498                 }
6499             }
6500           break;
6501         default:
6502           abort ();
6503         }
6504     }
6505
6506   /* Check if masking is applied to dest operand.  */
6507   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6508     {
6509       i.error = mask_not_on_destination;
6510       return 1;
6511     }
6512
6513   /* Check RC/SAE.  */
6514   if (i.rounding.type != rc_none)
6515     {
6516       if (!t->opcode_modifier.sae
6517           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6518           || i.mem_operands)
6519         {
6520           i.error = unsupported_rc_sae;
6521           return 1;
6522         }
6523
6524       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6525          operand.  */
6526       if (t->opcode_modifier.evex != EVEXLIG)
6527         {
6528           for (op = 0; op < t->operands; ++op)
6529             if (i.types[op].bitfield.zmmword)
6530               break;
6531           if (op >= t->operands)
6532             {
6533               i.error = operand_size_mismatch;
6534               return 1;
6535             }
6536         }
6537     }
6538
6539   /* Check the special Imm4 cases; must be the first operand.  */
6540   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6541     {
6542       if (i.op[0].imms->X_op != O_constant
6543           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6544         {
6545           i.error = bad_imm4;
6546           return 1;
6547         }
6548
6549       /* Turn off Imm<N> so that update_imm won't complain.  */
6550       operand_type_set (&i.types[0], 0);
6551     }
6552
6553   /* Check vector Disp8 operand.  */
6554   if (t->opcode_modifier.disp8memshift
6555       && i.disp_encoding <= disp_encoding_8bit)
6556     {
6557       if (i.broadcast.bytes)
6558         i.memshift = t->opcode_modifier.broadcast - 1;
6559       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6560         i.memshift = t->opcode_modifier.disp8memshift;
6561       else
6562         {
6563           const i386_operand_type *type = NULL, *fallback = NULL;
6564
6565           i.memshift = 0;
6566           for (op = 0; op < i.operands; op++)
6567             if (i.flags[op] & Operand_Mem)
6568               {
6569                 if (t->opcode_modifier.evex == EVEXLIG)
6570                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6571                 else if (t->operand_types[op].bitfield.xmmword
6572                          + t->operand_types[op].bitfield.ymmword
6573                          + t->operand_types[op].bitfield.zmmword <= 1)
6574                   type = &t->operand_types[op];
6575                 else if (!i.types[op].bitfield.unspecified)
6576                   type = &i.types[op];
6577                 else /* Ambiguities get resolved elsewhere.  */
6578                   fallback = &t->operand_types[op];
6579               }
6580             else if (i.types[op].bitfield.class == RegSIMD
6581                      && t->opcode_modifier.evex != EVEXLIG)
6582               {
6583                 if (i.types[op].bitfield.zmmword)
6584                   i.memshift = 6;
6585                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6586                   i.memshift = 5;
6587                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6588                   i.memshift = 4;
6589               }
6590
6591           if (!type && !i.memshift)
6592             type = fallback;
6593           if (type)
6594             {
6595               if (type->bitfield.zmmword)
6596                 i.memshift = 6;
6597               else if (type->bitfield.ymmword)
6598                 i.memshift = 5;
6599               else if (type->bitfield.xmmword)
6600                 i.memshift = 4;
6601             }
6602
6603           /* For the check in fits_in_disp8().  */
6604           if (i.memshift == 0)
6605             i.memshift = -1;
6606         }
6607
6608       for (op = 0; op < i.operands; op++)
6609         if (operand_type_check (i.types[op], disp)
6610             && i.op[op].disps->X_op == O_constant)
6611           {
6612             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6613               {
6614                 i.types[op].bitfield.disp8 = 1;
6615                 return 0;
6616               }
6617             i.types[op].bitfield.disp8 = 0;
6618           }
6619     }
6620
6621   i.memshift = 0;
6622
6623   return 0;
6624 }
6625
6626 /* Check if encoding requirements are met by the instruction.  */
6627
6628 static int
6629 VEX_check_encoding (const insn_template *t)
6630 {
6631   if (i.vec_encoding == vex_encoding_error)
6632     {
6633       i.error = unsupported;
6634       return 1;
6635     }
6636
6637   if (i.vec_encoding == vex_encoding_evex)
6638     {
6639       /* This instruction must be encoded with EVEX prefix.  */
6640       if (!is_evex_encoding (t))
6641         {
6642           i.error = unsupported;
6643           return 1;
6644         }
6645       return 0;
6646     }
6647
6648   if (!t->opcode_modifier.vex)
6649     {
6650       /* This instruction template doesn't have VEX prefix.  */
6651       if (i.vec_encoding != vex_encoding_default)
6652         {
6653           i.error = unsupported;
6654           return 1;
6655         }
6656       return 0;
6657     }
6658
6659   return 0;
6660 }
6661
6662 /* Helper function for the progress() macro in match_template().  */
6663 static INLINE enum i386_error progress (enum i386_error new,
6664                                         enum i386_error last,
6665                                         unsigned int line, unsigned int *line_p)
6666 {
6667   if (line <= *line_p)
6668     return last;
6669   *line_p = line;
6670   return new;
6671 }
6672
6673 static const insn_template *
6674 match_template (char mnem_suffix)
6675 {
6676   /* Points to template once we've found it.  */
6677   const insn_template *t;
6678   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6679   i386_operand_type overlap4;
6680   unsigned int found_reverse_match;
6681   i386_operand_type operand_types [MAX_OPERANDS];
6682   int addr_prefix_disp;
6683   unsigned int j, size_match, check_register, errline = __LINE__;
6684   enum i386_error specific_error = number_of_operands_mismatch;
6685 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6686
6687 #if MAX_OPERANDS != 5
6688 # error "MAX_OPERANDS must be 5."
6689 #endif
6690
6691   found_reverse_match = 0;
6692   addr_prefix_disp = -1;
6693
6694   for (t = current_templates->start; t < current_templates->end; t++)
6695     {
6696       addr_prefix_disp = -1;
6697       found_reverse_match = 0;
6698
6699       /* Must have right number of operands.  */
6700       if (i.operands != t->operands)
6701         continue;
6702
6703       /* Check processor support.  */
6704       specific_error = progress (unsupported);
6705       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6706         continue;
6707
6708       /* Check AT&T mnemonic.   */
6709       specific_error = progress (unsupported_with_intel_mnemonic);
6710       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6711         continue;
6712
6713       /* Check AT&T/Intel syntax.  */
6714       specific_error = progress (unsupported_syntax);
6715       if ((intel_syntax && t->opcode_modifier.attsyntax)
6716           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6717         continue;
6718
6719       /* Check Intel64/AMD64 ISA.   */
6720       switch (isa64)
6721         {
6722         default:
6723           /* Default: Don't accept Intel64.  */
6724           if (t->opcode_modifier.isa64 == INTEL64)
6725             continue;
6726           break;
6727         case amd64:
6728           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6729           if (t->opcode_modifier.isa64 >= INTEL64)
6730             continue;
6731           break;
6732         case intel64:
6733           /* -mintel64: Don't accept AMD64.  */
6734           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6735             continue;
6736           break;
6737         }
6738
6739       /* Check the suffix.  */
6740       specific_error = progress (invalid_instruction_suffix);
6741       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6742           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6743           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6744           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6745           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6746         continue;
6747
6748       specific_error = progress (operand_size_mismatch);
6749       size_match = operand_size_match (t);
6750       if (!size_match)
6751         continue;
6752
6753       /* This is intentionally not
6754
6755          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6756
6757          as the case of a missing * on the operand is accepted (perhaps with
6758          a warning, issued further down).  */
6759       specific_error = progress (operand_type_mismatch);
6760       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6761         continue;
6762
6763       /* In Intel syntax, normally we can check for memory operand size when
6764          there is no mnemonic suffix.  But jmp and call have 2 different
6765          encodings with Dword memory operand size.  Skip the "near" one
6766          (permitting a register operand) when "far" was requested.  */
6767       if (i.far_branch
6768           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6769           && t->operand_types[0].bitfield.class == Reg)
6770         continue;
6771
6772       for (j = 0; j < MAX_OPERANDS; j++)
6773         operand_types[j] = t->operand_types[j];
6774
6775       /* In general, don't allow 32-bit operands on pre-386.  */
6776       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6777                                              : operand_size_mismatch);
6778       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6779       if (i.suffix == LONG_MNEM_SUFFIX
6780           && !cpu_arch_flags.bitfield.cpui386
6781           && (intel_syntax
6782               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6783                  && !intel_float_operand (t->name))
6784               : intel_float_operand (t->name) != 2)
6785           && (t->operands == i.imm_operands
6786               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6787                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6788                && operand_types[i.imm_operands].bitfield.class != RegMask)
6789               || (operand_types[j].bitfield.class != RegMMX
6790                   && operand_types[j].bitfield.class != RegSIMD
6791                   && operand_types[j].bitfield.class != RegMask))
6792           && !t->opcode_modifier.sib)
6793         continue;
6794
6795       /* Do not verify operands when there are none.  */
6796       if (!t->operands)
6797         {
6798           if (VEX_check_encoding (t))
6799             {
6800               specific_error = progress (i.error);
6801               continue;
6802             }
6803
6804           /* We've found a match; break out of loop.  */
6805           break;
6806         }
6807
6808       if (!t->opcode_modifier.jump
6809           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6810         {
6811           /* There should be only one Disp operand.  */
6812           for (j = 0; j < MAX_OPERANDS; j++)
6813             if (operand_type_check (operand_types[j], disp))
6814               break;
6815           if (j < MAX_OPERANDS)
6816             {
6817               bool override = (i.prefix[ADDR_PREFIX] != 0);
6818
6819               addr_prefix_disp = j;
6820
6821               /* Address size prefix will turn Disp64 operand into Disp32 and
6822                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6823               switch (flag_code)
6824                 {
6825                 case CODE_16BIT:
6826                   override = !override;
6827                   /* Fall through.  */
6828                 case CODE_32BIT:
6829                   if (operand_types[j].bitfield.disp32
6830                       && operand_types[j].bitfield.disp16)
6831                     {
6832                       operand_types[j].bitfield.disp16 = override;
6833                       operand_types[j].bitfield.disp32 = !override;
6834                     }
6835                   gas_assert (!operand_types[j].bitfield.disp64);
6836                   break;
6837
6838                 case CODE_64BIT:
6839                   if (operand_types[j].bitfield.disp64)
6840                     {
6841                       gas_assert (!operand_types[j].bitfield.disp32);
6842                       operand_types[j].bitfield.disp32 = override;
6843                       operand_types[j].bitfield.disp64 = !override;
6844                     }
6845                   operand_types[j].bitfield.disp16 = 0;
6846                   break;
6847                 }
6848             }
6849         }
6850
6851       /* We check register size if needed.  */
6852       if (t->opcode_modifier.checkoperandsize)
6853         {
6854           check_register = (1 << t->operands) - 1;
6855           if (i.broadcast.type || i.broadcast.bytes)
6856             check_register &= ~(1 << i.broadcast.operand);
6857         }
6858       else
6859         check_register = 0;
6860
6861       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6862       switch (t->operands)
6863         {
6864         case 1:
6865           if (!operand_type_match (overlap0, i.types[0]))
6866             continue;
6867           break;
6868         case 2:
6869           /* xchg %eax, %eax is a special case. It is an alias for nop
6870              only in 32bit mode and we can use opcode 0x90.  In 64bit
6871              mode, we can't use 0x90 for xchg %eax, %eax since it should
6872              zero-extend %eax to %rax.  */
6873           if (flag_code == CODE_64BIT
6874               && t->base_opcode == 0x90
6875               && t->opcode_modifier.opcodespace == SPACE_BASE
6876               && i.types[0].bitfield.instance == Accum
6877               && i.types[0].bitfield.dword
6878               && i.types[1].bitfield.instance == Accum)
6879             continue;
6880
6881           if (t->base_opcode == MOV_AX_DISP32
6882               && t->opcode_modifier.opcodespace == SPACE_BASE)
6883             {
6884               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6885               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6886                 continue;
6887
6888               /* xrelease mov %eax, <disp> is another special case. It must not
6889                  match the accumulator-only encoding of mov.  */
6890               if (i.hle_prefix)
6891                 continue;
6892             }
6893           /* Fall through.  */
6894
6895         case 3:
6896           if (!(size_match & MATCH_STRAIGHT))
6897             goto check_reverse;
6898           /* Reverse direction of operands if swapping is possible in the first
6899              place (operands need to be symmetric) and
6900              - the load form is requested, and the template is a store form,
6901              - the store form is requested, and the template is a load form,
6902              - the non-default (swapped) form is requested.  */
6903           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6904           if (t->opcode_modifier.d && i.reg_operands == i.operands
6905               && !operand_type_all_zero (&overlap1))
6906             switch (i.dir_encoding)
6907               {
6908               case dir_encoding_load:
6909                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6910                     || t->opcode_modifier.regmem)
6911                   goto check_reverse;
6912                 break;
6913
6914               case dir_encoding_store:
6915                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6916                     && !t->opcode_modifier.regmem)
6917                   goto check_reverse;
6918                 break;
6919
6920               case dir_encoding_swap:
6921                 goto check_reverse;
6922
6923               case dir_encoding_default:
6924                 break;
6925               }
6926           /* If we want store form, we skip the current load.  */
6927           if ((i.dir_encoding == dir_encoding_store
6928                || i.dir_encoding == dir_encoding_swap)
6929               && i.mem_operands == 0
6930               && t->opcode_modifier.load)
6931             continue;
6932           /* Fall through.  */
6933         case 4:
6934         case 5:
6935           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6936           if (!operand_type_match (overlap0, i.types[0])
6937               || !operand_type_match (overlap1, i.types[1])
6938               || ((check_register & 3) == 3
6939                   && !operand_type_register_match (i.types[0],
6940                                                    operand_types[0],
6941                                                    i.types[1],
6942                                                    operand_types[1])))
6943             {
6944               specific_error = progress (i.error);
6945
6946               /* Check if other direction is valid ...  */
6947               if (!t->opcode_modifier.d)
6948                 continue;
6949
6950             check_reverse:
6951               if (!(size_match & MATCH_REVERSE))
6952                 continue;
6953               /* Try reversing direction of operands.  */
6954               j = t->opcode_modifier.vexsources ? 1 : i.operands - 1;
6955               overlap0 = operand_type_and (i.types[0], operand_types[j]);
6956               overlap1 = operand_type_and (i.types[j], operand_types[0]);
6957               overlap2 = operand_type_and (i.types[1], operand_types[1]);
6958               gas_assert (t->operands != 3 || !check_register);
6959               if (!operand_type_match (overlap0, i.types[0])
6960                   || !operand_type_match (overlap1, i.types[j])
6961                   || (t->operands == 3
6962                       && !operand_type_match (overlap2, i.types[1]))
6963                   || (check_register
6964                       && !operand_type_register_match (i.types[0],
6965                                                        operand_types[j],
6966                                                        i.types[j],
6967                                                        operand_types[0])))
6968                 {
6969                   /* Does not match either direction.  */
6970                   specific_error = progress (i.error);
6971                   continue;
6972                 }
6973               /* found_reverse_match holds which variant of D
6974                  we've found.  */
6975               if (!t->opcode_modifier.d)
6976                 found_reverse_match = 0;
6977               else if (operand_types[0].bitfield.tbyte)
6978                 {
6979                   if (t->opcode_modifier.operandconstraint != UGH)
6980                     found_reverse_match = Opcode_FloatD;
6981                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
6982                   if ((t->base_opcode & 0x20)
6983                       && (intel_syntax || intel_mnemonic))
6984                     found_reverse_match |= Opcode_FloatR;
6985                 }
6986               else if (t->opcode_modifier.vexsources)
6987                 {
6988                   found_reverse_match = Opcode_VexW;
6989                   goto check_operands_345;
6990                 }
6991               else if (t->opcode_modifier.opcodespace != SPACE_BASE
6992                        && (t->opcode_modifier.opcodespace != SPACE_0F
6993                            /* MOV to/from CR/DR/TR, as an exception, follow
6994                               the base opcode space encoding model.  */
6995                            || (t->base_opcode | 7) != 0x27))
6996                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6997                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
6998               else if (!t->opcode_modifier.commutative)
6999                 found_reverse_match = Opcode_D;
7000               else
7001                 found_reverse_match = ~0;
7002             }
7003           else
7004             {
7005               /* Found a forward 2 operand match here.  */
7006             check_operands_345:
7007               switch (t->operands)
7008                 {
7009                 case 5:
7010                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7011                   if (!operand_type_match (overlap4, i.types[4])
7012                       || !operand_type_register_match (i.types[3],
7013                                                        operand_types[3],
7014                                                        i.types[4],
7015                                                        operand_types[4]))
7016                     {
7017                       specific_error = progress (i.error);
7018                       continue;
7019                     }
7020                   /* Fall through.  */
7021                 case 4:
7022                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7023                   if (!operand_type_match (overlap3, i.types[3])
7024                       || ((check_register & 0xa) == 0xa
7025                           && !operand_type_register_match (i.types[1],
7026                                                             operand_types[1],
7027                                                             i.types[3],
7028                                                             operand_types[3]))
7029                       || ((check_register & 0xc) == 0xc
7030                           && !operand_type_register_match (i.types[2],
7031                                                             operand_types[2],
7032                                                             i.types[3],
7033                                                             operand_types[3])))
7034                     {
7035                       specific_error = progress (i.error);
7036                       continue;
7037                     }
7038                   /* Fall through.  */
7039                 case 3:
7040                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7041                   if (!operand_type_match (overlap2, i.types[2])
7042                       || ((check_register & 5) == 5
7043                           && !operand_type_register_match (i.types[0],
7044                                                             operand_types[0],
7045                                                             i.types[2],
7046                                                             operand_types[2]))
7047                       || ((check_register & 6) == 6
7048                           && !operand_type_register_match (i.types[1],
7049                                                             operand_types[1],
7050                                                             i.types[2],
7051                                                             operand_types[2])))
7052                     {
7053                       specific_error = progress (i.error);
7054                       continue;
7055                     }
7056                   break;
7057                 }
7058             }
7059           /* Found either forward/reverse 2, 3 or 4 operand match here:
7060              slip through to break.  */
7061         }
7062
7063       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7064       if (VEX_check_encoding (t))
7065         {
7066           specific_error = progress (i.error);
7067           continue;
7068         }
7069
7070       /* Check if vector operands are valid.  */
7071       if (check_VecOperands (t))
7072         {
7073           specific_error = progress (i.error);
7074           continue;
7075         }
7076
7077       /* We've found a match; break out of loop.  */
7078       break;
7079     }
7080
7081 #undef progress
7082
7083   if (t == current_templates->end)
7084     {
7085       /* We found no match.  */
7086       i.error = specific_error;
7087       return NULL;
7088     }
7089
7090   if (!quiet_warnings)
7091     {
7092       if (!intel_syntax
7093           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7094         as_warn (_("indirect %s without `*'"), t->name);
7095
7096       if (t->opcode_modifier.isprefix
7097           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7098         {
7099           /* Warn them that a data or address size prefix doesn't
7100              affect assembly of the next line of code.  */
7101           as_warn (_("stand-alone `%s' prefix"), t->name);
7102         }
7103     }
7104
7105   /* Copy the template we found.  */
7106   install_template (t);
7107
7108   if (addr_prefix_disp != -1)
7109     i.tm.operand_types[addr_prefix_disp]
7110       = operand_types[addr_prefix_disp];
7111
7112   switch (found_reverse_match)
7113     {
7114     case 0:
7115       break;
7116
7117     default:
7118       /* If we found a reverse match we must alter the opcode direction
7119          bit and clear/flip the regmem modifier one.  found_reverse_match
7120          holds bits to change (different for int & float insns).  */
7121
7122       i.tm.base_opcode ^= found_reverse_match;
7123
7124       /* Certain SIMD insns have their load forms specified in the opcode
7125          table, and hence we need to _set_ RegMem instead of clearing it.
7126          We need to avoid setting the bit though on insns like KMOVW.  */
7127       i.tm.opcode_modifier.regmem
7128         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7129           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7130           && !i.tm.opcode_modifier.regmem;
7131
7132       /* Fall through.  */
7133     case ~0:
7134       i.tm.operand_types[0] = operand_types[i.operands - 1];
7135       i.tm.operand_types[i.operands - 1] = operand_types[0];
7136       break;
7137
7138     case Opcode_VexW:
7139       /* Only the first two register operands need reversing, alongside
7140          flipping VEX.W.  */
7141       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7142
7143       j = i.tm.operand_types[0].bitfield.imm8;
7144       i.tm.operand_types[j] = operand_types[j + 1];
7145       i.tm.operand_types[j + 1] = operand_types[j];
7146       break;
7147     }
7148
7149   return t;
7150 }
7151
7152 static int
7153 check_string (void)
7154 {
7155   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7156   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7157
7158   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7159     {
7160       as_bad (_("`%s' operand %u must use `%ses' segment"),
7161               i.tm.name,
7162               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7163               register_prefix);
7164       return 0;
7165     }
7166
7167   /* There's only ever one segment override allowed per instruction.
7168      This instruction possibly has a legal segment override on the
7169      second operand, so copy the segment to where non-string
7170      instructions store it, allowing common code.  */
7171   i.seg[op] = i.seg[1];
7172
7173   return 1;
7174 }
7175
7176 static int
7177 process_suffix (void)
7178 {
7179   bool is_crc32 = false, is_movx = false;
7180
7181   /* If matched instruction specifies an explicit instruction mnemonic
7182      suffix, use it.  */
7183   if (i.tm.opcode_modifier.size == SIZE16)
7184     i.suffix = WORD_MNEM_SUFFIX;
7185   else if (i.tm.opcode_modifier.size == SIZE32)
7186     i.suffix = LONG_MNEM_SUFFIX;
7187   else if (i.tm.opcode_modifier.size == SIZE64)
7188     i.suffix = QWORD_MNEM_SUFFIX;
7189   else if (i.reg_operands
7190            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7191            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7192     {
7193       unsigned int numop = i.operands;
7194
7195       /* MOVSX/MOVZX */
7196       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7197                  && (i.tm.base_opcode | 8) == 0xbe)
7198                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7199                     && i.tm.base_opcode == 0x63
7200                     && i.tm.cpu_flags.bitfield.cpu64);
7201
7202       /* CRC32 */
7203       is_crc32 = (i.tm.base_opcode == 0xf0
7204                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7205                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7206
7207       /* movsx/movzx want only their source operand considered here, for the
7208          ambiguity checking below.  The suffix will be replaced afterwards
7209          to represent the destination (register).  */
7210       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7211         --i.operands;
7212
7213       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7214       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7215         i.rex |= REX_W;
7216
7217       /* If there's no instruction mnemonic suffix we try to invent one
7218          based on GPR operands.  */
7219       if (!i.suffix)
7220         {
7221           /* We take i.suffix from the last register operand specified,
7222              Destination register type is more significant than source
7223              register type.  crc32 in SSE4.2 prefers source register
7224              type. */
7225           unsigned int op = is_crc32 ? 1 : i.operands;
7226
7227           while (op--)
7228             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7229                 || i.tm.operand_types[op].bitfield.instance == Accum)
7230               {
7231                 if (i.types[op].bitfield.class != Reg)
7232                   continue;
7233                 if (i.types[op].bitfield.byte)
7234                   i.suffix = BYTE_MNEM_SUFFIX;
7235                 else if (i.types[op].bitfield.word)
7236                   i.suffix = WORD_MNEM_SUFFIX;
7237                 else if (i.types[op].bitfield.dword)
7238                   i.suffix = LONG_MNEM_SUFFIX;
7239                 else if (i.types[op].bitfield.qword)
7240                   i.suffix = QWORD_MNEM_SUFFIX;
7241                 else
7242                   continue;
7243                 break;
7244               }
7245
7246           /* As an exception, movsx/movzx silently default to a byte source
7247              in AT&T mode.  */
7248           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7249             i.suffix = BYTE_MNEM_SUFFIX;
7250         }
7251       else if (i.suffix == BYTE_MNEM_SUFFIX)
7252         {
7253           if (!check_byte_reg ())
7254             return 0;
7255         }
7256       else if (i.suffix == LONG_MNEM_SUFFIX)
7257         {
7258           if (!check_long_reg ())
7259             return 0;
7260         }
7261       else if (i.suffix == QWORD_MNEM_SUFFIX)
7262         {
7263           if (!check_qword_reg ())
7264             return 0;
7265         }
7266       else if (i.suffix == WORD_MNEM_SUFFIX)
7267         {
7268           if (!check_word_reg ())
7269             return 0;
7270         }
7271       else if (intel_syntax
7272                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7273         /* Do nothing if the instruction is going to ignore the prefix.  */
7274         ;
7275       else
7276         abort ();
7277
7278       /* Undo the movsx/movzx change done above.  */
7279       i.operands = numop;
7280     }
7281   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7282            && !i.suffix)
7283     {
7284       i.suffix = stackop_size;
7285       if (stackop_size == LONG_MNEM_SUFFIX)
7286         {
7287           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7288              .code16gcc directive to support 16-bit mode with
7289              32-bit address.  For IRET without a suffix, generate
7290              16-bit IRET (opcode 0xcf) to return from an interrupt
7291              handler.  */
7292           if (i.tm.base_opcode == 0xcf)
7293             {
7294               i.suffix = WORD_MNEM_SUFFIX;
7295               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7296             }
7297           /* Warn about changed behavior for segment register push/pop.  */
7298           else if ((i.tm.base_opcode | 1) == 0x07)
7299             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7300                      i.tm.name);
7301         }
7302     }
7303   else if (!i.suffix
7304            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7305                || i.tm.opcode_modifier.jump == JUMP_BYTE
7306                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7307                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7308                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7309                    && i.tm.extension_opcode <= 3)))
7310     {
7311       switch (flag_code)
7312         {
7313         case CODE_64BIT:
7314           if (!i.tm.opcode_modifier.no_qsuf)
7315             {
7316               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7317                   || i.tm.opcode_modifier.no_lsuf)
7318                 i.suffix = QWORD_MNEM_SUFFIX;
7319               break;
7320             }
7321           /* Fall through.  */
7322         case CODE_32BIT:
7323           if (!i.tm.opcode_modifier.no_lsuf)
7324             i.suffix = LONG_MNEM_SUFFIX;
7325           break;
7326         case CODE_16BIT:
7327           if (!i.tm.opcode_modifier.no_wsuf)
7328             i.suffix = WORD_MNEM_SUFFIX;
7329           break;
7330         }
7331     }
7332
7333   if (!i.suffix
7334       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7335           /* Also cover lret/retf/iret in 64-bit mode.  */
7336           || (flag_code == CODE_64BIT
7337               && !i.tm.opcode_modifier.no_lsuf
7338               && !i.tm.opcode_modifier.no_qsuf))
7339       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7340       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7341       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7342       /* Accept FLDENV et al without suffix.  */
7343       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7344     {
7345       unsigned int suffixes, evex = 0;
7346
7347       suffixes = !i.tm.opcode_modifier.no_bsuf;
7348       if (!i.tm.opcode_modifier.no_wsuf)
7349         suffixes |= 1 << 1;
7350       if (!i.tm.opcode_modifier.no_lsuf)
7351         suffixes |= 1 << 2;
7352       if (!i.tm.opcode_modifier.no_ssuf)
7353         suffixes |= 1 << 4;
7354       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7355         suffixes |= 1 << 5;
7356
7357       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7358          also suitable for AT&T syntax mode, it was requested that this be
7359          restricted to just Intel syntax.  */
7360       if (intel_syntax && is_any_vex_encoding (&i.tm)
7361           && !i.broadcast.type && !i.broadcast.bytes)
7362         {
7363           unsigned int op;
7364
7365           for (op = 0; op < i.tm.operands; ++op)
7366             {
7367               if (is_evex_encoding (&i.tm)
7368                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7369                 {
7370                   if (i.tm.operand_types[op].bitfield.ymmword)
7371                     i.tm.operand_types[op].bitfield.xmmword = 0;
7372                   if (i.tm.operand_types[op].bitfield.zmmword)
7373                     i.tm.operand_types[op].bitfield.ymmword = 0;
7374                   if (!i.tm.opcode_modifier.evex
7375                       || i.tm.opcode_modifier.evex == EVEXDYN)
7376                     i.tm.opcode_modifier.evex = EVEX512;
7377                 }
7378
7379               if (i.tm.operand_types[op].bitfield.xmmword
7380                   + i.tm.operand_types[op].bitfield.ymmword
7381                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7382                 continue;
7383
7384               /* Any properly sized operand disambiguates the insn.  */
7385               if (i.types[op].bitfield.xmmword
7386                   || i.types[op].bitfield.ymmword
7387                   || i.types[op].bitfield.zmmword)
7388                 {
7389                   suffixes &= ~(7 << 6);
7390                   evex = 0;
7391                   break;
7392                 }
7393
7394               if ((i.flags[op] & Operand_Mem)
7395                   && i.tm.operand_types[op].bitfield.unspecified)
7396                 {
7397                   if (i.tm.operand_types[op].bitfield.xmmword)
7398                     suffixes |= 1 << 6;
7399                   if (i.tm.operand_types[op].bitfield.ymmword)
7400                     suffixes |= 1 << 7;
7401                   if (i.tm.operand_types[op].bitfield.zmmword)
7402                     suffixes |= 1 << 8;
7403                   if (is_evex_encoding (&i.tm))
7404                     evex = EVEX512;
7405                 }
7406             }
7407         }
7408
7409       /* Are multiple suffixes / operand sizes allowed?  */
7410       if (suffixes & (suffixes - 1))
7411         {
7412           if (intel_syntax
7413               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7414                   || operand_check == check_error))
7415             {
7416               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7417               return 0;
7418             }
7419           if (operand_check == check_error)
7420             {
7421               as_bad (_("no instruction mnemonic suffix given and "
7422                         "no register operands; can't size `%s'"), i.tm.name);
7423               return 0;
7424             }
7425           if (operand_check == check_warning)
7426             as_warn (_("%s; using default for `%s'"),
7427                        intel_syntax
7428                        ? _("ambiguous operand size")
7429                        : _("no instruction mnemonic suffix given and "
7430                            "no register operands"),
7431                        i.tm.name);
7432
7433           if (i.tm.opcode_modifier.floatmf)
7434             i.suffix = SHORT_MNEM_SUFFIX;
7435           else if (is_movx)
7436             /* handled below */;
7437           else if (evex)
7438             i.tm.opcode_modifier.evex = evex;
7439           else if (flag_code == CODE_16BIT)
7440             i.suffix = WORD_MNEM_SUFFIX;
7441           else if (!i.tm.opcode_modifier.no_lsuf)
7442             i.suffix = LONG_MNEM_SUFFIX;
7443           else
7444             i.suffix = QWORD_MNEM_SUFFIX;
7445         }
7446     }
7447
7448   if (is_movx)
7449     {
7450       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7451          In AT&T syntax, if there is no suffix (warned about above), the default
7452          will be byte extension.  */
7453       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7454         i.tm.base_opcode |= 1;
7455
7456       /* For further processing, the suffix should represent the destination
7457          (register).  This is already the case when one was used with
7458          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7459          no suffix to begin with.  */
7460       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7461         {
7462           if (i.types[1].bitfield.word)
7463             i.suffix = WORD_MNEM_SUFFIX;
7464           else if (i.types[1].bitfield.qword)
7465             i.suffix = QWORD_MNEM_SUFFIX;
7466           else
7467             i.suffix = LONG_MNEM_SUFFIX;
7468
7469           i.tm.opcode_modifier.w = 0;
7470         }
7471     }
7472
7473   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7474     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7475                    != (i.tm.operand_types[1].bitfield.class == Reg);
7476
7477   /* Change the opcode based on the operand size given by i.suffix.  */
7478   switch (i.suffix)
7479     {
7480     /* Size floating point instruction.  */
7481     case LONG_MNEM_SUFFIX:
7482       if (i.tm.opcode_modifier.floatmf)
7483         {
7484           i.tm.base_opcode ^= 4;
7485           break;
7486         }
7487     /* fall through */
7488     case WORD_MNEM_SUFFIX:
7489     case QWORD_MNEM_SUFFIX:
7490       /* It's not a byte, select word/dword operation.  */
7491       if (i.tm.opcode_modifier.w)
7492         {
7493           if (i.short_form)
7494             i.tm.base_opcode |= 8;
7495           else
7496             i.tm.base_opcode |= 1;
7497         }
7498     /* fall through */
7499     case SHORT_MNEM_SUFFIX:
7500       /* Now select between word & dword operations via the operand
7501          size prefix, except for instructions that will ignore this
7502          prefix anyway.  */
7503       if (i.suffix != QWORD_MNEM_SUFFIX
7504           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7505           && !i.tm.opcode_modifier.floatmf
7506           && !is_any_vex_encoding (&i.tm)
7507           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7508               || (flag_code == CODE_64BIT
7509                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7510         {
7511           unsigned int prefix = DATA_PREFIX_OPCODE;
7512
7513           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7514             prefix = ADDR_PREFIX_OPCODE;
7515
7516           if (!add_prefix (prefix))
7517             return 0;
7518         }
7519
7520       /* Set mode64 for an operand.  */
7521       if (i.suffix == QWORD_MNEM_SUFFIX
7522           && flag_code == CODE_64BIT
7523           && !i.tm.opcode_modifier.norex64
7524           && !i.tm.opcode_modifier.vexw
7525           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7526              need rex64. */
7527           && ! (i.operands == 2
7528                 && i.tm.base_opcode == 0x90
7529                 && i.tm.opcode_modifier.opcodespace == SPACE_BASE
7530                 && i.types[0].bitfield.instance == Accum
7531                 && i.types[0].bitfield.qword
7532                 && i.types[1].bitfield.instance == Accum))
7533         i.rex |= REX_W;
7534
7535       break;
7536
7537     case 0:
7538       /* Select word/dword/qword operation with explicit data sizing prefix
7539          when there are no suitable register operands.  */
7540       if (i.tm.opcode_modifier.w
7541           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7542           && (!i.reg_operands
7543               || (i.reg_operands == 1
7544                       /* ShiftCount */
7545                   && (i.tm.operand_types[0].bitfield.instance == RegC
7546                       /* InOutPortReg */
7547                       || i.tm.operand_types[0].bitfield.instance == RegD
7548                       || i.tm.operand_types[1].bitfield.instance == RegD
7549                       /* CRC32 */
7550                       || is_crc32))))
7551         i.tm.base_opcode |= 1;
7552       break;
7553     }
7554
7555   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7556     {
7557       gas_assert (!i.suffix);
7558       gas_assert (i.reg_operands);
7559
7560       if (i.tm.operand_types[0].bitfield.instance == Accum
7561           || i.operands == 1)
7562         {
7563           /* The address size override prefix changes the size of the
7564              first operand.  */
7565           if (flag_code == CODE_64BIT
7566               && i.op[0].regs->reg_type.bitfield.word)
7567             {
7568               as_bad (_("16-bit addressing unavailable for `%s'"),
7569                       i.tm.name);
7570               return 0;
7571             }
7572
7573           if ((flag_code == CODE_32BIT
7574                ? i.op[0].regs->reg_type.bitfield.word
7575                : i.op[0].regs->reg_type.bitfield.dword)
7576               && !add_prefix (ADDR_PREFIX_OPCODE))
7577             return 0;
7578         }
7579       else
7580         {
7581           /* Check invalid register operand when the address size override
7582              prefix changes the size of register operands.  */
7583           unsigned int op;
7584           enum { need_word, need_dword, need_qword } need;
7585
7586           /* Check the register operand for the address size prefix if
7587              the memory operand has no real registers, like symbol, DISP
7588              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7589           if (i.mem_operands == 1
7590               && i.reg_operands == 1
7591               && i.operands == 2
7592               && i.types[1].bitfield.class == Reg
7593               && (flag_code == CODE_32BIT
7594                   ? i.op[1].regs->reg_type.bitfield.word
7595                   : i.op[1].regs->reg_type.bitfield.dword)
7596               && ((i.base_reg == NULL && i.index_reg == NULL)
7597 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7598                   || (x86_elf_abi == X86_64_X32_ABI
7599                       && i.base_reg
7600                       && i.base_reg->reg_num == RegIP
7601                       && i.base_reg->reg_type.bitfield.qword))
7602 #else
7603                   || 0)
7604 #endif
7605               && !add_prefix (ADDR_PREFIX_OPCODE))
7606             return 0;
7607
7608           if (flag_code == CODE_32BIT)
7609             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7610           else if (i.prefix[ADDR_PREFIX])
7611             need = need_dword;
7612           else
7613             need = flag_code == CODE_64BIT ? need_qword : need_word;
7614
7615           for (op = 0; op < i.operands; op++)
7616             {
7617               if (i.types[op].bitfield.class != Reg)
7618                 continue;
7619
7620               switch (need)
7621                 {
7622                 case need_word:
7623                   if (i.op[op].regs->reg_type.bitfield.word)
7624                     continue;
7625                   break;
7626                 case need_dword:
7627                   if (i.op[op].regs->reg_type.bitfield.dword)
7628                     continue;
7629                   break;
7630                 case need_qword:
7631                   if (i.op[op].regs->reg_type.bitfield.qword)
7632                     continue;
7633                   break;
7634                 }
7635
7636               as_bad (_("invalid register operand size for `%s'"),
7637                       i.tm.name);
7638               return 0;
7639             }
7640         }
7641     }
7642
7643   return 1;
7644 }
7645
7646 static int
7647 check_byte_reg (void)
7648 {
7649   int op;
7650
7651   for (op = i.operands; --op >= 0;)
7652     {
7653       /* Skip non-register operands. */
7654       if (i.types[op].bitfield.class != Reg)
7655         continue;
7656
7657       /* If this is an eight bit register, it's OK.  If it's the 16 or
7658          32 bit version of an eight bit register, we will just use the
7659          low portion, and that's OK too.  */
7660       if (i.types[op].bitfield.byte)
7661         continue;
7662
7663       /* I/O port address operands are OK too.  */
7664       if (i.tm.operand_types[op].bitfield.instance == RegD
7665           && i.tm.operand_types[op].bitfield.word)
7666         continue;
7667
7668       /* crc32 only wants its source operand checked here.  */
7669       if (i.tm.base_opcode == 0xf0
7670           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7671           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7672           && op != 0)
7673         continue;
7674
7675       /* Any other register is bad.  */
7676       as_bad (_("`%s%s' not allowed with `%s%c'"),
7677               register_prefix, i.op[op].regs->reg_name,
7678               i.tm.name, i.suffix);
7679       return 0;
7680     }
7681   return 1;
7682 }
7683
7684 static int
7685 check_long_reg (void)
7686 {
7687   int op;
7688
7689   for (op = i.operands; --op >= 0;)
7690     /* Skip non-register operands. */
7691     if (i.types[op].bitfield.class != Reg)
7692       continue;
7693     /* Reject eight bit registers, except where the template requires
7694        them. (eg. movzb)  */
7695     else if (i.types[op].bitfield.byte
7696              && (i.tm.operand_types[op].bitfield.class == Reg
7697                  || i.tm.operand_types[op].bitfield.instance == Accum)
7698              && (i.tm.operand_types[op].bitfield.word
7699                  || i.tm.operand_types[op].bitfield.dword))
7700       {
7701         as_bad (_("`%s%s' not allowed with `%s%c'"),
7702                 register_prefix,
7703                 i.op[op].regs->reg_name,
7704                 i.tm.name,
7705                 i.suffix);
7706         return 0;
7707       }
7708     /* Error if the e prefix on a general reg is missing.  */
7709     else if (i.types[op].bitfield.word
7710              && (i.tm.operand_types[op].bitfield.class == Reg
7711                  || i.tm.operand_types[op].bitfield.instance == Accum)
7712              && i.tm.operand_types[op].bitfield.dword)
7713       {
7714         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7715                 register_prefix, i.op[op].regs->reg_name,
7716                 i.suffix);
7717         return 0;
7718       }
7719     /* Warn if the r prefix on a general reg is present.  */
7720     else if (i.types[op].bitfield.qword
7721              && (i.tm.operand_types[op].bitfield.class == Reg
7722                  || i.tm.operand_types[op].bitfield.instance == Accum)
7723              && i.tm.operand_types[op].bitfield.dword)
7724       {
7725         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7726                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7727         return 0;
7728       }
7729   return 1;
7730 }
7731
7732 static int
7733 check_qword_reg (void)
7734 {
7735   int op;
7736
7737   for (op = i.operands; --op >= 0; )
7738     /* Skip non-register operands. */
7739     if (i.types[op].bitfield.class != Reg)
7740       continue;
7741     /* Reject eight bit registers, except where the template requires
7742        them. (eg. movzb)  */
7743     else if (i.types[op].bitfield.byte
7744              && (i.tm.operand_types[op].bitfield.class == Reg
7745                  || i.tm.operand_types[op].bitfield.instance == Accum)
7746              && (i.tm.operand_types[op].bitfield.word
7747                  || i.tm.operand_types[op].bitfield.dword))
7748       {
7749         as_bad (_("`%s%s' not allowed with `%s%c'"),
7750                 register_prefix,
7751                 i.op[op].regs->reg_name,
7752                 i.tm.name,
7753                 i.suffix);
7754         return 0;
7755       }
7756     /* Warn if the r prefix on a general reg is missing.  */
7757     else if ((i.types[op].bitfield.word
7758               || i.types[op].bitfield.dword)
7759              && (i.tm.operand_types[op].bitfield.class == Reg
7760                  || i.tm.operand_types[op].bitfield.instance == Accum)
7761              && i.tm.operand_types[op].bitfield.qword)
7762       {
7763         /* Prohibit these changes in the 64bit mode, since the
7764            lowering is more complicated.  */
7765         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7766                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7767         return 0;
7768       }
7769   return 1;
7770 }
7771
7772 static int
7773 check_word_reg (void)
7774 {
7775   int op;
7776   for (op = i.operands; --op >= 0;)
7777     /* Skip non-register operands. */
7778     if (i.types[op].bitfield.class != Reg)
7779       continue;
7780     /* Reject eight bit registers, except where the template requires
7781        them. (eg. movzb)  */
7782     else if (i.types[op].bitfield.byte
7783              && (i.tm.operand_types[op].bitfield.class == Reg
7784                  || i.tm.operand_types[op].bitfield.instance == Accum)
7785              && (i.tm.operand_types[op].bitfield.word
7786                  || i.tm.operand_types[op].bitfield.dword))
7787       {
7788         as_bad (_("`%s%s' not allowed with `%s%c'"),
7789                 register_prefix,
7790                 i.op[op].regs->reg_name,
7791                 i.tm.name,
7792                 i.suffix);
7793         return 0;
7794       }
7795     /* Error if the e or r prefix on a general reg is present.  */
7796     else if ((i.types[op].bitfield.dword
7797                  || i.types[op].bitfield.qword)
7798              && (i.tm.operand_types[op].bitfield.class == Reg
7799                  || i.tm.operand_types[op].bitfield.instance == Accum)
7800              && i.tm.operand_types[op].bitfield.word)
7801       {
7802         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7803                 register_prefix, i.op[op].regs->reg_name,
7804                 i.suffix);
7805         return 0;
7806       }
7807   return 1;
7808 }
7809
7810 static int
7811 update_imm (unsigned int j)
7812 {
7813   i386_operand_type overlap = i.types[j];
7814   if (overlap.bitfield.imm8
7815       + overlap.bitfield.imm8s
7816       + overlap.bitfield.imm16
7817       + overlap.bitfield.imm32
7818       + overlap.bitfield.imm32s
7819       + overlap.bitfield.imm64 > 1)
7820     {
7821       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7822       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7823       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7824       static const i386_operand_type imm16_32 = { .bitfield =
7825         { .imm16 = 1, .imm32 = 1 }
7826       };
7827       static const i386_operand_type imm16_32s =  { .bitfield =
7828         { .imm16 = 1, .imm32s = 1 }
7829       };
7830       static const i386_operand_type imm16_32_32s = { .bitfield =
7831         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7832       };
7833
7834       if (i.suffix)
7835         {
7836           i386_operand_type temp;
7837
7838           operand_type_set (&temp, 0);
7839           if (i.suffix == BYTE_MNEM_SUFFIX)
7840             {
7841               temp.bitfield.imm8 = overlap.bitfield.imm8;
7842               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7843             }
7844           else if (i.suffix == WORD_MNEM_SUFFIX)
7845             temp.bitfield.imm16 = overlap.bitfield.imm16;
7846           else if (i.suffix == QWORD_MNEM_SUFFIX)
7847             {
7848               temp.bitfield.imm64 = overlap.bitfield.imm64;
7849               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7850             }
7851           else
7852             temp.bitfield.imm32 = overlap.bitfield.imm32;
7853           overlap = temp;
7854         }
7855       else if (operand_type_equal (&overlap, &imm16_32_32s)
7856                || operand_type_equal (&overlap, &imm16_32)
7857                || operand_type_equal (&overlap, &imm16_32s))
7858         {
7859           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7860             overlap = imm16;
7861           else
7862             overlap = imm32s;
7863         }
7864       else if (i.prefix[REX_PREFIX] & REX_W)
7865         overlap = operand_type_and (overlap, imm32s);
7866       else if (i.prefix[DATA_PREFIX])
7867         overlap = operand_type_and (overlap,
7868                                     flag_code != CODE_16BIT ? imm16 : imm32);
7869       if (overlap.bitfield.imm8
7870           + overlap.bitfield.imm8s
7871           + overlap.bitfield.imm16
7872           + overlap.bitfield.imm32
7873           + overlap.bitfield.imm32s
7874           + overlap.bitfield.imm64 != 1)
7875         {
7876           as_bad (_("no instruction mnemonic suffix given; "
7877                     "can't determine immediate size"));
7878           return 0;
7879         }
7880     }
7881   i.types[j] = overlap;
7882
7883   return 1;
7884 }
7885
7886 static int
7887 finalize_imm (void)
7888 {
7889   unsigned int j, n;
7890
7891   /* Update the first 2 immediate operands.  */
7892   n = i.operands > 2 ? 2 : i.operands;
7893   if (n)
7894     {
7895       for (j = 0; j < n; j++)
7896         if (update_imm (j) == 0)
7897           return 0;
7898
7899       /* The 3rd operand can't be immediate operand.  */
7900       gas_assert (operand_type_check (i.types[2], imm) == 0);
7901     }
7902
7903   return 1;
7904 }
7905
7906 static int
7907 process_operands (void)
7908 {
7909   /* Default segment register this instruction will use for memory
7910      accesses.  0 means unknown.  This is only for optimizing out
7911      unnecessary segment overrides.  */
7912   const reg_entry *default_seg = NULL;
7913
7914   if (i.tm.opcode_modifier.sse2avx)
7915     {
7916       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7917          need converting.  */
7918       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7919       i.prefix[REX_PREFIX] = 0;
7920       i.rex_encoding = 0;
7921     }
7922   /* ImmExt should be processed after SSE2AVX.  */
7923   else if (i.tm.opcode_modifier.immext)
7924     process_immext ();
7925
7926   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7927     {
7928       static const i386_operand_type regxmm = {
7929         .bitfield = { .class = RegSIMD, .xmmword = 1 }
7930       };
7931       unsigned int dupl = i.operands;
7932       unsigned int dest = dupl - 1;
7933       unsigned int j;
7934
7935       /* The destination must be an xmm register.  */
7936       gas_assert (i.reg_operands
7937                   && MAX_OPERANDS > dupl
7938                   && operand_type_equal (&i.types[dest], &regxmm));
7939
7940       if (i.tm.operand_types[0].bitfield.instance == Accum
7941           && i.tm.operand_types[0].bitfield.xmmword)
7942         {
7943           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7944             {
7945               /* Keep xmm0 for instructions with VEX prefix and 3
7946                  sources.  */
7947               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7948               i.tm.operand_types[0].bitfield.class = RegSIMD;
7949               goto duplicate;
7950             }
7951           else
7952             {
7953               /* We remove the first xmm0 and keep the number of
7954                  operands unchanged, which in fact duplicates the
7955                  destination.  */
7956               for (j = 1; j < i.operands; j++)
7957                 {
7958                   i.op[j - 1] = i.op[j];
7959                   i.types[j - 1] = i.types[j];
7960                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7961                   i.flags[j - 1] = i.flags[j];
7962                 }
7963             }
7964         }
7965       else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
7966         {
7967           gas_assert ((MAX_OPERANDS - 1) > dupl
7968                       && (i.tm.opcode_modifier.vexsources
7969                           == VEX3SOURCES));
7970
7971           /* Add the implicit xmm0 for instructions with VEX prefix
7972              and 3 sources.  */
7973           for (j = i.operands; j > 0; j--)
7974             {
7975               i.op[j] = i.op[j - 1];
7976               i.types[j] = i.types[j - 1];
7977               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7978               i.flags[j] = i.flags[j - 1];
7979             }
7980           i.op[0].regs
7981             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7982           i.types[0] = regxmm;
7983           i.tm.operand_types[0] = regxmm;
7984
7985           i.operands += 2;
7986           i.reg_operands += 2;
7987           i.tm.operands += 2;
7988
7989           dupl++;
7990           dest++;
7991           i.op[dupl] = i.op[dest];
7992           i.types[dupl] = i.types[dest];
7993           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7994           i.flags[dupl] = i.flags[dest];
7995         }
7996       else
7997         {
7998         duplicate:
7999           i.operands++;
8000           i.reg_operands++;
8001           i.tm.operands++;
8002
8003           i.op[dupl] = i.op[dest];
8004           i.types[dupl] = i.types[dest];
8005           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8006           i.flags[dupl] = i.flags[dest];
8007         }
8008
8009        if (i.tm.opcode_modifier.immext)
8010          process_immext ();
8011     }
8012   else if (i.tm.operand_types[0].bitfield.instance == Accum
8013            && i.tm.operand_types[0].bitfield.xmmword)
8014     {
8015       unsigned int j;
8016
8017       for (j = 1; j < i.operands; j++)
8018         {
8019           i.op[j - 1] = i.op[j];
8020           i.types[j - 1] = i.types[j];
8021
8022           /* We need to adjust fields in i.tm since they are used by
8023              build_modrm_byte.  */
8024           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8025
8026           i.flags[j - 1] = i.flags[j];
8027         }
8028
8029       i.operands--;
8030       i.reg_operands--;
8031       i.tm.operands--;
8032     }
8033   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8034     {
8035       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8036
8037       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8038       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8039       regnum = register_number (i.op[1].regs);
8040       first_reg_in_group = regnum & ~3;
8041       last_reg_in_group = first_reg_in_group + 3;
8042       if (regnum != first_reg_in_group)
8043         as_warn (_("source register `%s%s' implicitly denotes"
8044                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8045                  register_prefix, i.op[1].regs->reg_name,
8046                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8047                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8048                  i.tm.name);
8049     }
8050   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8051     {
8052       /* The imul $imm, %reg instruction is converted into
8053          imul $imm, %reg, %reg, and the clr %reg instruction
8054          is converted into xor %reg, %reg.  */
8055
8056       unsigned int first_reg_op;
8057
8058       if (operand_type_check (i.types[0], reg))
8059         first_reg_op = 0;
8060       else
8061         first_reg_op = 1;
8062       /* Pretend we saw the extra register operand.  */
8063       gas_assert (i.reg_operands == 1
8064                   && i.op[first_reg_op + 1].regs == 0);
8065       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8066       i.types[first_reg_op + 1] = i.types[first_reg_op];
8067       i.operands++;
8068       i.reg_operands++;
8069     }
8070
8071   if (i.tm.opcode_modifier.modrm)
8072     {
8073       /* The opcode is completed (modulo i.tm.extension_opcode which
8074          must be put into the modrm byte).  Now, we make the modrm and
8075          index base bytes based on all the info we've collected.  */
8076
8077       default_seg = build_modrm_byte ();
8078     }
8079   else if (i.types[0].bitfield.class == SReg)
8080     {
8081       if (flag_code != CODE_64BIT
8082           ? i.tm.base_opcode == POP_SEG_SHORT
8083             && i.op[0].regs->reg_num == 1
8084           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8085             && i.op[0].regs->reg_num < 4)
8086         {
8087           as_bad (_("you can't `%s %s%s'"),
8088                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
8089           return 0;
8090         }
8091       if (i.op[0].regs->reg_num > 3
8092           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
8093         {
8094           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8095           i.tm.opcode_modifier.opcodespace = SPACE_0F;
8096         }
8097       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8098     }
8099   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
8100            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8101     {
8102       default_seg = reg_ds;
8103     }
8104   else if (i.tm.opcode_modifier.isstring)
8105     {
8106       /* For the string instructions that allow a segment override
8107          on one of their operands, the default segment is ds.  */
8108       default_seg = reg_ds;
8109     }
8110   else if (i.short_form)
8111     {
8112       /* The register or float register operand is in operand
8113          0 or 1.  */
8114       const reg_entry *r = i.op[0].regs;
8115
8116       if (i.imm_operands
8117           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
8118         r = i.op[1].regs;
8119       /* Register goes in low 3 bits of opcode.  */
8120       i.tm.base_opcode |= r->reg_num;
8121       if ((r->reg_flags & RegRex) != 0)
8122         i.rex |= REX_B;
8123       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8124         {
8125           /* Warn about some common errors, but press on regardless.  */
8126           if (i.operands != 2)
8127             {
8128               /* Extraneous `l' suffix on fp insn.  */
8129               as_warn (_("translating to `%s %s%s'"), i.tm.name,
8130                        register_prefix, i.op[0].regs->reg_name);
8131             }
8132           else if (i.op[0].regs->reg_type.bitfield.instance != Accum)
8133             {
8134               /* Reversed arguments on faddp or fmulp.  */
8135               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
8136                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8137                        register_prefix, i.op[intel_syntax].regs->reg_name);
8138             }
8139         }
8140     }
8141
8142   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8143       && i.tm.base_opcode == 0x8d /* lea */
8144       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8145       && !is_any_vex_encoding(&i.tm))
8146     {
8147       if (!quiet_warnings)
8148         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
8149       if (optimize)
8150         {
8151           i.seg[0] = NULL;
8152           i.prefix[SEG_PREFIX] = 0;
8153         }
8154     }
8155
8156   /* If a segment was explicitly specified, and the specified segment
8157      is neither the default nor the one already recorded from a prefix,
8158      use an opcode prefix to select it.  If we never figured out what
8159      the default segment is, then default_seg will be zero at this
8160      point, and the specified segment prefix will always be used.  */
8161   if (i.seg[0]
8162       && i.seg[0] != default_seg
8163       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8164     {
8165       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8166         return 0;
8167     }
8168   return 1;
8169 }
8170
8171 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8172                                  bool do_sse2avx)
8173 {
8174   if (r->reg_flags & RegRex)
8175     {
8176       if (i.rex & rex_bit)
8177         as_bad (_("same type of prefix used twice"));
8178       i.rex |= rex_bit;
8179     }
8180   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8181     {
8182       gas_assert (i.vex.register_specifier == r);
8183       i.vex.register_specifier += 8;
8184     }
8185
8186   if (r->reg_flags & RegVRex)
8187     i.vrex |= rex_bit;
8188 }
8189
8190 static const reg_entry *
8191 build_modrm_byte (void)
8192 {
8193   const reg_entry *default_seg = NULL;
8194   unsigned int source, dest;
8195   int vex_3_sources;
8196
8197   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8198   if (vex_3_sources)
8199     {
8200       unsigned int nds, reg_slot;
8201       expressionS *exp;
8202
8203       dest = i.operands - 1;
8204       nds = dest - 1;
8205
8206       /* There are 2 kinds of instructions:
8207          1. 5 operands: 4 register operands or 3 register operands
8208          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8209          VexW0 or VexW1.  The destination must be either XMM, YMM or
8210          ZMM register.
8211          2. 4 operands: 4 register operands or 3 register operands
8212          plus 1 memory operand, with VexXDS.  */
8213       gas_assert ((i.reg_operands == 4
8214                    || (i.reg_operands == 3 && i.mem_operands == 1))
8215                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8216                   && i.tm.opcode_modifier.vexw
8217                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8218
8219       /* If VexW1 is set, the first non-immediate operand is the source and
8220          the second non-immediate one is encoded in the immediate operand.  */
8221       if (i.tm.opcode_modifier.vexw == VEXW1)
8222         {
8223           source = i.imm_operands;
8224           reg_slot = i.imm_operands + 1;
8225         }
8226       else
8227         {
8228           source = i.imm_operands + 1;
8229           reg_slot = i.imm_operands;
8230         }
8231
8232       if (i.imm_operands == 0)
8233         {
8234           /* When there is no immediate operand, generate an 8bit
8235              immediate operand to encode the first operand.  */
8236           exp = &im_expressions[i.imm_operands++];
8237           i.op[i.operands].imms = exp;
8238           i.types[i.operands].bitfield.imm8 = 1;
8239           i.operands++;
8240
8241           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8242           exp->X_op = O_constant;
8243           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8244           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8245         }
8246       else
8247         {
8248           gas_assert (i.imm_operands == 1);
8249           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8250           gas_assert (!i.tm.opcode_modifier.immext);
8251
8252           /* Turn on Imm8 again so that output_imm will generate it.  */
8253           i.types[0].bitfield.imm8 = 1;
8254
8255           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8256           i.op[0].imms->X_add_number
8257               |= register_number (i.op[reg_slot].regs) << 4;
8258           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8259         }
8260
8261       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8262       i.vex.register_specifier = i.op[nds].regs;
8263     }
8264   else
8265     source = dest = 0;
8266
8267   /* i.reg_operands MUST be the number of real register operands;
8268      implicit registers do not count.  If there are 3 register
8269      operands, it must be a instruction with VexNDS.  For a
8270      instruction with VexNDD, the destination register is encoded
8271      in VEX prefix.  If there are 4 register operands, it must be
8272      a instruction with VEX prefix and 3 sources.  */
8273   if (i.mem_operands == 0
8274       && ((i.reg_operands == 2
8275            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8276           || (i.reg_operands == 3
8277               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8278           || (i.reg_operands == 4 && vex_3_sources)))
8279     {
8280       switch (i.operands)
8281         {
8282         case 2:
8283           source = 0;
8284           break;
8285         case 3:
8286           /* When there are 3 operands, one of them may be immediate,
8287              which may be the first or the last operand.  Otherwise,
8288              the first operand must be shift count register (cl) or it
8289              is an instruction with VexNDS. */
8290           gas_assert (i.imm_operands == 1
8291                       || (i.imm_operands == 0
8292                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8293                               || (i.types[0].bitfield.instance == RegC
8294                                   && i.types[0].bitfield.byte))));
8295           if (operand_type_check (i.types[0], imm)
8296               || (i.types[0].bitfield.instance == RegC
8297                   && i.types[0].bitfield.byte))
8298             source = 1;
8299           else
8300             source = 0;
8301           break;
8302         case 4:
8303           /* When there are 4 operands, the first two must be 8bit
8304              immediate operands. The source operand will be the 3rd
8305              one.
8306
8307              For instructions with VexNDS, if the first operand
8308              an imm8, the source operand is the 2nd one.  If the last
8309              operand is imm8, the source operand is the first one.  */
8310           gas_assert ((i.imm_operands == 2
8311                        && i.types[0].bitfield.imm8
8312                        && i.types[1].bitfield.imm8)
8313                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8314                           && i.imm_operands == 1
8315                           && (i.types[0].bitfield.imm8
8316                               || i.types[i.operands - 1].bitfield.imm8)));
8317           if (i.imm_operands == 2)
8318             source = 2;
8319           else
8320             {
8321               if (i.types[0].bitfield.imm8)
8322                 source = 1;
8323               else
8324                 source = 0;
8325             }
8326           break;
8327         case 5:
8328           gas_assert (!is_evex_encoding (&i.tm));
8329           gas_assert (i.imm_operands == 1 && vex_3_sources);
8330           break;
8331         default:
8332           abort ();
8333         }
8334
8335       if (!vex_3_sources)
8336         {
8337           dest = source + 1;
8338
8339           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8340             {
8341               /* For instructions with VexNDS, the register-only source
8342                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8343                  register.  It is encoded in VEX prefix.  */
8344
8345               i386_operand_type op;
8346               unsigned int vvvv;
8347
8348               /* Swap two source operands if needed.  */
8349               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8350                 {
8351                   vvvv = source;
8352                   source = dest;
8353                 }
8354               else
8355                 vvvv = dest;
8356
8357               op = i.tm.operand_types[vvvv];
8358               if ((dest + 1) >= i.operands
8359                   || ((op.bitfield.class != Reg
8360                        || (!op.bitfield.dword && !op.bitfield.qword))
8361                       && op.bitfield.class != RegSIMD
8362                       && op.bitfield.class != RegMask))
8363                 abort ();
8364               i.vex.register_specifier = i.op[vvvv].regs;
8365               dest++;
8366             }
8367         }
8368
8369       i.rm.mode = 3;
8370       /* One of the register operands will be encoded in the i.rm.reg
8371          field, the other in the combined i.rm.mode and i.rm.regmem
8372          fields.  If no form of this instruction supports a memory
8373          destination operand, then we assume the source operand may
8374          sometimes be a memory operand and so we need to store the
8375          destination in the i.rm.reg field.  */
8376       if (!i.tm.opcode_modifier.regmem
8377           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8378         {
8379           i.rm.reg = i.op[dest].regs->reg_num;
8380           i.rm.regmem = i.op[source].regs->reg_num;
8381           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8382           set_rex_vrex (i.op[source].regs, REX_B, false);
8383         }
8384       else
8385         {
8386           i.rm.reg = i.op[source].regs->reg_num;
8387           i.rm.regmem = i.op[dest].regs->reg_num;
8388           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8389           set_rex_vrex (i.op[source].regs, REX_R, false);
8390         }
8391       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8392         {
8393           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8394             abort ();
8395           i.rex &= ~REX_R;
8396           add_prefix (LOCK_PREFIX_OPCODE);
8397         }
8398     }
8399   else
8400     {                   /* If it's not 2 reg operands...  */
8401       unsigned int mem;
8402
8403       if (i.mem_operands)
8404         {
8405           unsigned int fake_zero_displacement = 0;
8406           unsigned int op;
8407
8408           for (op = 0; op < i.operands; op++)
8409             if (i.flags[op] & Operand_Mem)
8410               break;
8411           gas_assert (op < i.operands);
8412
8413           if (i.tm.opcode_modifier.sib)
8414             {
8415               /* The index register of VSIB shouldn't be RegIZ.  */
8416               if (i.tm.opcode_modifier.sib != SIBMEM
8417                   && i.index_reg->reg_num == RegIZ)
8418                 abort ();
8419
8420               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8421               if (!i.base_reg)
8422                 {
8423                   i.sib.base = NO_BASE_REGISTER;
8424                   i.sib.scale = i.log2_scale_factor;
8425                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8426                   i.types[op].bitfield.disp32 = 1;
8427                 }
8428
8429               /* Since the mandatory SIB always has index register, so
8430                  the code logic remains unchanged. The non-mandatory SIB
8431                  without index register is allowed and will be handled
8432                  later.  */
8433               if (i.index_reg)
8434                 {
8435                   if (i.index_reg->reg_num == RegIZ)
8436                     i.sib.index = NO_INDEX_REGISTER;
8437                   else
8438                     i.sib.index = i.index_reg->reg_num;
8439                   set_rex_vrex (i.index_reg, REX_X, false);
8440                 }
8441             }
8442
8443           default_seg = reg_ds;
8444
8445           if (i.base_reg == 0)
8446             {
8447               i.rm.mode = 0;
8448               if (!i.disp_operands)
8449                 fake_zero_displacement = 1;
8450               if (i.index_reg == 0)
8451                 {
8452                   /* Both check for VSIB and mandatory non-vector SIB. */
8453                   gas_assert (!i.tm.opcode_modifier.sib
8454                               || i.tm.opcode_modifier.sib == SIBMEM);
8455                   /* Operand is just <disp>  */
8456                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8457                   if (flag_code == CODE_64BIT)
8458                     {
8459                       /* 64bit mode overwrites the 32bit absolute
8460                          addressing by RIP relative addressing and
8461                          absolute addressing is encoded by one of the
8462                          redundant SIB forms.  */
8463                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8464                       i.sib.base = NO_BASE_REGISTER;
8465                       i.sib.index = NO_INDEX_REGISTER;
8466                       i.types[op].bitfield.disp32 = 1;
8467                     }
8468                   else if ((flag_code == CODE_16BIT)
8469                            ^ (i.prefix[ADDR_PREFIX] != 0))
8470                     {
8471                       i.rm.regmem = NO_BASE_REGISTER_16;
8472                       i.types[op].bitfield.disp16 = 1;
8473                     }
8474                   else
8475                     {
8476                       i.rm.regmem = NO_BASE_REGISTER;
8477                       i.types[op].bitfield.disp32 = 1;
8478                     }
8479                 }
8480               else if (!i.tm.opcode_modifier.sib)
8481                 {
8482                   /* !i.base_reg && i.index_reg  */
8483                   if (i.index_reg->reg_num == RegIZ)
8484                     i.sib.index = NO_INDEX_REGISTER;
8485                   else
8486                     i.sib.index = i.index_reg->reg_num;
8487                   i.sib.base = NO_BASE_REGISTER;
8488                   i.sib.scale = i.log2_scale_factor;
8489                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8490                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8491                   i.types[op].bitfield.disp32 = 1;
8492                   if ((i.index_reg->reg_flags & RegRex) != 0)
8493                     i.rex |= REX_X;
8494                 }
8495             }
8496           /* RIP addressing for 64bit mode.  */
8497           else if (i.base_reg->reg_num == RegIP)
8498             {
8499               gas_assert (!i.tm.opcode_modifier.sib);
8500               i.rm.regmem = NO_BASE_REGISTER;
8501               i.types[op].bitfield.disp8 = 0;
8502               i.types[op].bitfield.disp16 = 0;
8503               i.types[op].bitfield.disp32 = 1;
8504               i.types[op].bitfield.disp64 = 0;
8505               i.flags[op] |= Operand_PCrel;
8506               if (! i.disp_operands)
8507                 fake_zero_displacement = 1;
8508             }
8509           else if (i.base_reg->reg_type.bitfield.word)
8510             {
8511               gas_assert (!i.tm.opcode_modifier.sib);
8512               switch (i.base_reg->reg_num)
8513                 {
8514                 case 3: /* (%bx)  */
8515                   if (i.index_reg == 0)
8516                     i.rm.regmem = 7;
8517                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8518                     i.rm.regmem = i.index_reg->reg_num - 6;
8519                   break;
8520                 case 5: /* (%bp)  */
8521                   default_seg = reg_ss;
8522                   if (i.index_reg == 0)
8523                     {
8524                       i.rm.regmem = 6;
8525                       if (operand_type_check (i.types[op], disp) == 0)
8526                         {
8527                           /* fake (%bp) into 0(%bp)  */
8528                           if (i.disp_encoding == disp_encoding_16bit)
8529                             i.types[op].bitfield.disp16 = 1;
8530                           else
8531                             i.types[op].bitfield.disp8 = 1;
8532                           fake_zero_displacement = 1;
8533                         }
8534                     }
8535                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8536                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8537                   break;
8538                 default: /* (%si) -> 4 or (%di) -> 5  */
8539                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8540                 }
8541               if (!fake_zero_displacement
8542                   && !i.disp_operands
8543                   && i.disp_encoding)
8544                 {
8545                   fake_zero_displacement = 1;
8546                   if (i.disp_encoding == disp_encoding_8bit)
8547                     i.types[op].bitfield.disp8 = 1;
8548                   else
8549                     i.types[op].bitfield.disp16 = 1;
8550                 }
8551               i.rm.mode = mode_from_disp_size (i.types[op]);
8552             }
8553           else /* i.base_reg and 32/64 bit mode  */
8554             {
8555               if (operand_type_check (i.types[op], disp))
8556                 {
8557                   i.types[op].bitfield.disp16 = 0;
8558                   i.types[op].bitfield.disp64 = 0;
8559                   i.types[op].bitfield.disp32 = 1;
8560                 }
8561
8562               if (!i.tm.opcode_modifier.sib)
8563                 i.rm.regmem = i.base_reg->reg_num;
8564               if ((i.base_reg->reg_flags & RegRex) != 0)
8565                 i.rex |= REX_B;
8566               i.sib.base = i.base_reg->reg_num;
8567               /* x86-64 ignores REX prefix bit here to avoid decoder
8568                  complications.  */
8569               if (!(i.base_reg->reg_flags & RegRex)
8570                   && (i.base_reg->reg_num == EBP_REG_NUM
8571                    || i.base_reg->reg_num == ESP_REG_NUM))
8572                   default_seg = reg_ss;
8573               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8574                 {
8575                   fake_zero_displacement = 1;
8576                   if (i.disp_encoding == disp_encoding_32bit)
8577                     i.types[op].bitfield.disp32 = 1;
8578                   else
8579                     i.types[op].bitfield.disp8 = 1;
8580                 }
8581               i.sib.scale = i.log2_scale_factor;
8582               if (i.index_reg == 0)
8583                 {
8584                   /* Only check for VSIB. */
8585                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8586                               && i.tm.opcode_modifier.sib != VECSIB256
8587                               && i.tm.opcode_modifier.sib != VECSIB512);
8588
8589                   /* <disp>(%esp) becomes two byte modrm with no index
8590                      register.  We've already stored the code for esp
8591                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8592                      Any base register besides %esp will not use the
8593                      extra modrm byte.  */
8594                   i.sib.index = NO_INDEX_REGISTER;
8595                 }
8596               else if (!i.tm.opcode_modifier.sib)
8597                 {
8598                   if (i.index_reg->reg_num == RegIZ)
8599                     i.sib.index = NO_INDEX_REGISTER;
8600                   else
8601                     i.sib.index = i.index_reg->reg_num;
8602                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8603                   if ((i.index_reg->reg_flags & RegRex) != 0)
8604                     i.rex |= REX_X;
8605                 }
8606
8607               if (i.disp_operands
8608                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8609                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8610                 i.rm.mode = 0;
8611               else
8612                 {
8613                   if (!fake_zero_displacement
8614                       && !i.disp_operands
8615                       && i.disp_encoding)
8616                     {
8617                       fake_zero_displacement = 1;
8618                       if (i.disp_encoding == disp_encoding_8bit)
8619                         i.types[op].bitfield.disp8 = 1;
8620                       else
8621                         i.types[op].bitfield.disp32 = 1;
8622                     }
8623                   i.rm.mode = mode_from_disp_size (i.types[op]);
8624                 }
8625             }
8626
8627           if (fake_zero_displacement)
8628             {
8629               /* Fakes a zero displacement assuming that i.types[op]
8630                  holds the correct displacement size.  */
8631               expressionS *exp;
8632
8633               gas_assert (i.op[op].disps == 0);
8634               exp = &disp_expressions[i.disp_operands++];
8635               i.op[op].disps = exp;
8636               exp->X_op = O_constant;
8637               exp->X_add_number = 0;
8638               exp->X_add_symbol = (symbolS *) 0;
8639               exp->X_op_symbol = (symbolS *) 0;
8640             }
8641
8642           mem = op;
8643         }
8644       else
8645         mem = ~0;
8646
8647       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8648         {
8649           if (operand_type_check (i.types[0], imm))
8650             i.vex.register_specifier = NULL;
8651           else
8652             {
8653               /* VEX.vvvv encodes one of the sources when the first
8654                  operand is not an immediate.  */
8655               if (i.tm.opcode_modifier.vexw == VEXW0)
8656                 i.vex.register_specifier = i.op[0].regs;
8657               else
8658                 i.vex.register_specifier = i.op[1].regs;
8659             }
8660
8661           /* Destination is a XMM register encoded in the ModRM.reg
8662              and VEX.R bit.  */
8663           i.rm.reg = i.op[2].regs->reg_num;
8664           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8665             i.rex |= REX_R;
8666
8667           /* ModRM.rm and VEX.B encodes the other source.  */
8668           if (!i.mem_operands)
8669             {
8670               i.rm.mode = 3;
8671
8672               if (i.tm.opcode_modifier.vexw == VEXW0)
8673                 i.rm.regmem = i.op[1].regs->reg_num;
8674               else
8675                 i.rm.regmem = i.op[0].regs->reg_num;
8676
8677               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8678                 i.rex |= REX_B;
8679             }
8680         }
8681       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8682         {
8683           i.vex.register_specifier = i.op[2].regs;
8684           if (!i.mem_operands)
8685             {
8686               i.rm.mode = 3;
8687               i.rm.regmem = i.op[1].regs->reg_num;
8688               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8689                 i.rex |= REX_B;
8690             }
8691         }
8692       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8693          (if any) based on i.tm.extension_opcode.  Again, we must be
8694          careful to make sure that segment/control/debug/test/MMX
8695          registers are coded into the i.rm.reg field.  */
8696       else if (i.reg_operands)
8697         {
8698           unsigned int op;
8699           unsigned int vex_reg = ~0;
8700
8701           for (op = 0; op < i.operands; op++)
8702             if (i.types[op].bitfield.class == Reg
8703                 || i.types[op].bitfield.class == RegBND
8704                 || i.types[op].bitfield.class == RegMask
8705                 || i.types[op].bitfield.class == SReg
8706                 || i.types[op].bitfield.class == RegCR
8707                 || i.types[op].bitfield.class == RegDR
8708                 || i.types[op].bitfield.class == RegTR
8709                 || i.types[op].bitfield.class == RegSIMD
8710                 || i.types[op].bitfield.class == RegMMX)
8711               break;
8712
8713           if (vex_3_sources)
8714             op = dest;
8715           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8716             {
8717               /* For instructions with VexNDS, the register-only
8718                  source operand is encoded in VEX prefix. */
8719               gas_assert (mem != (unsigned int) ~0);
8720
8721               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8722                 {
8723                   vex_reg = op++;
8724                   gas_assert (op < i.operands);
8725                 }
8726               else
8727                 {
8728                   /* Check register-only source operand when two source
8729                      operands are swapped.  */
8730                   if (!i.tm.operand_types[op].bitfield.baseindex
8731                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8732                     {
8733                       vex_reg = op;
8734                       op += 2;
8735                       gas_assert (mem == (vex_reg + 1)
8736                                   && op < i.operands);
8737                     }
8738                   else
8739                     {
8740                       vex_reg = op + 1;
8741                       gas_assert (vex_reg < i.operands);
8742                     }
8743                 }
8744             }
8745           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8746             {
8747               /* For instructions with VexNDD, the register destination
8748                  is encoded in VEX prefix.  */
8749               if (i.mem_operands == 0)
8750                 {
8751                   /* There is no memory operand.  */
8752                   gas_assert ((op + 2) == i.operands);
8753                   vex_reg = op + 1;
8754                 }
8755               else
8756                 {
8757                   /* There are only 2 non-immediate operands.  */
8758                   gas_assert (op < i.imm_operands + 2
8759                               && i.operands == i.imm_operands + 2);
8760                   vex_reg = i.imm_operands + 1;
8761                 }
8762             }
8763           else
8764             gas_assert (op < i.operands);
8765
8766           if (vex_reg != (unsigned int) ~0)
8767             {
8768               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8769
8770               if ((type->bitfield.class != Reg
8771                    || (!type->bitfield.dword && !type->bitfield.qword))
8772                   && type->bitfield.class != RegSIMD
8773                   && type->bitfield.class != RegMask)
8774                 abort ();
8775
8776               i.vex.register_specifier = i.op[vex_reg].regs;
8777             }
8778
8779           /* Don't set OP operand twice.  */
8780           if (vex_reg != op)
8781             {
8782               /* If there is an extension opcode to put here, the
8783                  register number must be put into the regmem field.  */
8784               if (i.tm.extension_opcode != None)
8785                 {
8786                   i.rm.regmem = i.op[op].regs->reg_num;
8787                   set_rex_vrex (i.op[op].regs, REX_B,
8788                                 i.tm.opcode_modifier.sse2avx);
8789                 }
8790               else
8791                 {
8792                   i.rm.reg = i.op[op].regs->reg_num;
8793                   set_rex_vrex (i.op[op].regs, REX_R,
8794                                 i.tm.opcode_modifier.sse2avx);
8795                 }
8796             }
8797
8798           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8799              must set it to 3 to indicate this is a register operand
8800              in the regmem field.  */
8801           if (!i.mem_operands)
8802             i.rm.mode = 3;
8803         }
8804
8805       /* Fill in i.rm.reg field with extension opcode (if any).  */
8806       if (i.tm.extension_opcode != None)
8807         i.rm.reg = i.tm.extension_opcode;
8808     }
8809   return default_seg;
8810 }
8811
8812 static INLINE void
8813 frag_opcode_byte (unsigned char byte)
8814 {
8815   if (now_seg != absolute_section)
8816     FRAG_APPEND_1_CHAR (byte);
8817   else
8818     ++abs_section_offset;
8819 }
8820
8821 static unsigned int
8822 flip_code16 (unsigned int code16)
8823 {
8824   gas_assert (i.tm.operands == 1);
8825
8826   return !(i.prefix[REX_PREFIX] & REX_W)
8827          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8828                     : i.tm.operand_types[0].bitfield.disp16)
8829          ? CODE16 : 0;
8830 }
8831
8832 static void
8833 output_branch (void)
8834 {
8835   char *p;
8836   int size;
8837   int code16;
8838   int prefix;
8839   relax_substateT subtype;
8840   symbolS *sym;
8841   offsetT off;
8842
8843   if (now_seg == absolute_section)
8844     {
8845       as_bad (_("relaxable branches not supported in absolute section"));
8846       return;
8847     }
8848
8849   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8850   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8851
8852   prefix = 0;
8853   if (i.prefix[DATA_PREFIX] != 0)
8854     {
8855       prefix = 1;
8856       i.prefixes -= 1;
8857       code16 ^= flip_code16(code16);
8858     }
8859   /* Pentium4 branch hints.  */
8860   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8861       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8862     {
8863       prefix++;
8864       i.prefixes--;
8865     }
8866   if (i.prefix[REX_PREFIX] != 0)
8867     {
8868       prefix++;
8869       i.prefixes--;
8870     }
8871
8872   /* BND prefixed jump.  */
8873   if (i.prefix[BND_PREFIX] != 0)
8874     {
8875       prefix++;
8876       i.prefixes--;
8877     }
8878
8879   if (i.prefixes != 0)
8880     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8881
8882   /* It's always a symbol;  End frag & setup for relax.
8883      Make sure there is enough room in this frag for the largest
8884      instruction we may generate in md_convert_frag.  This is 2
8885      bytes for the opcode and room for the prefix and largest
8886      displacement.  */
8887   frag_grow (prefix + 2 + 4);
8888   /* Prefix and 1 opcode byte go in fr_fix.  */
8889   p = frag_more (prefix + 1);
8890   if (i.prefix[DATA_PREFIX] != 0)
8891     *p++ = DATA_PREFIX_OPCODE;
8892   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8893       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8894     *p++ = i.prefix[SEG_PREFIX];
8895   if (i.prefix[BND_PREFIX] != 0)
8896     *p++ = BND_PREFIX_OPCODE;
8897   if (i.prefix[REX_PREFIX] != 0)
8898     *p++ = i.prefix[REX_PREFIX];
8899   *p = i.tm.base_opcode;
8900
8901   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8902     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8903   else if (cpu_arch_flags.bitfield.cpui386)
8904     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8905   else
8906     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8907   subtype |= code16;
8908
8909   sym = i.op[0].disps->X_add_symbol;
8910   off = i.op[0].disps->X_add_number;
8911
8912   if (i.op[0].disps->X_op != O_constant
8913       && i.op[0].disps->X_op != O_symbol)
8914     {
8915       /* Handle complex expressions.  */
8916       sym = make_expr_symbol (i.op[0].disps);
8917       off = 0;
8918     }
8919
8920   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8921
8922   /* 1 possible extra opcode + 4 byte displacement go in var part.
8923      Pass reloc in fr_var.  */
8924   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8925 }
8926
8927 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8928 /* Return TRUE iff PLT32 relocation should be used for branching to
8929    symbol S.  */
8930
8931 static bool
8932 need_plt32_p (symbolS *s)
8933 {
8934   /* PLT32 relocation is ELF only.  */
8935   if (!IS_ELF)
8936     return false;
8937
8938 #ifdef TE_SOLARIS
8939   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8940      krtld support it.  */
8941   return false;
8942 #endif
8943
8944   /* Since there is no need to prepare for PLT branch on x86-64, we
8945      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8946      be used as a marker for 32-bit PC-relative branches.  */
8947   if (!object_64bit)
8948     return false;
8949
8950   if (s == NULL)
8951     return false;
8952
8953   /* Weak or undefined symbol need PLT32 relocation.  */
8954   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8955     return true;
8956
8957   /* Non-global symbol doesn't need PLT32 relocation.  */
8958   if (! S_IS_EXTERNAL (s))
8959     return false;
8960
8961   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8962      non-default visibilities are treated as normal global symbol
8963      so that PLT32 relocation can be used as a marker for 32-bit
8964      PC-relative branches.  It is useful for linker relaxation.  */
8965   return true;
8966 }
8967 #endif
8968
8969 static void
8970 output_jump (void)
8971 {
8972   char *p;
8973   int size;
8974   fixS *fixP;
8975   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8976
8977   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8978     {
8979       /* This is a loop or jecxz type instruction.  */
8980       size = 1;
8981       if (i.prefix[ADDR_PREFIX] != 0)
8982         {
8983           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8984           i.prefixes -= 1;
8985         }
8986       /* Pentium4 branch hints.  */
8987       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8988           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8989         {
8990           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8991           i.prefixes--;
8992         }
8993     }
8994   else
8995     {
8996       int code16;
8997
8998       code16 = 0;
8999       if (flag_code == CODE_16BIT)
9000         code16 = CODE16;
9001
9002       if (i.prefix[DATA_PREFIX] != 0)
9003         {
9004           frag_opcode_byte (DATA_PREFIX_OPCODE);
9005           i.prefixes -= 1;
9006           code16 ^= flip_code16(code16);
9007         }
9008
9009       size = 4;
9010       if (code16)
9011         size = 2;
9012     }
9013
9014   /* BND prefixed jump.  */
9015   if (i.prefix[BND_PREFIX] != 0)
9016     {
9017       frag_opcode_byte (i.prefix[BND_PREFIX]);
9018       i.prefixes -= 1;
9019     }
9020
9021   if (i.prefix[REX_PREFIX] != 0)
9022     {
9023       frag_opcode_byte (i.prefix[REX_PREFIX]);
9024       i.prefixes -= 1;
9025     }
9026
9027   if (i.prefixes != 0)
9028     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
9029
9030   if (now_seg == absolute_section)
9031     {
9032       abs_section_offset += i.opcode_length + size;
9033       return;
9034     }
9035
9036   p = frag_more (i.opcode_length + size);
9037   switch (i.opcode_length)
9038     {
9039     case 2:
9040       *p++ = i.tm.base_opcode >> 8;
9041       /* Fall through.  */
9042     case 1:
9043       *p++ = i.tm.base_opcode;
9044       break;
9045     default:
9046       abort ();
9047     }
9048
9049 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9050   if (flag_code == CODE_64BIT && size == 4
9051       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9052       && need_plt32_p (i.op[0].disps->X_add_symbol))
9053     jump_reloc = BFD_RELOC_X86_64_PLT32;
9054 #endif
9055
9056   jump_reloc = reloc (size, 1, 1, jump_reloc);
9057
9058   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9059                       i.op[0].disps, 1, jump_reloc);
9060
9061   /* All jumps handled here are signed, but don't unconditionally use a
9062      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9063      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9064      respectively.  */
9065   switch (size)
9066     {
9067     case 1:
9068       fixP->fx_signed = 1;
9069       break;
9070
9071     case 2:
9072       if (i.tm.base_opcode == 0xc7f8)
9073         fixP->fx_signed = 1;
9074       break;
9075
9076     case 4:
9077       if (flag_code == CODE_64BIT)
9078         fixP->fx_signed = 1;
9079       break;
9080     }
9081 }
9082
9083 static void
9084 output_interseg_jump (void)
9085 {
9086   char *p;
9087   int size;
9088   int prefix;
9089   int code16;
9090
9091   code16 = 0;
9092   if (flag_code == CODE_16BIT)
9093     code16 = CODE16;
9094
9095   prefix = 0;
9096   if (i.prefix[DATA_PREFIX] != 0)
9097     {
9098       prefix = 1;
9099       i.prefixes -= 1;
9100       code16 ^= CODE16;
9101     }
9102
9103   gas_assert (!i.prefix[REX_PREFIX]);
9104
9105   size = 4;
9106   if (code16)
9107     size = 2;
9108
9109   if (i.prefixes != 0)
9110     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
9111
9112   if (now_seg == absolute_section)
9113     {
9114       abs_section_offset += prefix + 1 + 2 + size;
9115       return;
9116     }
9117
9118   /* 1 opcode; 2 segment; offset  */
9119   p = frag_more (prefix + 1 + 2 + size);
9120
9121   if (i.prefix[DATA_PREFIX] != 0)
9122     *p++ = DATA_PREFIX_OPCODE;
9123
9124   if (i.prefix[REX_PREFIX] != 0)
9125     *p++ = i.prefix[REX_PREFIX];
9126
9127   *p++ = i.tm.base_opcode;
9128   if (i.op[1].imms->X_op == O_constant)
9129     {
9130       offsetT n = i.op[1].imms->X_add_number;
9131
9132       if (size == 2
9133           && !fits_in_unsigned_word (n)
9134           && !fits_in_signed_word (n))
9135         {
9136           as_bad (_("16-bit jump out of range"));
9137           return;
9138         }
9139       md_number_to_chars (p, n, size);
9140     }
9141   else
9142     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9143                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9144
9145   p += size;
9146   if (i.op[0].imms->X_op == O_constant)
9147     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9148   else
9149     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9150                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9151 }
9152
9153 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9154 void
9155 x86_cleanup (void)
9156 {
9157   char *p;
9158   asection *seg = now_seg;
9159   subsegT subseg = now_subseg;
9160   asection *sec;
9161   unsigned int alignment, align_size_1;
9162   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9163   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9164   unsigned int padding;
9165
9166   if (!IS_ELF || !x86_used_note)
9167     return;
9168
9169   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9170
9171   /* The .note.gnu.property section layout:
9172
9173      Field      Length          Contents
9174      ----       ----            ----
9175      n_namsz    4               4
9176      n_descsz   4               The note descriptor size
9177      n_type     4               NT_GNU_PROPERTY_TYPE_0
9178      n_name     4               "GNU"
9179      n_desc     n_descsz        The program property array
9180      ....       ....            ....
9181    */
9182
9183   /* Create the .note.gnu.property section.  */
9184   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9185   bfd_set_section_flags (sec,
9186                          (SEC_ALLOC
9187                           | SEC_LOAD
9188                           | SEC_DATA
9189                           | SEC_HAS_CONTENTS
9190                           | SEC_READONLY));
9191
9192   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9193     {
9194       align_size_1 = 7;
9195       alignment = 3;
9196     }
9197   else
9198     {
9199       align_size_1 = 3;
9200       alignment = 2;
9201     }
9202
9203   bfd_set_section_alignment (sec, alignment);
9204   elf_section_type (sec) = SHT_NOTE;
9205
9206   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9207                                   + 4-byte data  */
9208   isa_1_descsz_raw = 4 + 4 + 4;
9209   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9210   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9211
9212   feature_2_descsz_raw = isa_1_descsz;
9213   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9214                                       + 4-byte data  */
9215   feature_2_descsz_raw += 4 + 4 + 4;
9216   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9217   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9218                       & ~align_size_1);
9219
9220   descsz = feature_2_descsz;
9221   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9222   p = frag_more (4 + 4 + 4 + 4 + descsz);
9223
9224   /* Write n_namsz.  */
9225   md_number_to_chars (p, (valueT) 4, 4);
9226
9227   /* Write n_descsz.  */
9228   md_number_to_chars (p + 4, (valueT) descsz, 4);
9229
9230   /* Write n_type.  */
9231   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9232
9233   /* Write n_name.  */
9234   memcpy (p + 4 * 3, "GNU", 4);
9235
9236   /* Write 4-byte type.  */
9237   md_number_to_chars (p + 4 * 4,
9238                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9239
9240   /* Write 4-byte data size.  */
9241   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9242
9243   /* Write 4-byte data.  */
9244   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9245
9246   /* Zero out paddings.  */
9247   padding = isa_1_descsz - isa_1_descsz_raw;
9248   if (padding)
9249     memset (p + 4 * 7, 0, padding);
9250
9251   /* Write 4-byte type.  */
9252   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9253                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9254
9255   /* Write 4-byte data size.  */
9256   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9257
9258   /* Write 4-byte data.  */
9259   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9260                       (valueT) x86_feature_2_used, 4);
9261
9262   /* Zero out paddings.  */
9263   padding = feature_2_descsz - feature_2_descsz_raw;
9264   if (padding)
9265     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9266
9267   /* We probably can't restore the current segment, for there likely
9268      isn't one yet...  */
9269   if (seg && subseg)
9270     subseg_set (seg, subseg);
9271 }
9272
9273 bool
9274 x86_support_sframe_p (void)
9275 {
9276   /* At this time, SFrame unwind is supported for AMD64 ABI only.  */
9277   return (x86_elf_abi == X86_64_ABI);
9278 }
9279
9280 bool
9281 x86_sframe_ra_tracking_p (void)
9282 {
9283   /* In AMD64, return address is always stored on the stack at a fixed offset
9284      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9285      Do not track explicitly via an SFrame Frame Row Entry.  */
9286   return false;
9287 }
9288
9289 offsetT
9290 x86_sframe_cfa_ra_offset (void)
9291 {
9292   gas_assert (x86_elf_abi == X86_64_ABI);
9293   return (offsetT) -8;
9294 }
9295
9296 unsigned char
9297 x86_sframe_get_abi_arch (void)
9298 {
9299   unsigned char sframe_abi_arch = 0;
9300
9301   if (x86_support_sframe_p ())
9302     {
9303       gas_assert (!target_big_endian);
9304       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9305     }
9306
9307   return sframe_abi_arch;
9308 }
9309
9310 #endif
9311
9312 static unsigned int
9313 encoding_length (const fragS *start_frag, offsetT start_off,
9314                  const char *frag_now_ptr)
9315 {
9316   unsigned int len = 0;
9317
9318   if (start_frag != frag_now)
9319     {
9320       const fragS *fr = start_frag;
9321
9322       do {
9323         len += fr->fr_fix;
9324         fr = fr->fr_next;
9325       } while (fr && fr != frag_now);
9326     }
9327
9328   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9329 }
9330
9331 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9332    be macro-fused with conditional jumps.
9333    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9334    or is one of the following format:
9335
9336     cmp m, imm
9337     add m, imm
9338     sub m, imm
9339    test m, imm
9340     and m, imm
9341     inc m
9342     dec m
9343
9344    it is unfusible.  */
9345
9346 static int
9347 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9348 {
9349   /* No RIP address.  */
9350   if (i.base_reg && i.base_reg->reg_num == RegIP)
9351     return 0;
9352
9353   /* No opcodes outside of base encoding space.  */
9354   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9355     return 0;
9356
9357   /* add, sub without add/sub m, imm.  */
9358   if (i.tm.base_opcode <= 5
9359       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9360       || ((i.tm.base_opcode | 3) == 0x83
9361           && (i.tm.extension_opcode == 0x5
9362               || i.tm.extension_opcode == 0x0)))
9363     {
9364       *mf_cmp_p = mf_cmp_alu_cmp;
9365       return !(i.mem_operands && i.imm_operands);
9366     }
9367
9368   /* and without and m, imm.  */
9369   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9370       || ((i.tm.base_opcode | 3) == 0x83
9371           && i.tm.extension_opcode == 0x4))
9372     {
9373       *mf_cmp_p = mf_cmp_test_and;
9374       return !(i.mem_operands && i.imm_operands);
9375     }
9376
9377   /* test without test m imm.  */
9378   if ((i.tm.base_opcode | 1) == 0x85
9379       || (i.tm.base_opcode | 1) == 0xa9
9380       || ((i.tm.base_opcode | 1) == 0xf7
9381           && i.tm.extension_opcode == 0))
9382     {
9383       *mf_cmp_p = mf_cmp_test_and;
9384       return !(i.mem_operands && i.imm_operands);
9385     }
9386
9387   /* cmp without cmp m, imm.  */
9388   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9389       || ((i.tm.base_opcode | 3) == 0x83
9390           && (i.tm.extension_opcode == 0x7)))
9391     {
9392       *mf_cmp_p = mf_cmp_alu_cmp;
9393       return !(i.mem_operands && i.imm_operands);
9394     }
9395
9396   /* inc, dec without inc/dec m.   */
9397   if ((i.tm.cpu_flags.bitfield.cpuno64
9398        && (i.tm.base_opcode | 0xf) == 0x4f)
9399       || ((i.tm.base_opcode | 1) == 0xff
9400           && i.tm.extension_opcode <= 0x1))
9401     {
9402       *mf_cmp_p = mf_cmp_incdec;
9403       return !i.mem_operands;
9404     }
9405
9406   return 0;
9407 }
9408
9409 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9410
9411 static int
9412 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9413 {
9414   /* NB: Don't work with COND_JUMP86 without i386.  */
9415   if (!align_branch_power
9416       || now_seg == absolute_section
9417       || !cpu_arch_flags.bitfield.cpui386
9418       || !(align_branch & align_branch_fused_bit))
9419     return 0;
9420
9421   if (maybe_fused_with_jcc_p (mf_cmp_p))
9422     {
9423       if (last_insn.kind == last_insn_other
9424           || last_insn.seg != now_seg)
9425         return 1;
9426       if (flag_debug)
9427         as_warn_where (last_insn.file, last_insn.line,
9428                        _("`%s` skips -malign-branch-boundary on `%s`"),
9429                        last_insn.name, i.tm.name);
9430     }
9431
9432   return 0;
9433 }
9434
9435 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9436
9437 static int
9438 add_branch_prefix_frag_p (void)
9439 {
9440   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9441      to PadLock instructions since they include prefixes in opcode.  */
9442   if (!align_branch_power
9443       || !align_branch_prefix_size
9444       || now_seg == absolute_section
9445       || i.tm.cpu_flags.bitfield.cpupadlock
9446       || !cpu_arch_flags.bitfield.cpui386)
9447     return 0;
9448
9449   /* Don't add prefix if it is a prefix or there is no operand in case
9450      that segment prefix is special.  */
9451   if (!i.operands || i.tm.opcode_modifier.isprefix)
9452     return 0;
9453
9454   if (last_insn.kind == last_insn_other
9455       || last_insn.seg != now_seg)
9456     return 1;
9457
9458   if (flag_debug)
9459     as_warn_where (last_insn.file, last_insn.line,
9460                    _("`%s` skips -malign-branch-boundary on `%s`"),
9461                    last_insn.name, i.tm.name);
9462
9463   return 0;
9464 }
9465
9466 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9467
9468 static int
9469 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9470                            enum mf_jcc_kind *mf_jcc_p)
9471 {
9472   int add_padding;
9473
9474   /* NB: Don't work with COND_JUMP86 without i386.  */
9475   if (!align_branch_power
9476       || now_seg == absolute_section
9477       || !cpu_arch_flags.bitfield.cpui386
9478       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9479     return 0;
9480
9481   add_padding = 0;
9482
9483   /* Check for jcc and direct jmp.  */
9484   if (i.tm.opcode_modifier.jump == JUMP)
9485     {
9486       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9487         {
9488           *branch_p = align_branch_jmp;
9489           add_padding = align_branch & align_branch_jmp_bit;
9490         }
9491       else
9492         {
9493           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9494              igore the lowest bit.  */
9495           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9496           *branch_p = align_branch_jcc;
9497           if ((align_branch & align_branch_jcc_bit))
9498             add_padding = 1;
9499         }
9500     }
9501   else if ((i.tm.base_opcode | 1) == 0xc3)
9502     {
9503       /* Near ret.  */
9504       *branch_p = align_branch_ret;
9505       if ((align_branch & align_branch_ret_bit))
9506         add_padding = 1;
9507     }
9508   else
9509     {
9510       /* Check for indirect jmp, direct and indirect calls.  */
9511       if (i.tm.base_opcode == 0xe8)
9512         {
9513           /* Direct call.  */
9514           *branch_p = align_branch_call;
9515           if ((align_branch & align_branch_call_bit))
9516             add_padding = 1;
9517         }
9518       else if (i.tm.base_opcode == 0xff
9519                && (i.tm.extension_opcode == 2
9520                    || i.tm.extension_opcode == 4))
9521         {
9522           /* Indirect call and jmp.  */
9523           *branch_p = align_branch_indirect;
9524           if ((align_branch & align_branch_indirect_bit))
9525             add_padding = 1;
9526         }
9527
9528       if (add_padding
9529           && i.disp_operands
9530           && tls_get_addr
9531           && (i.op[0].disps->X_op == O_symbol
9532               || (i.op[0].disps->X_op == O_subtract
9533                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9534         {
9535           symbolS *s = i.op[0].disps->X_add_symbol;
9536           /* No padding to call to global or undefined tls_get_addr.  */
9537           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9538               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9539             return 0;
9540         }
9541     }
9542
9543   if (add_padding
9544       && last_insn.kind != last_insn_other
9545       && last_insn.seg == now_seg)
9546     {
9547       if (flag_debug)
9548         as_warn_where (last_insn.file, last_insn.line,
9549                        _("`%s` skips -malign-branch-boundary on `%s`"),
9550                        last_insn.name, i.tm.name);
9551       return 0;
9552     }
9553
9554   return add_padding;
9555 }
9556
9557 static void
9558 output_insn (void)
9559 {
9560   fragS *insn_start_frag;
9561   offsetT insn_start_off;
9562   fragS *fragP = NULL;
9563   enum align_branch_kind branch = align_branch_none;
9564   /* The initializer is arbitrary just to avoid uninitialized error.
9565      it's actually either assigned in add_branch_padding_frag_p
9566      or never be used.  */
9567   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9568
9569 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9570   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9571     {
9572       if ((i.xstate & xstate_tmm) == xstate_tmm
9573           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9574         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9575
9576       if (i.tm.cpu_flags.bitfield.cpu8087
9577           || i.tm.cpu_flags.bitfield.cpu287
9578           || i.tm.cpu_flags.bitfield.cpu387
9579           || i.tm.cpu_flags.bitfield.cpu687
9580           || i.tm.cpu_flags.bitfield.cpufisttp)
9581         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9582
9583       if ((i.xstate & xstate_mmx)
9584           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9585               && !is_any_vex_encoding (&i.tm)
9586               && (i.tm.base_opcode == 0x77 /* emms */
9587                   || i.tm.base_opcode == 0x0e /* femms */)))
9588         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9589
9590       if (i.index_reg)
9591         {
9592           if (i.index_reg->reg_type.bitfield.zmmword)
9593             i.xstate |= xstate_zmm;
9594           else if (i.index_reg->reg_type.bitfield.ymmword)
9595             i.xstate |= xstate_ymm;
9596           else if (i.index_reg->reg_type.bitfield.xmmword)
9597             i.xstate |= xstate_xmm;
9598         }
9599
9600       /* vzeroall / vzeroupper */
9601       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9602         i.xstate |= xstate_ymm;
9603
9604       if ((i.xstate & xstate_xmm)
9605           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9606           || (i.tm.base_opcode == 0xae
9607               && (i.tm.cpu_flags.bitfield.cpusse
9608                   || i.tm.cpu_flags.bitfield.cpuavx))
9609           || i.tm.cpu_flags.bitfield.cpuwidekl
9610           || i.tm.cpu_flags.bitfield.cpukl)
9611         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9612
9613       if ((i.xstate & xstate_ymm) == xstate_ymm)
9614         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9615       if ((i.xstate & xstate_zmm) == xstate_zmm)
9616         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9617       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9618         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9619       if (i.tm.cpu_flags.bitfield.cpufxsr)
9620         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9621       if (i.tm.cpu_flags.bitfield.cpuxsave)
9622         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9623       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9624         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9625       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9626         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9627
9628       if (x86_feature_2_used
9629           || i.tm.cpu_flags.bitfield.cpucmov
9630           || i.tm.cpu_flags.bitfield.cpusyscall
9631           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9632               && i.tm.base_opcode == 0xc7
9633               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9634               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9635         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9636       if (i.tm.cpu_flags.bitfield.cpusse3
9637           || i.tm.cpu_flags.bitfield.cpussse3
9638           || i.tm.cpu_flags.bitfield.cpusse4_1
9639           || i.tm.cpu_flags.bitfield.cpusse4_2
9640           || i.tm.cpu_flags.bitfield.cpucx16
9641           || i.tm.cpu_flags.bitfield.cpupopcnt
9642           /* LAHF-SAHF insns in 64-bit mode.  */
9643           || (flag_code == CODE_64BIT
9644               && (i.tm.base_opcode | 1) == 0x9f
9645               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9646         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9647       if (i.tm.cpu_flags.bitfield.cpuavx
9648           || i.tm.cpu_flags.bitfield.cpuavx2
9649           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9650              XOP, FMA4, LPW, TBM, and AMX.  */
9651           || (i.tm.opcode_modifier.vex
9652               && !i.tm.cpu_flags.bitfield.cpuavx512f
9653               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9654               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9655               && !i.tm.cpu_flags.bitfield.cpuxop
9656               && !i.tm.cpu_flags.bitfield.cpufma4
9657               && !i.tm.cpu_flags.bitfield.cpulwp
9658               && !i.tm.cpu_flags.bitfield.cputbm
9659               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9660           || i.tm.cpu_flags.bitfield.cpuf16c
9661           || i.tm.cpu_flags.bitfield.cpufma
9662           || i.tm.cpu_flags.bitfield.cpulzcnt
9663           || i.tm.cpu_flags.bitfield.cpumovbe
9664           || i.tm.cpu_flags.bitfield.cpuxsaves
9665           || (x86_feature_2_used
9666               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9667                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9668                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9669         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9670       if (i.tm.cpu_flags.bitfield.cpuavx512f
9671           || i.tm.cpu_flags.bitfield.cpuavx512bw
9672           || i.tm.cpu_flags.bitfield.cpuavx512dq
9673           || i.tm.cpu_flags.bitfield.cpuavx512vl
9674           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9675              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9676           || (i.tm.opcode_modifier.evex
9677               && !i.tm.cpu_flags.bitfield.cpuavx512er
9678               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9679               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9680               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9681         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9682     }
9683 #endif
9684
9685   /* Tie dwarf2 debug info to the address at the start of the insn.
9686      We can't do this after the insn has been output as the current
9687      frag may have been closed off.  eg. by frag_var.  */
9688   dwarf2_emit_insn (0);
9689
9690   insn_start_frag = frag_now;
9691   insn_start_off = frag_now_fix ();
9692
9693   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9694     {
9695       char *p;
9696       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9697       unsigned int max_branch_padding_size = 14;
9698
9699       /* Align section to boundary.  */
9700       record_alignment (now_seg, align_branch_power);
9701
9702       /* Make room for padding.  */
9703       frag_grow (max_branch_padding_size);
9704
9705       /* Start of the padding.  */
9706       p = frag_more (0);
9707
9708       fragP = frag_now;
9709
9710       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9711                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9712                 NULL, 0, p);
9713
9714       fragP->tc_frag_data.mf_type = mf_jcc;
9715       fragP->tc_frag_data.branch_type = branch;
9716       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9717     }
9718
9719   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9720       && !pre_386_16bit_warned)
9721     {
9722       as_warn (_("use .code16 to ensure correct addressing mode"));
9723       pre_386_16bit_warned = true;
9724     }
9725
9726   /* Output jumps.  */
9727   if (i.tm.opcode_modifier.jump == JUMP)
9728     output_branch ();
9729   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9730            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9731     output_jump ();
9732   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9733     output_interseg_jump ();
9734   else
9735     {
9736       /* Output normal instructions here.  */
9737       char *p;
9738       unsigned char *q;
9739       unsigned int j;
9740       enum mf_cmp_kind mf_cmp;
9741
9742       if (avoid_fence
9743           && (i.tm.base_opcode == 0xaee8
9744               || i.tm.base_opcode == 0xaef0
9745               || i.tm.base_opcode == 0xaef8))
9746         {
9747           /* Encode lfence, mfence, and sfence as
9748              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9749           if (flag_code == CODE_16BIT)
9750             as_bad (_("Cannot convert `%s' in 16-bit mode"), i.tm.name);
9751           else if (omit_lock_prefix)
9752             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9753                     i.tm.name);
9754           else if (now_seg != absolute_section)
9755             {
9756               offsetT val = 0x240483f0ULL;
9757
9758               p = frag_more (5);
9759               md_number_to_chars (p, val, 5);
9760             }
9761           else
9762             abs_section_offset += 5;
9763           return;
9764         }
9765
9766       /* Some processors fail on LOCK prefix. This options makes
9767          assembler ignore LOCK prefix and serves as a workaround.  */
9768       if (omit_lock_prefix)
9769         {
9770           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9771               && i.tm.opcode_modifier.isprefix)
9772             return;
9773           i.prefix[LOCK_PREFIX] = 0;
9774         }
9775
9776       if (branch)
9777         /* Skip if this is a branch.  */
9778         ;
9779       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9780         {
9781           /* Make room for padding.  */
9782           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9783           p = frag_more (0);
9784
9785           fragP = frag_now;
9786
9787           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9788                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9789                     NULL, 0, p);
9790
9791           fragP->tc_frag_data.mf_type = mf_cmp;
9792           fragP->tc_frag_data.branch_type = align_branch_fused;
9793           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9794         }
9795       else if (add_branch_prefix_frag_p ())
9796         {
9797           unsigned int max_prefix_size = align_branch_prefix_size;
9798
9799           /* Make room for padding.  */
9800           frag_grow (max_prefix_size);
9801           p = frag_more (0);
9802
9803           fragP = frag_now;
9804
9805           frag_var (rs_machine_dependent, max_prefix_size, 0,
9806                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9807                     NULL, 0, p);
9808
9809           fragP->tc_frag_data.max_bytes = max_prefix_size;
9810         }
9811
9812       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9813          don't need the explicit prefix.  */
9814       if (!is_any_vex_encoding (&i.tm))
9815         {
9816           switch (i.tm.opcode_modifier.opcodeprefix)
9817             {
9818             case PREFIX_0X66:
9819               add_prefix (0x66);
9820               break;
9821             case PREFIX_0XF2:
9822               add_prefix (0xf2);
9823               break;
9824             case PREFIX_0XF3:
9825               if (!i.tm.cpu_flags.bitfield.cpupadlock
9826                   || (i.prefix[REP_PREFIX] != 0xf3))
9827                 add_prefix (0xf3);
9828               break;
9829             case PREFIX_NONE:
9830               switch (i.opcode_length)
9831                 {
9832                 case 2:
9833                   break;
9834                 case 1:
9835                   /* Check for pseudo prefixes.  */
9836                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9837                     break;
9838                   as_bad_where (insn_start_frag->fr_file,
9839                                 insn_start_frag->fr_line,
9840                                 _("pseudo prefix without instruction"));
9841                   return;
9842                 default:
9843                   abort ();
9844                 }
9845               break;
9846             default:
9847               abort ();
9848             }
9849
9850 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9851           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9852              R_X86_64_GOTTPOFF relocation so that linker can safely
9853              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9854              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9855              relocation for GDesc -> IE/LE optimization.  */
9856           if (x86_elf_abi == X86_64_X32_ABI
9857               && i.operands == 2
9858               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9859                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9860               && i.prefix[REX_PREFIX] == 0)
9861             add_prefix (REX_OPCODE);
9862 #endif
9863
9864           /* The prefix bytes.  */
9865           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9866             if (*q)
9867               frag_opcode_byte (*q);
9868         }
9869       else
9870         {
9871           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9872             if (*q)
9873               switch (j)
9874                 {
9875                 case SEG_PREFIX:
9876                 case ADDR_PREFIX:
9877                   frag_opcode_byte (*q);
9878                   break;
9879                 default:
9880                   /* There should be no other prefixes for instructions
9881                      with VEX prefix.  */
9882                   abort ();
9883                 }
9884
9885           /* For EVEX instructions i.vrex should become 0 after
9886              build_evex_prefix.  For VEX instructions upper 16 registers
9887              aren't available, so VREX should be 0.  */
9888           if (i.vrex)
9889             abort ();
9890           /* Now the VEX prefix.  */
9891           if (now_seg != absolute_section)
9892             {
9893               p = frag_more (i.vex.length);
9894               for (j = 0; j < i.vex.length; j++)
9895                 p[j] = i.vex.bytes[j];
9896             }
9897           else
9898             abs_section_offset += i.vex.length;
9899         }
9900
9901       /* Now the opcode; be careful about word order here!  */
9902       j = i.opcode_length;
9903       if (!i.vex.length)
9904         switch (i.tm.opcode_modifier.opcodespace)
9905           {
9906           case SPACE_BASE:
9907             break;
9908           case SPACE_0F:
9909             ++j;
9910             break;
9911           case SPACE_0F38:
9912           case SPACE_0F3A:
9913             j += 2;
9914             break;
9915           default:
9916             abort ();
9917           }
9918
9919       if (now_seg == absolute_section)
9920         abs_section_offset += j;
9921       else if (j == 1)
9922         {
9923           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9924         }
9925       else
9926         {
9927           p = frag_more (j);
9928           if (!i.vex.length
9929               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9930             {
9931               *p++ = 0x0f;
9932               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9933                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9934                        ? 0x38 : 0x3a;
9935             }
9936
9937           switch (i.opcode_length)
9938             {
9939             case 2:
9940               /* Put out high byte first: can't use md_number_to_chars!  */
9941               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9942               /* Fall through.  */
9943             case 1:
9944               *p = i.tm.base_opcode & 0xff;
9945               break;
9946             default:
9947               abort ();
9948               break;
9949             }
9950
9951         }
9952
9953       /* Now the modrm byte and sib byte (if present).  */
9954       if (i.tm.opcode_modifier.modrm)
9955         {
9956           frag_opcode_byte ((i.rm.regmem << 0)
9957                              | (i.rm.reg << 3)
9958                              | (i.rm.mode << 6));
9959           /* If i.rm.regmem == ESP (4)
9960              && i.rm.mode != (Register mode)
9961              && not 16 bit
9962              ==> need second modrm byte.  */
9963           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9964               && i.rm.mode != 3
9965               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9966             frag_opcode_byte ((i.sib.base << 0)
9967                               | (i.sib.index << 3)
9968                               | (i.sib.scale << 6));
9969         }
9970
9971       if (i.disp_operands)
9972         output_disp (insn_start_frag, insn_start_off);
9973
9974       if (i.imm_operands)
9975         output_imm (insn_start_frag, insn_start_off);
9976
9977       /*
9978        * frag_now_fix () returning plain abs_section_offset when we're in the
9979        * absolute section, and abs_section_offset not getting updated as data
9980        * gets added to the frag breaks the logic below.
9981        */
9982       if (now_seg != absolute_section)
9983         {
9984           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9985           if (j > 15)
9986             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9987                      j);
9988           else if (fragP)
9989             {
9990               /* NB: Don't add prefix with GOTPC relocation since
9991                  output_disp() above depends on the fixed encoding
9992                  length.  Can't add prefix with TLS relocation since
9993                  it breaks TLS linker optimization.  */
9994               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9995               /* Prefix count on the current instruction.  */
9996               unsigned int count = i.vex.length;
9997               unsigned int k;
9998               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9999                 /* REX byte is encoded in VEX/EVEX prefix.  */
10000                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10001                   count++;
10002
10003               /* Count prefixes for extended opcode maps.  */
10004               if (!i.vex.length)
10005                 switch (i.tm.opcode_modifier.opcodespace)
10006                   {
10007                   case SPACE_BASE:
10008                     break;
10009                   case SPACE_0F:
10010                     count++;
10011                     break;
10012                   case SPACE_0F38:
10013                   case SPACE_0F3A:
10014                     count += 2;
10015                     break;
10016                   default:
10017                     abort ();
10018                   }
10019
10020               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10021                   == BRANCH_PREFIX)
10022                 {
10023                   /* Set the maximum prefix size in BRANCH_PREFIX
10024                      frag.  */
10025                   if (fragP->tc_frag_data.max_bytes > max)
10026                     fragP->tc_frag_data.max_bytes = max;
10027                   if (fragP->tc_frag_data.max_bytes > count)
10028                     fragP->tc_frag_data.max_bytes -= count;
10029                   else
10030                     fragP->tc_frag_data.max_bytes = 0;
10031                 }
10032               else
10033                 {
10034                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10035                      frag.  */
10036                   unsigned int max_prefix_size;
10037                   if (align_branch_prefix_size > max)
10038                     max_prefix_size = max;
10039                   else
10040                     max_prefix_size = align_branch_prefix_size;
10041                   if (max_prefix_size > count)
10042                     fragP->tc_frag_data.max_prefix_length
10043                       = max_prefix_size - count;
10044                 }
10045
10046               /* Use existing segment prefix if possible.  Use CS
10047                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10048                  segment prefix with ESP/EBP base register and use DS
10049                  segment prefix without ESP/EBP base register.  */
10050               if (i.prefix[SEG_PREFIX])
10051                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10052               else if (flag_code == CODE_64BIT)
10053                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10054               else if (i.base_reg
10055                        && (i.base_reg->reg_num == 4
10056                            || i.base_reg->reg_num == 5))
10057                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10058               else
10059                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10060             }
10061         }
10062     }
10063
10064   /* NB: Don't work with COND_JUMP86 without i386.  */
10065   if (align_branch_power
10066       && now_seg != absolute_section
10067       && cpu_arch_flags.bitfield.cpui386)
10068     {
10069       /* Terminate each frag so that we can add prefix and check for
10070          fused jcc.  */
10071       frag_wane (frag_now);
10072       frag_new (0);
10073     }
10074
10075 #ifdef DEBUG386
10076   if (flag_debug)
10077     {
10078       pi ("" /*line*/, &i);
10079     }
10080 #endif /* DEBUG386  */
10081 }
10082
10083 /* Return the size of the displacement operand N.  */
10084
10085 static int
10086 disp_size (unsigned int n)
10087 {
10088   int size = 4;
10089
10090   if (i.types[n].bitfield.disp64)
10091     size = 8;
10092   else if (i.types[n].bitfield.disp8)
10093     size = 1;
10094   else if (i.types[n].bitfield.disp16)
10095     size = 2;
10096   return size;
10097 }
10098
10099 /* Return the size of the immediate operand N.  */
10100
10101 static int
10102 imm_size (unsigned int n)
10103 {
10104   int size = 4;
10105   if (i.types[n].bitfield.imm64)
10106     size = 8;
10107   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10108     size = 1;
10109   else if (i.types[n].bitfield.imm16)
10110     size = 2;
10111   return size;
10112 }
10113
10114 static void
10115 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10116 {
10117   char *p;
10118   unsigned int n;
10119
10120   for (n = 0; n < i.operands; n++)
10121     {
10122       if (operand_type_check (i.types[n], disp))
10123         {
10124           int size = disp_size (n);
10125
10126           if (now_seg == absolute_section)
10127             abs_section_offset += size;
10128           else if (i.op[n].disps->X_op == O_constant)
10129             {
10130               offsetT val = i.op[n].disps->X_add_number;
10131
10132               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10133                                      size);
10134               p = frag_more (size);
10135               md_number_to_chars (p, val, size);
10136             }
10137           else
10138             {
10139               enum bfd_reloc_code_real reloc_type;
10140               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10141               bool sign = (flag_code == CODE_64BIT && size == 4
10142                            && (!want_disp32 (&i.tm)
10143                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10144                                    && !i.types[n].bitfield.baseindex)))
10145                           || pcrel;
10146               fixS *fixP;
10147
10148               /* We can't have 8 bit displacement here.  */
10149               gas_assert (!i.types[n].bitfield.disp8);
10150
10151               /* The PC relative address is computed relative
10152                  to the instruction boundary, so in case immediate
10153                  fields follows, we need to adjust the value.  */
10154               if (pcrel && i.imm_operands)
10155                 {
10156                   unsigned int n1;
10157                   int sz = 0;
10158
10159                   for (n1 = 0; n1 < i.operands; n1++)
10160                     if (operand_type_check (i.types[n1], imm))
10161                       {
10162                         /* Only one immediate is allowed for PC
10163                            relative address.  */
10164                         gas_assert (sz == 0);
10165                         sz = imm_size (n1);
10166                         i.op[n].disps->X_add_number -= sz;
10167                       }
10168                   /* We should find the immediate.  */
10169                   gas_assert (sz != 0);
10170                 }
10171
10172               p = frag_more (size);
10173               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10174               if (GOT_symbol
10175                   && GOT_symbol == i.op[n].disps->X_add_symbol
10176                   && (((reloc_type == BFD_RELOC_32
10177                         || reloc_type == BFD_RELOC_X86_64_32S
10178                         || (reloc_type == BFD_RELOC_64
10179                             && object_64bit))
10180                        && (i.op[n].disps->X_op == O_symbol
10181                            || (i.op[n].disps->X_op == O_add
10182                                && ((symbol_get_value_expression
10183                                     (i.op[n].disps->X_op_symbol)->X_op)
10184                                    == O_subtract))))
10185                       || reloc_type == BFD_RELOC_32_PCREL))
10186                 {
10187                   if (!object_64bit)
10188                     {
10189                       reloc_type = BFD_RELOC_386_GOTPC;
10190                       i.has_gotpc_tls_reloc = true;
10191                       i.op[n].disps->X_add_number +=
10192                         encoding_length (insn_start_frag, insn_start_off, p);
10193                     }
10194                   else if (reloc_type == BFD_RELOC_64)
10195                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10196                   else
10197                     /* Don't do the adjustment for x86-64, as there
10198                        the pcrel addressing is relative to the _next_
10199                        insn, and that is taken care of in other code.  */
10200                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10201                 }
10202               else if (align_branch_power)
10203                 {
10204                   switch (reloc_type)
10205                     {
10206                     case BFD_RELOC_386_TLS_GD:
10207                     case BFD_RELOC_386_TLS_LDM:
10208                     case BFD_RELOC_386_TLS_IE:
10209                     case BFD_RELOC_386_TLS_IE_32:
10210                     case BFD_RELOC_386_TLS_GOTIE:
10211                     case BFD_RELOC_386_TLS_GOTDESC:
10212                     case BFD_RELOC_386_TLS_DESC_CALL:
10213                     case BFD_RELOC_X86_64_TLSGD:
10214                     case BFD_RELOC_X86_64_TLSLD:
10215                     case BFD_RELOC_X86_64_GOTTPOFF:
10216                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10217                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10218                       i.has_gotpc_tls_reloc = true;
10219                     default:
10220                       break;
10221                     }
10222                 }
10223               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10224                                   size, i.op[n].disps, pcrel,
10225                                   reloc_type);
10226
10227               if (flag_code == CODE_64BIT && size == 4 && pcrel
10228                   && !i.prefix[ADDR_PREFIX])
10229                 fixP->fx_signed = 1;
10230
10231               /* Check for "call/jmp *mem", "mov mem, %reg",
10232                  "test %reg, mem" and "binop mem, %reg" where binop
10233                  is one of adc, add, and, cmp, or, sbb, sub, xor
10234                  instructions without data prefix.  Always generate
10235                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10236               if (i.prefix[DATA_PREFIX] == 0
10237                   && (generate_relax_relocations
10238                       || (!object_64bit
10239                           && i.rm.mode == 0
10240                           && i.rm.regmem == 5))
10241                   && (i.rm.mode == 2
10242                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10243                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10244                   && ((i.operands == 1
10245                        && i.tm.base_opcode == 0xff
10246                        && (i.rm.reg == 2 || i.rm.reg == 4))
10247                       || (i.operands == 2
10248                           && (i.tm.base_opcode == 0x8b
10249                               || i.tm.base_opcode == 0x85
10250                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10251                 {
10252                   if (object_64bit)
10253                     {
10254                       fixP->fx_tcbit = i.rex != 0;
10255                       if (i.base_reg
10256                           && (i.base_reg->reg_num == RegIP))
10257                       fixP->fx_tcbit2 = 1;
10258                     }
10259                   else
10260                     fixP->fx_tcbit2 = 1;
10261                 }
10262             }
10263         }
10264     }
10265 }
10266
10267 static void
10268 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10269 {
10270   char *p;
10271   unsigned int n;
10272
10273   for (n = 0; n < i.operands; n++)
10274     {
10275       if (operand_type_check (i.types[n], imm))
10276         {
10277           int size = imm_size (n);
10278
10279           if (now_seg == absolute_section)
10280             abs_section_offset += size;
10281           else if (i.op[n].imms->X_op == O_constant)
10282             {
10283               offsetT val;
10284
10285               val = offset_in_range (i.op[n].imms->X_add_number,
10286                                      size);
10287               p = frag_more (size);
10288               md_number_to_chars (p, val, size);
10289             }
10290           else
10291             {
10292               /* Not absolute_section.
10293                  Need a 32-bit fixup (don't support 8bit
10294                  non-absolute imms).  Try to support other
10295                  sizes ...  */
10296               enum bfd_reloc_code_real reloc_type;
10297               int sign;
10298
10299               if (i.types[n].bitfield.imm32s
10300                   && (i.suffix == QWORD_MNEM_SUFFIX
10301                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10302                 sign = 1;
10303               else
10304                 sign = 0;
10305
10306               p = frag_more (size);
10307               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10308
10309               /*   This is tough to explain.  We end up with this one if we
10310                * have operands that look like
10311                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10312                * obtain the absolute address of the GOT, and it is strongly
10313                * preferable from a performance point of view to avoid using
10314                * a runtime relocation for this.  The actual sequence of
10315                * instructions often look something like:
10316                *
10317                *        call    .L66
10318                * .L66:
10319                *        popl    %ebx
10320                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10321                *
10322                *   The call and pop essentially return the absolute address
10323                * of the label .L66 and store it in %ebx.  The linker itself
10324                * will ultimately change the first operand of the addl so
10325                * that %ebx points to the GOT, but to keep things simple, the
10326                * .o file must have this operand set so that it generates not
10327                * the absolute address of .L66, but the absolute address of
10328                * itself.  This allows the linker itself simply treat a GOTPC
10329                * relocation as asking for a pcrel offset to the GOT to be
10330                * added in, and the addend of the relocation is stored in the
10331                * operand field for the instruction itself.
10332                *
10333                *   Our job here is to fix the operand so that it would add
10334                * the correct offset so that %ebx would point to itself.  The
10335                * thing that is tricky is that .-.L66 will point to the
10336                * beginning of the instruction, so we need to further modify
10337                * the operand so that it will point to itself.  There are
10338                * other cases where you have something like:
10339                *
10340                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10341                *
10342                * and here no correction would be required.  Internally in
10343                * the assembler we treat operands of this form as not being
10344                * pcrel since the '.' is explicitly mentioned, and I wonder
10345                * whether it would simplify matters to do it this way.  Who
10346                * knows.  In earlier versions of the PIC patches, the
10347                * pcrel_adjust field was used to store the correction, but
10348                * since the expression is not pcrel, I felt it would be
10349                * confusing to do it this way.  */
10350
10351               if ((reloc_type == BFD_RELOC_32
10352                    || reloc_type == BFD_RELOC_X86_64_32S
10353                    || reloc_type == BFD_RELOC_64)
10354                   && GOT_symbol
10355                   && GOT_symbol == i.op[n].imms->X_add_symbol
10356                   && (i.op[n].imms->X_op == O_symbol
10357                       || (i.op[n].imms->X_op == O_add
10358                           && ((symbol_get_value_expression
10359                                (i.op[n].imms->X_op_symbol)->X_op)
10360                               == O_subtract))))
10361                 {
10362                   if (!object_64bit)
10363                     reloc_type = BFD_RELOC_386_GOTPC;
10364                   else if (size == 4)
10365                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10366                   else if (size == 8)
10367                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10368                   i.has_gotpc_tls_reloc = true;
10369                   i.op[n].imms->X_add_number +=
10370                     encoding_length (insn_start_frag, insn_start_off, p);
10371                 }
10372               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10373                            i.op[n].imms, 0, reloc_type);
10374             }
10375         }
10376     }
10377 }
10378 \f
10379 /* x86_cons_fix_new is called via the expression parsing code when a
10380    reloc is needed.  We use this hook to get the correct .got reloc.  */
10381 static int cons_sign = -1;
10382
10383 void
10384 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10385                   expressionS *exp, bfd_reloc_code_real_type r)
10386 {
10387   r = reloc (len, 0, cons_sign, r);
10388
10389 #ifdef TE_PE
10390   if (exp->X_op == O_secrel)
10391     {
10392       exp->X_op = O_symbol;
10393       r = BFD_RELOC_32_SECREL;
10394     }
10395   else if (exp->X_op == O_secidx)
10396     r = BFD_RELOC_16_SECIDX;
10397 #endif
10398
10399   fix_new_exp (frag, off, len, exp, 0, r);
10400 }
10401
10402 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10403    purpose of the `.dc.a' internal pseudo-op.  */
10404
10405 int
10406 x86_address_bytes (void)
10407 {
10408   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10409     return 4;
10410   return stdoutput->arch_info->bits_per_address / 8;
10411 }
10412
10413 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10414      || defined (LEX_AT)) && !defined (TE_PE)
10415 # define lex_got(reloc, adjust, types) NULL
10416 #else
10417 /* Parse operands of the form
10418    <symbol>@GOTOFF+<nnn>
10419    and similar .plt or .got references.
10420
10421    If we find one, set up the correct relocation in RELOC and copy the
10422    input string, minus the `@GOTOFF' into a malloc'd buffer for
10423    parsing by the calling routine.  Return this buffer, and if ADJUST
10424    is non-null set it to the length of the string we removed from the
10425    input line.  Otherwise return NULL.  */
10426 static char *
10427 lex_got (enum bfd_reloc_code_real *rel,
10428          int *adjust,
10429          i386_operand_type *types)
10430 {
10431   /* Some of the relocations depend on the size of what field is to
10432      be relocated.  But in our callers i386_immediate and i386_displacement
10433      we don't yet know the operand size (this will be set by insn
10434      matching).  Hence we record the word32 relocation here,
10435      and adjust the reloc according to the real size in reloc().  */
10436   static const struct
10437   {
10438     const char *str;
10439     int len;
10440     const enum bfd_reloc_code_real rel[2];
10441     const i386_operand_type types64;
10442     bool need_GOT_symbol;
10443   }
10444     gotrel[] =
10445   {
10446
10447 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10448   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10449 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10450   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10451 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10452   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10453 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10454   { .imm64 = 1, .disp64 = 1 } }
10455
10456 #ifndef TE_PE
10457 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10458     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10459                                         BFD_RELOC_SIZE32 },
10460       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10461 #endif
10462     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10463                                        BFD_RELOC_X86_64_PLTOFF64 },
10464       { .bitfield = { .imm64 = 1 } }, true },
10465     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10466                                        BFD_RELOC_X86_64_PLT32    },
10467       OPERAND_TYPE_IMM32_32S_DISP32, false },
10468     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10469                                        BFD_RELOC_X86_64_GOTPLT64 },
10470       OPERAND_TYPE_IMM64_DISP64, true },
10471     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10472                                        BFD_RELOC_X86_64_GOTOFF64 },
10473       OPERAND_TYPE_IMM64_DISP64, true },
10474     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10475                                        BFD_RELOC_X86_64_GOTPCREL },
10476       OPERAND_TYPE_IMM32_32S_DISP32, true },
10477     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10478                                        BFD_RELOC_X86_64_TLSGD    },
10479       OPERAND_TYPE_IMM32_32S_DISP32, true },
10480     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10481                                        _dummy_first_bfd_reloc_code_real },
10482       OPERAND_TYPE_NONE, true },
10483     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10484                                        BFD_RELOC_X86_64_TLSLD    },
10485       OPERAND_TYPE_IMM32_32S_DISP32, true },
10486     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10487                                        BFD_RELOC_X86_64_GOTTPOFF },
10488       OPERAND_TYPE_IMM32_32S_DISP32, true },
10489     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10490                                        BFD_RELOC_X86_64_TPOFF32  },
10491       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10492     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10493                                        _dummy_first_bfd_reloc_code_real },
10494       OPERAND_TYPE_NONE, true },
10495     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10496                                        BFD_RELOC_X86_64_DTPOFF32 },
10497       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10498     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10499                                        _dummy_first_bfd_reloc_code_real },
10500       OPERAND_TYPE_NONE, true },
10501     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10502                                        _dummy_first_bfd_reloc_code_real },
10503       OPERAND_TYPE_NONE, true },
10504     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10505                                        BFD_RELOC_X86_64_GOT32    },
10506       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10507     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10508                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10509       OPERAND_TYPE_IMM32_32S_DISP32, true },
10510     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10511                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10512       OPERAND_TYPE_IMM32_32S_DISP32, true },
10513 #else /* TE_PE */
10514     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10515                                        BFD_RELOC_32_SECREL },
10516       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10517 #endif
10518
10519 #undef OPERAND_TYPE_IMM32_32S_DISP32
10520 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10521 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10522 #undef OPERAND_TYPE_IMM64_DISP64
10523
10524   };
10525   char *cp;
10526   unsigned int j;
10527
10528 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10529   if (!IS_ELF)
10530     return NULL;
10531 #endif
10532
10533   for (cp = input_line_pointer; *cp != '@'; cp++)
10534     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10535       return NULL;
10536
10537   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10538     {
10539       int len = gotrel[j].len;
10540       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10541         {
10542           if (gotrel[j].rel[object_64bit] != 0)
10543             {
10544               int first, second;
10545               char *tmpbuf, *past_reloc;
10546
10547               *rel = gotrel[j].rel[object_64bit];
10548
10549               if (types)
10550                 {
10551                   if (flag_code != CODE_64BIT)
10552                     {
10553                       types->bitfield.imm32 = 1;
10554                       types->bitfield.disp32 = 1;
10555                     }
10556                   else
10557                     *types = gotrel[j].types64;
10558                 }
10559
10560               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10561                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10562
10563               /* The length of the first part of our input line.  */
10564               first = cp - input_line_pointer;
10565
10566               /* The second part goes from after the reloc token until
10567                  (and including) an end_of_line char or comma.  */
10568               past_reloc = cp + 1 + len;
10569               cp = past_reloc;
10570               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10571                 ++cp;
10572               second = cp + 1 - past_reloc;
10573
10574               /* Allocate and copy string.  The trailing NUL shouldn't
10575                  be necessary, but be safe.  */
10576               tmpbuf = XNEWVEC (char, first + second + 2);
10577               memcpy (tmpbuf, input_line_pointer, first);
10578               if (second != 0 && *past_reloc != ' ')
10579                 /* Replace the relocation token with ' ', so that
10580                    errors like foo@GOTOFF1 will be detected.  */
10581                 tmpbuf[first++] = ' ';
10582               else
10583                 /* Increment length by 1 if the relocation token is
10584                    removed.  */
10585                 len++;
10586               if (adjust)
10587                 *adjust = len;
10588               memcpy (tmpbuf + first, past_reloc, second);
10589               tmpbuf[first + second] = '\0';
10590               return tmpbuf;
10591             }
10592
10593           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10594                   gotrel[j].str, 1 << (5 + object_64bit));
10595           return NULL;
10596         }
10597     }
10598
10599   /* Might be a symbol version string.  Don't as_bad here.  */
10600   return NULL;
10601 }
10602 #endif
10603
10604 bfd_reloc_code_real_type
10605 x86_cons (expressionS *exp, int size)
10606 {
10607   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10608
10609 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10610       && !defined (LEX_AT)) \
10611     || defined (TE_PE)
10612   intel_syntax = -intel_syntax;
10613
10614   exp->X_md = 0;
10615   if (size == 4 || (object_64bit && size == 8))
10616     {
10617       /* Handle @GOTOFF and the like in an expression.  */
10618       char *save;
10619       char *gotfree_input_line;
10620       int adjust = 0;
10621
10622       save = input_line_pointer;
10623       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10624       if (gotfree_input_line)
10625         input_line_pointer = gotfree_input_line;
10626
10627       expression (exp);
10628
10629       if (gotfree_input_line)
10630         {
10631           /* expression () has merrily parsed up to the end of line,
10632              or a comma - in the wrong buffer.  Transfer how far
10633              input_line_pointer has moved to the right buffer.  */
10634           input_line_pointer = (save
10635                                 + (input_line_pointer - gotfree_input_line)
10636                                 + adjust);
10637           free (gotfree_input_line);
10638           if (exp->X_op == O_constant
10639               || exp->X_op == O_absent
10640               || exp->X_op == O_illegal
10641               || exp->X_op == O_register
10642               || exp->X_op == O_big)
10643             {
10644               char c = *input_line_pointer;
10645               *input_line_pointer = 0;
10646               as_bad (_("missing or invalid expression `%s'"), save);
10647               *input_line_pointer = c;
10648             }
10649           else if ((got_reloc == BFD_RELOC_386_PLT32
10650                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10651                    && exp->X_op != O_symbol)
10652             {
10653               char c = *input_line_pointer;
10654               *input_line_pointer = 0;
10655               as_bad (_("invalid PLT expression `%s'"), save);
10656               *input_line_pointer = c;
10657             }
10658         }
10659     }
10660   else
10661     expression (exp);
10662
10663   intel_syntax = -intel_syntax;
10664
10665   if (intel_syntax)
10666     i386_intel_simplify (exp);
10667 #else
10668   expression (exp);
10669 #endif
10670
10671   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10672   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10673     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10674
10675   return got_reloc;
10676 }
10677
10678 static void
10679 signed_cons (int size)
10680 {
10681   if (object_64bit)
10682     cons_sign = 1;
10683   cons (size);
10684   cons_sign = -1;
10685 }
10686
10687 #ifdef TE_PE
10688 static void
10689 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10690 {
10691   expressionS exp;
10692
10693   do
10694     {
10695       expression (&exp);
10696       if (exp.X_op == O_symbol)
10697         exp.X_op = O_secrel;
10698
10699       emit_expr (&exp, 4);
10700     }
10701   while (*input_line_pointer++ == ',');
10702
10703   input_line_pointer--;
10704   demand_empty_rest_of_line ();
10705 }
10706
10707 static void
10708 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10709 {
10710   expressionS exp;
10711
10712   do
10713     {
10714       expression (&exp);
10715       if (exp.X_op == O_symbol)
10716         exp.X_op = O_secidx;
10717
10718       emit_expr (&exp, 2);
10719     }
10720   while (*input_line_pointer++ == ',');
10721
10722   input_line_pointer--;
10723   demand_empty_rest_of_line ();
10724 }
10725 #endif
10726
10727 /* Handle Rounding Control / SAE specifiers.  */
10728
10729 static char *
10730 RC_SAE_specifier (const char *pstr)
10731 {
10732   unsigned int j;
10733
10734   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10735     {
10736       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10737         {
10738           if (i.rounding.type != rc_none)
10739             {
10740               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10741               return NULL;
10742             }
10743
10744           i.rounding.type = RC_NamesTable[j].type;
10745
10746           return (char *)(pstr + RC_NamesTable[j].len);
10747         }
10748     }
10749
10750   return NULL;
10751 }
10752
10753 /* Handle Vector operations.  */
10754
10755 static char *
10756 check_VecOperations (char *op_string)
10757 {
10758   const reg_entry *mask;
10759   const char *saved;
10760   char *end_op;
10761
10762   while (*op_string)
10763     {
10764       saved = op_string;
10765       if (*op_string == '{')
10766         {
10767           op_string++;
10768
10769           /* Check broadcasts.  */
10770           if (startswith (op_string, "1to"))
10771             {
10772               unsigned int bcst_type;
10773
10774               if (i.broadcast.type)
10775                 goto duplicated_vec_op;
10776
10777               op_string += 3;
10778               if (*op_string == '8')
10779                 bcst_type = 8;
10780               else if (*op_string == '4')
10781                 bcst_type = 4;
10782               else if (*op_string == '2')
10783                 bcst_type = 2;
10784               else if (*op_string == '1'
10785                        && *(op_string+1) == '6')
10786                 {
10787                   bcst_type = 16;
10788                   op_string++;
10789                 }
10790               else if (*op_string == '3'
10791                        && *(op_string+1) == '2')
10792                 {
10793                   bcst_type = 32;
10794                   op_string++;
10795                 }
10796               else
10797                 {
10798                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10799                   return NULL;
10800                 }
10801               op_string++;
10802
10803               i.broadcast.type = bcst_type;
10804               i.broadcast.operand = this_operand;
10805             }
10806           /* Check masking operation.  */
10807           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10808             {
10809               if (mask == &bad_reg)
10810                 return NULL;
10811
10812               /* k0 can't be used for write mask.  */
10813               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10814                 {
10815                   as_bad (_("`%s%s' can't be used for write mask"),
10816                           register_prefix, mask->reg_name);
10817                   return NULL;
10818                 }
10819
10820               if (!i.mask.reg)
10821                 {
10822                   i.mask.reg = mask;
10823                   i.mask.operand = this_operand;
10824                 }
10825               else if (i.mask.reg->reg_num)
10826                 goto duplicated_vec_op;
10827               else
10828                 {
10829                   i.mask.reg = mask;
10830
10831                   /* Only "{z}" is allowed here.  No need to check
10832                      zeroing mask explicitly.  */
10833                   if (i.mask.operand != (unsigned int) this_operand)
10834                     {
10835                       as_bad (_("invalid write mask `%s'"), saved);
10836                       return NULL;
10837                     }
10838                 }
10839
10840               op_string = end_op;
10841             }
10842           /* Check zeroing-flag for masking operation.  */
10843           else if (*op_string == 'z')
10844             {
10845               if (!i.mask.reg)
10846                 {
10847                   i.mask.reg = reg_k0;
10848                   i.mask.zeroing = 1;
10849                   i.mask.operand = this_operand;
10850                 }
10851               else
10852                 {
10853                   if (i.mask.zeroing)
10854                     {
10855                     duplicated_vec_op:
10856                       as_bad (_("duplicated `%s'"), saved);
10857                       return NULL;
10858                     }
10859
10860                   i.mask.zeroing = 1;
10861
10862                   /* Only "{%k}" is allowed here.  No need to check mask
10863                      register explicitly.  */
10864                   if (i.mask.operand != (unsigned int) this_operand)
10865                     {
10866                       as_bad (_("invalid zeroing-masking `%s'"),
10867                               saved);
10868                       return NULL;
10869                     }
10870                 }
10871
10872               op_string++;
10873             }
10874           else if (intel_syntax
10875                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10876             i.rounding.modifier = true;
10877           else
10878             goto unknown_vec_op;
10879
10880           if (*op_string != '}')
10881             {
10882               as_bad (_("missing `}' in `%s'"), saved);
10883               return NULL;
10884             }
10885           op_string++;
10886
10887           /* Strip whitespace since the addition of pseudo prefixes
10888              changed how the scrubber treats '{'.  */
10889           if (is_space_char (*op_string))
10890             ++op_string;
10891
10892           continue;
10893         }
10894     unknown_vec_op:
10895       /* We don't know this one.  */
10896       as_bad (_("unknown vector operation: `%s'"), saved);
10897       return NULL;
10898     }
10899
10900   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10901     {
10902       as_bad (_("zeroing-masking only allowed with write mask"));
10903       return NULL;
10904     }
10905
10906   return op_string;
10907 }
10908
10909 static int
10910 i386_immediate (char *imm_start)
10911 {
10912   char *save_input_line_pointer;
10913   char *gotfree_input_line;
10914   segT exp_seg = 0;
10915   expressionS *exp;
10916   i386_operand_type types;
10917
10918   operand_type_set (&types, ~0);
10919
10920   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10921     {
10922       as_bad (_("at most %d immediate operands are allowed"),
10923               MAX_IMMEDIATE_OPERANDS);
10924       return 0;
10925     }
10926
10927   exp = &im_expressions[i.imm_operands++];
10928   i.op[this_operand].imms = exp;
10929
10930   if (is_space_char (*imm_start))
10931     ++imm_start;
10932
10933   save_input_line_pointer = input_line_pointer;
10934   input_line_pointer = imm_start;
10935
10936   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10937   if (gotfree_input_line)
10938     input_line_pointer = gotfree_input_line;
10939
10940   exp_seg = expression (exp);
10941
10942   SKIP_WHITESPACE ();
10943   if (*input_line_pointer)
10944     as_bad (_("junk `%s' after expression"), input_line_pointer);
10945
10946   input_line_pointer = save_input_line_pointer;
10947   if (gotfree_input_line)
10948     {
10949       free (gotfree_input_line);
10950
10951       if (exp->X_op == O_constant)
10952         exp->X_op = O_illegal;
10953     }
10954
10955   if (exp_seg == reg_section)
10956     {
10957       as_bad (_("illegal immediate register operand %s"), imm_start);
10958       return 0;
10959     }
10960
10961   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10962 }
10963
10964 static int
10965 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10966                          i386_operand_type types, const char *imm_start)
10967 {
10968   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10969     {
10970       if (imm_start)
10971         as_bad (_("missing or invalid immediate expression `%s'"),
10972                 imm_start);
10973       return 0;
10974     }
10975   else if (exp->X_op == O_constant)
10976     {
10977       /* Size it properly later.  */
10978       i.types[this_operand].bitfield.imm64 = 1;
10979
10980       /* If not 64bit, sign/zero extend val, to account for wraparound
10981          when !BFD64.  */
10982       if (flag_code != CODE_64BIT)
10983         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10984     }
10985 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10986   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10987            && exp_seg != absolute_section
10988            && exp_seg != text_section
10989            && exp_seg != data_section
10990            && exp_seg != bss_section
10991            && exp_seg != undefined_section
10992            && !bfd_is_com_section (exp_seg))
10993     {
10994       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10995       return 0;
10996     }
10997 #endif
10998   else
10999     {
11000       /* This is an address.  The size of the address will be
11001          determined later, depending on destination register,
11002          suffix, or the default for the section.  */
11003       i.types[this_operand].bitfield.imm8 = 1;
11004       i.types[this_operand].bitfield.imm16 = 1;
11005       i.types[this_operand].bitfield.imm32 = 1;
11006       i.types[this_operand].bitfield.imm32s = 1;
11007       i.types[this_operand].bitfield.imm64 = 1;
11008       i.types[this_operand] = operand_type_and (i.types[this_operand],
11009                                                 types);
11010     }
11011
11012   return 1;
11013 }
11014
11015 static char *
11016 i386_scale (char *scale)
11017 {
11018   offsetT val;
11019   char *save = input_line_pointer;
11020
11021   input_line_pointer = scale;
11022   val = get_absolute_expression ();
11023
11024   switch (val)
11025     {
11026     case 1:
11027       i.log2_scale_factor = 0;
11028       break;
11029     case 2:
11030       i.log2_scale_factor = 1;
11031       break;
11032     case 4:
11033       i.log2_scale_factor = 2;
11034       break;
11035     case 8:
11036       i.log2_scale_factor = 3;
11037       break;
11038     default:
11039       {
11040         char sep = *input_line_pointer;
11041
11042         *input_line_pointer = '\0';
11043         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11044                 scale);
11045         *input_line_pointer = sep;
11046         input_line_pointer = save;
11047         return NULL;
11048       }
11049     }
11050   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11051     {
11052       as_warn (_("scale factor of %d without an index register"),
11053                1 << i.log2_scale_factor);
11054       i.log2_scale_factor = 0;
11055     }
11056   scale = input_line_pointer;
11057   input_line_pointer = save;
11058   return scale;
11059 }
11060
11061 static int
11062 i386_displacement (char *disp_start, char *disp_end)
11063 {
11064   expressionS *exp;
11065   segT exp_seg = 0;
11066   char *save_input_line_pointer;
11067   char *gotfree_input_line;
11068   int override;
11069   i386_operand_type bigdisp, types = anydisp;
11070   int ret;
11071
11072   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11073     {
11074       as_bad (_("at most %d displacement operands are allowed"),
11075               MAX_MEMORY_OPERANDS);
11076       return 0;
11077     }
11078
11079   operand_type_set (&bigdisp, 0);
11080   if (i.jumpabsolute
11081       || i.types[this_operand].bitfield.baseindex
11082       || (current_templates->start->opcode_modifier.jump != JUMP
11083           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11084     {
11085       i386_addressing_mode ();
11086       override = (i.prefix[ADDR_PREFIX] != 0);
11087       if (flag_code == CODE_64BIT)
11088         {
11089           bigdisp.bitfield.disp32 = 1;
11090           if (!override)
11091             bigdisp.bitfield.disp64 = 1;
11092         }
11093       else if ((flag_code == CODE_16BIT) ^ override)
11094           bigdisp.bitfield.disp16 = 1;
11095       else
11096           bigdisp.bitfield.disp32 = 1;
11097     }
11098   else
11099     {
11100       /* For PC-relative branches, the width of the displacement may be
11101          dependent upon data size, but is never dependent upon address size.
11102          Also make sure to not unintentionally match against a non-PC-relative
11103          branch template.  */
11104       static templates aux_templates;
11105       const insn_template *t = current_templates->start;
11106       bool has_intel64 = false;
11107
11108       aux_templates.start = t;
11109       while (++t < current_templates->end)
11110         {
11111           if (t->opcode_modifier.jump
11112               != current_templates->start->opcode_modifier.jump)
11113             break;
11114           if ((t->opcode_modifier.isa64 >= INTEL64))
11115             has_intel64 = true;
11116         }
11117       if (t < current_templates->end)
11118         {
11119           aux_templates.end = t;
11120           current_templates = &aux_templates;
11121         }
11122
11123       override = (i.prefix[DATA_PREFIX] != 0);
11124       if (flag_code == CODE_64BIT)
11125         {
11126           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11127               && (!intel64 || !has_intel64))
11128             bigdisp.bitfield.disp16 = 1;
11129           else
11130             bigdisp.bitfield.disp32 = 1;
11131         }
11132       else
11133         {
11134           if (!override)
11135             override = (i.suffix == (flag_code != CODE_16BIT
11136                                      ? WORD_MNEM_SUFFIX
11137                                      : LONG_MNEM_SUFFIX));
11138           bigdisp.bitfield.disp32 = 1;
11139           if ((flag_code == CODE_16BIT) ^ override)
11140             {
11141               bigdisp.bitfield.disp32 = 0;
11142               bigdisp.bitfield.disp16 = 1;
11143             }
11144         }
11145     }
11146   i.types[this_operand] = operand_type_or (i.types[this_operand],
11147                                            bigdisp);
11148
11149   exp = &disp_expressions[i.disp_operands];
11150   i.op[this_operand].disps = exp;
11151   i.disp_operands++;
11152   save_input_line_pointer = input_line_pointer;
11153   input_line_pointer = disp_start;
11154   END_STRING_AND_SAVE (disp_end);
11155
11156 #ifndef GCC_ASM_O_HACK
11157 #define GCC_ASM_O_HACK 0
11158 #endif
11159 #if GCC_ASM_O_HACK
11160   END_STRING_AND_SAVE (disp_end + 1);
11161   if (i.types[this_operand].bitfield.baseIndex
11162       && displacement_string_end[-1] == '+')
11163     {
11164       /* This hack is to avoid a warning when using the "o"
11165          constraint within gcc asm statements.
11166          For instance:
11167
11168          #define _set_tssldt_desc(n,addr,limit,type) \
11169          __asm__ __volatile__ ( \
11170          "movw %w2,%0\n\t" \
11171          "movw %w1,2+%0\n\t" \
11172          "rorl $16,%1\n\t" \
11173          "movb %b1,4+%0\n\t" \
11174          "movb %4,5+%0\n\t" \
11175          "movb $0,6+%0\n\t" \
11176          "movb %h1,7+%0\n\t" \
11177          "rorl $16,%1" \
11178          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11179
11180          This works great except that the output assembler ends
11181          up looking a bit weird if it turns out that there is
11182          no offset.  You end up producing code that looks like:
11183
11184          #APP
11185          movw $235,(%eax)
11186          movw %dx,2+(%eax)
11187          rorl $16,%edx
11188          movb %dl,4+(%eax)
11189          movb $137,5+(%eax)
11190          movb $0,6+(%eax)
11191          movb %dh,7+(%eax)
11192          rorl $16,%edx
11193          #NO_APP
11194
11195          So here we provide the missing zero.  */
11196
11197       *displacement_string_end = '0';
11198     }
11199 #endif
11200   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11201   if (gotfree_input_line)
11202     input_line_pointer = gotfree_input_line;
11203
11204   exp_seg = expression (exp);
11205
11206   SKIP_WHITESPACE ();
11207   if (*input_line_pointer)
11208     as_bad (_("junk `%s' after expression"), input_line_pointer);
11209 #if GCC_ASM_O_HACK
11210   RESTORE_END_STRING (disp_end + 1);
11211 #endif
11212   input_line_pointer = save_input_line_pointer;
11213   if (gotfree_input_line)
11214     {
11215       free (gotfree_input_line);
11216
11217       if (exp->X_op == O_constant || exp->X_op == O_register)
11218         exp->X_op = O_illegal;
11219     }
11220
11221   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11222
11223   RESTORE_END_STRING (disp_end);
11224
11225   return ret;
11226 }
11227
11228 static int
11229 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11230                             i386_operand_type types, const char *disp_start)
11231 {
11232   int ret = 1;
11233
11234   /* We do this to make sure that the section symbol is in
11235      the symbol table.  We will ultimately change the relocation
11236      to be relative to the beginning of the section.  */
11237   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11238       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11239       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11240     {
11241       if (exp->X_op != O_symbol)
11242         goto inv_disp;
11243
11244       if (S_IS_LOCAL (exp->X_add_symbol)
11245           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11246           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11247         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11248       exp->X_op = O_subtract;
11249       exp->X_op_symbol = GOT_symbol;
11250       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11251         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11252       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11253         i.reloc[this_operand] = BFD_RELOC_64;
11254       else
11255         i.reloc[this_operand] = BFD_RELOC_32;
11256     }
11257
11258   else if (exp->X_op == O_absent
11259            || exp->X_op == O_illegal
11260            || exp->X_op == O_big)
11261     {
11262     inv_disp:
11263       as_bad (_("missing or invalid displacement expression `%s'"),
11264               disp_start);
11265       ret = 0;
11266     }
11267
11268   else if (exp->X_op == O_constant)
11269     {
11270       /* Sizing gets taken care of by optimize_disp().
11271
11272          If not 64bit, sign/zero extend val, to account for wraparound
11273          when !BFD64.  */
11274       if (flag_code != CODE_64BIT)
11275         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11276     }
11277
11278 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11279   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11280            && exp_seg != absolute_section
11281            && exp_seg != text_section
11282            && exp_seg != data_section
11283            && exp_seg != bss_section
11284            && exp_seg != undefined_section
11285            && !bfd_is_com_section (exp_seg))
11286     {
11287       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11288       ret = 0;
11289     }
11290 #endif
11291
11292   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11293     i.types[this_operand].bitfield.disp8 = 1;
11294
11295   /* Check if this is a displacement only operand.  */
11296   if (!i.types[this_operand].bitfield.baseindex)
11297     i.types[this_operand] =
11298       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11299                        operand_type_and (i.types[this_operand], types));
11300
11301   return ret;
11302 }
11303
11304 /* Return the active addressing mode, taking address override and
11305    registers forming the address into consideration.  Update the
11306    address override prefix if necessary.  */
11307
11308 static enum flag_code
11309 i386_addressing_mode (void)
11310 {
11311   enum flag_code addr_mode;
11312
11313   if (i.prefix[ADDR_PREFIX])
11314     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11315   else if (flag_code == CODE_16BIT
11316            && current_templates->start->cpu_flags.bitfield.cpumpx
11317            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11318               from md_assemble() by "is not a valid base/index expression"
11319               when there is a base and/or index.  */
11320            && !i.types[this_operand].bitfield.baseindex)
11321     {
11322       /* MPX insn memory operands with neither base nor index must be forced
11323          to use 32-bit addressing in 16-bit mode.  */
11324       addr_mode = CODE_32BIT;
11325       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11326       ++i.prefixes;
11327       gas_assert (!i.types[this_operand].bitfield.disp16);
11328       gas_assert (!i.types[this_operand].bitfield.disp32);
11329     }
11330   else
11331     {
11332       addr_mode = flag_code;
11333
11334 #if INFER_ADDR_PREFIX
11335       if (i.mem_operands == 0)
11336         {
11337           /* Infer address prefix from the first memory operand.  */
11338           const reg_entry *addr_reg = i.base_reg;
11339
11340           if (addr_reg == NULL)
11341             addr_reg = i.index_reg;
11342
11343           if (addr_reg)
11344             {
11345               if (addr_reg->reg_type.bitfield.dword)
11346                 addr_mode = CODE_32BIT;
11347               else if (flag_code != CODE_64BIT
11348                        && addr_reg->reg_type.bitfield.word)
11349                 addr_mode = CODE_16BIT;
11350
11351               if (addr_mode != flag_code)
11352                 {
11353                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11354                   i.prefixes += 1;
11355                   /* Change the size of any displacement too.  At most one
11356                      of Disp16 or Disp32 is set.
11357                      FIXME.  There doesn't seem to be any real need for
11358                      separate Disp16 and Disp32 flags.  The same goes for
11359                      Imm16 and Imm32.  Removing them would probably clean
11360                      up the code quite a lot.  */
11361                   if (flag_code != CODE_64BIT
11362                       && (i.types[this_operand].bitfield.disp16
11363                           || i.types[this_operand].bitfield.disp32))
11364                     {
11365                       static const i386_operand_type disp16_32 = {
11366                         .bitfield = { .disp16 = 1, .disp32 = 1 }
11367                       };
11368
11369                       i.types[this_operand]
11370                         = operand_type_xor (i.types[this_operand], disp16_32);
11371                     }
11372                 }
11373             }
11374         }
11375 #endif
11376     }
11377
11378   return addr_mode;
11379 }
11380
11381 /* Make sure the memory operand we've been dealt is valid.
11382    Return 1 on success, 0 on a failure.  */
11383
11384 static int
11385 i386_index_check (const char *operand_string)
11386 {
11387   const char *kind = "base/index";
11388   enum flag_code addr_mode = i386_addressing_mode ();
11389   const insn_template *t = current_templates->end - 1;
11390
11391   if (t->opcode_modifier.isstring)
11392     {
11393       /* Memory operands of string insns are special in that they only allow
11394          a single register (rDI, rSI, or rBX) as their memory address.  */
11395       const reg_entry *expected_reg;
11396       static const char *di_si[][2] =
11397         {
11398           { "esi", "edi" },
11399           { "si", "di" },
11400           { "rsi", "rdi" }
11401         };
11402       static const char *bx[] = { "ebx", "bx", "rbx" };
11403
11404       kind = "string address";
11405
11406       if (t->opcode_modifier.prefixok == PrefixRep)
11407         {
11408           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
11409           int op = 0;
11410
11411           if (!t->operand_types[0].bitfield.baseindex
11412               || ((!i.mem_operands != !intel_syntax)
11413                   && t->operand_types[1].bitfield.baseindex))
11414             op = 1;
11415           expected_reg
11416             = (const reg_entry *) str_hash_find (reg_hash,
11417                                                  di_si[addr_mode][op == es_op]);
11418         }
11419       else
11420         expected_reg
11421           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11422
11423       if (i.base_reg != expected_reg
11424           || i.index_reg
11425           || operand_type_check (i.types[this_operand], disp))
11426         {
11427           /* The second memory operand must have the same size as
11428              the first one.  */
11429           if (i.mem_operands
11430               && i.base_reg
11431               && !((addr_mode == CODE_64BIT
11432                     && i.base_reg->reg_type.bitfield.qword)
11433                    || (addr_mode == CODE_32BIT
11434                        ? i.base_reg->reg_type.bitfield.dword
11435                        : i.base_reg->reg_type.bitfield.word)))
11436             goto bad_address;
11437
11438           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11439                    operand_string,
11440                    intel_syntax ? '[' : '(',
11441                    register_prefix,
11442                    expected_reg->reg_name,
11443                    intel_syntax ? ']' : ')');
11444           return 1;
11445         }
11446       else
11447         return 1;
11448
11449     bad_address:
11450       as_bad (_("`%s' is not a valid %s expression"),
11451               operand_string, kind);
11452       return 0;
11453     }
11454   else
11455     {
11456       t = current_templates->start;
11457
11458       if (addr_mode != CODE_16BIT)
11459         {
11460           /* 32-bit/64-bit checks.  */
11461           if (i.disp_encoding == disp_encoding_16bit)
11462             {
11463             bad_disp:
11464               as_bad (_("invalid `%s' prefix"),
11465                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11466               return 0;
11467             }
11468
11469           if ((i.base_reg
11470                && ((addr_mode == CODE_64BIT
11471                     ? !i.base_reg->reg_type.bitfield.qword
11472                     : !i.base_reg->reg_type.bitfield.dword)
11473                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11474                    || i.base_reg->reg_num == RegIZ))
11475               || (i.index_reg
11476                   && !i.index_reg->reg_type.bitfield.xmmword
11477                   && !i.index_reg->reg_type.bitfield.ymmword
11478                   && !i.index_reg->reg_type.bitfield.zmmword
11479                   && ((addr_mode == CODE_64BIT
11480                        ? !i.index_reg->reg_type.bitfield.qword
11481                        : !i.index_reg->reg_type.bitfield.dword)
11482                       || !i.index_reg->reg_type.bitfield.baseindex)))
11483             goto bad_address;
11484
11485           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11486           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11487                && t->opcode_modifier.opcodespace == SPACE_0F
11488                && t->base_opcode == 0x1b)
11489               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11490                   && t->opcode_modifier.opcodespace == SPACE_0F
11491                   && (t->base_opcode & ~1) == 0x1a)
11492               || t->opcode_modifier.sib == SIBMEM)
11493             {
11494               /* They cannot use RIP-relative addressing. */
11495               if (i.base_reg && i.base_reg->reg_num == RegIP)
11496                 {
11497                   as_bad (_("`%s' cannot be used here"), operand_string);
11498                   return 0;
11499                 }
11500
11501               /* bndldx and bndstx ignore their scale factor. */
11502               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11503                   && t->opcode_modifier.opcodespace == SPACE_0F
11504                   && (t->base_opcode & ~1) == 0x1a
11505                   && i.log2_scale_factor)
11506                 as_warn (_("register scaling is being ignored here"));
11507             }
11508         }
11509       else
11510         {
11511           /* 16-bit checks.  */
11512           if (i.disp_encoding == disp_encoding_32bit)
11513             goto bad_disp;
11514
11515           if ((i.base_reg
11516                && (!i.base_reg->reg_type.bitfield.word
11517                    || !i.base_reg->reg_type.bitfield.baseindex))
11518               || (i.index_reg
11519                   && (!i.index_reg->reg_type.bitfield.word
11520                       || !i.index_reg->reg_type.bitfield.baseindex
11521                       || !(i.base_reg
11522                            && i.base_reg->reg_num < 6
11523                            && i.index_reg->reg_num >= 6
11524                            && i.log2_scale_factor == 0))))
11525             goto bad_address;
11526         }
11527     }
11528   return 1;
11529 }
11530
11531 /* Handle vector immediates.  */
11532
11533 static int
11534 RC_SAE_immediate (const char *imm_start)
11535 {
11536   const char *pstr = imm_start;
11537
11538   if (*pstr != '{')
11539     return 0;
11540
11541   pstr = RC_SAE_specifier (pstr + 1);
11542   if (pstr == NULL)
11543     return 0;
11544
11545   if (*pstr++ != '}')
11546     {
11547       as_bad (_("Missing '}': '%s'"), imm_start);
11548       return 0;
11549     }
11550   /* RC/SAE immediate string should contain nothing more.  */;
11551   if (*pstr != 0)
11552     {
11553       as_bad (_("Junk after '}': '%s'"), imm_start);
11554       return 0;
11555     }
11556
11557   /* Internally this doesn't count as an operand.  */
11558   --i.operands;
11559
11560   return 1;
11561 }
11562
11563 static INLINE bool starts_memory_operand (char c)
11564 {
11565   return ISDIGIT (c)
11566          || is_identifier_char (c)
11567          || strchr ("([\"+-!~", c);
11568 }
11569
11570 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11571    on error.  */
11572
11573 static int
11574 i386_att_operand (char *operand_string)
11575 {
11576   const reg_entry *r;
11577   char *end_op;
11578   char *op_string = operand_string;
11579
11580   if (is_space_char (*op_string))
11581     ++op_string;
11582
11583   /* We check for an absolute prefix (differentiating,
11584      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11585   if (*op_string == ABSOLUTE_PREFIX)
11586     {
11587       ++op_string;
11588       if (is_space_char (*op_string))
11589         ++op_string;
11590       i.jumpabsolute = true;
11591     }
11592
11593   /* Check if operand is a register.  */
11594   if ((r = parse_register (op_string, &end_op)) != NULL)
11595     {
11596       i386_operand_type temp;
11597
11598       if (r == &bad_reg)
11599         return 0;
11600
11601       /* Check for a segment override by searching for ':' after a
11602          segment register.  */
11603       op_string = end_op;
11604       if (is_space_char (*op_string))
11605         ++op_string;
11606       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11607         {
11608           i.seg[i.mem_operands] = r;
11609
11610           /* Skip the ':' and whitespace.  */
11611           ++op_string;
11612           if (is_space_char (*op_string))
11613             ++op_string;
11614
11615           /* Handle case of %es:*foo.  */
11616           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11617             {
11618               ++op_string;
11619               if (is_space_char (*op_string))
11620                 ++op_string;
11621               i.jumpabsolute = true;
11622             }
11623
11624           if (!starts_memory_operand (*op_string))
11625             {
11626               as_bad (_("bad memory operand `%s'"), op_string);
11627               return 0;
11628             }
11629           goto do_memory_reference;
11630         }
11631
11632       /* Handle vector operations.  */
11633       if (*op_string == '{')
11634         {
11635           op_string = check_VecOperations (op_string);
11636           if (op_string == NULL)
11637             return 0;
11638         }
11639
11640       if (*op_string)
11641         {
11642           as_bad (_("junk `%s' after register"), op_string);
11643           return 0;
11644         }
11645       temp = r->reg_type;
11646       temp.bitfield.baseindex = 0;
11647       i.types[this_operand] = operand_type_or (i.types[this_operand],
11648                                                temp);
11649       i.types[this_operand].bitfield.unspecified = 0;
11650       i.op[this_operand].regs = r;
11651       i.reg_operands++;
11652
11653       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11654          operand was also present earlier on.  */
11655       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11656           && i.reg_operands == 1)
11657         {
11658           unsigned int j;
11659
11660           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11661             if (i.rounding.type == RC_NamesTable[j].type)
11662               break;
11663           as_bad (_("`%s': misplaced `{%s}'"),
11664                   current_templates->start->name, RC_NamesTable[j].name);
11665           return 0;
11666         }
11667     }
11668   else if (*op_string == REGISTER_PREFIX)
11669     {
11670       as_bad (_("bad register name `%s'"), op_string);
11671       return 0;
11672     }
11673   else if (*op_string == IMMEDIATE_PREFIX)
11674     {
11675       ++op_string;
11676       if (i.jumpabsolute)
11677         {
11678           as_bad (_("immediate operand illegal with absolute jump"));
11679           return 0;
11680         }
11681       if (!i386_immediate (op_string))
11682         return 0;
11683       if (i.rounding.type != rc_none)
11684         {
11685           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11686                   current_templates->start->name);
11687           return 0;
11688         }
11689     }
11690   else if (RC_SAE_immediate (operand_string))
11691     {
11692       /* If it is a RC or SAE immediate, do the necessary placement check:
11693          Only another immediate or a GPR may precede it.  */
11694       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11695           || (i.reg_operands == 1
11696               && i.op[0].regs->reg_type.bitfield.class != Reg))
11697         {
11698           as_bad (_("`%s': misplaced `%s'"),
11699                   current_templates->start->name, operand_string);
11700           return 0;
11701         }
11702     }
11703   else if (starts_memory_operand (*op_string))
11704     {
11705       /* This is a memory reference of some sort.  */
11706       char *base_string;
11707
11708       /* Start and end of displacement string expression (if found).  */
11709       char *displacement_string_start;
11710       char *displacement_string_end;
11711
11712     do_memory_reference:
11713       /* Check for base index form.  We detect the base index form by
11714          looking for an ')' at the end of the operand, searching
11715          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11716          after the '('.  */
11717       base_string = op_string + strlen (op_string);
11718
11719       /* Handle vector operations.  */
11720       --base_string;
11721       if (is_space_char (*base_string))
11722         --base_string;
11723
11724       if (*base_string == '}')
11725         {
11726           char *vop_start = NULL;
11727
11728           while (base_string-- > op_string)
11729             {
11730               if (*base_string == '"')
11731                 break;
11732               if (*base_string != '{')
11733                 continue;
11734
11735               vop_start = base_string;
11736
11737               --base_string;
11738               if (is_space_char (*base_string))
11739                 --base_string;
11740
11741               if (*base_string != '}')
11742                 break;
11743
11744               vop_start = NULL;
11745             }
11746
11747           if (!vop_start)
11748             {
11749               as_bad (_("unbalanced figure braces"));
11750               return 0;
11751             }
11752
11753           if (check_VecOperations (vop_start) == NULL)
11754             return 0;
11755         }
11756
11757       /* If we only have a displacement, set-up for it to be parsed later.  */
11758       displacement_string_start = op_string;
11759       displacement_string_end = base_string + 1;
11760
11761       if (*base_string == ')')
11762         {
11763           char *temp_string;
11764           unsigned int parens_not_balanced = 0;
11765           bool in_quotes = false;
11766
11767           /* We've already checked that the number of left & right ()'s are
11768              equal, and that there's a matching set of double quotes.  */
11769           end_op = base_string;
11770           for (temp_string = op_string; temp_string < end_op; temp_string++)
11771             {
11772               if (*temp_string == '\\' && temp_string[1] == '"')
11773                 ++temp_string;
11774               else if (*temp_string == '"')
11775                 in_quotes = !in_quotes;
11776               else if (!in_quotes)
11777                 {
11778                   if (*temp_string == '(' && !parens_not_balanced++)
11779                     base_string = temp_string;
11780                   if (*temp_string == ')')
11781                     --parens_not_balanced;
11782                 }
11783             }
11784
11785           temp_string = base_string;
11786
11787           /* Skip past '(' and whitespace.  */
11788           gas_assert (*base_string == '(');
11789           ++base_string;
11790           if (is_space_char (*base_string))
11791             ++base_string;
11792
11793           if (*base_string == ','
11794               || ((i.base_reg = parse_register (base_string, &end_op))
11795                   != NULL))
11796             {
11797               displacement_string_end = temp_string;
11798
11799               i.types[this_operand].bitfield.baseindex = 1;
11800
11801               if (i.base_reg)
11802                 {
11803                   if (i.base_reg == &bad_reg)
11804                     return 0;
11805                   base_string = end_op;
11806                   if (is_space_char (*base_string))
11807                     ++base_string;
11808                 }
11809
11810               /* There may be an index reg or scale factor here.  */
11811               if (*base_string == ',')
11812                 {
11813                   ++base_string;
11814                   if (is_space_char (*base_string))
11815                     ++base_string;
11816
11817                   if ((i.index_reg = parse_register (base_string, &end_op))
11818                       != NULL)
11819                     {
11820                       if (i.index_reg == &bad_reg)
11821                         return 0;
11822                       base_string = end_op;
11823                       if (is_space_char (*base_string))
11824                         ++base_string;
11825                       if (*base_string == ',')
11826                         {
11827                           ++base_string;
11828                           if (is_space_char (*base_string))
11829                             ++base_string;
11830                         }
11831                       else if (*base_string != ')')
11832                         {
11833                           as_bad (_("expecting `,' or `)' "
11834                                     "after index register in `%s'"),
11835                                   operand_string);
11836                           return 0;
11837                         }
11838                     }
11839                   else if (*base_string == REGISTER_PREFIX)
11840                     {
11841                       end_op = strchr (base_string, ',');
11842                       if (end_op)
11843                         *end_op = '\0';
11844                       as_bad (_("bad register name `%s'"), base_string);
11845                       return 0;
11846                     }
11847
11848                   /* Check for scale factor.  */
11849                   if (*base_string != ')')
11850                     {
11851                       char *end_scale = i386_scale (base_string);
11852
11853                       if (!end_scale)
11854                         return 0;
11855
11856                       base_string = end_scale;
11857                       if (is_space_char (*base_string))
11858                         ++base_string;
11859                       if (*base_string != ')')
11860                         {
11861                           as_bad (_("expecting `)' "
11862                                     "after scale factor in `%s'"),
11863                                   operand_string);
11864                           return 0;
11865                         }
11866                     }
11867                   else if (!i.index_reg)
11868                     {
11869                       as_bad (_("expecting index register or scale factor "
11870                                 "after `,'; got '%c'"),
11871                               *base_string);
11872                       return 0;
11873                     }
11874                 }
11875               else if (*base_string != ')')
11876                 {
11877                   as_bad (_("expecting `,' or `)' "
11878                             "after base register in `%s'"),
11879                           operand_string);
11880                   return 0;
11881                 }
11882             }
11883           else if (*base_string == REGISTER_PREFIX)
11884             {
11885               end_op = strchr (base_string, ',');
11886               if (end_op)
11887                 *end_op = '\0';
11888               as_bad (_("bad register name `%s'"), base_string);
11889               return 0;
11890             }
11891         }
11892
11893       /* If there's an expression beginning the operand, parse it,
11894          assuming displacement_string_start and
11895          displacement_string_end are meaningful.  */
11896       if (displacement_string_start != displacement_string_end)
11897         {
11898           if (!i386_displacement (displacement_string_start,
11899                                   displacement_string_end))
11900             return 0;
11901         }
11902
11903       /* Special case for (%dx) while doing input/output op.  */
11904       if (i.base_reg
11905           && i.base_reg->reg_type.bitfield.instance == RegD
11906           && i.base_reg->reg_type.bitfield.word
11907           && i.index_reg == 0
11908           && i.log2_scale_factor == 0
11909           && i.seg[i.mem_operands] == 0
11910           && !operand_type_check (i.types[this_operand], disp))
11911         {
11912           i.types[this_operand] = i.base_reg->reg_type;
11913           i.input_output_operand = true;
11914           return 1;
11915         }
11916
11917       if (i386_index_check (operand_string) == 0)
11918         return 0;
11919       i.flags[this_operand] |= Operand_Mem;
11920       i.mem_operands++;
11921     }
11922   else
11923     {
11924       /* It's not a memory operand; argh!  */
11925       as_bad (_("invalid char %s beginning operand %d `%s'"),
11926               output_invalid (*op_string),
11927               this_operand + 1,
11928               op_string);
11929       return 0;
11930     }
11931   return 1;                     /* Normal return.  */
11932 }
11933 \f
11934 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11935    that an rs_machine_dependent frag may reach.  */
11936
11937 unsigned int
11938 i386_frag_max_var (fragS *frag)
11939 {
11940   /* The only relaxable frags are for jumps.
11941      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11942   gas_assert (frag->fr_type == rs_machine_dependent);
11943   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11944 }
11945
11946 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11947 static int
11948 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11949 {
11950   /* STT_GNU_IFUNC symbol must go through PLT.  */
11951   if ((symbol_get_bfdsym (fr_symbol)->flags
11952        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11953     return 0;
11954
11955   if (!S_IS_EXTERNAL (fr_symbol))
11956     /* Symbol may be weak or local.  */
11957     return !S_IS_WEAK (fr_symbol);
11958
11959   /* Global symbols with non-default visibility can't be preempted. */
11960   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11961     return 1;
11962
11963   if (fr_var != NO_RELOC)
11964     switch ((enum bfd_reloc_code_real) fr_var)
11965       {
11966       case BFD_RELOC_386_PLT32:
11967       case BFD_RELOC_X86_64_PLT32:
11968         /* Symbol with PLT relocation may be preempted. */
11969         return 0;
11970       default:
11971         abort ();
11972       }
11973
11974   /* Global symbols with default visibility in a shared library may be
11975      preempted by another definition.  */
11976   return !shared;
11977 }
11978 #endif
11979
11980 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11981    Note also work for Skylake and Cascadelake.
11982 ---------------------------------------------------------------------
11983 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11984 | ------  | ----------- | ------- | -------- |
11985 |   Jo    |      N      |    N    |     Y    |
11986 |   Jno   |      N      |    N    |     Y    |
11987 |  Jc/Jb  |      Y      |    N    |     Y    |
11988 | Jae/Jnb |      Y      |    N    |     Y    |
11989 |  Je/Jz  |      Y      |    Y    |     Y    |
11990 | Jne/Jnz |      Y      |    Y    |     Y    |
11991 | Jna/Jbe |      Y      |    N    |     Y    |
11992 | Ja/Jnbe |      Y      |    N    |     Y    |
11993 |   Js    |      N      |    N    |     Y    |
11994 |   Jns   |      N      |    N    |     Y    |
11995 |  Jp/Jpe |      N      |    N    |     Y    |
11996 | Jnp/Jpo |      N      |    N    |     Y    |
11997 | Jl/Jnge |      Y      |    Y    |     Y    |
11998 | Jge/Jnl |      Y      |    Y    |     Y    |
11999 | Jle/Jng |      Y      |    Y    |     Y    |
12000 | Jg/Jnle |      Y      |    Y    |     Y    |
12001 ---------------------------------------------------------------------  */
12002 static int
12003 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12004 {
12005   if (mf_cmp == mf_cmp_alu_cmp)
12006     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12007             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12008   if (mf_cmp == mf_cmp_incdec)
12009     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12010             || mf_jcc == mf_jcc_jle);
12011   if (mf_cmp == mf_cmp_test_and)
12012     return 1;
12013   return 0;
12014 }
12015
12016 /* Return the next non-empty frag.  */
12017
12018 static fragS *
12019 i386_next_non_empty_frag (fragS *fragP)
12020 {
12021   /* There may be a frag with a ".fill 0" when there is no room in
12022      the current frag for frag_grow in output_insn.  */
12023   for (fragP = fragP->fr_next;
12024        (fragP != NULL
12025         && fragP->fr_type == rs_fill
12026         && fragP->fr_fix == 0);
12027        fragP = fragP->fr_next)
12028     ;
12029   return fragP;
12030 }
12031
12032 /* Return the next jcc frag after BRANCH_PADDING.  */
12033
12034 static fragS *
12035 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12036 {
12037   fragS *branch_fragP;
12038   if (!pad_fragP)
12039     return NULL;
12040
12041   if (pad_fragP->fr_type == rs_machine_dependent
12042       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12043           == BRANCH_PADDING))
12044     {
12045       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12046       if (branch_fragP->fr_type != rs_machine_dependent)
12047         return NULL;
12048       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12049           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12050                                    pad_fragP->tc_frag_data.mf_type))
12051         return branch_fragP;
12052     }
12053
12054   return NULL;
12055 }
12056
12057 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12058
12059 static void
12060 i386_classify_machine_dependent_frag (fragS *fragP)
12061 {
12062   fragS *cmp_fragP;
12063   fragS *pad_fragP;
12064   fragS *branch_fragP;
12065   fragS *next_fragP;
12066   unsigned int max_prefix_length;
12067
12068   if (fragP->tc_frag_data.classified)
12069     return;
12070
12071   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12072      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12073   for (next_fragP = fragP;
12074        next_fragP != NULL;
12075        next_fragP = next_fragP->fr_next)
12076     {
12077       next_fragP->tc_frag_data.classified = 1;
12078       if (next_fragP->fr_type == rs_machine_dependent)
12079         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12080           {
12081           case BRANCH_PADDING:
12082             /* The BRANCH_PADDING frag must be followed by a branch
12083                frag.  */
12084             branch_fragP = i386_next_non_empty_frag (next_fragP);
12085             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12086             break;
12087           case FUSED_JCC_PADDING:
12088             /* Check if this is a fused jcc:
12089                FUSED_JCC_PADDING
12090                CMP like instruction
12091                BRANCH_PADDING
12092                COND_JUMP
12093                */
12094             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12095             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12096             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12097             if (branch_fragP)
12098               {
12099                 /* The BRANCH_PADDING frag is merged with the
12100                    FUSED_JCC_PADDING frag.  */
12101                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12102                 /* CMP like instruction size.  */
12103                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12104                 frag_wane (pad_fragP);
12105                 /* Skip to branch_fragP.  */
12106                 next_fragP = branch_fragP;
12107               }
12108             else if (next_fragP->tc_frag_data.max_prefix_length)
12109               {
12110                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12111                    a fused jcc.  */
12112                 next_fragP->fr_subtype
12113                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12114                 next_fragP->tc_frag_data.max_bytes
12115                   = next_fragP->tc_frag_data.max_prefix_length;
12116                 /* This will be updated in the BRANCH_PREFIX scan.  */
12117                 next_fragP->tc_frag_data.max_prefix_length = 0;
12118               }
12119             else
12120               frag_wane (next_fragP);
12121             break;
12122           }
12123     }
12124
12125   /* Stop if there is no BRANCH_PREFIX.  */
12126   if (!align_branch_prefix_size)
12127     return;
12128
12129   /* Scan for BRANCH_PREFIX.  */
12130   for (; fragP != NULL; fragP = fragP->fr_next)
12131     {
12132       if (fragP->fr_type != rs_machine_dependent
12133           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12134               != BRANCH_PREFIX))
12135         continue;
12136
12137       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12138          COND_JUMP_PREFIX.  */
12139       max_prefix_length = 0;
12140       for (next_fragP = fragP;
12141            next_fragP != NULL;
12142            next_fragP = next_fragP->fr_next)
12143         {
12144           if (next_fragP->fr_type == rs_fill)
12145             /* Skip rs_fill frags.  */
12146             continue;
12147           else if (next_fragP->fr_type != rs_machine_dependent)
12148             /* Stop for all other frags.  */
12149             break;
12150
12151           /* rs_machine_dependent frags.  */
12152           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12153               == BRANCH_PREFIX)
12154             {
12155               /* Count BRANCH_PREFIX frags.  */
12156               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12157                 {
12158                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12159                   frag_wane (next_fragP);
12160                 }
12161               else
12162                 max_prefix_length
12163                   += next_fragP->tc_frag_data.max_bytes;
12164             }
12165           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12166                     == BRANCH_PADDING)
12167                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12168                        == FUSED_JCC_PADDING))
12169             {
12170               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12171               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12172               break;
12173             }
12174           else
12175             /* Stop for other rs_machine_dependent frags.  */
12176             break;
12177         }
12178
12179       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12180
12181       /* Skip to the next frag.  */
12182       fragP = next_fragP;
12183     }
12184 }
12185
12186 /* Compute padding size for
12187
12188         FUSED_JCC_PADDING
12189         CMP like instruction
12190         BRANCH_PADDING
12191         COND_JUMP/UNCOND_JUMP
12192
12193    or
12194
12195         BRANCH_PADDING
12196         COND_JUMP/UNCOND_JUMP
12197  */
12198
12199 static int
12200 i386_branch_padding_size (fragS *fragP, offsetT address)
12201 {
12202   unsigned int offset, size, padding_size;
12203   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12204
12205   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12206   if (!address)
12207     address = fragP->fr_address;
12208   address += fragP->fr_fix;
12209
12210   /* CMP like instrunction size.  */
12211   size = fragP->tc_frag_data.cmp_size;
12212
12213   /* The base size of the branch frag.  */
12214   size += branch_fragP->fr_fix;
12215
12216   /* Add opcode and displacement bytes for the rs_machine_dependent
12217      branch frag.  */
12218   if (branch_fragP->fr_type == rs_machine_dependent)
12219     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12220
12221   /* Check if branch is within boundary and doesn't end at the last
12222      byte.  */
12223   offset = address & ((1U << align_branch_power) - 1);
12224   if ((offset + size) >= (1U << align_branch_power))
12225     /* Padding needed to avoid crossing boundary.  */
12226     padding_size = (1U << align_branch_power) - offset;
12227   else
12228     /* No padding needed.  */
12229     padding_size = 0;
12230
12231   /* The return value may be saved in tc_frag_data.length which is
12232      unsigned byte.  */
12233   if (!fits_in_unsigned_byte (padding_size))
12234     abort ();
12235
12236   return padding_size;
12237 }
12238
12239 /* i386_generic_table_relax_frag()
12240
12241    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12242    grow/shrink padding to align branch frags.  Hand others to
12243    relax_frag().  */
12244
12245 long
12246 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12247 {
12248   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12249       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12250     {
12251       long padding_size = i386_branch_padding_size (fragP, 0);
12252       long grow = padding_size - fragP->tc_frag_data.length;
12253
12254       /* When the BRANCH_PREFIX frag is used, the computed address
12255          must match the actual address and there should be no padding.  */
12256       if (fragP->tc_frag_data.padding_address
12257           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12258               || padding_size))
12259         abort ();
12260
12261       /* Update the padding size.  */
12262       if (grow)
12263         fragP->tc_frag_data.length = padding_size;
12264
12265       return grow;
12266     }
12267   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12268     {
12269       fragS *padding_fragP, *next_fragP;
12270       long padding_size, left_size, last_size;
12271
12272       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12273       if (!padding_fragP)
12274         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12275         return (fragP->tc_frag_data.length
12276                 - fragP->tc_frag_data.last_length);
12277
12278       /* Compute the relative address of the padding frag in the very
12279         first time where the BRANCH_PREFIX frag sizes are zero.  */
12280       if (!fragP->tc_frag_data.padding_address)
12281         fragP->tc_frag_data.padding_address
12282           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12283
12284       /* First update the last length from the previous interation.  */
12285       left_size = fragP->tc_frag_data.prefix_length;
12286       for (next_fragP = fragP;
12287            next_fragP != padding_fragP;
12288            next_fragP = next_fragP->fr_next)
12289         if (next_fragP->fr_type == rs_machine_dependent
12290             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12291                 == BRANCH_PREFIX))
12292           {
12293             if (left_size)
12294               {
12295                 int max = next_fragP->tc_frag_data.max_bytes;
12296                 if (max)
12297                   {
12298                     int size;
12299                     if (max > left_size)
12300                       size = left_size;
12301                     else
12302                       size = max;
12303                     left_size -= size;
12304                     next_fragP->tc_frag_data.last_length = size;
12305                   }
12306               }
12307             else
12308               next_fragP->tc_frag_data.last_length = 0;
12309           }
12310
12311       /* Check the padding size for the padding frag.  */
12312       padding_size = i386_branch_padding_size
12313         (padding_fragP, (fragP->fr_address
12314                          + fragP->tc_frag_data.padding_address));
12315
12316       last_size = fragP->tc_frag_data.prefix_length;
12317       /* Check if there is change from the last interation.  */
12318       if (padding_size == last_size)
12319         {
12320           /* Update the expected address of the padding frag.  */
12321           padding_fragP->tc_frag_data.padding_address
12322             = (fragP->fr_address + padding_size
12323                + fragP->tc_frag_data.padding_address);
12324           return 0;
12325         }
12326
12327       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12328         {
12329           /* No padding if there is no sufficient room.  Clear the
12330              expected address of the padding frag.  */
12331           padding_fragP->tc_frag_data.padding_address = 0;
12332           padding_size = 0;
12333         }
12334       else
12335         /* Store the expected address of the padding frag.  */
12336         padding_fragP->tc_frag_data.padding_address
12337           = (fragP->fr_address + padding_size
12338              + fragP->tc_frag_data.padding_address);
12339
12340       fragP->tc_frag_data.prefix_length = padding_size;
12341
12342       /* Update the length for the current interation.  */
12343       left_size = padding_size;
12344       for (next_fragP = fragP;
12345            next_fragP != padding_fragP;
12346            next_fragP = next_fragP->fr_next)
12347         if (next_fragP->fr_type == rs_machine_dependent
12348             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12349                 == BRANCH_PREFIX))
12350           {
12351             if (left_size)
12352               {
12353                 int max = next_fragP->tc_frag_data.max_bytes;
12354                 if (max)
12355                   {
12356                     int size;
12357                     if (max > left_size)
12358                       size = left_size;
12359                     else
12360                       size = max;
12361                     left_size -= size;
12362                     next_fragP->tc_frag_data.length = size;
12363                   }
12364               }
12365             else
12366               next_fragP->tc_frag_data.length = 0;
12367           }
12368
12369       return (fragP->tc_frag_data.length
12370               - fragP->tc_frag_data.last_length);
12371     }
12372   return relax_frag (segment, fragP, stretch);
12373 }
12374
12375 /* md_estimate_size_before_relax()
12376
12377    Called just before relax() for rs_machine_dependent frags.  The x86
12378    assembler uses these frags to handle variable size jump
12379    instructions.
12380
12381    Any symbol that is now undefined will not become defined.
12382    Return the correct fr_subtype in the frag.
12383    Return the initial "guess for variable size of frag" to caller.
12384    The guess is actually the growth beyond the fixed part.  Whatever
12385    we do to grow the fixed or variable part contributes to our
12386    returned value.  */
12387
12388 int
12389 md_estimate_size_before_relax (fragS *fragP, segT segment)
12390 {
12391   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12392       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12393       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12394     {
12395       i386_classify_machine_dependent_frag (fragP);
12396       return fragP->tc_frag_data.length;
12397     }
12398
12399   /* We've already got fragP->fr_subtype right;  all we have to do is
12400      check for un-relaxable symbols.  On an ELF system, we can't relax
12401      an externally visible symbol, because it may be overridden by a
12402      shared library.  */
12403   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12404 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12405       || (IS_ELF
12406           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12407                                                 fragP->fr_var))
12408 #endif
12409 #if defined (OBJ_COFF) && defined (TE_PE)
12410       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12411           && S_IS_WEAK (fragP->fr_symbol))
12412 #endif
12413       )
12414     {
12415       /* Symbol is undefined in this segment, or we need to keep a
12416          reloc so that weak symbols can be overridden.  */
12417       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12418       enum bfd_reloc_code_real reloc_type;
12419       unsigned char *opcode;
12420       int old_fr_fix;
12421       fixS *fixP = NULL;
12422
12423       if (fragP->fr_var != NO_RELOC)
12424         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12425       else if (size == 2)
12426         reloc_type = BFD_RELOC_16_PCREL;
12427 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12428       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12429                && need_plt32_p (fragP->fr_symbol))
12430         reloc_type = BFD_RELOC_X86_64_PLT32;
12431 #endif
12432       else
12433         reloc_type = BFD_RELOC_32_PCREL;
12434
12435       old_fr_fix = fragP->fr_fix;
12436       opcode = (unsigned char *) fragP->fr_opcode;
12437
12438       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12439         {
12440         case UNCOND_JUMP:
12441           /* Make jmp (0xeb) a (d)word displacement jump.  */
12442           opcode[0] = 0xe9;
12443           fragP->fr_fix += size;
12444           fixP = fix_new (fragP, old_fr_fix, size,
12445                           fragP->fr_symbol,
12446                           fragP->fr_offset, 1,
12447                           reloc_type);
12448           break;
12449
12450         case COND_JUMP86:
12451           if (size == 2
12452               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12453             {
12454               /* Negate the condition, and branch past an
12455                  unconditional jump.  */
12456               opcode[0] ^= 1;
12457               opcode[1] = 3;
12458               /* Insert an unconditional jump.  */
12459               opcode[2] = 0xe9;
12460               /* We added two extra opcode bytes, and have a two byte
12461                  offset.  */
12462               fragP->fr_fix += 2 + 2;
12463               fix_new (fragP, old_fr_fix + 2, 2,
12464                        fragP->fr_symbol,
12465                        fragP->fr_offset, 1,
12466                        reloc_type);
12467               break;
12468             }
12469           /* Fall through.  */
12470
12471         case COND_JUMP:
12472           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12473             {
12474               fragP->fr_fix += 1;
12475               fixP = fix_new (fragP, old_fr_fix, 1,
12476                               fragP->fr_symbol,
12477                               fragP->fr_offset, 1,
12478                               BFD_RELOC_8_PCREL);
12479               fixP->fx_signed = 1;
12480               break;
12481             }
12482
12483           /* This changes the byte-displacement jump 0x7N
12484              to the (d)word-displacement jump 0x0f,0x8N.  */
12485           opcode[1] = opcode[0] + 0x10;
12486           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12487           /* We've added an opcode byte.  */
12488           fragP->fr_fix += 1 + size;
12489           fixP = fix_new (fragP, old_fr_fix + 1, size,
12490                           fragP->fr_symbol,
12491                           fragP->fr_offset, 1,
12492                           reloc_type);
12493           break;
12494
12495         default:
12496           BAD_CASE (fragP->fr_subtype);
12497           break;
12498         }
12499
12500       /* All jumps handled here are signed, but don't unconditionally use a
12501          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12502          around at 4G (outside of 64-bit mode) and 64k.  */
12503       if (size == 4 && flag_code == CODE_64BIT)
12504         fixP->fx_signed = 1;
12505
12506       frag_wane (fragP);
12507       return fragP->fr_fix - old_fr_fix;
12508     }
12509
12510   /* Guess size depending on current relax state.  Initially the relax
12511      state will correspond to a short jump and we return 1, because
12512      the variable part of the frag (the branch offset) is one byte
12513      long.  However, we can relax a section more than once and in that
12514      case we must either set fr_subtype back to the unrelaxed state,
12515      or return the value for the appropriate branch.  */
12516   return md_relax_table[fragP->fr_subtype].rlx_length;
12517 }
12518
12519 /* Called after relax() is finished.
12520
12521    In:  Address of frag.
12522         fr_type == rs_machine_dependent.
12523         fr_subtype is what the address relaxed to.
12524
12525    Out: Any fixSs and constants are set up.
12526         Caller will turn frag into a ".space 0".  */
12527
12528 void
12529 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12530                  fragS *fragP)
12531 {
12532   unsigned char *opcode;
12533   unsigned char *where_to_put_displacement = NULL;
12534   offsetT target_address;
12535   offsetT opcode_address;
12536   unsigned int extension = 0;
12537   offsetT displacement_from_opcode_start;
12538
12539   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12540       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12541       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12542     {
12543       /* Generate nop padding.  */
12544       unsigned int size = fragP->tc_frag_data.length;
12545       if (size)
12546         {
12547           if (size > fragP->tc_frag_data.max_bytes)
12548             abort ();
12549
12550           if (flag_debug)
12551             {
12552               const char *msg;
12553               const char *branch = "branch";
12554               const char *prefix = "";
12555               fragS *padding_fragP;
12556               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12557                   == BRANCH_PREFIX)
12558                 {
12559                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12560                   switch (fragP->tc_frag_data.default_prefix)
12561                     {
12562                     default:
12563                       abort ();
12564                       break;
12565                     case CS_PREFIX_OPCODE:
12566                       prefix = " cs";
12567                       break;
12568                     case DS_PREFIX_OPCODE:
12569                       prefix = " ds";
12570                       break;
12571                     case ES_PREFIX_OPCODE:
12572                       prefix = " es";
12573                       break;
12574                     case FS_PREFIX_OPCODE:
12575                       prefix = " fs";
12576                       break;
12577                     case GS_PREFIX_OPCODE:
12578                       prefix = " gs";
12579                       break;
12580                     case SS_PREFIX_OPCODE:
12581                       prefix = " ss";
12582                       break;
12583                     }
12584                   if (padding_fragP)
12585                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12586                             "%s within %d-byte boundary\n");
12587                   else
12588                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12589                             "align %s within %d-byte boundary\n");
12590                 }
12591               else
12592                 {
12593                   padding_fragP = fragP;
12594                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12595                           "%s within %d-byte boundary\n");
12596                 }
12597
12598               if (padding_fragP)
12599                 switch (padding_fragP->tc_frag_data.branch_type)
12600                   {
12601                   case align_branch_jcc:
12602                     branch = "jcc";
12603                     break;
12604                   case align_branch_fused:
12605                     branch = "fused jcc";
12606                     break;
12607                   case align_branch_jmp:
12608                     branch = "jmp";
12609                     break;
12610                   case align_branch_call:
12611                     branch = "call";
12612                     break;
12613                   case align_branch_indirect:
12614                     branch = "indiret branch";
12615                     break;
12616                   case align_branch_ret:
12617                     branch = "ret";
12618                     break;
12619                   default:
12620                     break;
12621                   }
12622
12623               fprintf (stdout, msg,
12624                        fragP->fr_file, fragP->fr_line, size, prefix,
12625                        (long long) fragP->fr_address, branch,
12626                        1 << align_branch_power);
12627             }
12628           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12629             memset (fragP->fr_opcode,
12630                     fragP->tc_frag_data.default_prefix, size);
12631           else
12632             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12633                                 size, 0);
12634           fragP->fr_fix += size;
12635         }
12636       return;
12637     }
12638
12639   opcode = (unsigned char *) fragP->fr_opcode;
12640
12641   /* Address we want to reach in file space.  */
12642   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12643
12644   /* Address opcode resides at in file space.  */
12645   opcode_address = fragP->fr_address + fragP->fr_fix;
12646
12647   /* Displacement from opcode start to fill into instruction.  */
12648   displacement_from_opcode_start = target_address - opcode_address;
12649
12650   if ((fragP->fr_subtype & BIG) == 0)
12651     {
12652       /* Don't have to change opcode.  */
12653       extension = 1;            /* 1 opcode + 1 displacement  */
12654       where_to_put_displacement = &opcode[1];
12655     }
12656   else
12657     {
12658       if (no_cond_jump_promotion
12659           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12660         as_warn_where (fragP->fr_file, fragP->fr_line,
12661                        _("long jump required"));
12662
12663       switch (fragP->fr_subtype)
12664         {
12665         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12666           extension = 4;                /* 1 opcode + 4 displacement  */
12667           opcode[0] = 0xe9;
12668           where_to_put_displacement = &opcode[1];
12669           break;
12670
12671         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12672           extension = 2;                /* 1 opcode + 2 displacement  */
12673           opcode[0] = 0xe9;
12674           where_to_put_displacement = &opcode[1];
12675           break;
12676
12677         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12678         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12679           extension = 5;                /* 2 opcode + 4 displacement  */
12680           opcode[1] = opcode[0] + 0x10;
12681           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12682           where_to_put_displacement = &opcode[2];
12683           break;
12684
12685         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12686           extension = 3;                /* 2 opcode + 2 displacement  */
12687           opcode[1] = opcode[0] + 0x10;
12688           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12689           where_to_put_displacement = &opcode[2];
12690           break;
12691
12692         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12693           extension = 4;
12694           opcode[0] ^= 1;
12695           opcode[1] = 3;
12696           opcode[2] = 0xe9;
12697           where_to_put_displacement = &opcode[3];
12698           break;
12699
12700         default:
12701           BAD_CASE (fragP->fr_subtype);
12702           break;
12703         }
12704     }
12705
12706   /* If size if less then four we are sure that the operand fits,
12707      but if it's 4, then it could be that the displacement is larger
12708      then -/+ 2GB.  */
12709   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12710       && object_64bit
12711       && ((addressT) (displacement_from_opcode_start - extension
12712                       + ((addressT) 1 << 31))
12713           > (((addressT) 2 << 31) - 1)))
12714     {
12715       as_bad_where (fragP->fr_file, fragP->fr_line,
12716                     _("jump target out of range"));
12717       /* Make us emit 0.  */
12718       displacement_from_opcode_start = extension;
12719     }
12720   /* Now put displacement after opcode.  */
12721   md_number_to_chars ((char *) where_to_put_displacement,
12722                       (valueT) (displacement_from_opcode_start - extension),
12723                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12724   fragP->fr_fix += extension;
12725 }
12726 \f
12727 /* Apply a fixup (fixP) to segment data, once it has been determined
12728    by our caller that we have all the info we need to fix it up.
12729
12730    Parameter valP is the pointer to the value of the bits.
12731
12732    On the 386, immediates, displacements, and data pointers are all in
12733    the same (little-endian) format, so we don't need to care about which
12734    we are handling.  */
12735
12736 void
12737 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12738 {
12739   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12740   valueT value = *valP;
12741
12742 #if !defined (TE_Mach)
12743   if (fixP->fx_pcrel)
12744     {
12745       switch (fixP->fx_r_type)
12746         {
12747         default:
12748           break;
12749
12750         case BFD_RELOC_64:
12751           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12752           break;
12753         case BFD_RELOC_32:
12754         case BFD_RELOC_X86_64_32S:
12755           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12756           break;
12757         case BFD_RELOC_16:
12758           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12759           break;
12760         case BFD_RELOC_8:
12761           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12762           break;
12763         }
12764     }
12765
12766   if (fixP->fx_addsy != NULL
12767       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12768           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12769           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12770           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12771       && !use_rela_relocations)
12772     {
12773       /* This is a hack.  There should be a better way to handle this.
12774          This covers for the fact that bfd_install_relocation will
12775          subtract the current location (for partial_inplace, PC relative
12776          relocations); see more below.  */
12777 #ifndef OBJ_AOUT
12778       if (IS_ELF
12779 #ifdef TE_PE
12780           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12781 #endif
12782           )
12783         value += fixP->fx_where + fixP->fx_frag->fr_address;
12784 #endif
12785 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12786       if (IS_ELF)
12787         {
12788           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12789
12790           if ((sym_seg == seg
12791                || (symbol_section_p (fixP->fx_addsy)
12792                    && sym_seg != absolute_section))
12793               && !generic_force_reloc (fixP))
12794             {
12795               /* Yes, we add the values in twice.  This is because
12796                  bfd_install_relocation subtracts them out again.  I think
12797                  bfd_install_relocation is broken, but I don't dare change
12798                  it.  FIXME.  */
12799               value += fixP->fx_where + fixP->fx_frag->fr_address;
12800             }
12801         }
12802 #endif
12803 #if defined (OBJ_COFF) && defined (TE_PE)
12804       /* For some reason, the PE format does not store a
12805          section address offset for a PC relative symbol.  */
12806       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12807           || S_IS_WEAK (fixP->fx_addsy))
12808         value += md_pcrel_from (fixP);
12809 #endif
12810     }
12811 #if defined (OBJ_COFF) && defined (TE_PE)
12812   if (fixP->fx_addsy != NULL
12813       && S_IS_WEAK (fixP->fx_addsy)
12814       /* PR 16858: Do not modify weak function references.  */
12815       && ! fixP->fx_pcrel)
12816     {
12817 #if !defined (TE_PEP)
12818       /* For x86 PE weak function symbols are neither PC-relative
12819          nor do they set S_IS_FUNCTION.  So the only reliable way
12820          to detect them is to check the flags of their containing
12821          section.  */
12822       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12823           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12824         ;
12825       else
12826 #endif
12827       value -= S_GET_VALUE (fixP->fx_addsy);
12828     }
12829 #endif
12830
12831   /* Fix a few things - the dynamic linker expects certain values here,
12832      and we must not disappoint it.  */
12833 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12834   if (IS_ELF && fixP->fx_addsy)
12835     switch (fixP->fx_r_type)
12836       {
12837       case BFD_RELOC_386_PLT32:
12838       case BFD_RELOC_X86_64_PLT32:
12839         /* Make the jump instruction point to the address of the operand.
12840            At runtime we merely add the offset to the actual PLT entry.
12841            NB: Subtract the offset size only for jump instructions.  */
12842         if (fixP->fx_pcrel)
12843           value = -4;
12844         break;
12845
12846       case BFD_RELOC_386_TLS_GD:
12847       case BFD_RELOC_386_TLS_LDM:
12848       case BFD_RELOC_386_TLS_IE_32:
12849       case BFD_RELOC_386_TLS_IE:
12850       case BFD_RELOC_386_TLS_GOTIE:
12851       case BFD_RELOC_386_TLS_GOTDESC:
12852       case BFD_RELOC_X86_64_TLSGD:
12853       case BFD_RELOC_X86_64_TLSLD:
12854       case BFD_RELOC_X86_64_GOTTPOFF:
12855       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12856         value = 0; /* Fully resolved at runtime.  No addend.  */
12857         /* Fallthrough */
12858       case BFD_RELOC_386_TLS_LE:
12859       case BFD_RELOC_386_TLS_LDO_32:
12860       case BFD_RELOC_386_TLS_LE_32:
12861       case BFD_RELOC_X86_64_DTPOFF32:
12862       case BFD_RELOC_X86_64_DTPOFF64:
12863       case BFD_RELOC_X86_64_TPOFF32:
12864       case BFD_RELOC_X86_64_TPOFF64:
12865         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12866         break;
12867
12868       case BFD_RELOC_386_TLS_DESC_CALL:
12869       case BFD_RELOC_X86_64_TLSDESC_CALL:
12870         value = 0; /* Fully resolved at runtime.  No addend.  */
12871         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12872         fixP->fx_done = 0;
12873         return;
12874
12875       case BFD_RELOC_VTABLE_INHERIT:
12876       case BFD_RELOC_VTABLE_ENTRY:
12877         fixP->fx_done = 0;
12878         return;
12879
12880       default:
12881         break;
12882       }
12883 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12884
12885   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12886   if (!object_64bit)
12887     value = extend_to_32bit_address (value);
12888
12889   *valP = value;
12890 #endif /* !defined (TE_Mach)  */
12891
12892   /* Are we finished with this relocation now?  */
12893   if (fixP->fx_addsy == NULL)
12894     {
12895       fixP->fx_done = 1;
12896       switch (fixP->fx_r_type)
12897         {
12898         case BFD_RELOC_X86_64_32S:
12899           fixP->fx_signed = 1;
12900           break;
12901
12902         default:
12903           break;
12904         }
12905     }
12906 #if defined (OBJ_COFF) && defined (TE_PE)
12907   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12908     {
12909       fixP->fx_done = 0;
12910       /* Remember value for tc_gen_reloc.  */
12911       fixP->fx_addnumber = value;
12912       /* Clear out the frag for now.  */
12913       value = 0;
12914     }
12915 #endif
12916   else if (use_rela_relocations)
12917     {
12918       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12919         fixP->fx_no_overflow = 1;
12920       /* Remember value for tc_gen_reloc.  */
12921       fixP->fx_addnumber = value;
12922       value = 0;
12923     }
12924
12925   md_number_to_chars (p, value, fixP->fx_size);
12926 }
12927 \f
12928 const char *
12929 md_atof (int type, char *litP, int *sizeP)
12930 {
12931   /* This outputs the LITTLENUMs in REVERSE order;
12932      in accord with the bigendian 386.  */
12933   return ieee_md_atof (type, litP, sizeP, false);
12934 }
12935 \f
12936 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12937
12938 static char *
12939 output_invalid (int c)
12940 {
12941   if (ISPRINT (c))
12942     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12943               "'%c'", c);
12944   else
12945     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12946               "(0x%x)", (unsigned char) c);
12947   return output_invalid_buf;
12948 }
12949
12950 /* Verify that @r can be used in the current context.  */
12951
12952 static bool check_register (const reg_entry *r)
12953 {
12954   if (allow_pseudo_reg)
12955     return true;
12956
12957   if (operand_type_all_zero (&r->reg_type))
12958     return false;
12959
12960   if ((r->reg_type.bitfield.dword
12961        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12962        || r->reg_type.bitfield.class == RegCR
12963        || r->reg_type.bitfield.class == RegDR)
12964       && !cpu_arch_flags.bitfield.cpui386)
12965     return false;
12966
12967   if (r->reg_type.bitfield.class == RegTR
12968       && (flag_code == CODE_64BIT
12969           || !cpu_arch_flags.bitfield.cpui386
12970           || cpu_arch_isa_flags.bitfield.cpui586
12971           || cpu_arch_isa_flags.bitfield.cpui686))
12972     return false;
12973
12974   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12975     return false;
12976
12977   if (!cpu_arch_flags.bitfield.cpuavx512f)
12978     {
12979       if (r->reg_type.bitfield.zmmword
12980           || r->reg_type.bitfield.class == RegMask)
12981         return false;
12982
12983       if (!cpu_arch_flags.bitfield.cpuavx)
12984         {
12985           if (r->reg_type.bitfield.ymmword)
12986             return false;
12987
12988           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12989             return false;
12990         }
12991     }
12992
12993   if (r->reg_type.bitfield.tmmword
12994       && (!cpu_arch_flags.bitfield.cpuamx_tile
12995           || flag_code != CODE_64BIT))
12996     return false;
12997
12998   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12999     return false;
13000
13001   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13002   if (!allow_index_reg && r->reg_num == RegIZ)
13003     return false;
13004
13005   /* Upper 16 vector registers are only available with VREX in 64bit
13006      mode, and require EVEX encoding.  */
13007   if (r->reg_flags & RegVRex)
13008     {
13009       if (!cpu_arch_flags.bitfield.cpuavx512f
13010           || flag_code != CODE_64BIT)
13011         return false;
13012
13013       if (i.vec_encoding == vex_encoding_default)
13014         i.vec_encoding = vex_encoding_evex;
13015       else if (i.vec_encoding != vex_encoding_evex)
13016         i.vec_encoding = vex_encoding_error;
13017     }
13018
13019   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13020       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
13021       && flag_code != CODE_64BIT)
13022     return false;
13023
13024   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13025       && !intel_syntax)
13026     return false;
13027
13028   return true;
13029 }
13030
13031 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13032
13033 static const reg_entry *
13034 parse_real_register (char *reg_string, char **end_op)
13035 {
13036   char *s = reg_string;
13037   char *p;
13038   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13039   const reg_entry *r;
13040
13041   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13042   if (*s == REGISTER_PREFIX)
13043     ++s;
13044
13045   if (is_space_char (*s))
13046     ++s;
13047
13048   p = reg_name_given;
13049   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13050     {
13051       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13052         return (const reg_entry *) NULL;
13053       s++;
13054     }
13055
13056   /* For naked regs, make sure that we are not dealing with an identifier.
13057      This prevents confusing an identifier like `eax_var' with register
13058      `eax'.  */
13059   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
13060     return (const reg_entry *) NULL;
13061
13062   *end_op = s;
13063
13064   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13065
13066   /* Handle floating point regs, allowing spaces in the (i) part.  */
13067   if (r == reg_st0)
13068     {
13069       if (!cpu_arch_flags.bitfield.cpu8087
13070           && !cpu_arch_flags.bitfield.cpu287
13071           && !cpu_arch_flags.bitfield.cpu387
13072           && !allow_pseudo_reg)
13073         return (const reg_entry *) NULL;
13074
13075       if (is_space_char (*s))
13076         ++s;
13077       if (*s == '(')
13078         {
13079           ++s;
13080           if (is_space_char (*s))
13081             ++s;
13082           if (*s >= '0' && *s <= '7')
13083             {
13084               int fpr = *s - '0';
13085               ++s;
13086               if (is_space_char (*s))
13087                 ++s;
13088               if (*s == ')')
13089                 {
13090                   *end_op = s + 1;
13091                   know (r[fpr].reg_num == fpr);
13092                   return r + fpr;
13093                 }
13094             }
13095           /* We have "%st(" then garbage.  */
13096           return (const reg_entry *) NULL;
13097         }
13098     }
13099
13100   return r && check_register (r) ? r : NULL;
13101 }
13102
13103 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13104
13105 static const reg_entry *
13106 parse_register (char *reg_string, char **end_op)
13107 {
13108   const reg_entry *r;
13109
13110   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13111     r = parse_real_register (reg_string, end_op);
13112   else
13113     r = NULL;
13114   if (!r)
13115     {
13116       char *save = input_line_pointer;
13117       char c;
13118       symbolS *symbolP;
13119
13120       input_line_pointer = reg_string;
13121       c = get_symbol_name (&reg_string);
13122       symbolP = symbol_find (reg_string);
13123       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13124         {
13125           const expressionS *e = symbol_get_value_expression(symbolP);
13126
13127           if (e->X_op != O_symbol || e->X_add_number)
13128             break;
13129           symbolP = e->X_add_symbol;
13130         }
13131       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13132         {
13133           const expressionS *e = symbol_get_value_expression (symbolP);
13134
13135           know (e->X_op == O_register);
13136           know (e->X_add_number >= 0
13137                 && (valueT) e->X_add_number < i386_regtab_size);
13138           r = i386_regtab + e->X_add_number;
13139           if (!check_register (r))
13140             {
13141               as_bad (_("register '%s%s' cannot be used here"),
13142                       register_prefix, r->reg_name);
13143               r = &bad_reg;
13144             }
13145           *end_op = input_line_pointer;
13146         }
13147       *input_line_pointer = c;
13148       input_line_pointer = save;
13149     }
13150   return r;
13151 }
13152
13153 int
13154 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13155 {
13156   const reg_entry *r = NULL;
13157   char *end = input_line_pointer;
13158
13159   *end = *nextcharP;
13160   if (*name == REGISTER_PREFIX || allow_naked_reg)
13161     r = parse_real_register (name, &input_line_pointer);
13162   if (r && end <= input_line_pointer)
13163     {
13164       *nextcharP = *input_line_pointer;
13165       *input_line_pointer = 0;
13166       if (r != &bad_reg)
13167         {
13168           e->X_op = O_register;
13169           e->X_add_number = r - i386_regtab;
13170         }
13171       else
13172           e->X_op = O_illegal;
13173       return 1;
13174     }
13175   input_line_pointer = end;
13176   *end = 0;
13177   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13178 }
13179
13180 void
13181 md_operand (expressionS *e)
13182 {
13183   char *end;
13184   const reg_entry *r;
13185
13186   switch (*input_line_pointer)
13187     {
13188     case REGISTER_PREFIX:
13189       r = parse_real_register (input_line_pointer, &end);
13190       if (r)
13191         {
13192           e->X_op = O_register;
13193           e->X_add_number = r - i386_regtab;
13194           input_line_pointer = end;
13195         }
13196       break;
13197
13198     case '[':
13199       gas_assert (intel_syntax);
13200       end = input_line_pointer++;
13201       expression (e);
13202       if (*input_line_pointer == ']')
13203         {
13204           ++input_line_pointer;
13205           e->X_op_symbol = make_expr_symbol (e);
13206           e->X_add_symbol = NULL;
13207           e->X_add_number = 0;
13208           e->X_op = O_index;
13209         }
13210       else
13211         {
13212           e->X_op = O_absent;
13213           input_line_pointer = end;
13214         }
13215       break;
13216     }
13217 }
13218
13219 \f
13220 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13221 const char *md_shortopts = "kVQ:sqnO::";
13222 #else
13223 const char *md_shortopts = "qnO::";
13224 #endif
13225
13226 #define OPTION_32 (OPTION_MD_BASE + 0)
13227 #define OPTION_64 (OPTION_MD_BASE + 1)
13228 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13229 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13230 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13231 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13232 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13233 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13234 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13235 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13236 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13237 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13238 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13239 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13240 #define OPTION_X32 (OPTION_MD_BASE + 14)
13241 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13242 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13243 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13244 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13245 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13246 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13247 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13248 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13249 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13250 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13251 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13252 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13253 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13254 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13255 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13256 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13257 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13258 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13259 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13260 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13261
13262 struct option md_longopts[] =
13263 {
13264   {"32", no_argument, NULL, OPTION_32},
13265 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13266      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13267   {"64", no_argument, NULL, OPTION_64},
13268 #endif
13269 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13270   {"x32", no_argument, NULL, OPTION_X32},
13271   {"mshared", no_argument, NULL, OPTION_MSHARED},
13272   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13273 #endif
13274   {"divide", no_argument, NULL, OPTION_DIVIDE},
13275   {"march", required_argument, NULL, OPTION_MARCH},
13276   {"mtune", required_argument, NULL, OPTION_MTUNE},
13277   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13278   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13279   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13280   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13281   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13282   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13283   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13284   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13285   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13286   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13287   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13288   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13289   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13290 # if defined (TE_PE) || defined (TE_PEP)
13291   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13292 #endif
13293   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13294   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13295   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13296   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13297   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13298   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13299   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13300   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13301   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13302   {"mlfence-before-indirect-branch", required_argument, NULL,
13303    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13304   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13305   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13306   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13307   {NULL, no_argument, NULL, 0}
13308 };
13309 size_t md_longopts_size = sizeof (md_longopts);
13310
13311 int
13312 md_parse_option (int c, const char *arg)
13313 {
13314   unsigned int j;
13315   char *arch, *next, *saved, *type;
13316
13317   switch (c)
13318     {
13319     case 'n':
13320       optimize_align_code = 0;
13321       break;
13322
13323     case 'q':
13324       quiet_warnings = 1;
13325       break;
13326
13327 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13328       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13329          should be emitted or not.  FIXME: Not implemented.  */
13330     case 'Q':
13331       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13332         return 0;
13333       break;
13334
13335       /* -V: SVR4 argument to print version ID.  */
13336     case 'V':
13337       print_version_id ();
13338       break;
13339
13340       /* -k: Ignore for FreeBSD compatibility.  */
13341     case 'k':
13342       break;
13343
13344     case 's':
13345       /* -s: On i386 Solaris, this tells the native assembler to use
13346          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13347       break;
13348
13349     case OPTION_MSHARED:
13350       shared = 1;
13351       break;
13352
13353     case OPTION_X86_USED_NOTE:
13354       if (strcasecmp (arg, "yes") == 0)
13355         x86_used_note = 1;
13356       else if (strcasecmp (arg, "no") == 0)
13357         x86_used_note = 0;
13358       else
13359         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13360       break;
13361
13362
13363 #endif
13364 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13365      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13366     case OPTION_64:
13367       {
13368         const char **list, **l;
13369
13370         list = bfd_target_list ();
13371         for (l = list; *l != NULL; l++)
13372           if (startswith (*l, "elf64-x86-64")
13373               || strcmp (*l, "coff-x86-64") == 0
13374               || strcmp (*l, "pe-x86-64") == 0
13375               || strcmp (*l, "pei-x86-64") == 0
13376               || strcmp (*l, "mach-o-x86-64") == 0)
13377             {
13378               default_arch = "x86_64";
13379               break;
13380             }
13381         if (*l == NULL)
13382           as_fatal (_("no compiled in support for x86_64"));
13383         free (list);
13384       }
13385       break;
13386 #endif
13387
13388 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13389     case OPTION_X32:
13390       if (IS_ELF)
13391         {
13392           const char **list, **l;
13393
13394           list = bfd_target_list ();
13395           for (l = list; *l != NULL; l++)
13396             if (startswith (*l, "elf32-x86-64"))
13397               {
13398                 default_arch = "x86_64:32";
13399                 break;
13400               }
13401           if (*l == NULL)
13402             as_fatal (_("no compiled in support for 32bit x86_64"));
13403           free (list);
13404         }
13405       else
13406         as_fatal (_("32bit x86_64 is only supported for ELF"));
13407       break;
13408 #endif
13409
13410     case OPTION_32:
13411       default_arch = "i386";
13412       break;
13413
13414     case OPTION_DIVIDE:
13415 #ifdef SVR4_COMMENT_CHARS
13416       {
13417         char *n, *t;
13418         const char *s;
13419
13420         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13421         t = n;
13422         for (s = i386_comment_chars; *s != '\0'; s++)
13423           if (*s != '/')
13424             *t++ = *s;
13425         *t = '\0';
13426         i386_comment_chars = n;
13427       }
13428 #endif
13429       break;
13430
13431     case OPTION_MARCH:
13432       saved = xstrdup (arg);
13433       arch = saved;
13434       /* Allow -march=+nosse.  */
13435       if (*arch == '+')
13436         arch++;
13437       do
13438         {
13439           if (*arch == '.')
13440             as_fatal (_("invalid -march= option: `%s'"), arg);
13441           next = strchr (arch, '+');
13442           if (next)
13443             *next++ = '\0';
13444           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13445             {
13446               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13447                   && strcmp (arch, cpu_arch[j].name) == 0)
13448                 {
13449                   /* Processor.  */
13450                   if (! cpu_arch[j].enable.bitfield.cpui386)
13451                     continue;
13452
13453                   cpu_arch_name = cpu_arch[j].name;
13454                   free (cpu_sub_arch_name);
13455                   cpu_sub_arch_name = NULL;
13456                   cpu_arch_flags = cpu_arch[j].enable;
13457                   cpu_arch_isa = cpu_arch[j].type;
13458                   cpu_arch_isa_flags = cpu_arch[j].enable;
13459                   if (!cpu_arch_tune_set)
13460                     {
13461                       cpu_arch_tune = cpu_arch_isa;
13462                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13463                     }
13464                   break;
13465                 }
13466               else if (cpu_arch[j].type == PROCESSOR_NONE
13467                        && strcmp (arch, cpu_arch[j].name) == 0
13468                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13469                 {
13470                   /* ISA extension.  */
13471                   i386_cpu_flags flags;
13472
13473                   flags = cpu_flags_or (cpu_arch_flags,
13474                                         cpu_arch[j].enable);
13475
13476                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13477                     {
13478                       extend_cpu_sub_arch_name (arch);
13479                       cpu_arch_flags = flags;
13480                       cpu_arch_isa_flags = flags;
13481                     }
13482                   else
13483                     cpu_arch_isa_flags
13484                       = cpu_flags_or (cpu_arch_isa_flags,
13485                                       cpu_arch[j].enable);
13486                   break;
13487                 }
13488             }
13489
13490           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13491             {
13492               /* Disable an ISA extension.  */
13493               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13494                 if (cpu_arch[j].type == PROCESSOR_NONE
13495                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13496                   {
13497                     i386_cpu_flags flags;
13498
13499                     flags = cpu_flags_and_not (cpu_arch_flags,
13500                                                cpu_arch[j].disable);
13501                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13502                       {
13503                         extend_cpu_sub_arch_name (arch);
13504                         cpu_arch_flags = flags;
13505                         cpu_arch_isa_flags = flags;
13506                       }
13507                     break;
13508                   }
13509             }
13510
13511           if (j >= ARRAY_SIZE (cpu_arch))
13512             as_fatal (_("invalid -march= option: `%s'"), arg);
13513
13514           arch = next;
13515         }
13516       while (next != NULL);
13517       free (saved);
13518       break;
13519
13520     case OPTION_MTUNE:
13521       if (*arg == '.')
13522         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13523       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13524         {
13525           if (cpu_arch[j].type != PROCESSOR_NONE
13526               && strcmp (arg, cpu_arch[j].name) == 0)
13527             {
13528               cpu_arch_tune_set = 1;
13529               cpu_arch_tune = cpu_arch [j].type;
13530               cpu_arch_tune_flags = cpu_arch[j].enable;
13531               break;
13532             }
13533         }
13534       if (j >= ARRAY_SIZE (cpu_arch))
13535         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13536       break;
13537
13538     case OPTION_MMNEMONIC:
13539       if (strcasecmp (arg, "att") == 0)
13540         intel_mnemonic = 0;
13541       else if (strcasecmp (arg, "intel") == 0)
13542         intel_mnemonic = 1;
13543       else
13544         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13545       break;
13546
13547     case OPTION_MSYNTAX:
13548       if (strcasecmp (arg, "att") == 0)
13549         intel_syntax = 0;
13550       else if (strcasecmp (arg, "intel") == 0)
13551         intel_syntax = 1;
13552       else
13553         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13554       break;
13555
13556     case OPTION_MINDEX_REG:
13557       allow_index_reg = 1;
13558       break;
13559
13560     case OPTION_MNAKED_REG:
13561       allow_naked_reg = 1;
13562       break;
13563
13564     case OPTION_MSSE2AVX:
13565       sse2avx = 1;
13566       break;
13567
13568     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13569       use_unaligned_vector_move = 1;
13570       break;
13571
13572     case OPTION_MSSE_CHECK:
13573       if (strcasecmp (arg, "error") == 0)
13574         sse_check = check_error;
13575       else if (strcasecmp (arg, "warning") == 0)
13576         sse_check = check_warning;
13577       else if (strcasecmp (arg, "none") == 0)
13578         sse_check = check_none;
13579       else
13580         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13581       break;
13582
13583     case OPTION_MOPERAND_CHECK:
13584       if (strcasecmp (arg, "error") == 0)
13585         operand_check = check_error;
13586       else if (strcasecmp (arg, "warning") == 0)
13587         operand_check = check_warning;
13588       else if (strcasecmp (arg, "none") == 0)
13589         operand_check = check_none;
13590       else
13591         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13592       break;
13593
13594     case OPTION_MAVXSCALAR:
13595       if (strcasecmp (arg, "128") == 0)
13596         avxscalar = vex128;
13597       else if (strcasecmp (arg, "256") == 0)
13598         avxscalar = vex256;
13599       else
13600         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13601       break;
13602
13603     case OPTION_MVEXWIG:
13604       if (strcmp (arg, "0") == 0)
13605         vexwig = vexw0;
13606       else if (strcmp (arg, "1") == 0)
13607         vexwig = vexw1;
13608       else
13609         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13610       break;
13611
13612     case OPTION_MADD_BND_PREFIX:
13613       add_bnd_prefix = 1;
13614       break;
13615
13616     case OPTION_MEVEXLIG:
13617       if (strcmp (arg, "128") == 0)
13618         evexlig = evexl128;
13619       else if (strcmp (arg, "256") == 0)
13620         evexlig = evexl256;
13621       else  if (strcmp (arg, "512") == 0)
13622         evexlig = evexl512;
13623       else
13624         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13625       break;
13626
13627     case OPTION_MEVEXRCIG:
13628       if (strcmp (arg, "rne") == 0)
13629         evexrcig = rne;
13630       else if (strcmp (arg, "rd") == 0)
13631         evexrcig = rd;
13632       else if (strcmp (arg, "ru") == 0)
13633         evexrcig = ru;
13634       else if (strcmp (arg, "rz") == 0)
13635         evexrcig = rz;
13636       else
13637         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13638       break;
13639
13640     case OPTION_MEVEXWIG:
13641       if (strcmp (arg, "0") == 0)
13642         evexwig = evexw0;
13643       else if (strcmp (arg, "1") == 0)
13644         evexwig = evexw1;
13645       else
13646         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13647       break;
13648
13649 # if defined (TE_PE) || defined (TE_PEP)
13650     case OPTION_MBIG_OBJ:
13651       use_big_obj = 1;
13652       break;
13653 #endif
13654
13655     case OPTION_MOMIT_LOCK_PREFIX:
13656       if (strcasecmp (arg, "yes") == 0)
13657         omit_lock_prefix = 1;
13658       else if (strcasecmp (arg, "no") == 0)
13659         omit_lock_prefix = 0;
13660       else
13661         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13662       break;
13663
13664     case OPTION_MFENCE_AS_LOCK_ADD:
13665       if (strcasecmp (arg, "yes") == 0)
13666         avoid_fence = 1;
13667       else if (strcasecmp (arg, "no") == 0)
13668         avoid_fence = 0;
13669       else
13670         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13671       break;
13672
13673     case OPTION_MLFENCE_AFTER_LOAD:
13674       if (strcasecmp (arg, "yes") == 0)
13675         lfence_after_load = 1;
13676       else if (strcasecmp (arg, "no") == 0)
13677         lfence_after_load = 0;
13678       else
13679         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13680       break;
13681
13682     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13683       if (strcasecmp (arg, "all") == 0)
13684         {
13685           lfence_before_indirect_branch = lfence_branch_all;
13686           if (lfence_before_ret == lfence_before_ret_none)
13687             lfence_before_ret = lfence_before_ret_shl;
13688         }
13689       else if (strcasecmp (arg, "memory") == 0)
13690         lfence_before_indirect_branch = lfence_branch_memory;
13691       else if (strcasecmp (arg, "register") == 0)
13692         lfence_before_indirect_branch = lfence_branch_register;
13693       else if (strcasecmp (arg, "none") == 0)
13694         lfence_before_indirect_branch = lfence_branch_none;
13695       else
13696         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13697                   arg);
13698       break;
13699
13700     case OPTION_MLFENCE_BEFORE_RET:
13701       if (strcasecmp (arg, "or") == 0)
13702         lfence_before_ret = lfence_before_ret_or;
13703       else if (strcasecmp (arg, "not") == 0)
13704         lfence_before_ret = lfence_before_ret_not;
13705       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13706         lfence_before_ret = lfence_before_ret_shl;
13707       else if (strcasecmp (arg, "none") == 0)
13708         lfence_before_ret = lfence_before_ret_none;
13709       else
13710         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13711                   arg);
13712       break;
13713
13714     case OPTION_MRELAX_RELOCATIONS:
13715       if (strcasecmp (arg, "yes") == 0)
13716         generate_relax_relocations = 1;
13717       else if (strcasecmp (arg, "no") == 0)
13718         generate_relax_relocations = 0;
13719       else
13720         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13721       break;
13722
13723     case OPTION_MALIGN_BRANCH_BOUNDARY:
13724       {
13725         char *end;
13726         long int align = strtoul (arg, &end, 0);
13727         if (*end == '\0')
13728           {
13729             if (align == 0)
13730               {
13731                 align_branch_power = 0;
13732                 break;
13733               }
13734             else if (align >= 16)
13735               {
13736                 int align_power;
13737                 for (align_power = 0;
13738                      (align & 1) == 0;
13739                      align >>= 1, align_power++)
13740                   continue;
13741                 /* Limit alignment power to 31.  */
13742                 if (align == 1 && align_power < 32)
13743                   {
13744                     align_branch_power = align_power;
13745                     break;
13746                   }
13747               }
13748           }
13749         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13750       }
13751       break;
13752
13753     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13754       {
13755         char *end;
13756         int align = strtoul (arg, &end, 0);
13757         /* Some processors only support 5 prefixes.  */
13758         if (*end == '\0' && align >= 0 && align < 6)
13759           {
13760             align_branch_prefix_size = align;
13761             break;
13762           }
13763         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13764                   arg);
13765       }
13766       break;
13767
13768     case OPTION_MALIGN_BRANCH:
13769       align_branch = 0;
13770       saved = xstrdup (arg);
13771       type = saved;
13772       do
13773         {
13774           next = strchr (type, '+');
13775           if (next)
13776             *next++ = '\0';
13777           if (strcasecmp (type, "jcc") == 0)
13778             align_branch |= align_branch_jcc_bit;
13779           else if (strcasecmp (type, "fused") == 0)
13780             align_branch |= align_branch_fused_bit;
13781           else if (strcasecmp (type, "jmp") == 0)
13782             align_branch |= align_branch_jmp_bit;
13783           else if (strcasecmp (type, "call") == 0)
13784             align_branch |= align_branch_call_bit;
13785           else if (strcasecmp (type, "ret") == 0)
13786             align_branch |= align_branch_ret_bit;
13787           else if (strcasecmp (type, "indirect") == 0)
13788             align_branch |= align_branch_indirect_bit;
13789           else
13790             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13791           type = next;
13792         }
13793       while (next != NULL);
13794       free (saved);
13795       break;
13796
13797     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13798       align_branch_power = 5;
13799       align_branch_prefix_size = 5;
13800       align_branch = (align_branch_jcc_bit
13801                       | align_branch_fused_bit
13802                       | align_branch_jmp_bit);
13803       break;
13804
13805     case OPTION_MAMD64:
13806       isa64 = amd64;
13807       break;
13808
13809     case OPTION_MINTEL64:
13810       isa64 = intel64;
13811       break;
13812
13813     case 'O':
13814       if (arg == NULL)
13815         {
13816           optimize = 1;
13817           /* Turn off -Os.  */
13818           optimize_for_space = 0;
13819         }
13820       else if (*arg == 's')
13821         {
13822           optimize_for_space = 1;
13823           /* Turn on all encoding optimizations.  */
13824           optimize = INT_MAX;
13825         }
13826       else
13827         {
13828           optimize = atoi (arg);
13829           /* Turn off -Os.  */
13830           optimize_for_space = 0;
13831         }
13832       break;
13833
13834     default:
13835       return 0;
13836     }
13837   return 1;
13838 }
13839
13840 #define MESSAGE_TEMPLATE \
13841 "                                                                                "
13842
13843 static char *
13844 output_message (FILE *stream, char *p, char *message, char *start,
13845                 int *left_p, const char *name, int len)
13846 {
13847   int size = sizeof (MESSAGE_TEMPLATE);
13848   int left = *left_p;
13849
13850   /* Reserve 2 spaces for ", " or ",\0" */
13851   left -= len + 2;
13852
13853   /* Check if there is any room.  */
13854   if (left >= 0)
13855     {
13856       if (p != start)
13857         {
13858           *p++ = ',';
13859           *p++ = ' ';
13860         }
13861       p = mempcpy (p, name, len);
13862     }
13863   else
13864     {
13865       /* Output the current message now and start a new one.  */
13866       *p++ = ',';
13867       *p = '\0';
13868       fprintf (stream, "%s\n", message);
13869       p = start;
13870       left = size - (start - message) - len - 2;
13871
13872       gas_assert (left >= 0);
13873
13874       p = mempcpy (p, name, len);
13875     }
13876
13877   *left_p = left;
13878   return p;
13879 }
13880
13881 static void
13882 show_arch (FILE *stream, int ext, int check)
13883 {
13884   static char message[] = MESSAGE_TEMPLATE;
13885   char *start = message + 27;
13886   char *p;
13887   int size = sizeof (MESSAGE_TEMPLATE);
13888   int left;
13889   const char *name;
13890   int len;
13891   unsigned int j;
13892
13893   p = start;
13894   left = size - (start - message);
13895
13896   if (!ext && check)
13897     {
13898       p = output_message (stream, p, message, start, &left,
13899                           STRING_COMMA_LEN ("default"));
13900       p = output_message (stream, p, message, start, &left,
13901                           STRING_COMMA_LEN ("push"));
13902       p = output_message (stream, p, message, start, &left,
13903                           STRING_COMMA_LEN ("pop"));
13904     }
13905
13906   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13907     {
13908       /* Should it be skipped?  */
13909       if (cpu_arch [j].skip)
13910         continue;
13911
13912       name = cpu_arch [j].name;
13913       len = cpu_arch [j].len;
13914       if (cpu_arch[j].type == PROCESSOR_NONE)
13915         {
13916           /* It is an extension.  Skip if we aren't asked to show it.  */
13917           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13918             continue;
13919         }
13920       else if (ext)
13921         {
13922           /* It is an processor.  Skip if we show only extension.  */
13923           continue;
13924         }
13925       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13926         {
13927           /* It is an impossible processor - skip.  */
13928           continue;
13929         }
13930
13931       p = output_message (stream, p, message, start, &left, name, len);
13932     }
13933
13934   /* Display disabled extensions.  */
13935   if (ext)
13936     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13937       {
13938         char *str;
13939
13940         if (cpu_arch[j].type != PROCESSOR_NONE
13941             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13942           continue;
13943         str = xasprintf ("no%s", cpu_arch[j].name);
13944         p = output_message (stream, p, message, start, &left, str,
13945                             strlen (str));
13946         free (str);
13947       }
13948
13949   *p = '\0';
13950   fprintf (stream, "%s\n", message);
13951 }
13952
13953 void
13954 md_show_usage (FILE *stream)
13955 {
13956 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13957   fprintf (stream, _("\
13958   -Qy, -Qn                ignored\n\
13959   -V                      print assembler version number\n\
13960   -k                      ignored\n"));
13961 #endif
13962   fprintf (stream, _("\
13963   -n                      do not optimize code alignment\n\
13964   -O{012s}                attempt some code optimizations\n\
13965   -q                      quieten some warnings\n"));
13966 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13967   fprintf (stream, _("\
13968   -s                      ignored\n"));
13969 #endif
13970 #ifdef BFD64
13971 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13972   fprintf (stream, _("\
13973   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13974 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13975   fprintf (stream, _("\
13976   --32/--64               generate 32bit/64bit object\n"));
13977 # endif
13978 #endif
13979 #ifdef SVR4_COMMENT_CHARS
13980   fprintf (stream, _("\
13981   --divide                do not treat `/' as a comment character\n"));
13982 #else
13983   fprintf (stream, _("\
13984   --divide                ignored\n"));
13985 #endif
13986   fprintf (stream, _("\
13987   -march=CPU[,+EXTENSION...]\n\
13988                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13989   show_arch (stream, 0, 1);
13990   fprintf (stream, _("\
13991                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
13992   show_arch (stream, 1, 0);
13993   fprintf (stream, _("\
13994   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13995   show_arch (stream, 0, 0);
13996   fprintf (stream, _("\
13997   -msse2avx               encode SSE instructions with VEX prefix\n"));
13998   fprintf (stream, _("\
13999   -muse-unaligned-vector-move\n\
14000                           encode aligned vector move as unaligned vector move\n"));
14001   fprintf (stream, _("\
14002   -msse-check=[none|error|warning] (default: warning)\n\
14003                           check SSE instructions\n"));
14004   fprintf (stream, _("\
14005   -moperand-check=[none|error|warning] (default: warning)\n\
14006                           check operand combinations for validity\n"));
14007   fprintf (stream, _("\
14008   -mavxscalar=[128|256] (default: 128)\n\
14009                           encode scalar AVX instructions with specific vector\n\
14010                            length\n"));
14011   fprintf (stream, _("\
14012   -mvexwig=[0|1] (default: 0)\n\
14013                           encode VEX instructions with specific VEX.W value\n\
14014                            for VEX.W bit ignored instructions\n"));
14015   fprintf (stream, _("\
14016   -mevexlig=[128|256|512] (default: 128)\n\
14017                           encode scalar EVEX instructions with specific vector\n\
14018                            length\n"));
14019   fprintf (stream, _("\
14020   -mevexwig=[0|1] (default: 0)\n\
14021                           encode EVEX instructions with specific EVEX.W value\n\
14022                            for EVEX.W bit ignored instructions\n"));
14023   fprintf (stream, _("\
14024   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14025                           encode EVEX instructions with specific EVEX.RC value\n\
14026                            for SAE-only ignored instructions\n"));
14027   fprintf (stream, _("\
14028   -mmnemonic=[att|intel] "));
14029   if (SYSV386_COMPAT)
14030     fprintf (stream, _("(default: att)\n"));
14031   else
14032     fprintf (stream, _("(default: intel)\n"));
14033   fprintf (stream, _("\
14034                           use AT&T/Intel mnemonic\n"));
14035   fprintf (stream, _("\
14036   -msyntax=[att|intel] (default: att)\n\
14037                           use AT&T/Intel syntax\n"));
14038   fprintf (stream, _("\
14039   -mindex-reg             support pseudo index registers\n"));
14040   fprintf (stream, _("\
14041   -mnaked-reg             don't require `%%' prefix for registers\n"));
14042   fprintf (stream, _("\
14043   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14044 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14045   fprintf (stream, _("\
14046   -mshared                disable branch optimization for shared code\n"));
14047   fprintf (stream, _("\
14048   -mx86-used-note=[no|yes] "));
14049   if (DEFAULT_X86_USED_NOTE)
14050     fprintf (stream, _("(default: yes)\n"));
14051   else
14052     fprintf (stream, _("(default: no)\n"));
14053   fprintf (stream, _("\
14054                           generate x86 used ISA and feature properties\n"));
14055 #endif
14056 #if defined (TE_PE) || defined (TE_PEP)
14057   fprintf (stream, _("\
14058   -mbig-obj               generate big object files\n"));
14059 #endif
14060   fprintf (stream, _("\
14061   -momit-lock-prefix=[no|yes] (default: no)\n\
14062                           strip all lock prefixes\n"));
14063   fprintf (stream, _("\
14064   -mfence-as-lock-add=[no|yes] (default: no)\n\
14065                           encode lfence, mfence and sfence as\n\
14066                            lock addl $0x0, (%%{re}sp)\n"));
14067   fprintf (stream, _("\
14068   -mrelax-relocations=[no|yes] "));
14069   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14070     fprintf (stream, _("(default: yes)\n"));
14071   else
14072     fprintf (stream, _("(default: no)\n"));
14073   fprintf (stream, _("\
14074                           generate relax relocations\n"));
14075   fprintf (stream, _("\
14076   -malign-branch-boundary=NUM (default: 0)\n\
14077                           align branches within NUM byte boundary\n"));
14078   fprintf (stream, _("\
14079   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14080                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14081                            indirect\n\
14082                           specify types of branches to align\n"));
14083   fprintf (stream, _("\
14084   -malign-branch-prefix-size=NUM (default: 5)\n\
14085                           align branches with NUM prefixes per instruction\n"));
14086   fprintf (stream, _("\
14087   -mbranches-within-32B-boundaries\n\
14088                           align branches within 32 byte boundary\n"));
14089   fprintf (stream, _("\
14090   -mlfence-after-load=[no|yes] (default: no)\n\
14091                           generate lfence after load\n"));
14092   fprintf (stream, _("\
14093   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14094                           generate lfence before indirect near branch\n"));
14095   fprintf (stream, _("\
14096   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14097                           generate lfence before ret\n"));
14098   fprintf (stream, _("\
14099   -mamd64                 accept only AMD64 ISA [default]\n"));
14100   fprintf (stream, _("\
14101   -mintel64               accept only Intel64 ISA\n"));
14102 }
14103
14104 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14105      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14106      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14107
14108 /* Pick the target format to use.  */
14109
14110 const char *
14111 i386_target_format (void)
14112 {
14113   if (startswith (default_arch, "x86_64"))
14114     {
14115       update_code_flag (CODE_64BIT, 1);
14116       if (default_arch[6] == '\0')
14117         x86_elf_abi = X86_64_ABI;
14118       else
14119         x86_elf_abi = X86_64_X32_ABI;
14120     }
14121   else if (!strcmp (default_arch, "i386"))
14122     update_code_flag (CODE_32BIT, 1);
14123   else if (!strcmp (default_arch, "iamcu"))
14124     {
14125       update_code_flag (CODE_32BIT, 1);
14126       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14127         {
14128           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14129           cpu_arch_name = "iamcu";
14130           free (cpu_sub_arch_name);
14131           cpu_sub_arch_name = NULL;
14132           cpu_arch_flags = iamcu_flags;
14133           cpu_arch_isa = PROCESSOR_IAMCU;
14134           cpu_arch_isa_flags = iamcu_flags;
14135           if (!cpu_arch_tune_set)
14136             {
14137               cpu_arch_tune = cpu_arch_isa;
14138               cpu_arch_tune_flags = cpu_arch_isa_flags;
14139             }
14140         }
14141       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14142         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14143                   cpu_arch_name);
14144     }
14145   else
14146     as_fatal (_("unknown architecture"));
14147
14148   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14149     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14150   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14151     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14152
14153   switch (OUTPUT_FLAVOR)
14154     {
14155 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14156     case bfd_target_aout_flavour:
14157       return AOUT_TARGET_FORMAT;
14158 #endif
14159 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14160 # if defined (TE_PE) || defined (TE_PEP)
14161     case bfd_target_coff_flavour:
14162       if (flag_code == CODE_64BIT)
14163         {
14164           object_64bit = 1;
14165           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14166         }
14167       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14168 # elif defined (TE_GO32)
14169     case bfd_target_coff_flavour:
14170       return "coff-go32";
14171 # else
14172     case bfd_target_coff_flavour:
14173       return "coff-i386";
14174 # endif
14175 #endif
14176 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14177     case bfd_target_elf_flavour:
14178       {
14179         const char *format;
14180
14181         switch (x86_elf_abi)
14182           {
14183           default:
14184             format = ELF_TARGET_FORMAT;
14185 #ifndef TE_SOLARIS
14186             tls_get_addr = "___tls_get_addr";
14187 #endif
14188             break;
14189           case X86_64_ABI:
14190             use_rela_relocations = 1;
14191             object_64bit = 1;
14192 #ifndef TE_SOLARIS
14193             tls_get_addr = "__tls_get_addr";
14194 #endif
14195             format = ELF_TARGET_FORMAT64;
14196             break;
14197           case X86_64_X32_ABI:
14198             use_rela_relocations = 1;
14199             object_64bit = 1;
14200 #ifndef TE_SOLARIS
14201             tls_get_addr = "__tls_get_addr";
14202 #endif
14203             disallow_64bit_reloc = 1;
14204             format = ELF_TARGET_FORMAT32;
14205             break;
14206           }
14207         if (cpu_arch_isa == PROCESSOR_IAMCU)
14208           {
14209             if (x86_elf_abi != I386_ABI)
14210               as_fatal (_("Intel MCU is 32bit only"));
14211             return ELF_TARGET_IAMCU_FORMAT;
14212           }
14213         else
14214           return format;
14215       }
14216 #endif
14217 #if defined (OBJ_MACH_O)
14218     case bfd_target_mach_o_flavour:
14219       if (flag_code == CODE_64BIT)
14220         {
14221           use_rela_relocations = 1;
14222           object_64bit = 1;
14223           return "mach-o-x86-64";
14224         }
14225       else
14226         return "mach-o-i386";
14227 #endif
14228     default:
14229       abort ();
14230       return NULL;
14231     }
14232 }
14233
14234 #endif /* OBJ_MAYBE_ more than one  */
14235 \f
14236 symbolS *
14237 md_undefined_symbol (char *name)
14238 {
14239   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14240       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14241       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14242       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14243     {
14244       if (!GOT_symbol)
14245         {
14246           if (symbol_find (name))
14247             as_bad (_("GOT already in symbol table"));
14248           GOT_symbol = symbol_new (name, undefined_section,
14249                                    &zero_address_frag, 0);
14250         };
14251       return GOT_symbol;
14252     }
14253   return 0;
14254 }
14255
14256 /* Round up a section size to the appropriate boundary.  */
14257
14258 valueT
14259 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14260 {
14261 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14262   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14263     {
14264       /* For a.out, force the section size to be aligned.  If we don't do
14265          this, BFD will align it for us, but it will not write out the
14266          final bytes of the section.  This may be a bug in BFD, but it is
14267          easier to fix it here since that is how the other a.out targets
14268          work.  */
14269       int align;
14270
14271       align = bfd_section_alignment (segment);
14272       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14273     }
14274 #endif
14275
14276   return size;
14277 }
14278
14279 /* On the i386, PC-relative offsets are relative to the start of the
14280    next instruction.  That is, the address of the offset, plus its
14281    size, since the offset is always the last part of the insn.  */
14282
14283 long
14284 md_pcrel_from (fixS *fixP)
14285 {
14286   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14287 }
14288
14289 #ifndef I386COFF
14290
14291 static void
14292 s_bss (int ignore ATTRIBUTE_UNUSED)
14293 {
14294   int temp;
14295
14296 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14297   if (IS_ELF)
14298     obj_elf_section_change_hook ();
14299 #endif
14300   temp = get_absolute_expression ();
14301   subseg_set (bss_section, (subsegT) temp);
14302   demand_empty_rest_of_line ();
14303 }
14304
14305 #endif
14306
14307 /* Remember constant directive.  */
14308
14309 void
14310 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14311 {
14312   if (last_insn.kind != last_insn_directive
14313       && (bfd_section_flags (now_seg) & SEC_CODE))
14314     {
14315       last_insn.seg = now_seg;
14316       last_insn.kind = last_insn_directive;
14317       last_insn.name = "constant directive";
14318       last_insn.file = as_where (&last_insn.line);
14319       if (lfence_before_ret != lfence_before_ret_none)
14320         {
14321           if (lfence_before_indirect_branch != lfence_branch_none)
14322             as_warn (_("constant directive skips -mlfence-before-ret "
14323                        "and -mlfence-before-indirect-branch"));
14324           else
14325             as_warn (_("constant directive skips -mlfence-before-ret"));
14326         }
14327       else if (lfence_before_indirect_branch != lfence_branch_none)
14328         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14329     }
14330 }
14331
14332 int
14333 i386_validate_fix (fixS *fixp)
14334 {
14335   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14336     {
14337       reloc_howto_type *howto;
14338
14339       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14340       as_bad_where (fixp->fx_file, fixp->fx_line,
14341                     _("invalid %s relocation against register"),
14342                     howto ? howto->name : "<unknown>");
14343       return 0;
14344     }
14345
14346 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14347   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14348       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14349     return IS_ELF && fixp->fx_addsy
14350            && (!S_IS_DEFINED (fixp->fx_addsy)
14351                || S_IS_EXTERNAL (fixp->fx_addsy));
14352 #endif
14353
14354   if (fixp->fx_subsy)
14355     {
14356       if (fixp->fx_subsy == GOT_symbol)
14357         {
14358           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14359             {
14360               if (!object_64bit)
14361                 abort ();
14362 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14363               if (fixp->fx_tcbit2)
14364                 fixp->fx_r_type = (fixp->fx_tcbit
14365                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14366                                    : BFD_RELOC_X86_64_GOTPCRELX);
14367               else
14368 #endif
14369                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14370             }
14371           else
14372             {
14373               if (!object_64bit)
14374                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14375               else
14376                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14377             }
14378           fixp->fx_subsy = 0;
14379         }
14380     }
14381 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14382   else
14383     {
14384       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14385          to section.  Since PLT32 relocation must be against symbols,
14386          turn such PLT32 relocation into PC32 relocation.  */
14387       if (fixp->fx_addsy
14388           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14389               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14390           && symbol_section_p (fixp->fx_addsy))
14391         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14392       if (!object_64bit)
14393         {
14394           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14395               && fixp->fx_tcbit2)
14396             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14397         }
14398     }
14399 #endif
14400
14401   return 1;
14402 }
14403
14404 arelent *
14405 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14406 {
14407   arelent *rel;
14408   bfd_reloc_code_real_type code;
14409
14410   switch (fixp->fx_r_type)
14411     {
14412 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14413       symbolS *sym;
14414
14415     case BFD_RELOC_SIZE32:
14416     case BFD_RELOC_SIZE64:
14417       if (fixp->fx_addsy
14418           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14419           && (!fixp->fx_subsy
14420               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14421         sym = fixp->fx_addsy;
14422       else if (fixp->fx_subsy
14423                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14424                && (!fixp->fx_addsy
14425                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14426         sym = fixp->fx_subsy;
14427       else
14428         sym = NULL;
14429       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14430         {
14431           /* Resolve size relocation against local symbol to size of
14432              the symbol plus addend.  */
14433           valueT value = S_GET_SIZE (sym);
14434
14435           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14436             value = bfd_section_size (S_GET_SEGMENT (sym));
14437           if (sym == fixp->fx_subsy)
14438             {
14439               value = -value;
14440               if (fixp->fx_addsy)
14441                 value += S_GET_VALUE (fixp->fx_addsy);
14442             }
14443           else if (fixp->fx_subsy)
14444             value -= S_GET_VALUE (fixp->fx_subsy);
14445           value += fixp->fx_offset;
14446           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14447               && object_64bit
14448               && !fits_in_unsigned_long (value))
14449             as_bad_where (fixp->fx_file, fixp->fx_line,
14450                           _("symbol size computation overflow"));
14451           fixp->fx_addsy = NULL;
14452           fixp->fx_subsy = NULL;
14453           md_apply_fix (fixp, (valueT *) &value, NULL);
14454           return NULL;
14455         }
14456       if (!fixp->fx_addsy || fixp->fx_subsy)
14457         {
14458           as_bad_where (fixp->fx_file, fixp->fx_line,
14459                         "unsupported expression involving @size");
14460           return NULL;
14461         }
14462 #endif
14463       /* Fall through.  */
14464
14465     case BFD_RELOC_X86_64_PLT32:
14466     case BFD_RELOC_X86_64_GOT32:
14467     case BFD_RELOC_X86_64_GOTPCREL:
14468     case BFD_RELOC_X86_64_GOTPCRELX:
14469     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14470     case BFD_RELOC_386_PLT32:
14471     case BFD_RELOC_386_GOT32:
14472     case BFD_RELOC_386_GOT32X:
14473     case BFD_RELOC_386_GOTOFF:
14474     case BFD_RELOC_386_GOTPC:
14475     case BFD_RELOC_386_TLS_GD:
14476     case BFD_RELOC_386_TLS_LDM:
14477     case BFD_RELOC_386_TLS_LDO_32:
14478     case BFD_RELOC_386_TLS_IE_32:
14479     case BFD_RELOC_386_TLS_IE:
14480     case BFD_RELOC_386_TLS_GOTIE:
14481     case BFD_RELOC_386_TLS_LE_32:
14482     case BFD_RELOC_386_TLS_LE:
14483     case BFD_RELOC_386_TLS_GOTDESC:
14484     case BFD_RELOC_386_TLS_DESC_CALL:
14485     case BFD_RELOC_X86_64_TLSGD:
14486     case BFD_RELOC_X86_64_TLSLD:
14487     case BFD_RELOC_X86_64_DTPOFF32:
14488     case BFD_RELOC_X86_64_DTPOFF64:
14489     case BFD_RELOC_X86_64_GOTTPOFF:
14490     case BFD_RELOC_X86_64_TPOFF32:
14491     case BFD_RELOC_X86_64_TPOFF64:
14492     case BFD_RELOC_X86_64_GOTOFF64:
14493     case BFD_RELOC_X86_64_GOTPC32:
14494     case BFD_RELOC_X86_64_GOT64:
14495     case BFD_RELOC_X86_64_GOTPCREL64:
14496     case BFD_RELOC_X86_64_GOTPC64:
14497     case BFD_RELOC_X86_64_GOTPLT64:
14498     case BFD_RELOC_X86_64_PLTOFF64:
14499     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14500     case BFD_RELOC_X86_64_TLSDESC_CALL:
14501     case BFD_RELOC_RVA:
14502     case BFD_RELOC_VTABLE_ENTRY:
14503     case BFD_RELOC_VTABLE_INHERIT:
14504 #ifdef TE_PE
14505     case BFD_RELOC_32_SECREL:
14506     case BFD_RELOC_16_SECIDX:
14507 #endif
14508       code = fixp->fx_r_type;
14509       break;
14510     case BFD_RELOC_X86_64_32S:
14511       if (!fixp->fx_pcrel)
14512         {
14513           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14514           code = fixp->fx_r_type;
14515           break;
14516         }
14517       /* Fall through.  */
14518     default:
14519       if (fixp->fx_pcrel)
14520         {
14521           switch (fixp->fx_size)
14522             {
14523             default:
14524               as_bad_where (fixp->fx_file, fixp->fx_line,
14525                             _("can not do %d byte pc-relative relocation"),
14526                             fixp->fx_size);
14527               code = BFD_RELOC_32_PCREL;
14528               break;
14529             case 1: code = BFD_RELOC_8_PCREL;  break;
14530             case 2: code = BFD_RELOC_16_PCREL; break;
14531             case 4: code = BFD_RELOC_32_PCREL; break;
14532 #ifdef BFD64
14533             case 8: code = BFD_RELOC_64_PCREL; break;
14534 #endif
14535             }
14536         }
14537       else
14538         {
14539           switch (fixp->fx_size)
14540             {
14541             default:
14542               as_bad_where (fixp->fx_file, fixp->fx_line,
14543                             _("can not do %d byte relocation"),
14544                             fixp->fx_size);
14545               code = BFD_RELOC_32;
14546               break;
14547             case 1: code = BFD_RELOC_8;  break;
14548             case 2: code = BFD_RELOC_16; break;
14549             case 4: code = BFD_RELOC_32; break;
14550 #ifdef BFD64
14551             case 8: code = BFD_RELOC_64; break;
14552 #endif
14553             }
14554         }
14555       break;
14556     }
14557
14558   if ((code == BFD_RELOC_32
14559        || code == BFD_RELOC_32_PCREL
14560        || code == BFD_RELOC_X86_64_32S)
14561       && GOT_symbol
14562       && fixp->fx_addsy == GOT_symbol)
14563     {
14564       if (!object_64bit)
14565         code = BFD_RELOC_386_GOTPC;
14566       else
14567         code = BFD_RELOC_X86_64_GOTPC32;
14568     }
14569   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14570       && GOT_symbol
14571       && fixp->fx_addsy == GOT_symbol)
14572     {
14573       code = BFD_RELOC_X86_64_GOTPC64;
14574     }
14575
14576   rel = XNEW (arelent);
14577   rel->sym_ptr_ptr = XNEW (asymbol *);
14578   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14579
14580   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14581
14582   if (!use_rela_relocations)
14583     {
14584       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14585          vtable entry to be used in the relocation's section offset.  */
14586       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14587         rel->address = fixp->fx_offset;
14588 #if defined (OBJ_COFF) && defined (TE_PE)
14589       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14590         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14591       else
14592 #endif
14593       rel->addend = 0;
14594     }
14595   /* Use the rela in 64bit mode.  */
14596   else
14597     {
14598       if (disallow_64bit_reloc)
14599         switch (code)
14600           {
14601           case BFD_RELOC_X86_64_DTPOFF64:
14602           case BFD_RELOC_X86_64_TPOFF64:
14603           case BFD_RELOC_64_PCREL:
14604           case BFD_RELOC_X86_64_GOTOFF64:
14605           case BFD_RELOC_X86_64_GOT64:
14606           case BFD_RELOC_X86_64_GOTPCREL64:
14607           case BFD_RELOC_X86_64_GOTPC64:
14608           case BFD_RELOC_X86_64_GOTPLT64:
14609           case BFD_RELOC_X86_64_PLTOFF64:
14610             as_bad_where (fixp->fx_file, fixp->fx_line,
14611                           _("cannot represent relocation type %s in x32 mode"),
14612                           bfd_get_reloc_code_name (code));
14613             break;
14614           default:
14615             break;
14616           }
14617
14618       if (!fixp->fx_pcrel)
14619         rel->addend = fixp->fx_offset;
14620       else
14621         switch (code)
14622           {
14623           case BFD_RELOC_X86_64_PLT32:
14624           case BFD_RELOC_X86_64_GOT32:
14625           case BFD_RELOC_X86_64_GOTPCREL:
14626           case BFD_RELOC_X86_64_GOTPCRELX:
14627           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14628           case BFD_RELOC_X86_64_TLSGD:
14629           case BFD_RELOC_X86_64_TLSLD:
14630           case BFD_RELOC_X86_64_GOTTPOFF:
14631           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14632           case BFD_RELOC_X86_64_TLSDESC_CALL:
14633             rel->addend = fixp->fx_offset - fixp->fx_size;
14634             break;
14635           default:
14636             rel->addend = (section->vma
14637                            - fixp->fx_size
14638                            + fixp->fx_addnumber
14639                            + md_pcrel_from (fixp));
14640             break;
14641           }
14642     }
14643
14644   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14645   if (rel->howto == NULL)
14646     {
14647       as_bad_where (fixp->fx_file, fixp->fx_line,
14648                     _("cannot represent relocation type %s"),
14649                     bfd_get_reloc_code_name (code));
14650       /* Set howto to a garbage value so that we can keep going.  */
14651       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14652       gas_assert (rel->howto != NULL);
14653     }
14654
14655   return rel;
14656 }
14657
14658 #include "tc-i386-intel.c"
14659
14660 void
14661 tc_x86_parse_to_dw2regnum (expressionS *exp)
14662 {
14663   int saved_naked_reg;
14664   char saved_register_dot;
14665
14666   saved_naked_reg = allow_naked_reg;
14667   allow_naked_reg = 1;
14668   saved_register_dot = register_chars['.'];
14669   register_chars['.'] = '.';
14670   allow_pseudo_reg = 1;
14671   expression_and_evaluate (exp);
14672   allow_pseudo_reg = 0;
14673   register_chars['.'] = saved_register_dot;
14674   allow_naked_reg = saved_naked_reg;
14675
14676   if (exp->X_op == O_register && exp->X_add_number >= 0)
14677     {
14678       if ((addressT) exp->X_add_number < i386_regtab_size)
14679         {
14680           exp->X_op = O_constant;
14681           exp->X_add_number = i386_regtab[exp->X_add_number]
14682                               .dw2_regnum[flag_code >> 1];
14683         }
14684       else
14685         exp->X_op = O_illegal;
14686     }
14687 }
14688
14689 void
14690 tc_x86_frame_initial_instructions (void)
14691 {
14692   static unsigned int sp_regno[2];
14693
14694   if (!sp_regno[flag_code >> 1])
14695     {
14696       char *saved_input = input_line_pointer;
14697       char sp[][4] = {"esp", "rsp"};
14698       expressionS exp;
14699
14700       input_line_pointer = sp[flag_code >> 1];
14701       tc_x86_parse_to_dw2regnum (&exp);
14702       gas_assert (exp.X_op == O_constant);
14703       sp_regno[flag_code >> 1] = exp.X_add_number;
14704       input_line_pointer = saved_input;
14705     }
14706
14707   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14708   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14709 }
14710
14711 int
14712 x86_dwarf2_addr_size (void)
14713 {
14714 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14715   if (x86_elf_abi == X86_64_X32_ABI)
14716     return 4;
14717 #endif
14718   return bfd_arch_bits_per_address (stdoutput) / 8;
14719 }
14720
14721 int
14722 i386_elf_section_type (const char *str, size_t len)
14723 {
14724   if (flag_code == CODE_64BIT
14725       && len == sizeof ("unwind") - 1
14726       && startswith (str, "unwind"))
14727     return SHT_X86_64_UNWIND;
14728
14729   return -1;
14730 }
14731
14732 #ifdef TE_SOLARIS
14733 void
14734 i386_solaris_fix_up_eh_frame (segT sec)
14735 {
14736   if (flag_code == CODE_64BIT)
14737     elf_section_type (sec) = SHT_X86_64_UNWIND;
14738 }
14739 #endif
14740
14741 #ifdef TE_PE
14742 void
14743 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14744 {
14745   expressionS exp;
14746
14747   exp.X_op = O_secrel;
14748   exp.X_add_symbol = symbol;
14749   exp.X_add_number = 0;
14750   emit_expr (&exp, size);
14751 }
14752 #endif
14753
14754 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14755 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14756
14757 bfd_vma
14758 x86_64_section_letter (int letter, const char **ptr_msg)
14759 {
14760   if (flag_code == CODE_64BIT)
14761     {
14762       if (letter == 'l')
14763         return SHF_X86_64_LARGE;
14764
14765       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14766     }
14767   else
14768     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14769   return -1;
14770 }
14771
14772 bfd_vma
14773 x86_64_section_word (char *str, size_t len)
14774 {
14775   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14776     return SHF_X86_64_LARGE;
14777
14778   return -1;
14779 }
14780
14781 static void
14782 handle_large_common (int small ATTRIBUTE_UNUSED)
14783 {
14784   if (flag_code != CODE_64BIT)
14785     {
14786       s_comm_internal (0, elf_common_parse);
14787       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14788     }
14789   else
14790     {
14791       static segT lbss_section;
14792       asection *saved_com_section_ptr = elf_com_section_ptr;
14793       asection *saved_bss_section = bss_section;
14794
14795       if (lbss_section == NULL)
14796         {
14797           flagword applicable;
14798           segT seg = now_seg;
14799           subsegT subseg = now_subseg;
14800
14801           /* The .lbss section is for local .largecomm symbols.  */
14802           lbss_section = subseg_new (".lbss", 0);
14803           applicable = bfd_applicable_section_flags (stdoutput);
14804           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14805           seg_info (lbss_section)->bss = 1;
14806
14807           subseg_set (seg, subseg);
14808         }
14809
14810       elf_com_section_ptr = &_bfd_elf_large_com_section;
14811       bss_section = lbss_section;
14812
14813       s_comm_internal (0, elf_common_parse);
14814
14815       elf_com_section_ptr = saved_com_section_ptr;
14816       bss_section = saved_bss_section;
14817     }
14818 }
14819 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */