gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2020 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35
  36 #ifdef HAVE_LIMITS_H
  37 #include <limits.h>
  38 #else
  39 #ifdef HAVE_SYS_PARAM_H
  40 #include <sys/param.h>
  41 #endif
  42 #ifndef INT_MAX
  43 #define INT_MAX (int) (((unsigned) (-1)) >> 1)
  44 #endif
  45 #endif
  46
  47 #ifndef INFER_ADDR_PREFIX
  48 #define INFER_ADDR_PREFIX 1
  49 #endif
  50
  51 #ifndef DEFAULT_ARCH
  52 #define DEFAULT_ARCH "i386"
  53 #endif
  54
  55 #ifndef INLINE
  56 #if __GNUC__ >= 2
  57 #define INLINE __inline__
  58 #else
  59 #define INLINE
  60 #endif
  61 #endif
  62
  63 /* Prefixes will be emitted in the order defined below.
  64    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  65    instruction, and so must come before any prefixes.
  66    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  67    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  68 #define WAIT_PREFIX     0
  69 #define SEG_PREFIX      1
  70 #define ADDR_PREFIX     2
  71 #define DATA_PREFIX     3
  72 #define REP_PREFIX      4
  73 #define HLE_PREFIX      REP_PREFIX
  74 #define BND_PREFIX      REP_PREFIX
  75 #define LOCK_PREFIX     5
  76 #define REX_PREFIX      6       /* must come last.  */
  77 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  78
  79 /* we define the syntax here (modulo base,index,scale syntax) */
  80 #define REGISTER_PREFIX '%'
  81 #define IMMEDIATE_PREFIX '$'
  82 #define ABSOLUTE_PREFIX '*'
  83
  84 /* these are the instruction mnemonic suffixes in AT&T syntax or
  85    memory operand size in Intel syntax.  */
  86 #define WORD_MNEM_SUFFIX  'w'
  87 #define BYTE_MNEM_SUFFIX  'b'
  88 #define SHORT_MNEM_SUFFIX 's'
  89 #define LONG_MNEM_SUFFIX  'l'
  90 #define QWORD_MNEM_SUFFIX  'q'
  91 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  92    in instructions.  */
  93 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  94
  95 #define END_OF_INSN '\0'
  96
  97 /* This matches the C -> StaticRounding alias in the opcode table.  */
  98 #define commutative staticrounding
  99
 100 /*
 101   'templates' is for grouping together 'template' structures for opcodes
 102   of the same name.  This is only used for storing the insns in the grand
 103   ole hash table of insns.
 104   The templates themselves start at START and range up to (but not including)
 105   END.
 106   */
 107 typedef struct
 108 {
 109   const insn_template *start;
 110   const insn_template *end;
 111 }
 112 templates;
 113
 114 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 115 typedef struct
 116 {
 117   unsigned int regmem;  /* codes register or memory operand */
 118   unsigned int reg;     /* codes register operand (or extended opcode) */
 119   unsigned int mode;    /* how to interpret regmem & reg */
 120 }
 121 modrm_byte;
 122
 123 /* x86-64 extension prefix.  */
 124 typedef int rex_byte;
 125
 126 /* 386 opcode byte to code indirect addressing.  */
 127 typedef struct
 128 {
 129   unsigned base;
 130   unsigned index;
 131   unsigned scale;
 132 }
 133 sib_byte;
 134
 135 /* x86 arch names, types and features */
 136 typedef struct
 137 {
 138   const char *name;             /* arch name */
 139   unsigned int len;             /* arch string length */
 140   enum processor_type type;     /* arch type */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142   unsigned int skip;            /* show_arch should skip this. */
 143 }
 144 arch_entry;
 145
 146 /* Used to turn off indicated flags.  */
 147 typedef struct
 148 {
 149   const char *name;             /* arch name */
 150   unsigned int len;             /* arch string length */
 151   i386_cpu_flags flags;         /* cpu feature flags */
 152 }
 153 noarch_entry;
 154
 155 static void update_code_flag (int, int);
 156 static void set_code_flag (int);
 157 static void set_16bit_gcc_code_flag (int);
 158 static void set_intel_syntax (int);
 159 static void set_intel_mnemonic (int);
 160 static void set_allow_index_reg (int);
 161 static void set_check (int);
 162 static void set_cpu_arch (int);
 163 #ifdef TE_PE
 164 static void pe_directive_secrel (int);
 165 #endif
 166 static void signed_cons (int);
 167 static char *output_invalid (int c);
 168 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 169                                     const char *);
 170 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 171                                        const char *);
 172 static int i386_att_operand (char *);
 173 static int i386_intel_operand (char *, int);
 174 static int i386_intel_simplify (expressionS *);
 175 static int i386_intel_parse_name (const char *, expressionS *);
 176 static const reg_entry *parse_register (char *, char **);
 177 static char *parse_insn (char *, char *);
 178 static char *parse_operands (char *, const char *);
 179 static void swap_operands (void);
 180 static void swap_2_operands (int, int);
 181 static enum flag_code i386_addressing_mode (void);
 182 static void optimize_imm (void);
 183 static void optimize_disp (void);
 184 static const insn_template *match_template (char);
 185 static int check_string (void);
 186 static int process_suffix (void);
 187 static int check_byte_reg (void);
 188 static int check_long_reg (void);
 189 static int check_qword_reg (void);
 190 static int check_word_reg (void);
 191 static int finalize_imm (void);
 192 static int process_operands (void);
 193 static const seg_entry *build_modrm_byte (void);
 194 static void output_insn (void);
 195 static void output_imm (fragS *, offsetT);
 196 static void output_disp (fragS *, offsetT);
 197 #ifndef I386COFF
 198 static void s_bss (int);
 199 #endif
 200 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 201 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 202
 203 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 204 static unsigned int x86_isa_1_used;
 205 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 206 static unsigned int x86_feature_2_used;
 207 /* Generate x86 used ISA and feature properties.  */
 208 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 209 #endif
 210
 211 static const char *default_arch = DEFAULT_ARCH;
 212
 213 /* parse_register() returns this when a register alias cannot be used.  */
 214 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 215                                    { Dw2Inval, Dw2Inval } };
 216
 217 /* This struct describes rounding control and SAE in the instruction.  */
 218 struct RC_Operation
 219 {
 220   enum rc_type
 221     {
 222       rne = 0,
 223       rd,
 224       ru,
 225       rz,
 226       saeonly
 227     } type;
 228   int operand;
 229 };
 230
 231 static struct RC_Operation rc_op;
 232
 233 /* The struct describes masking, applied to OPERAND in the instruction.
 234    MASK is a pointer to the corresponding mask register.  ZEROING tells
 235    whether merging or zeroing mask is used.  */
 236 struct Mask_Operation
 237 {
 238   const reg_entry *mask;
 239   unsigned int zeroing;
 240   /* The operand where this operation is associated.  */
 241   int operand;
 242 };
 243
 244 static struct Mask_Operation mask_op;
 245
 246 /* The struct describes broadcasting, applied to OPERAND.  FACTOR is
 247    broadcast factor.  */
 248 struct Broadcast_Operation
 249 {
 250   /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 251   int type;
 252
 253   /* Index of broadcasted operand.  */
 254   int operand;
 255
 256   /* Number of bytes to broadcast.  */
 257   int bytes;
 258 };
 259
 260 static struct Broadcast_Operation broadcast_op;
 261
 262 /* VEX prefix.  */
 263 typedef struct
 264 {
 265   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 266   unsigned char bytes[4];
 267   unsigned int length;
 268   /* Destination or source register specifier.  */
 269   const reg_entry *register_specifier;
 270 } vex_prefix;
 271
 272 /* 'md_assemble ()' gathers together information and puts it into a
 273    i386_insn.  */
 274
 275 union i386_op
 276   {
 277     expressionS *disps;
 278     expressionS *imms;
 279     const reg_entry *regs;
 280   };
 281
 282 enum i386_error
 283   {
 284     operand_size_mismatch,
 285     operand_type_mismatch,
 286     register_type_mismatch,
 287     number_of_operands_mismatch,
 288     invalid_instruction_suffix,
 289     bad_imm4,
 290     unsupported_with_intel_mnemonic,
 291     unsupported_syntax,
 292     unsupported,
 293     invalid_sib_address,
 294     invalid_vsib_address,
 295     invalid_vector_register_set,
 296     invalid_tmm_register_set,
 297     unsupported_vector_index_register,
 298     unsupported_broadcast,
 299     broadcast_needed,
 300     unsupported_masking,
 301     mask_not_on_destination,
 302     no_default_mask,
 303     unsupported_rc_sae,
 304     rc_sae_operand_not_last_imm,
 305     invalid_register_operand,
 306   };
 307
 308 struct _i386_insn
 309   {
 310     /* TM holds the template for the insn were currently assembling.  */
 311     insn_template tm;
 312
 313     /* SUFFIX holds the instruction size suffix for byte, word, dword
 314        or qword, if given.  */
 315     char suffix;
 316
 317     /* OPERANDS gives the number of given operands.  */
 318     unsigned int operands;
 319
 320     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 321        of given register, displacement, memory operands and immediate
 322        operands.  */
 323     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 324
 325     /* TYPES [i] is the type (see above #defines) which tells us how to
 326        use OP[i] for the corresponding operand.  */
 327     i386_operand_type types[MAX_OPERANDS];
 328
 329     /* Displacement expression, immediate expression, or register for each
 330        operand.  */
 331     union i386_op op[MAX_OPERANDS];
 332
 333     /* Flags for operands.  */
 334     unsigned int flags[MAX_OPERANDS];
 335 #define Operand_PCrel 1
 336 #define Operand_Mem   2
 337
 338     /* Relocation type for operand */
 339     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 340
 341     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 342        the base index byte below.  */
 343     const reg_entry *base_reg;
 344     const reg_entry *index_reg;
 345     unsigned int log2_scale_factor;
 346
 347     /* SEG gives the seg_entries of this insn.  They are zero unless
 348        explicit segment overrides are given.  */
 349     const seg_entry *seg[2];
 350
 351     /* Copied first memory operand string, for re-checking.  */
 352     char *memop1_string;
 353
 354     /* PREFIX holds all the given prefix opcodes (usually null).
 355        PREFIXES is the number of prefix opcodes.  */
 356     unsigned int prefixes;
 357     unsigned char prefix[MAX_PREFIXES];
 358
 359     /* Register is in low 3 bits of opcode.  */
 360     bfd_boolean short_form;
 361
 362     /* The operand to a branch insn indicates an absolute branch.  */
 363     bfd_boolean jumpabsolute;
 364
 365     /* Extended states.  */
 366     enum
 367       {
 368         /* Use MMX state.  */
 369         xstate_mmx = 1 << 0,
 370         /* Use XMM state.  */
 371         xstate_xmm = 1 << 1,
 372         /* Use YMM state.  */
 373         xstate_ymm = 1 << 2 | xstate_xmm,
 374         /* Use ZMM state.  */
 375         xstate_zmm = 1 << 3 | xstate_ymm,
 376         /* Use TMM state.  */
 377         xstate_tmm = 1 << 4
 378       } xstate;
 379
 380     /* Has GOTPC or TLS relocation.  */
 381     bfd_boolean has_gotpc_tls_reloc;
 382
 383     /* RM and SIB are the modrm byte and the sib byte where the
 384        addressing modes of this insn are encoded.  */
 385     modrm_byte rm;
 386     rex_byte rex;
 387     rex_byte vrex;
 388     sib_byte sib;
 389     vex_prefix vex;
 390
 391     /* Masking attributes.  */
 392     struct Mask_Operation *mask;
 393
 394     /* Rounding control and SAE attributes.  */
 395     struct RC_Operation *rounding;
 396
 397     /* Broadcasting attributes.  */
 398     struct Broadcast_Operation *broadcast;
 399
 400     /* Compressed disp8*N attribute.  */
 401     unsigned int memshift;
 402
 403     /* Prefer load or store in encoding.  */
 404     enum
 405       {
 406         dir_encoding_default = 0,
 407         dir_encoding_load,
 408         dir_encoding_store,
 409         dir_encoding_swap
 410       } dir_encoding;
 411
 412     /* Prefer 8bit or 32bit displacement in encoding.  */
 413     enum
 414       {
 415         disp_encoding_default = 0,
 416         disp_encoding_8bit,
 417         disp_encoding_32bit
 418       } disp_encoding;
 419
 420     /* Prefer the REX byte in encoding.  */
 421     bfd_boolean rex_encoding;
 422
 423     /* Disable instruction size optimization.  */
 424     bfd_boolean no_optimize;
 425
 426     /* How to encode vector instructions.  */
 427     enum
 428       {
 429         vex_encoding_default = 0,
 430         vex_encoding_vex,
 431         vex_encoding_vex3,
 432         vex_encoding_evex,
 433         vex_encoding_error
 434       } vec_encoding;
 435
 436     /* REP prefix.  */
 437     const char *rep_prefix;
 438
 439     /* HLE prefix.  */
 440     const char *hle_prefix;
 441
 442     /* Have BND prefix.  */
 443     const char *bnd_prefix;
 444
 445     /* Have NOTRACK prefix.  */
 446     const char *notrack_prefix;
 447
 448     /* Error message.  */
 449     enum i386_error error;
 450   };
 451
 452 typedef struct _i386_insn i386_insn;
 453
 454 /* Link RC type with corresponding string, that'll be looked for in
 455    asm.  */
 456 struct RC_name
 457 {
 458   enum rc_type type;
 459   const char *name;
 460   unsigned int len;
 461 };
 462
 463 static const struct RC_name RC_NamesTable[] =
 464 {
 465   {  rne, STRING_COMMA_LEN ("rn-sae") },
 466   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 467   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 468   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 469   {  saeonly,  STRING_COMMA_LEN ("sae") },
 470 };
 471
 472 /* List of chars besides those in app.c:symbol_chars that can start an
 473    operand.  Used to prevent the scrubber eating vital white-space.  */
 474 const char extra_symbol_chars[] = "*%-([{}"
 475 #ifdef LEX_AT
 476         "@"
 477 #endif
 478 #ifdef LEX_QM
 479         "?"
 480 #endif
 481         ;
 482
 483 #if (defined (TE_I386AIX)                               \
 484      || ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
 485          && !defined (TE_GNU)                           \
 486          && !defined (TE_LINUX)                         \
 487          && !defined (TE_FreeBSD)                       \
 488          && !defined (TE_DragonFly)                     \
 489          && !defined (TE_NetBSD)))
 490 /* This array holds the chars that always start a comment.  If the
 491    pre-processor is disabled, these aren't very useful.  The option
 492    --divide will remove '/' from this list.  */
 493 const char *i386_comment_chars = "#/";
 494 #define SVR4_COMMENT_CHARS 1
 495 #define PREFIX_SEPARATOR '\\'
 496
 497 #else
 498 const char *i386_comment_chars = "#";
 499 #define PREFIX_SEPARATOR '/'
 500 #endif
 501
 502 /* This array holds the chars that only start a comment at the beginning of
 503    a line.  If the line seems to have the form '# 123 filename'
 504    .line and .file directives will appear in the pre-processed output.
 505    Note that input_file.c hand checks for '#' at the beginning of the
 506    first line of the input file.  This is because the compiler outputs
 507    #NO_APP at the beginning of its output.
 508    Also note that comments started like this one will always work if
 509    '/' isn't otherwise defined.  */
 510 const char line_comment_chars[] = "#/";
 511
 512 const char line_separator_chars[] = ";";
 513
 514 /* Chars that can be used to separate mant from exp in floating point
 515    nums.  */
 516 const char EXP_CHARS[] = "eE";
 517
 518 /* Chars that mean this number is a floating point constant
 519    As in 0f12.456
 520    or    0d1.2345e12.  */
 521 const char FLT_CHARS[] = "fFdDxX";
 522
 523 /* Tables for lexical analysis.  */
 524 static char mnemonic_chars[256];
 525 static char register_chars[256];
 526 static char operand_chars[256];
 527 static char identifier_chars[256];
 528 static char digit_chars[256];
 529
 530 /* Lexical macros.  */
 531 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 532 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 533 #define is_register_char(x) (register_chars[(unsigned char) x])
 534 #define is_space_char(x) ((x) == ' ')
 535 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 536 #define is_digit_char(x) (digit_chars[(unsigned char) x])
 537
 538 /* All non-digit non-letter characters that may occur in an operand.  */
 539 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 540
 541 /* md_assemble() always leaves the strings it's passed unaltered.  To
 542    effect this we maintain a stack of saved characters that we've smashed
 543    with '\0's (indicating end of strings for various sub-fields of the
 544    assembler instruction).  */
 545 static char save_stack[32];
 546 static char *save_stack_p;
 547 #define END_STRING_AND_SAVE(s) \
 548         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 549 #define RESTORE_END_STRING(s) \
 550         do { *(s) = *--save_stack_p; } while (0)
 551
 552 /* The instruction we're assembling.  */
 553 static i386_insn i;
 554
 555 /* Possible templates for current insn.  */
 556 static const templates *current_templates;
 557
 558 /* Per instruction expressionS buffers: max displacements & immediates.  */
 559 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 560 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 561
 562 /* Current operand we are working on.  */
 563 static int this_operand = -1;
 564
 565 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 566    these.  */
 567
 568 enum flag_code {
 569         CODE_32BIT,
 570         CODE_16BIT,
 571         CODE_64BIT };
 572
 573 static enum flag_code flag_code;
 574 static unsigned int object_64bit;
 575 static unsigned int disallow_64bit_reloc;
 576 static int use_rela_relocations = 0;
 577 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 578 static const char *tls_get_addr;
 579
 580 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 581      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 582      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 583
 584 /* The ELF ABI to use.  */
 585 enum x86_elf_abi
 586 {
 587   I386_ABI,
 588   X86_64_ABI,
 589   X86_64_X32_ABI
 590 };
 591
 592 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 593 #endif
 594
 595 #if defined (TE_PE) || defined (TE_PEP)
 596 /* Use big object file format.  */
 597 static int use_big_obj = 0;
 598 #endif
 599
 600 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 601 /* 1 if generating code for a shared library.  */
 602 static int shared = 0;
 603 #endif
 604
 605 /* 1 for intel syntax,
 606    0 if att syntax.  */
 607 static int intel_syntax = 0;
 608
 609 static enum x86_64_isa
 610 {
 611   amd64 = 1,    /* AMD64 ISA.  */
 612   intel64       /* Intel64 ISA.  */
 613 } isa64;
 614
 615 /* 1 for intel mnemonic,
 616    0 if att mnemonic.  */
 617 static int intel_mnemonic = !SYSV386_COMPAT;
 618
 619 /* 1 if pseudo registers are permitted.  */
 620 static int allow_pseudo_reg = 0;
 621
 622 /* 1 if register prefix % not required.  */
 623 static int allow_naked_reg = 0;
 624
 625 /* 1 if the assembler should add BND prefix for all control-transferring
 626    instructions supporting it, even if this prefix wasn't specified
 627    explicitly.  */
 628 static int add_bnd_prefix = 0;
 629
 630 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 631 static int allow_index_reg = 0;
 632
 633 /* 1 if the assembler should ignore LOCK prefix, even if it was
 634    specified explicitly.  */
 635 static int omit_lock_prefix = 0;
 636
 637 /* 1 if the assembler should encode lfence, mfence, and sfence as
 638    "lock addl $0, (%{re}sp)".  */
 639 static int avoid_fence = 0;
 640
 641 /* 1 if lfence should be inserted after every load.  */
 642 static int lfence_after_load = 0;
 643
 644 /* Non-zero if lfence should be inserted before indirect branch.  */
 645 static enum lfence_before_indirect_branch_kind
 646   {
 647     lfence_branch_none = 0,
 648     lfence_branch_register,
 649     lfence_branch_memory,
 650     lfence_branch_all
 651   }
 652 lfence_before_indirect_branch;
 653
 654 /* Non-zero if lfence should be inserted before ret.  */
 655 static enum lfence_before_ret_kind
 656   {
 657     lfence_before_ret_none = 0,
 658     lfence_before_ret_not,
 659     lfence_before_ret_or,
 660     lfence_before_ret_shl
 661   }
 662 lfence_before_ret;
 663
 664 /* Types of previous instruction is .byte or prefix.  */
 665 static struct
 666   {
 667     segT seg;
 668     const char *file;
 669     const char *name;
 670     unsigned int line;
 671     enum last_insn_kind
 672       {
 673         last_insn_other = 0,
 674         last_insn_directive,
 675         last_insn_prefix
 676       } kind;
 677   } last_insn;
 678
 679 /* 1 if the assembler should generate relax relocations.  */
 680
 681 static int generate_relax_relocations
 682   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 683
 684 static enum check_kind
 685   {
 686     check_none = 0,
 687     check_warning,
 688     check_error
 689   }
 690 sse_check, operand_check = check_warning;
 691
 692 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 693 static int align_branch_power = 0;
 694
 695 /* Types of branches to align.  */
 696 enum align_branch_kind
 697   {
 698     align_branch_none = 0,
 699     align_branch_jcc = 1,
 700     align_branch_fused = 2,
 701     align_branch_jmp = 3,
 702     align_branch_call = 4,
 703     align_branch_indirect = 5,
 704     align_branch_ret = 6
 705   };
 706
 707 /* Type bits of branches to align.  */
 708 enum align_branch_bit
 709   {
 710     align_branch_jcc_bit = 1 << align_branch_jcc,
 711     align_branch_fused_bit = 1 << align_branch_fused,
 712     align_branch_jmp_bit = 1 << align_branch_jmp,
 713     align_branch_call_bit = 1 << align_branch_call,
 714     align_branch_indirect_bit = 1 << align_branch_indirect,
 715     align_branch_ret_bit = 1 << align_branch_ret
 716   };
 717
 718 static unsigned int align_branch = (align_branch_jcc_bit
 719                                     | align_branch_fused_bit
 720                                     | align_branch_jmp_bit);
 721
 722 /* Types of condition jump used by macro-fusion.  */
 723 enum mf_jcc_kind
 724   {
 725     mf_jcc_jo = 0,  /* base opcode 0x70  */
 726     mf_jcc_jc,      /* base opcode 0x72  */
 727     mf_jcc_je,      /* base opcode 0x74  */
 728     mf_jcc_jna,     /* base opcode 0x76  */
 729     mf_jcc_js,      /* base opcode 0x78  */
 730     mf_jcc_jp,      /* base opcode 0x7a  */
 731     mf_jcc_jl,      /* base opcode 0x7c  */
 732     mf_jcc_jle,     /* base opcode 0x7e  */
 733   };
 734
 735 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 736 enum mf_cmp_kind
 737   {
 738     mf_cmp_test_and,  /* test/cmp */
 739     mf_cmp_alu_cmp,  /* add/sub/cmp */
 740     mf_cmp_incdec  /* inc/dec */
 741   };
 742
 743 /* The maximum padding size for fused jcc.  CMP like instruction can
 744    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 745    prefixes.   */
 746 #define MAX_FUSED_JCC_PADDING_SIZE 20
 747
 748 /* The maximum number of prefixes added for an instruction.  */
 749 static unsigned int align_branch_prefix_size = 5;
 750
 751 /* Optimization:
 752    1. Clear the REX_W bit with register operand if possible.
 753    2. Above plus use 128bit vector instruction to clear the full vector
 754       register.
 755  */
 756 static int optimize = 0;
 757
 758 /* Optimization:
 759    1. Clear the REX_W bit with register operand if possible.
 760    2. Above plus use 128bit vector instruction to clear the full vector
 761       register.
 762    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 763       "testb $imm7,%r8".
 764  */
 765 static int optimize_for_space = 0;
 766
 767 /* Register prefix used for error message.  */
 768 static const char *register_prefix = "%";
 769
 770 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 771    leave, push, and pop instructions so that gcc has the same stack
 772    frame as in 32 bit mode.  */
 773 static char stackop_size = '\0';
 774
 775 /* Non-zero to optimize code alignment.  */
 776 int optimize_align_code = 1;
 777
 778 /* Non-zero to quieten some warnings.  */
 779 static int quiet_warnings = 0;
 780
 781 /* CPU name.  */
 782 static const char *cpu_arch_name = NULL;
 783 static char *cpu_sub_arch_name = NULL;
 784
 785 /* CPU feature flags.  */
 786 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 787
 788 /* If we have selected a cpu we are generating instructions for.  */
 789 static int cpu_arch_tune_set = 0;
 790
 791 /* Cpu we are generating instructions for.  */
 792 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 793
 794 /* CPU feature flags of cpu we are generating instructions for.  */
 795 static i386_cpu_flags cpu_arch_tune_flags;
 796
 797 /* CPU instruction set architecture used.  */
 798 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 799
 800 /* CPU feature flags of instruction set architecture used.  */
 801 i386_cpu_flags cpu_arch_isa_flags;
 802
 803 /* If set, conditional jumps are not automatically promoted to handle
 804    larger than a byte offset.  */
 805 static unsigned int no_cond_jump_promotion = 0;
 806
 807 /* Encode SSE instructions with VEX prefix.  */
 808 static unsigned int sse2avx;
 809
 810 /* Encode scalar AVX instructions with specific vector length.  */
 811 static enum
 812   {
 813     vex128 = 0,
 814     vex256
 815   } avxscalar;
 816
 817 /* Encode VEX WIG instructions with specific vex.w.  */
 818 static enum
 819   {
 820     vexw0 = 0,
 821     vexw1
 822   } vexwig;
 823
 824 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 825 static enum
 826   {
 827     evexl128 = 0,
 828     evexl256,
 829     evexl512
 830   } evexlig;
 831
 832 /* Encode EVEX WIG instructions with specific evex.w.  */
 833 static enum
 834   {
 835     evexw0 = 0,
 836     evexw1
 837   } evexwig;
 838
 839 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 840 static enum rc_type evexrcig = rne;
 841
 842 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 843 static symbolS *GOT_symbol;
 844
 845 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 846 unsigned int x86_dwarf2_return_column;
 847
 848 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 849 int x86_cie_data_alignment;
 850
 851 /* Interface to relax_segment.
 852    There are 3 major relax states for 386 jump insns because the
 853    different types of jumps add different sizes to frags when we're
 854    figuring out what sort of jump to choose to reach a given label.
 855
 856    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 857    branches which are handled by md_estimate_size_before_relax() and
 858    i386_generic_table_relax_frag().  */
 859
 860 /* Types.  */
 861 #define UNCOND_JUMP 0
 862 #define COND_JUMP 1
 863 #define COND_JUMP86 2
 864 #define BRANCH_PADDING 3
 865 #define BRANCH_PREFIX 4
 866 #define FUSED_JCC_PADDING 5
 867
 868 /* Sizes.  */
 869 #define CODE16  1
 870 #define SMALL   0
 871 #define SMALL16 (SMALL | CODE16)
 872 #define BIG     2
 873 #define BIG16   (BIG | CODE16)
 874
 875 #ifndef INLINE
 876 #ifdef __GNUC__
 877 #define INLINE __inline__
 878 #else
 879 #define INLINE
 880 #endif
 881 #endif
 882
 883 #define ENCODE_RELAX_STATE(type, size) \
 884   ((relax_substateT) (((type) << 2) | (size)))
 885 #define TYPE_FROM_RELAX_STATE(s) \
 886   ((s) >> 2)
 887 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 888     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 889
 890 /* This table is used by relax_frag to promote short jumps to long
 891    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 892    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 893    don't allow a short jump in a 32 bit code segment to be promoted to
 894    a 16 bit offset jump because it's slower (requires data size
 895    prefix), and doesn't work, unless the destination is in the bottom
 896    64k of the code segment (The top 16 bits of eip are zeroed).  */
 897
 898 const relax_typeS md_relax_table[] =
 899 {
 900   /* The fields are:
 901      1) most positive reach of this state,
 902      2) most negative reach of this state,
 903      3) how many bytes this mode will have in the variable part of the frag
 904      4) which index into the table to try if we can't fit into this one.  */
 905
 906   /* UNCOND_JUMP states.  */
 907   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 908   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 909   /* dword jmp adds 4 bytes to frag:
 910      0 extra opcode bytes, 4 displacement bytes.  */
 911   {0, 0, 4, 0},
 912   /* word jmp adds 2 byte2 to frag:
 913      0 extra opcode bytes, 2 displacement bytes.  */
 914   {0, 0, 2, 0},
 915
 916   /* COND_JUMP states.  */
 917   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 918   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 919   /* dword conditionals adds 5 bytes to frag:
 920      1 extra opcode byte, 4 displacement bytes.  */
 921   {0, 0, 5, 0},
 922   /* word conditionals add 3 bytes to frag:
 923      1 extra opcode byte, 2 displacement bytes.  */
 924   {0, 0, 3, 0},
 925
 926   /* COND_JUMP86 states.  */
 927   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 928   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 929   /* dword conditionals adds 5 bytes to frag:
 930      1 extra opcode byte, 4 displacement bytes.  */
 931   {0, 0, 5, 0},
 932   /* word conditionals add 4 bytes to frag:
 933      1 displacement byte and a 3 byte long branch insn.  */
 934   {0, 0, 4, 0}
 935 };
 936
 937 static const arch_entry cpu_arch[] =
 938 {
 939   /* Do not replace the first two entries - i386_target_format()
 940      relies on them being there in this order.  */
 941   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 942     CPU_GENERIC32_FLAGS, 0 },
 943   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 944     CPU_GENERIC64_FLAGS, 0 },
 945   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 946     CPU_NONE_FLAGS, 0 },
 947   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 948     CPU_I186_FLAGS, 0 },
 949   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 950     CPU_I286_FLAGS, 0 },
 951   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 952     CPU_I386_FLAGS, 0 },
 953   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 954     CPU_I486_FLAGS, 0 },
 955   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 956     CPU_I586_FLAGS, 0 },
 957   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 958     CPU_I686_FLAGS, 0 },
 959   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 960     CPU_I586_FLAGS, 0 },
 961   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 962     CPU_PENTIUMPRO_FLAGS, 0 },
 963   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 964     CPU_P2_FLAGS, 0 },
 965   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 966     CPU_P3_FLAGS, 0 },
 967   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 968     CPU_P4_FLAGS, 0 },
 969   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 970     CPU_CORE_FLAGS, 0 },
 971   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 972     CPU_NOCONA_FLAGS, 0 },
 973   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 974     CPU_CORE_FLAGS, 1 },
 975   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 976     CPU_CORE_FLAGS, 0 },
 977   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 978     CPU_CORE2_FLAGS, 1 },
 979   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 980     CPU_CORE2_FLAGS, 0 },
 981   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 982     CPU_COREI7_FLAGS, 0 },
 983   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 984     CPU_L1OM_FLAGS, 0 },
 985   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 986     CPU_K1OM_FLAGS, 0 },
 987   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 988     CPU_IAMCU_FLAGS, 0 },
 989   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 990     CPU_K6_FLAGS, 0 },
 991   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 992     CPU_K6_2_FLAGS, 0 },
 993   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 994     CPU_ATHLON_FLAGS, 0 },
 995   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 996     CPU_K8_FLAGS, 1 },
 997   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
 998     CPU_K8_FLAGS, 0 },
 999   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
1000     CPU_K8_FLAGS, 0 },
1001   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
1002     CPU_AMDFAM10_FLAGS, 0 },
1003   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
1004     CPU_BDVER1_FLAGS, 0 },
1005   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
1006     CPU_BDVER2_FLAGS, 0 },
1007   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
1008     CPU_BDVER3_FLAGS, 0 },
1009   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
1010     CPU_BDVER4_FLAGS, 0 },
1011   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
1012     CPU_ZNVER1_FLAGS, 0 },
1013   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
1014     CPU_ZNVER2_FLAGS, 0 },
1015   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
1016     CPU_BTVER1_FLAGS, 0 },
1017   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
1018     CPU_BTVER2_FLAGS, 0 },
1019   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
1020     CPU_8087_FLAGS, 0 },
1021   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
1022     CPU_287_FLAGS, 0 },
1023   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
1024     CPU_387_FLAGS, 0 },
1025   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
1026     CPU_687_FLAGS, 0 },
1027   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
1028     CPU_CMOV_FLAGS, 0 },
1029   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
1030     CPU_FXSR_FLAGS, 0 },
1031   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1032     CPU_MMX_FLAGS, 0 },
1033   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1034     CPU_SSE_FLAGS, 0 },
1035   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1036     CPU_SSE2_FLAGS, 0 },
1037   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1038     CPU_SSE3_FLAGS, 0 },
1039   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1040     CPU_SSE4A_FLAGS, 0 },
1041   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1042     CPU_SSSE3_FLAGS, 0 },
1043   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1044     CPU_SSE4_1_FLAGS, 0 },
1045   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1046     CPU_SSE4_2_FLAGS, 0 },
1047   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1048     CPU_SSE4_2_FLAGS, 0 },
1049   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1050     CPU_AVX_FLAGS, 0 },
1051   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1052     CPU_AVX2_FLAGS, 0 },
1053   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1054     CPU_AVX512F_FLAGS, 0 },
1055   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1056     CPU_AVX512CD_FLAGS, 0 },
1057   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1058     CPU_AVX512ER_FLAGS, 0 },
1059   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1060     CPU_AVX512PF_FLAGS, 0 },
1061   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1062     CPU_AVX512DQ_FLAGS, 0 },
1063   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1064     CPU_AVX512BW_FLAGS, 0 },
1065   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1066     CPU_AVX512VL_FLAGS, 0 },
1067   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1068     CPU_VMX_FLAGS, 0 },
1069   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1070     CPU_VMFUNC_FLAGS, 0 },
1071   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1072     CPU_SMX_FLAGS, 0 },
1073   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1074     CPU_XSAVE_FLAGS, 0 },
1075   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1076     CPU_XSAVEOPT_FLAGS, 0 },
1077   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1078     CPU_XSAVEC_FLAGS, 0 },
1079   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1080     CPU_XSAVES_FLAGS, 0 },
1081   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1082     CPU_AES_FLAGS, 0 },
1083   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1084     CPU_PCLMUL_FLAGS, 0 },
1085   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1086     CPU_PCLMUL_FLAGS, 1 },
1087   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1088     CPU_FSGSBASE_FLAGS, 0 },
1089   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1090     CPU_RDRND_FLAGS, 0 },
1091   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1092     CPU_F16C_FLAGS, 0 },
1093   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1094     CPU_BMI2_FLAGS, 0 },
1095   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1096     CPU_FMA_FLAGS, 0 },
1097   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1098     CPU_FMA4_FLAGS, 0 },
1099   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1100     CPU_XOP_FLAGS, 0 },
1101   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1102     CPU_LWP_FLAGS, 0 },
1103   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1104     CPU_MOVBE_FLAGS, 0 },
1105   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1106     CPU_CX16_FLAGS, 0 },
1107   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1108     CPU_EPT_FLAGS, 0 },
1109   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1110     CPU_LZCNT_FLAGS, 0 },
1111   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1112     CPU_POPCNT_FLAGS, 0 },
1113   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1114     CPU_HLE_FLAGS, 0 },
1115   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1116     CPU_RTM_FLAGS, 0 },
1117   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1118     CPU_INVPCID_FLAGS, 0 },
1119   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1120     CPU_CLFLUSH_FLAGS, 0 },
1121   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1122     CPU_NOP_FLAGS, 0 },
1123   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1124     CPU_SYSCALL_FLAGS, 0 },
1125   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1126     CPU_RDTSCP_FLAGS, 0 },
1127   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1128     CPU_3DNOW_FLAGS, 0 },
1129   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1130     CPU_3DNOWA_FLAGS, 0 },
1131   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1132     CPU_PADLOCK_FLAGS, 0 },
1133   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1134     CPU_SVME_FLAGS, 1 },
1135   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1136     CPU_SVME_FLAGS, 0 },
1137   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1138     CPU_SSE4A_FLAGS, 0 },
1139   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1140     CPU_ABM_FLAGS, 0 },
1141   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1142     CPU_BMI_FLAGS, 0 },
1143   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1144     CPU_TBM_FLAGS, 0 },
1145   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1146     CPU_ADX_FLAGS, 0 },
1147   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1148     CPU_RDSEED_FLAGS, 0 },
1149   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1150     CPU_PRFCHW_FLAGS, 0 },
1151   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1152     CPU_SMAP_FLAGS, 0 },
1153   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1154     CPU_MPX_FLAGS, 0 },
1155   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1156     CPU_SHA_FLAGS, 0 },
1157   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1158     CPU_CLFLUSHOPT_FLAGS, 0 },
1159   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1160     CPU_PREFETCHWT1_FLAGS, 0 },
1161   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1162     CPU_SE1_FLAGS, 0 },
1163   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1164     CPU_CLWB_FLAGS, 0 },
1165   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1166     CPU_AVX512IFMA_FLAGS, 0 },
1167   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1168     CPU_AVX512VBMI_FLAGS, 0 },
1169   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1170     CPU_AVX512_4FMAPS_FLAGS, 0 },
1171   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1172     CPU_AVX512_4VNNIW_FLAGS, 0 },
1173   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1174     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1175   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1176     CPU_AVX512_VBMI2_FLAGS, 0 },
1177   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1178     CPU_AVX512_VNNI_FLAGS, 0 },
1179   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1180     CPU_AVX512_BITALG_FLAGS, 0 },
1181   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1182     CPU_CLZERO_FLAGS, 0 },
1183   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1184     CPU_MWAITX_FLAGS, 0 },
1185   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1186     CPU_OSPKE_FLAGS, 0 },
1187   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1188     CPU_RDPID_FLAGS, 0 },
1189   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1190     CPU_PTWRITE_FLAGS, 0 },
1191   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1192     CPU_IBT_FLAGS, 0 },
1193   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1194     CPU_SHSTK_FLAGS, 0 },
1195   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1196     CPU_GFNI_FLAGS, 0 },
1197   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1198     CPU_VAES_FLAGS, 0 },
1199   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1200     CPU_VPCLMULQDQ_FLAGS, 0 },
1201   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1202     CPU_WBNOINVD_FLAGS, 0 },
1203   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1204     CPU_PCONFIG_FLAGS, 0 },
1205   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1206     CPU_WAITPKG_FLAGS, 0 },
1207   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1208     CPU_CLDEMOTE_FLAGS, 0 },
1209   { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
1210     CPU_AMX_INT8_FLAGS, 0 },
1211   { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
1212     CPU_AMX_BF16_FLAGS, 0 },
1213   { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
1214     CPU_AMX_TILE_FLAGS, 0 },
1215   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1216     CPU_MOVDIRI_FLAGS, 0 },
1217   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1218     CPU_MOVDIR64B_FLAGS, 0 },
1219   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1220     CPU_AVX512_BF16_FLAGS, 0 },
1221   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1222     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1223   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1224     CPU_ENQCMD_FLAGS, 0 },
1225   { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
1226     CPU_SERIALIZE_FLAGS, 0 },
1227   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1228     CPU_RDPRU_FLAGS, 0 },
1229   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1230     CPU_MCOMMIT_FLAGS, 0 },
1231   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1232     CPU_SEV_ES_FLAGS, 0 },
1233   { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
1234     CPU_TSXLDTRK_FLAGS, 0 },
1235 };
1236
1237 static const noarch_entry cpu_noarch[] =
1238 {
1239   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1240   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1241   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1242   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1243   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1244   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1245   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1246   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1247   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1248   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1249   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1250   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1251   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1252   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1253   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1254   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1255   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1256   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1257   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1258   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1259   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1260   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1261   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1262   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1263   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1264   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1265   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1266   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1267   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1268   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1269   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1270   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1271   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1272   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1273   { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
1274   { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
1275   { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
1276   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1277   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1278   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1279   { STRING_COMMA_LEN ("noavx512_vp2intersect"),
1280     CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
1281   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1282   { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
1283   { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
1284 };
1285
1286 #ifdef I386COFF
1287 /* Like s_lcomm_internal in gas/read.c but the alignment string
1288    is allowed to be optional.  */
1289
1290 static symbolS *
1291 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1292 {
1293   addressT align = 0;
1294
1295   SKIP_WHITESPACE ();
1296
1297   if (needs_align
1298       && *input_line_pointer == ',')
1299     {
1300       align = parse_align (needs_align - 1);
1301
1302       if (align == (addressT) -1)
1303         return NULL;
1304     }
1305   else
1306     {
1307       if (size >= 8)
1308         align = 3;
1309       else if (size >= 4)
1310         align = 2;
1311       else if (size >= 2)
1312         align = 1;
1313       else
1314         align = 0;
1315     }
1316
1317   bss_alloc (symbolP, size, align);
1318   return symbolP;
1319 }
1320
1321 static void
1322 pe_lcomm (int needs_align)
1323 {
1324   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1325 }
1326 #endif
1327
1328 const pseudo_typeS md_pseudo_table[] =
1329 {
1330 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1331   {"align", s_align_bytes, 0},
1332 #else
1333   {"align", s_align_ptwo, 0},
1334 #endif
1335   {"arch", set_cpu_arch, 0},
1336 #ifndef I386COFF
1337   {"bss", s_bss, 0},
1338 #else
1339   {"lcomm", pe_lcomm, 1},
1340 #endif
1341   {"ffloat", float_cons, 'f'},
1342   {"dfloat", float_cons, 'd'},
1343   {"tfloat", float_cons, 'x'},
1344   {"value", cons, 2},
1345   {"slong", signed_cons, 4},
1346   {"noopt", s_ignore, 0},
1347   {"optim", s_ignore, 0},
1348   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1349   {"code16", set_code_flag, CODE_16BIT},
1350   {"code32", set_code_flag, CODE_32BIT},
1351 #ifdef BFD64
1352   {"code64", set_code_flag, CODE_64BIT},
1353 #endif
1354   {"intel_syntax", set_intel_syntax, 1},
1355   {"att_syntax", set_intel_syntax, 0},
1356   {"intel_mnemonic", set_intel_mnemonic, 1},
1357   {"att_mnemonic", set_intel_mnemonic, 0},
1358   {"allow_index_reg", set_allow_index_reg, 1},
1359   {"disallow_index_reg", set_allow_index_reg, 0},
1360   {"sse_check", set_check, 0},
1361   {"operand_check", set_check, 1},
1362 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1363   {"largecomm", handle_large_common, 0},
1364 #else
1365   {"file", dwarf2_directive_file, 0},
1366   {"loc", dwarf2_directive_loc, 0},
1367   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1368 #endif
1369 #ifdef TE_PE
1370   {"secrel32", pe_directive_secrel, 0},
1371 #endif
1372   {0, 0, 0}
1373 };
1374
1375 /* For interface with expression ().  */
1376 extern char *input_line_pointer;
1377
1378 /* Hash table for instruction mnemonic lookup.  */
1379 static struct hash_control *op_hash;
1380
1381 /* Hash table for register lookup.  */
1382 static struct hash_control *reg_hash;
1383 \f
1384   /* Various efficient no-op patterns for aligning code labels.
1385      Note: Don't try to assemble the instructions in the comments.
1386      0L and 0w are not legal.  */
1387 static const unsigned char f32_1[] =
1388   {0x90};                               /* nop                  */
1389 static const unsigned char f32_2[] =
1390   {0x66,0x90};                          /* xchg %ax,%ax         */
1391 static const unsigned char f32_3[] =
1392   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1393 static const unsigned char f32_4[] =
1394   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1395 static const unsigned char f32_6[] =
1396   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1397 static const unsigned char f32_7[] =
1398   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1399 static const unsigned char f16_3[] =
1400   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1401 static const unsigned char f16_4[] =
1402   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1403 static const unsigned char jump_disp8[] =
1404   {0xeb};                               /* jmp disp8           */
1405 static const unsigned char jump32_disp32[] =
1406   {0xe9};                               /* jmp disp32          */
1407 static const unsigned char jump16_disp32[] =
1408   {0x66,0xe9};                          /* jmp disp32          */
1409 /* 32-bit NOPs patterns.  */
1410 static const unsigned char *const f32_patt[] = {
1411   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1412 };
1413 /* 16-bit NOPs patterns.  */
1414 static const unsigned char *const f16_patt[] = {
1415   f32_1, f32_2, f16_3, f16_4
1416 };
1417 /* nopl (%[re]ax) */
1418 static const unsigned char alt_3[] =
1419   {0x0f,0x1f,0x00};
1420 /* nopl 0(%[re]ax) */
1421 static const unsigned char alt_4[] =
1422   {0x0f,0x1f,0x40,0x00};
1423 /* nopl 0(%[re]ax,%[re]ax,1) */
1424 static const unsigned char alt_5[] =
1425   {0x0f,0x1f,0x44,0x00,0x00};
1426 /* nopw 0(%[re]ax,%[re]ax,1) */
1427 static const unsigned char alt_6[] =
1428   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1429 /* nopl 0L(%[re]ax) */
1430 static const unsigned char alt_7[] =
1431   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1432 /* nopl 0L(%[re]ax,%[re]ax,1) */
1433 static const unsigned char alt_8[] =
1434   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1435 /* nopw 0L(%[re]ax,%[re]ax,1) */
1436 static const unsigned char alt_9[] =
1437   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1438 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1439 static const unsigned char alt_10[] =
1440   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1441 /* data16 nopw %cs:0L(%eax,%eax,1) */
1442 static const unsigned char alt_11[] =
1443   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1444 /* 32-bit and 64-bit NOPs patterns.  */
1445 static const unsigned char *const alt_patt[] = {
1446   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1447   alt_9, alt_10, alt_11
1448 };
1449
1450 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1451    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1452
1453 static void
1454 i386_output_nops (char *where, const unsigned char *const *patt,
1455                   int count, int max_single_nop_size)
1456
1457 {
1458   /* Place the longer NOP first.  */
1459   int last;
1460   int offset;
1461   const unsigned char *nops;
1462
1463   if (max_single_nop_size < 1)
1464     {
1465       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1466                 max_single_nop_size);
1467       return;
1468     }
1469
1470   nops = patt[max_single_nop_size - 1];
1471
1472   /* Use the smaller one if the requsted one isn't available.  */
1473   if (nops == NULL)
1474     {
1475       max_single_nop_size--;
1476       nops = patt[max_single_nop_size - 1];
1477     }
1478
1479   last = count % max_single_nop_size;
1480
1481   count -= last;
1482   for (offset = 0; offset < count; offset += max_single_nop_size)
1483     memcpy (where + offset, nops, max_single_nop_size);
1484
1485   if (last)
1486     {
1487       nops = patt[last - 1];
1488       if (nops == NULL)
1489         {
1490           /* Use the smaller one plus one-byte NOP if the needed one
1491              isn't available.  */
1492           last--;
1493           nops = patt[last - 1];
1494           memcpy (where + offset, nops, last);
1495           where[offset + last] = *patt[0];
1496         }
1497       else
1498         memcpy (where + offset, nops, last);
1499     }
1500 }
1501
1502 static INLINE int
1503 fits_in_imm7 (offsetT num)
1504 {
1505   return (num & 0x7f) == num;
1506 }
1507
1508 static INLINE int
1509 fits_in_imm31 (offsetT num)
1510 {
1511   return (num & 0x7fffffff) == num;
1512 }
1513
1514 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1515    single NOP instruction LIMIT.  */
1516
1517 void
1518 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1519 {
1520   const unsigned char *const *patt = NULL;
1521   int max_single_nop_size;
1522   /* Maximum number of NOPs before switching to jump over NOPs.  */
1523   int max_number_of_nops;
1524
1525   switch (fragP->fr_type)
1526     {
1527     case rs_fill_nop:
1528     case rs_align_code:
1529       break;
1530     case rs_machine_dependent:
1531       /* Allow NOP padding for jumps and calls.  */
1532       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1533           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1534         break;
1535       /* Fall through.  */
1536     default:
1537       return;
1538     }
1539
1540   /* We need to decide which NOP sequence to use for 32bit and
1541      64bit. When -mtune= is used:
1542
1543      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1544      PROCESSOR_GENERIC32, f32_patt will be used.
1545      2. For the rest, alt_patt will be used.
1546
1547      When -mtune= isn't used, alt_patt will be used if
1548      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1549      be used.
1550
1551      When -march= or .arch is used, we can't use anything beyond
1552      cpu_arch_isa_flags.   */
1553
1554   if (flag_code == CODE_16BIT)
1555     {
1556       patt = f16_patt;
1557       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1558       /* Limit number of NOPs to 2 in 16-bit mode.  */
1559       max_number_of_nops = 2;
1560     }
1561   else
1562     {
1563       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1564         {
1565           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1566           switch (cpu_arch_tune)
1567             {
1568             case PROCESSOR_UNKNOWN:
1569               /* We use cpu_arch_isa_flags to check if we SHOULD
1570                  optimize with nops.  */
1571               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1572                 patt = alt_patt;
1573               else
1574                 patt = f32_patt;
1575               break;
1576             case PROCESSOR_PENTIUM4:
1577             case PROCESSOR_NOCONA:
1578             case PROCESSOR_CORE:
1579             case PROCESSOR_CORE2:
1580             case PROCESSOR_COREI7:
1581             case PROCESSOR_L1OM:
1582             case PROCESSOR_K1OM:
1583             case PROCESSOR_GENERIC64:
1584             case PROCESSOR_K6:
1585             case PROCESSOR_ATHLON:
1586             case PROCESSOR_K8:
1587             case PROCESSOR_AMDFAM10:
1588             case PROCESSOR_BD:
1589             case PROCESSOR_ZNVER:
1590             case PROCESSOR_BT:
1591               patt = alt_patt;
1592               break;
1593             case PROCESSOR_I386:
1594             case PROCESSOR_I486:
1595             case PROCESSOR_PENTIUM:
1596             case PROCESSOR_PENTIUMPRO:
1597             case PROCESSOR_IAMCU:
1598             case PROCESSOR_GENERIC32:
1599               patt = f32_patt;
1600               break;
1601             }
1602         }
1603       else
1604         {
1605           switch (fragP->tc_frag_data.tune)
1606             {
1607             case PROCESSOR_UNKNOWN:
1608               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1609                  PROCESSOR_UNKNOWN.  */
1610               abort ();
1611               break;
1612
1613             case PROCESSOR_I386:
1614             case PROCESSOR_I486:
1615             case PROCESSOR_PENTIUM:
1616             case PROCESSOR_IAMCU:
1617             case PROCESSOR_K6:
1618             case PROCESSOR_ATHLON:
1619             case PROCESSOR_K8:
1620             case PROCESSOR_AMDFAM10:
1621             case PROCESSOR_BD:
1622             case PROCESSOR_ZNVER:
1623             case PROCESSOR_BT:
1624             case PROCESSOR_GENERIC32:
1625               /* We use cpu_arch_isa_flags to check if we CAN optimize
1626                  with nops.  */
1627               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1628                 patt = alt_patt;
1629               else
1630                 patt = f32_patt;
1631               break;
1632             case PROCESSOR_PENTIUMPRO:
1633             case PROCESSOR_PENTIUM4:
1634             case PROCESSOR_NOCONA:
1635             case PROCESSOR_CORE:
1636             case PROCESSOR_CORE2:
1637             case PROCESSOR_COREI7:
1638             case PROCESSOR_L1OM:
1639             case PROCESSOR_K1OM:
1640               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1641                 patt = alt_patt;
1642               else
1643                 patt = f32_patt;
1644               break;
1645             case PROCESSOR_GENERIC64:
1646               patt = alt_patt;
1647               break;
1648             }
1649         }
1650
1651       if (patt == f32_patt)
1652         {
1653           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1654           /* Limit number of NOPs to 2 for older processors.  */
1655           max_number_of_nops = 2;
1656         }
1657       else
1658         {
1659           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1660           /* Limit number of NOPs to 7 for newer processors.  */
1661           max_number_of_nops = 7;
1662         }
1663     }
1664
1665   if (limit == 0)
1666     limit = max_single_nop_size;
1667
1668   if (fragP->fr_type == rs_fill_nop)
1669     {
1670       /* Output NOPs for .nop directive.  */
1671       if (limit > max_single_nop_size)
1672         {
1673           as_bad_where (fragP->fr_file, fragP->fr_line,
1674                         _("invalid single nop size: %d "
1675                           "(expect within [0, %d])"),
1676                         limit, max_single_nop_size);
1677           return;
1678         }
1679     }
1680   else if (fragP->fr_type != rs_machine_dependent)
1681     fragP->fr_var = count;
1682
1683   if ((count / max_single_nop_size) > max_number_of_nops)
1684     {
1685       /* Generate jump over NOPs.  */
1686       offsetT disp = count - 2;
1687       if (fits_in_imm7 (disp))
1688         {
1689           /* Use "jmp disp8" if possible.  */
1690           count = disp;
1691           where[0] = jump_disp8[0];
1692           where[1] = count;
1693           where += 2;
1694         }
1695       else
1696         {
1697           unsigned int size_of_jump;
1698
1699           if (flag_code == CODE_16BIT)
1700             {
1701               where[0] = jump16_disp32[0];
1702               where[1] = jump16_disp32[1];
1703               size_of_jump = 2;
1704             }
1705           else
1706             {
1707               where[0] = jump32_disp32[0];
1708               size_of_jump = 1;
1709             }
1710
1711           count -= size_of_jump + 4;
1712           if (!fits_in_imm31 (count))
1713             {
1714               as_bad_where (fragP->fr_file, fragP->fr_line,
1715                             _("jump over nop padding out of range"));
1716               return;
1717             }
1718
1719           md_number_to_chars (where + size_of_jump, count, 4);
1720           where += size_of_jump + 4;
1721         }
1722     }
1723
1724   /* Generate multiple NOPs.  */
1725   i386_output_nops (where, patt, count, limit);
1726 }
1727
1728 static INLINE int
1729 operand_type_all_zero (const union i386_operand_type *x)
1730 {
1731   switch (ARRAY_SIZE(x->array))
1732     {
1733     case 3:
1734       if (x->array[2])
1735         return 0;
1736       /* Fall through.  */
1737     case 2:
1738       if (x->array[1])
1739         return 0;
1740       /* Fall through.  */
1741     case 1:
1742       return !x->array[0];
1743     default:
1744       abort ();
1745     }
1746 }
1747
1748 static INLINE void
1749 operand_type_set (union i386_operand_type *x, unsigned int v)
1750 {
1751   switch (ARRAY_SIZE(x->array))
1752     {
1753     case 3:
1754       x->array[2] = v;
1755       /* Fall through.  */
1756     case 2:
1757       x->array[1] = v;
1758       /* Fall through.  */
1759     case 1:
1760       x->array[0] = v;
1761       /* Fall through.  */
1762       break;
1763     default:
1764       abort ();
1765     }
1766
1767   x->bitfield.class = ClassNone;
1768   x->bitfield.instance = InstanceNone;
1769 }
1770
1771 static INLINE int
1772 operand_type_equal (const union i386_operand_type *x,
1773                     const union i386_operand_type *y)
1774 {
1775   switch (ARRAY_SIZE(x->array))
1776     {
1777     case 3:
1778       if (x->array[2] != y->array[2])
1779         return 0;
1780       /* Fall through.  */
1781     case 2:
1782       if (x->array[1] != y->array[1])
1783         return 0;
1784       /* Fall through.  */
1785     case 1:
1786       return x->array[0] == y->array[0];
1787       break;
1788     default:
1789       abort ();
1790     }
1791 }
1792
1793 static INLINE int
1794 cpu_flags_all_zero (const union i386_cpu_flags *x)
1795 {
1796   switch (ARRAY_SIZE(x->array))
1797     {
1798     case 4:
1799       if (x->array[3])
1800         return 0;
1801       /* Fall through.  */
1802     case 3:
1803       if (x->array[2])
1804         return 0;
1805       /* Fall through.  */
1806     case 2:
1807       if (x->array[1])
1808         return 0;
1809       /* Fall through.  */
1810     case 1:
1811       return !x->array[0];
1812     default:
1813       abort ();
1814     }
1815 }
1816
1817 static INLINE int
1818 cpu_flags_equal (const union i386_cpu_flags *x,
1819                  const union i386_cpu_flags *y)
1820 {
1821   switch (ARRAY_SIZE(x->array))
1822     {
1823     case 4:
1824       if (x->array[3] != y->array[3])
1825         return 0;
1826       /* Fall through.  */
1827     case 3:
1828       if (x->array[2] != y->array[2])
1829         return 0;
1830       /* Fall through.  */
1831     case 2:
1832       if (x->array[1] != y->array[1])
1833         return 0;
1834       /* Fall through.  */
1835     case 1:
1836       return x->array[0] == y->array[0];
1837       break;
1838     default:
1839       abort ();
1840     }
1841 }
1842
1843 static INLINE int
1844 cpu_flags_check_cpu64 (i386_cpu_flags f)
1845 {
1846   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1847            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1848 }
1849
1850 static INLINE i386_cpu_flags
1851 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1852 {
1853   switch (ARRAY_SIZE (x.array))
1854     {
1855     case 4:
1856       x.array [3] &= y.array [3];
1857       /* Fall through.  */
1858     case 3:
1859       x.array [2] &= y.array [2];
1860       /* Fall through.  */
1861     case 2:
1862       x.array [1] &= y.array [1];
1863       /* Fall through.  */
1864     case 1:
1865       x.array [0] &= y.array [0];
1866       break;
1867     default:
1868       abort ();
1869     }
1870   return x;
1871 }
1872
1873 static INLINE i386_cpu_flags
1874 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1875 {
1876   switch (ARRAY_SIZE (x.array))
1877     {
1878     case 4:
1879       x.array [3] |= y.array [3];
1880       /* Fall through.  */
1881     case 3:
1882       x.array [2] |= y.array [2];
1883       /* Fall through.  */
1884     case 2:
1885       x.array [1] |= y.array [1];
1886       /* Fall through.  */
1887     case 1:
1888       x.array [0] |= y.array [0];
1889       break;
1890     default:
1891       abort ();
1892     }
1893   return x;
1894 }
1895
1896 static INLINE i386_cpu_flags
1897 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1898 {
1899   switch (ARRAY_SIZE (x.array))
1900     {
1901     case 4:
1902       x.array [3] &= ~y.array [3];
1903       /* Fall through.  */
1904     case 3:
1905       x.array [2] &= ~y.array [2];
1906       /* Fall through.  */
1907     case 2:
1908       x.array [1] &= ~y.array [1];
1909       /* Fall through.  */
1910     case 1:
1911       x.array [0] &= ~y.array [0];
1912       break;
1913     default:
1914       abort ();
1915     }
1916   return x;
1917 }
1918
1919 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1920
1921 #define CPU_FLAGS_ARCH_MATCH            0x1
1922 #define CPU_FLAGS_64BIT_MATCH           0x2
1923
1924 #define CPU_FLAGS_PERFECT_MATCH \
1925   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1926
1927 /* Return CPU flags match bits. */
1928
1929 static int
1930 cpu_flags_match (const insn_template *t)
1931 {
1932   i386_cpu_flags x = t->cpu_flags;
1933   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1934
1935   x.bitfield.cpu64 = 0;
1936   x.bitfield.cpuno64 = 0;
1937
1938   if (cpu_flags_all_zero (&x))
1939     {
1940       /* This instruction is available on all archs.  */
1941       match |= CPU_FLAGS_ARCH_MATCH;
1942     }
1943   else
1944     {
1945       /* This instruction is available only on some archs.  */
1946       i386_cpu_flags cpu = cpu_arch_flags;
1947
1948       /* AVX512VL is no standalone feature - match it and then strip it.  */
1949       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1950         return match;
1951       x.bitfield.cpuavx512vl = 0;
1952
1953       cpu = cpu_flags_and (x, cpu);
1954       if (!cpu_flags_all_zero (&cpu))
1955         {
1956           if (x.bitfield.cpuavx)
1957             {
1958               /* We need to check a few extra flags with AVX.  */
1959               if (cpu.bitfield.cpuavx
1960                   && (!t->opcode_modifier.sse2avx
1961                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1962                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1963                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1964                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1965                 match |= CPU_FLAGS_ARCH_MATCH;
1966             }
1967           else if (x.bitfield.cpuavx512f)
1968             {
1969               /* We need to check a few extra flags with AVX512F.  */
1970               if (cpu.bitfield.cpuavx512f
1971                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1972                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1973                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1974                 match |= CPU_FLAGS_ARCH_MATCH;
1975             }
1976           else
1977             match |= CPU_FLAGS_ARCH_MATCH;
1978         }
1979     }
1980   return match;
1981 }
1982
1983 static INLINE i386_operand_type
1984 operand_type_and (i386_operand_type x, i386_operand_type y)
1985 {
1986   if (x.bitfield.class != y.bitfield.class)
1987     x.bitfield.class = ClassNone;
1988   if (x.bitfield.instance != y.bitfield.instance)
1989     x.bitfield.instance = InstanceNone;
1990
1991   switch (ARRAY_SIZE (x.array))
1992     {
1993     case 3:
1994       x.array [2] &= y.array [2];
1995       /* Fall through.  */
1996     case 2:
1997       x.array [1] &= y.array [1];
1998       /* Fall through.  */
1999     case 1:
2000       x.array [0] &= y.array [0];
2001       break;
2002     default:
2003       abort ();
2004     }
2005   return x;
2006 }
2007
2008 static INLINE i386_operand_type
2009 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2010 {
2011   gas_assert (y.bitfield.class == ClassNone);
2012   gas_assert (y.bitfield.instance == InstanceNone);
2013
2014   switch (ARRAY_SIZE (x.array))
2015     {
2016     case 3:
2017       x.array [2] &= ~y.array [2];
2018       /* Fall through.  */
2019     case 2:
2020       x.array [1] &= ~y.array [1];
2021       /* Fall through.  */
2022     case 1:
2023       x.array [0] &= ~y.array [0];
2024       break;
2025     default:
2026       abort ();
2027     }
2028   return x;
2029 }
2030
2031 static INLINE i386_operand_type
2032 operand_type_or (i386_operand_type x, i386_operand_type y)
2033 {
2034   gas_assert (x.bitfield.class == ClassNone ||
2035               y.bitfield.class == ClassNone ||
2036               x.bitfield.class == y.bitfield.class);
2037   gas_assert (x.bitfield.instance == InstanceNone ||
2038               y.bitfield.instance == InstanceNone ||
2039               x.bitfield.instance == y.bitfield.instance);
2040
2041   switch (ARRAY_SIZE (x.array))
2042     {
2043     case 3:
2044       x.array [2] |= y.array [2];
2045       /* Fall through.  */
2046     case 2:
2047       x.array [1] |= y.array [1];
2048       /* Fall through.  */
2049     case 1:
2050       x.array [0] |= y.array [0];
2051       break;
2052     default:
2053       abort ();
2054     }
2055   return x;
2056 }
2057
2058 static INLINE i386_operand_type
2059 operand_type_xor (i386_operand_type x, i386_operand_type y)
2060 {
2061   gas_assert (y.bitfield.class == ClassNone);
2062   gas_assert (y.bitfield.instance == InstanceNone);
2063
2064   switch (ARRAY_SIZE (x.array))
2065     {
2066     case 3:
2067       x.array [2] ^= y.array [2];
2068       /* Fall through.  */
2069     case 2:
2070       x.array [1] ^= y.array [1];
2071       /* Fall through.  */
2072     case 1:
2073       x.array [0] ^= y.array [0];
2074       break;
2075     default:
2076       abort ();
2077     }
2078   return x;
2079 }
2080
2081 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2082 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2083 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2084 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2085 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2086 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2087 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2088 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2089 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2090 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2091 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2092 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2093 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2094 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2095 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2096 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2097 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2098
2099 enum operand_type
2100 {
2101   reg,
2102   imm,
2103   disp,
2104   anymem
2105 };
2106
2107 static INLINE int
2108 operand_type_check (i386_operand_type t, enum operand_type c)
2109 {
2110   switch (c)
2111     {
2112     case reg:
2113       return t.bitfield.class == Reg;
2114
2115     case imm:
2116       return (t.bitfield.imm8
2117               || t.bitfield.imm8s
2118               || t.bitfield.imm16
2119               || t.bitfield.imm32
2120               || t.bitfield.imm32s
2121               || t.bitfield.imm64);
2122
2123     case disp:
2124       return (t.bitfield.disp8
2125               || t.bitfield.disp16
2126               || t.bitfield.disp32
2127               || t.bitfield.disp32s
2128               || t.bitfield.disp64);
2129
2130     case anymem:
2131       return (t.bitfield.disp8
2132               || t.bitfield.disp16
2133               || t.bitfield.disp32
2134               || t.bitfield.disp32s
2135               || t.bitfield.disp64
2136               || t.bitfield.baseindex);
2137
2138     default:
2139       abort ();
2140     }
2141
2142   return 0;
2143 }
2144
2145 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2146    between operand GIVEN and opeand WANTED for instruction template T.  */
2147
2148 static INLINE int
2149 match_operand_size (const insn_template *t, unsigned int wanted,
2150                     unsigned int given)
2151 {
2152   return !((i.types[given].bitfield.byte
2153             && !t->operand_types[wanted].bitfield.byte)
2154            || (i.types[given].bitfield.word
2155                && !t->operand_types[wanted].bitfield.word)
2156            || (i.types[given].bitfield.dword
2157                && !t->operand_types[wanted].bitfield.dword)
2158            || (i.types[given].bitfield.qword
2159                && !t->operand_types[wanted].bitfield.qword)
2160            || (i.types[given].bitfield.tbyte
2161                && !t->operand_types[wanted].bitfield.tbyte));
2162 }
2163
2164 /* Return 1 if there is no conflict in SIMD register between operand
2165    GIVEN and opeand WANTED for instruction template T.  */
2166
2167 static INLINE int
2168 match_simd_size (const insn_template *t, unsigned int wanted,
2169                  unsigned int given)
2170 {
2171   return !((i.types[given].bitfield.xmmword
2172             && !t->operand_types[wanted].bitfield.xmmword)
2173            || (i.types[given].bitfield.ymmword
2174                && !t->operand_types[wanted].bitfield.ymmword)
2175            || (i.types[given].bitfield.zmmword
2176                && !t->operand_types[wanted].bitfield.zmmword)
2177            || (i.types[given].bitfield.tmmword
2178                && !t->operand_types[wanted].bitfield.tmmword));
2179 }
2180
2181 /* Return 1 if there is no conflict in any size between operand GIVEN
2182    and opeand WANTED for instruction template T.  */
2183
2184 static INLINE int
2185 match_mem_size (const insn_template *t, unsigned int wanted,
2186                 unsigned int given)
2187 {
2188   return (match_operand_size (t, wanted, given)
2189           && !((i.types[given].bitfield.unspecified
2190                 && !i.broadcast
2191                 && !t->operand_types[wanted].bitfield.unspecified)
2192                || (i.types[given].bitfield.fword
2193                    && !t->operand_types[wanted].bitfield.fword)
2194                /* For scalar opcode templates to allow register and memory
2195                   operands at the same time, some special casing is needed
2196                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2197                   down-conversion vpmov*.  */
2198                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2199                     && t->operand_types[wanted].bitfield.byte
2200                        + t->operand_types[wanted].bitfield.word
2201                        + t->operand_types[wanted].bitfield.dword
2202                        + t->operand_types[wanted].bitfield.qword
2203                        > !!t->opcode_modifier.broadcast)
2204                    ? (i.types[given].bitfield.xmmword
2205                       || i.types[given].bitfield.ymmword
2206                       || i.types[given].bitfield.zmmword)
2207                    : !match_simd_size(t, wanted, given))));
2208 }
2209
2210 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2211    operands for instruction template T, and it has MATCH_REVERSE set if there
2212    is no size conflict on any operands for the template with operands reversed
2213    (and the template allows for reversing in the first place).  */
2214
2215 #define MATCH_STRAIGHT 1
2216 #define MATCH_REVERSE  2
2217
2218 static INLINE unsigned int
2219 operand_size_match (const insn_template *t)
2220 {
2221   unsigned int j, match = MATCH_STRAIGHT;
2222
2223   /* Don't check non-absolute jump instructions.  */
2224   if (t->opcode_modifier.jump
2225       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2226     return match;
2227
2228   /* Check memory and accumulator operand size.  */
2229   for (j = 0; j < i.operands; j++)
2230     {
2231       if (i.types[j].bitfield.class != Reg
2232           && i.types[j].bitfield.class != RegSIMD
2233           && t->opcode_modifier.anysize)
2234         continue;
2235
2236       if (t->operand_types[j].bitfield.class == Reg
2237           && !match_operand_size (t, j, j))
2238         {
2239           match = 0;
2240           break;
2241         }
2242
2243       if (t->operand_types[j].bitfield.class == RegSIMD
2244           && !match_simd_size (t, j, j))
2245         {
2246           match = 0;
2247           break;
2248         }
2249
2250       if (t->operand_types[j].bitfield.instance == Accum
2251           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2252         {
2253           match = 0;
2254           break;
2255         }
2256
2257       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2258         {
2259           match = 0;
2260           break;
2261         }
2262     }
2263
2264   if (!t->opcode_modifier.d)
2265     {
2266     mismatch:
2267       if (!match)
2268         i.error = operand_size_mismatch;
2269       return match;
2270     }
2271
2272   /* Check reverse.  */
2273   gas_assert (i.operands >= 2 && i.operands <= 3);
2274
2275   for (j = 0; j < i.operands; j++)
2276     {
2277       unsigned int given = i.operands - j - 1;
2278
2279       if (t->operand_types[j].bitfield.class == Reg
2280           && !match_operand_size (t, j, given))
2281         goto mismatch;
2282
2283       if (t->operand_types[j].bitfield.class == RegSIMD
2284           && !match_simd_size (t, j, given))
2285         goto mismatch;
2286
2287       if (t->operand_types[j].bitfield.instance == Accum
2288           && (!match_operand_size (t, j, given)
2289               || !match_simd_size (t, j, given)))
2290         goto mismatch;
2291
2292       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2293         goto mismatch;
2294     }
2295
2296   return match | MATCH_REVERSE;
2297 }
2298
2299 static INLINE int
2300 operand_type_match (i386_operand_type overlap,
2301                     i386_operand_type given)
2302 {
2303   i386_operand_type temp = overlap;
2304
2305   temp.bitfield.unspecified = 0;
2306   temp.bitfield.byte = 0;
2307   temp.bitfield.word = 0;
2308   temp.bitfield.dword = 0;
2309   temp.bitfield.fword = 0;
2310   temp.bitfield.qword = 0;
2311   temp.bitfield.tbyte = 0;
2312   temp.bitfield.xmmword = 0;
2313   temp.bitfield.ymmword = 0;
2314   temp.bitfield.zmmword = 0;
2315   temp.bitfield.tmmword = 0;
2316   if (operand_type_all_zero (&temp))
2317     goto mismatch;
2318
2319   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2320     return 1;
2321
2322  mismatch:
2323   i.error = operand_type_mismatch;
2324   return 0;
2325 }
2326
2327 /* If given types g0 and g1 are registers they must be of the same type
2328    unless the expected operand type register overlap is null.
2329    Some Intel syntax memory operand size checking also happens here.  */
2330
2331 static INLINE int
2332 operand_type_register_match (i386_operand_type g0,
2333                              i386_operand_type t0,
2334                              i386_operand_type g1,
2335                              i386_operand_type t1)
2336 {
2337   if (g0.bitfield.class != Reg
2338       && g0.bitfield.class != RegSIMD
2339       && (!operand_type_check (g0, anymem)
2340           || g0.bitfield.unspecified
2341           || (t0.bitfield.class != Reg
2342               && t0.bitfield.class != RegSIMD)))
2343     return 1;
2344
2345   if (g1.bitfield.class != Reg
2346       && g1.bitfield.class != RegSIMD
2347       && (!operand_type_check (g1, anymem)
2348           || g1.bitfield.unspecified
2349           || (t1.bitfield.class != Reg
2350               && t1.bitfield.class != RegSIMD)))
2351     return 1;
2352
2353   if (g0.bitfield.byte == g1.bitfield.byte
2354       && g0.bitfield.word == g1.bitfield.word
2355       && g0.bitfield.dword == g1.bitfield.dword
2356       && g0.bitfield.qword == g1.bitfield.qword
2357       && g0.bitfield.xmmword == g1.bitfield.xmmword
2358       && g0.bitfield.ymmword == g1.bitfield.ymmword
2359       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2360     return 1;
2361
2362   if (!(t0.bitfield.byte & t1.bitfield.byte)
2363       && !(t0.bitfield.word & t1.bitfield.word)
2364       && !(t0.bitfield.dword & t1.bitfield.dword)
2365       && !(t0.bitfield.qword & t1.bitfield.qword)
2366       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2367       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2368       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2369     return 1;
2370
2371   i.error = register_type_mismatch;
2372
2373   return 0;
2374 }
2375
2376 static INLINE unsigned int
2377 register_number (const reg_entry *r)
2378 {
2379   unsigned int nr = r->reg_num;
2380
2381   if (r->reg_flags & RegRex)
2382     nr += 8;
2383
2384   if (r->reg_flags & RegVRex)
2385     nr += 16;
2386
2387   return nr;
2388 }
2389
2390 static INLINE unsigned int
2391 mode_from_disp_size (i386_operand_type t)
2392 {
2393   if (t.bitfield.disp8)
2394     return 1;
2395   else if (t.bitfield.disp16
2396            || t.bitfield.disp32
2397            || t.bitfield.disp32s)
2398     return 2;
2399   else
2400     return 0;
2401 }
2402
2403 static INLINE int
2404 fits_in_signed_byte (addressT num)
2405 {
2406   return num + 0x80 <= 0xff;
2407 }
2408
2409 static INLINE int
2410 fits_in_unsigned_byte (addressT num)
2411 {
2412   return num <= 0xff;
2413 }
2414
2415 static INLINE int
2416 fits_in_unsigned_word (addressT num)
2417 {
2418   return num <= 0xffff;
2419 }
2420
2421 static INLINE int
2422 fits_in_signed_word (addressT num)
2423 {
2424   return num + 0x8000 <= 0xffff;
2425 }
2426
2427 static INLINE int
2428 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2429 {
2430 #ifndef BFD64
2431   return 1;
2432 #else
2433   return num + 0x80000000 <= 0xffffffff;
2434 #endif
2435 }                               /* fits_in_signed_long() */
2436
2437 static INLINE int
2438 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2439 {
2440 #ifndef BFD64
2441   return 1;
2442 #else
2443   return num <= 0xffffffff;
2444 #endif
2445 }                               /* fits_in_unsigned_long() */
2446
2447 static INLINE int
2448 fits_in_disp8 (offsetT num)
2449 {
2450   int shift = i.memshift;
2451   unsigned int mask;
2452
2453   if (shift == -1)
2454     abort ();
2455
2456   mask = (1 << shift) - 1;
2457
2458   /* Return 0 if NUM isn't properly aligned.  */
2459   if ((num & mask))
2460     return 0;
2461
2462   /* Check if NUM will fit in 8bit after shift.  */
2463   return fits_in_signed_byte (num >> shift);
2464 }
2465
2466 static INLINE int
2467 fits_in_imm4 (offsetT num)
2468 {
2469   return (num & 0xf) == num;
2470 }
2471
2472 static i386_operand_type
2473 smallest_imm_type (offsetT num)
2474 {
2475   i386_operand_type t;
2476
2477   operand_type_set (&t, 0);
2478   t.bitfield.imm64 = 1;
2479
2480   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2481     {
2482       /* This code is disabled on the 486 because all the Imm1 forms
2483          in the opcode table are slower on the i486.  They're the
2484          versions with the implicitly specified single-position
2485          displacement, which has another syntax if you really want to
2486          use that form.  */
2487       t.bitfield.imm1 = 1;
2488       t.bitfield.imm8 = 1;
2489       t.bitfield.imm8s = 1;
2490       t.bitfield.imm16 = 1;
2491       t.bitfield.imm32 = 1;
2492       t.bitfield.imm32s = 1;
2493     }
2494   else if (fits_in_signed_byte (num))
2495     {
2496       t.bitfield.imm8 = 1;
2497       t.bitfield.imm8s = 1;
2498       t.bitfield.imm16 = 1;
2499       t.bitfield.imm32 = 1;
2500       t.bitfield.imm32s = 1;
2501     }
2502   else if (fits_in_unsigned_byte (num))
2503     {
2504       t.bitfield.imm8 = 1;
2505       t.bitfield.imm16 = 1;
2506       t.bitfield.imm32 = 1;
2507       t.bitfield.imm32s = 1;
2508     }
2509   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2510     {
2511       t.bitfield.imm16 = 1;
2512       t.bitfield.imm32 = 1;
2513       t.bitfield.imm32s = 1;
2514     }
2515   else if (fits_in_signed_long (num))
2516     {
2517       t.bitfield.imm32 = 1;
2518       t.bitfield.imm32s = 1;
2519     }
2520   else if (fits_in_unsigned_long (num))
2521     t.bitfield.imm32 = 1;
2522
2523   return t;
2524 }
2525
2526 static offsetT
2527 offset_in_range (offsetT val, int size)
2528 {
2529   addressT mask;
2530
2531   switch (size)
2532     {
2533     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2534     case 2: mask = ((addressT) 1 << 16) - 1; break;
2535     case 4: mask = ((addressT) 2 << 31) - 1; break;
2536 #ifdef BFD64
2537     case 8: mask = ((addressT) 2 << 63) - 1; break;
2538 #endif
2539     default: abort ();
2540     }
2541
2542   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2543     {
2544       char buf1[40], buf2[40];
2545
2546       sprint_value (buf1, val);
2547       sprint_value (buf2, val & mask);
2548       as_warn (_("%s shortened to %s"), buf1, buf2);
2549     }
2550   return val & mask;
2551 }
2552
2553 enum PREFIX_GROUP
2554 {
2555   PREFIX_EXIST = 0,
2556   PREFIX_LOCK,
2557   PREFIX_REP,
2558   PREFIX_DS,
2559   PREFIX_OTHER
2560 };
2561
2562 /* Returns
2563    a. PREFIX_EXIST if attempting to add a prefix where one from the
2564    same class already exists.
2565    b. PREFIX_LOCK if lock prefix is added.
2566    c. PREFIX_REP if rep/repne prefix is added.
2567    d. PREFIX_DS if ds prefix is added.
2568    e. PREFIX_OTHER if other prefix is added.
2569  */
2570
2571 static enum PREFIX_GROUP
2572 add_prefix (unsigned int prefix)
2573 {
2574   enum PREFIX_GROUP ret = PREFIX_OTHER;
2575   unsigned int q;
2576
2577   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2578       && flag_code == CODE_64BIT)
2579     {
2580       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2581           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2582           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2583           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2584         ret = PREFIX_EXIST;
2585       q = REX_PREFIX;
2586     }
2587   else
2588     {
2589       switch (prefix)
2590         {
2591         default:
2592           abort ();
2593
2594         case DS_PREFIX_OPCODE:
2595           ret = PREFIX_DS;
2596           /* Fall through.  */
2597         case CS_PREFIX_OPCODE:
2598         case ES_PREFIX_OPCODE:
2599         case FS_PREFIX_OPCODE:
2600         case GS_PREFIX_OPCODE:
2601         case SS_PREFIX_OPCODE:
2602           q = SEG_PREFIX;
2603           break;
2604
2605         case REPNE_PREFIX_OPCODE:
2606         case REPE_PREFIX_OPCODE:
2607           q = REP_PREFIX;
2608           ret = PREFIX_REP;
2609           break;
2610
2611         case LOCK_PREFIX_OPCODE:
2612           q = LOCK_PREFIX;
2613           ret = PREFIX_LOCK;
2614           break;
2615
2616         case FWAIT_OPCODE:
2617           q = WAIT_PREFIX;
2618           break;
2619
2620         case ADDR_PREFIX_OPCODE:
2621           q = ADDR_PREFIX;
2622           break;
2623
2624         case DATA_PREFIX_OPCODE:
2625           q = DATA_PREFIX;
2626           break;
2627         }
2628       if (i.prefix[q] != 0)
2629         ret = PREFIX_EXIST;
2630     }
2631
2632   if (ret)
2633     {
2634       if (!i.prefix[q])
2635         ++i.prefixes;
2636       i.prefix[q] |= prefix;
2637     }
2638   else
2639     as_bad (_("same type of prefix used twice"));
2640
2641   return ret;
2642 }
2643
2644 static void
2645 update_code_flag (int value, int check)
2646 {
2647   PRINTF_LIKE ((*as_error));
2648
2649   flag_code = (enum flag_code) value;
2650   if (flag_code == CODE_64BIT)
2651     {
2652       cpu_arch_flags.bitfield.cpu64 = 1;
2653       cpu_arch_flags.bitfield.cpuno64 = 0;
2654     }
2655   else
2656     {
2657       cpu_arch_flags.bitfield.cpu64 = 0;
2658       cpu_arch_flags.bitfield.cpuno64 = 1;
2659     }
2660   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2661     {
2662       if (check)
2663         as_error = as_fatal;
2664       else
2665         as_error = as_bad;
2666       (*as_error) (_("64bit mode not supported on `%s'."),
2667                    cpu_arch_name ? cpu_arch_name : default_arch);
2668     }
2669   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2670     {
2671       if (check)
2672         as_error = as_fatal;
2673       else
2674         as_error = as_bad;
2675       (*as_error) (_("32bit mode not supported on `%s'."),
2676                    cpu_arch_name ? cpu_arch_name : default_arch);
2677     }
2678   stackop_size = '\0';
2679 }
2680
2681 static void
2682 set_code_flag (int value)
2683 {
2684   update_code_flag (value, 0);
2685 }
2686
2687 static void
2688 set_16bit_gcc_code_flag (int new_code_flag)
2689 {
2690   flag_code = (enum flag_code) new_code_flag;
2691   if (flag_code != CODE_16BIT)
2692     abort ();
2693   cpu_arch_flags.bitfield.cpu64 = 0;
2694   cpu_arch_flags.bitfield.cpuno64 = 1;
2695   stackop_size = LONG_MNEM_SUFFIX;
2696 }
2697
2698 static void
2699 set_intel_syntax (int syntax_flag)
2700 {
2701   /* Find out if register prefixing is specified.  */
2702   int ask_naked_reg = 0;
2703
2704   SKIP_WHITESPACE ();
2705   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2706     {
2707       char *string;
2708       int e = get_symbol_name (&string);
2709
2710       if (strcmp (string, "prefix") == 0)
2711         ask_naked_reg = 1;
2712       else if (strcmp (string, "noprefix") == 0)
2713         ask_naked_reg = -1;
2714       else
2715         as_bad (_("bad argument to syntax directive."));
2716       (void) restore_line_pointer (e);
2717     }
2718   demand_empty_rest_of_line ();
2719
2720   intel_syntax = syntax_flag;
2721
2722   if (ask_naked_reg == 0)
2723     allow_naked_reg = (intel_syntax
2724                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2725   else
2726     allow_naked_reg = (ask_naked_reg < 0);
2727
2728   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2729
2730   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2731   identifier_chars['$'] = intel_syntax ? '$' : 0;
2732   register_prefix = allow_naked_reg ? "" : "%";
2733 }
2734
2735 static void
2736 set_intel_mnemonic (int mnemonic_flag)
2737 {
2738   intel_mnemonic = mnemonic_flag;
2739 }
2740
2741 static void
2742 set_allow_index_reg (int flag)
2743 {
2744   allow_index_reg = flag;
2745 }
2746
2747 static void
2748 set_check (int what)
2749 {
2750   enum check_kind *kind;
2751   const char *str;
2752
2753   if (what)
2754     {
2755       kind = &operand_check;
2756       str = "operand";
2757     }
2758   else
2759     {
2760       kind = &sse_check;
2761       str = "sse";
2762     }
2763
2764   SKIP_WHITESPACE ();
2765
2766   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2767     {
2768       char *string;
2769       int e = get_symbol_name (&string);
2770
2771       if (strcmp (string, "none") == 0)
2772         *kind = check_none;
2773       else if (strcmp (string, "warning") == 0)
2774         *kind = check_warning;
2775       else if (strcmp (string, "error") == 0)
2776         *kind = check_error;
2777       else
2778         as_bad (_("bad argument to %s_check directive."), str);
2779       (void) restore_line_pointer (e);
2780     }
2781   else
2782     as_bad (_("missing argument for %s_check directive"), str);
2783
2784   demand_empty_rest_of_line ();
2785 }
2786
2787 static void
2788 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2789                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2790 {
2791 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2792   static const char *arch;
2793
2794   /* Intel LIOM is only supported on ELF.  */
2795   if (!IS_ELF)
2796     return;
2797
2798   if (!arch)
2799     {
2800       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2801          use default_arch.  */
2802       arch = cpu_arch_name;
2803       if (!arch)
2804         arch = default_arch;
2805     }
2806
2807   /* If we are targeting Intel MCU, we must enable it.  */
2808   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2809       || new_flag.bitfield.cpuiamcu)
2810     return;
2811
2812   /* If we are targeting Intel L1OM, we must enable it.  */
2813   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2814       || new_flag.bitfield.cpul1om)
2815     return;
2816
2817   /* If we are targeting Intel K1OM, we must enable it.  */
2818   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2819       || new_flag.bitfield.cpuk1om)
2820     return;
2821
2822   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2823 #endif
2824 }
2825
2826 static void
2827 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2828 {
2829   SKIP_WHITESPACE ();
2830
2831   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2832     {
2833       char *string;
2834       int e = get_symbol_name (&string);
2835       unsigned int j;
2836       i386_cpu_flags flags;
2837
2838       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2839         {
2840           if (strcmp (string, cpu_arch[j].name) == 0)
2841             {
2842               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2843
2844               if (*string != '.')
2845                 {
2846                   cpu_arch_name = cpu_arch[j].name;
2847                   cpu_sub_arch_name = NULL;
2848                   cpu_arch_flags = cpu_arch[j].flags;
2849                   if (flag_code == CODE_64BIT)
2850                     {
2851                       cpu_arch_flags.bitfield.cpu64 = 1;
2852                       cpu_arch_flags.bitfield.cpuno64 = 0;
2853                     }
2854                   else
2855                     {
2856                       cpu_arch_flags.bitfield.cpu64 = 0;
2857                       cpu_arch_flags.bitfield.cpuno64 = 1;
2858                     }
2859                   cpu_arch_isa = cpu_arch[j].type;
2860                   cpu_arch_isa_flags = cpu_arch[j].flags;
2861                   if (!cpu_arch_tune_set)
2862                     {
2863                       cpu_arch_tune = cpu_arch_isa;
2864                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2865                     }
2866                   break;
2867                 }
2868
2869               flags = cpu_flags_or (cpu_arch_flags,
2870                                     cpu_arch[j].flags);
2871
2872               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2873                 {
2874                   if (cpu_sub_arch_name)
2875                     {
2876                       char *name = cpu_sub_arch_name;
2877                       cpu_sub_arch_name = concat (name,
2878                                                   cpu_arch[j].name,
2879                                                   (const char *) NULL);
2880                       free (name);
2881                     }
2882                   else
2883                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2884                   cpu_arch_flags = flags;
2885                   cpu_arch_isa_flags = flags;
2886                 }
2887               else
2888                 cpu_arch_isa_flags
2889                   = cpu_flags_or (cpu_arch_isa_flags,
2890                                   cpu_arch[j].flags);
2891               (void) restore_line_pointer (e);
2892               demand_empty_rest_of_line ();
2893               return;
2894             }
2895         }
2896
2897       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2898         {
2899           /* Disable an ISA extension.  */
2900           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2901             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2902               {
2903                 flags = cpu_flags_and_not (cpu_arch_flags,
2904                                            cpu_noarch[j].flags);
2905                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2906                   {
2907                     if (cpu_sub_arch_name)
2908                       {
2909                         char *name = cpu_sub_arch_name;
2910                         cpu_sub_arch_name = concat (name, string,
2911                                                     (const char *) NULL);
2912                         free (name);
2913                       }
2914                     else
2915                       cpu_sub_arch_name = xstrdup (string);
2916                     cpu_arch_flags = flags;
2917                     cpu_arch_isa_flags = flags;
2918                   }
2919                 (void) restore_line_pointer (e);
2920                 demand_empty_rest_of_line ();
2921                 return;
2922               }
2923
2924           j = ARRAY_SIZE (cpu_arch);
2925         }
2926
2927       if (j >= ARRAY_SIZE (cpu_arch))
2928         as_bad (_("no such architecture: `%s'"), string);
2929
2930       *input_line_pointer = e;
2931     }
2932   else
2933     as_bad (_("missing cpu architecture"));
2934
2935   no_cond_jump_promotion = 0;
2936   if (*input_line_pointer == ','
2937       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2938     {
2939       char *string;
2940       char e;
2941
2942       ++input_line_pointer;
2943       e = get_symbol_name (&string);
2944
2945       if (strcmp (string, "nojumps") == 0)
2946         no_cond_jump_promotion = 1;
2947       else if (strcmp (string, "jumps") == 0)
2948         ;
2949       else
2950         as_bad (_("no such architecture modifier: `%s'"), string);
2951
2952       (void) restore_line_pointer (e);
2953     }
2954
2955   demand_empty_rest_of_line ();
2956 }
2957
2958 enum bfd_architecture
2959 i386_arch (void)
2960 {
2961   if (cpu_arch_isa == PROCESSOR_L1OM)
2962     {
2963       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2964           || flag_code != CODE_64BIT)
2965         as_fatal (_("Intel L1OM is 64bit ELF only"));
2966       return bfd_arch_l1om;
2967     }
2968   else if (cpu_arch_isa == PROCESSOR_K1OM)
2969     {
2970       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2971           || flag_code != CODE_64BIT)
2972         as_fatal (_("Intel K1OM is 64bit ELF only"));
2973       return bfd_arch_k1om;
2974     }
2975   else if (cpu_arch_isa == PROCESSOR_IAMCU)
2976     {
2977       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2978           || flag_code == CODE_64BIT)
2979         as_fatal (_("Intel MCU is 32bit ELF only"));
2980       return bfd_arch_iamcu;
2981     }
2982   else
2983     return bfd_arch_i386;
2984 }
2985
2986 unsigned long
2987 i386_mach (void)
2988 {
2989   if (!strncmp (default_arch, "x86_64", 6))
2990     {
2991       if (cpu_arch_isa == PROCESSOR_L1OM)
2992         {
2993           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2994               || default_arch[6] != '\0')
2995             as_fatal (_("Intel L1OM is 64bit ELF only"));
2996           return bfd_mach_l1om;
2997         }
2998       else if (cpu_arch_isa == PROCESSOR_K1OM)
2999         {
3000           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3001               || default_arch[6] != '\0')
3002             as_fatal (_("Intel K1OM is 64bit ELF only"));
3003           return bfd_mach_k1om;
3004         }
3005       else if (default_arch[6] == '\0')
3006         return bfd_mach_x86_64;
3007       else
3008         return bfd_mach_x64_32;
3009     }
3010   else if (!strcmp (default_arch, "i386")
3011            || !strcmp (default_arch, "iamcu"))
3012     {
3013       if (cpu_arch_isa == PROCESSOR_IAMCU)
3014         {
3015           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3016             as_fatal (_("Intel MCU is 32bit ELF only"));
3017           return bfd_mach_i386_iamcu;
3018         }
3019       else
3020         return bfd_mach_i386_i386;
3021     }
3022   else
3023     as_fatal (_("unknown architecture"));
3024 }
3025 \f
3026 void
3027 md_begin (void)
3028 {
3029   const char *hash_err;
3030
3031   /* Support pseudo prefixes like {disp32}.  */
3032   lex_type ['{'] = LEX_BEGIN_NAME;
3033
3034   /* Initialize op_hash hash table.  */
3035   op_hash = hash_new ();
3036
3037   {
3038     const insn_template *optab;
3039     templates *core_optab;
3040
3041     /* Setup for loop.  */
3042     optab = i386_optab;
3043     core_optab = XNEW (templates);
3044     core_optab->start = optab;
3045
3046     while (1)
3047       {
3048         ++optab;
3049         if (optab->name == NULL
3050             || strcmp (optab->name, (optab - 1)->name) != 0)
3051           {
3052             /* different name --> ship out current template list;
3053                add to hash table; & begin anew.  */
3054             core_optab->end = optab;
3055             hash_err = hash_insert (op_hash,
3056                                     (optab - 1)->name,
3057                                     (void *) core_optab);
3058             if (hash_err)
3059               {
3060                 as_fatal (_("can't hash %s: %s"),
3061                           (optab - 1)->name,
3062                           hash_err);
3063               }
3064             if (optab->name == NULL)
3065               break;
3066             core_optab = XNEW (templates);
3067             core_optab->start = optab;
3068           }
3069       }
3070   }
3071
3072   /* Initialize reg_hash hash table.  */
3073   reg_hash = hash_new ();
3074   {
3075     const reg_entry *regtab;
3076     unsigned int regtab_size = i386_regtab_size;
3077
3078     for (regtab = i386_regtab; regtab_size--; regtab++)
3079       {
3080         hash_err = hash_insert (reg_hash, regtab->reg_name, (void *) regtab);
3081         if (hash_err)
3082           as_fatal (_("can't hash %s: %s"),
3083                     regtab->reg_name,
3084                     hash_err);
3085       }
3086   }
3087
3088   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3089   {
3090     int c;
3091     char *p;
3092
3093     for (c = 0; c < 256; c++)
3094       {
3095         if (ISDIGIT (c))
3096           {
3097             digit_chars[c] = c;
3098             mnemonic_chars[c] = c;
3099             register_chars[c] = c;
3100             operand_chars[c] = c;
3101           }
3102         else if (ISLOWER (c))
3103           {
3104             mnemonic_chars[c] = c;
3105             register_chars[c] = c;
3106             operand_chars[c] = c;
3107           }
3108         else if (ISUPPER (c))
3109           {
3110             mnemonic_chars[c] = TOLOWER (c);
3111             register_chars[c] = mnemonic_chars[c];
3112             operand_chars[c] = c;
3113           }
3114         else if (c == '{' || c == '}')
3115           {
3116             mnemonic_chars[c] = c;
3117             operand_chars[c] = c;
3118           }
3119
3120         if (ISALPHA (c) || ISDIGIT (c))
3121           identifier_chars[c] = c;
3122         else if (c >= 128)
3123           {
3124             identifier_chars[c] = c;
3125             operand_chars[c] = c;
3126           }
3127       }
3128
3129 #ifdef LEX_AT
3130     identifier_chars['@'] = '@';
3131 #endif
3132 #ifdef LEX_QM
3133     identifier_chars['?'] = '?';
3134     operand_chars['?'] = '?';
3135 #endif
3136     digit_chars['-'] = '-';
3137     mnemonic_chars['_'] = '_';
3138     mnemonic_chars['-'] = '-';
3139     mnemonic_chars['.'] = '.';
3140     identifier_chars['_'] = '_';
3141     identifier_chars['.'] = '.';
3142
3143     for (p = operand_special_chars; *p != '\0'; p++)
3144       operand_chars[(unsigned char) *p] = *p;
3145   }
3146
3147   if (flag_code == CODE_64BIT)
3148     {
3149 #if defined (OBJ_COFF) && defined (TE_PE)
3150       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3151                                   ? 32 : 16);
3152 #else
3153       x86_dwarf2_return_column = 16;
3154 #endif
3155       x86_cie_data_alignment = -8;
3156     }
3157   else
3158     {
3159       x86_dwarf2_return_column = 8;
3160       x86_cie_data_alignment = -4;
3161     }
3162
3163   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3164      can be turned into BRANCH_PREFIX frag.  */
3165   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3166     abort ();
3167 }
3168
3169 void
3170 i386_print_statistics (FILE *file)
3171 {
3172   hash_print_statistics (file, "i386 opcode", op_hash);
3173   hash_print_statistics (file, "i386 register", reg_hash);
3174 }
3175 \f
3176 #ifdef DEBUG386
3177
3178 /* Debugging routines for md_assemble.  */
3179 static void pte (insn_template *);
3180 static void pt (i386_operand_type);
3181 static void pe (expressionS *);
3182 static void ps (symbolS *);
3183
3184 static void
3185 pi (const char *line, i386_insn *x)
3186 {
3187   unsigned int j;
3188
3189   fprintf (stdout, "%s: template ", line);
3190   pte (&x->tm);
3191   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3192            x->base_reg ? x->base_reg->reg_name : "none",
3193            x->index_reg ? x->index_reg->reg_name : "none",
3194            x->log2_scale_factor);
3195   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3196            x->rm.mode, x->rm.reg, x->rm.regmem);
3197   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3198            x->sib.base, x->sib.index, x->sib.scale);
3199   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3200            (x->rex & REX_W) != 0,
3201            (x->rex & REX_R) != 0,
3202            (x->rex & REX_X) != 0,
3203            (x->rex & REX_B) != 0);
3204   for (j = 0; j < x->operands; j++)
3205     {
3206       fprintf (stdout, "    #%d:  ", j + 1);
3207       pt (x->types[j]);
3208       fprintf (stdout, "\n");
3209       if (x->types[j].bitfield.class == Reg
3210           || x->types[j].bitfield.class == RegMMX
3211           || x->types[j].bitfield.class == RegSIMD
3212           || x->types[j].bitfield.class == RegMask
3213           || x->types[j].bitfield.class == SReg
3214           || x->types[j].bitfield.class == RegCR
3215           || x->types[j].bitfield.class == RegDR
3216           || x->types[j].bitfield.class == RegTR
3217           || x->types[j].bitfield.class == RegBND)
3218         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3219       if (operand_type_check (x->types[j], imm))
3220         pe (x->op[j].imms);
3221       if (operand_type_check (x->types[j], disp))
3222         pe (x->op[j].disps);
3223     }
3224 }
3225
3226 static void
3227 pte (insn_template *t)
3228 {
3229   unsigned int j;
3230   fprintf (stdout, " %d operands ", t->operands);
3231   fprintf (stdout, "opcode %x ", t->base_opcode);
3232   if (t->extension_opcode != None)
3233     fprintf (stdout, "ext %x ", t->extension_opcode);
3234   if (t->opcode_modifier.d)
3235     fprintf (stdout, "D");
3236   if (t->opcode_modifier.w)
3237     fprintf (stdout, "W");
3238   fprintf (stdout, "\n");
3239   for (j = 0; j < t->operands; j++)
3240     {
3241       fprintf (stdout, "    #%d type ", j + 1);
3242       pt (t->operand_types[j]);
3243       fprintf (stdout, "\n");
3244     }
3245 }
3246
3247 static void
3248 pe (expressionS *e)
3249 {
3250   fprintf (stdout, "    operation     %d\n", e->X_op);
3251   fprintf (stdout, "    add_number    %ld (%lx)\n",
3252            (long) e->X_add_number, (long) e->X_add_number);
3253   if (e->X_add_symbol)
3254     {
3255       fprintf (stdout, "    add_symbol    ");
3256       ps (e->X_add_symbol);
3257       fprintf (stdout, "\n");
3258     }
3259   if (e->X_op_symbol)
3260     {
3261       fprintf (stdout, "    op_symbol    ");
3262       ps (e->X_op_symbol);
3263       fprintf (stdout, "\n");
3264     }
3265 }
3266
3267 static void
3268 ps (symbolS *s)
3269 {
3270   fprintf (stdout, "%s type %s%s",
3271            S_GET_NAME (s),
3272            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3273            segment_name (S_GET_SEGMENT (s)));
3274 }
3275
3276 static struct type_name
3277   {
3278     i386_operand_type mask;
3279     const char *name;
3280   }
3281 const type_names[] =
3282 {
3283   { OPERAND_TYPE_REG8, "r8" },
3284   { OPERAND_TYPE_REG16, "r16" },
3285   { OPERAND_TYPE_REG32, "r32" },
3286   { OPERAND_TYPE_REG64, "r64" },
3287   { OPERAND_TYPE_ACC8, "acc8" },
3288   { OPERAND_TYPE_ACC16, "acc16" },
3289   { OPERAND_TYPE_ACC32, "acc32" },
3290   { OPERAND_TYPE_ACC64, "acc64" },
3291   { OPERAND_TYPE_IMM8, "i8" },
3292   { OPERAND_TYPE_IMM8, "i8s" },
3293   { OPERAND_TYPE_IMM16, "i16" },
3294   { OPERAND_TYPE_IMM32, "i32" },
3295   { OPERAND_TYPE_IMM32S, "i32s" },
3296   { OPERAND_TYPE_IMM64, "i64" },
3297   { OPERAND_TYPE_IMM1, "i1" },
3298   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3299   { OPERAND_TYPE_DISP8, "d8" },
3300   { OPERAND_TYPE_DISP16, "d16" },
3301   { OPERAND_TYPE_DISP32, "d32" },
3302   { OPERAND_TYPE_DISP32S, "d32s" },
3303   { OPERAND_TYPE_DISP64, "d64" },
3304   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3305   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3306   { OPERAND_TYPE_CONTROL, "control reg" },
3307   { OPERAND_TYPE_TEST, "test reg" },
3308   { OPERAND_TYPE_DEBUG, "debug reg" },
3309   { OPERAND_TYPE_FLOATREG, "FReg" },
3310   { OPERAND_TYPE_FLOATACC, "FAcc" },
3311   { OPERAND_TYPE_SREG, "SReg" },
3312   { OPERAND_TYPE_REGMMX, "rMMX" },
3313   { OPERAND_TYPE_REGXMM, "rXMM" },
3314   { OPERAND_TYPE_REGYMM, "rYMM" },
3315   { OPERAND_TYPE_REGZMM, "rZMM" },
3316   { OPERAND_TYPE_REGTMM, "rTMM" },
3317   { OPERAND_TYPE_REGMASK, "Mask reg" },
3318 };
3319
3320 static void
3321 pt (i386_operand_type t)
3322 {
3323   unsigned int j;
3324   i386_operand_type a;
3325
3326   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3327     {
3328       a = operand_type_and (t, type_names[j].mask);
3329       if (operand_type_equal (&a, &type_names[j].mask))
3330         fprintf (stdout, "%s, ",  type_names[j].name);
3331     }
3332   fflush (stdout);
3333 }
3334
3335 #endif /* DEBUG386 */
3336 \f
3337 static bfd_reloc_code_real_type
3338 reloc (unsigned int size,
3339        int pcrel,
3340        int sign,
3341        bfd_reloc_code_real_type other)
3342 {
3343   if (other != NO_RELOC)
3344     {
3345       reloc_howto_type *rel;
3346
3347       if (size == 8)
3348         switch (other)
3349           {
3350           case BFD_RELOC_X86_64_GOT32:
3351             return BFD_RELOC_X86_64_GOT64;
3352             break;
3353           case BFD_RELOC_X86_64_GOTPLT64:
3354             return BFD_RELOC_X86_64_GOTPLT64;
3355             break;
3356           case BFD_RELOC_X86_64_PLTOFF64:
3357             return BFD_RELOC_X86_64_PLTOFF64;
3358             break;
3359           case BFD_RELOC_X86_64_GOTPC32:
3360             other = BFD_RELOC_X86_64_GOTPC64;
3361             break;
3362           case BFD_RELOC_X86_64_GOTPCREL:
3363             other = BFD_RELOC_X86_64_GOTPCREL64;
3364             break;
3365           case BFD_RELOC_X86_64_TPOFF32:
3366             other = BFD_RELOC_X86_64_TPOFF64;
3367             break;
3368           case BFD_RELOC_X86_64_DTPOFF32:
3369             other = BFD_RELOC_X86_64_DTPOFF64;
3370             break;
3371           default:
3372             break;
3373           }
3374
3375 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3376       if (other == BFD_RELOC_SIZE32)
3377         {
3378           if (size == 8)
3379             other = BFD_RELOC_SIZE64;
3380           if (pcrel)
3381             {
3382               as_bad (_("there are no pc-relative size relocations"));
3383               return NO_RELOC;
3384             }
3385         }
3386 #endif
3387
3388       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3389       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3390         sign = -1;
3391
3392       rel = bfd_reloc_type_lookup (stdoutput, other);
3393       if (!rel)
3394         as_bad (_("unknown relocation (%u)"), other);
3395       else if (size != bfd_get_reloc_size (rel))
3396         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3397                 bfd_get_reloc_size (rel),
3398                 size);
3399       else if (pcrel && !rel->pc_relative)
3400         as_bad (_("non-pc-relative relocation for pc-relative field"));
3401       else if ((rel->complain_on_overflow == complain_overflow_signed
3402                 && !sign)
3403                || (rel->complain_on_overflow == complain_overflow_unsigned
3404                    && sign > 0))
3405         as_bad (_("relocated field and relocation type differ in signedness"));
3406       else
3407         return other;
3408       return NO_RELOC;
3409     }
3410
3411   if (pcrel)
3412     {
3413       if (!sign)
3414         as_bad (_("there are no unsigned pc-relative relocations"));
3415       switch (size)
3416         {
3417         case 1: return BFD_RELOC_8_PCREL;
3418         case 2: return BFD_RELOC_16_PCREL;
3419         case 4: return BFD_RELOC_32_PCREL;
3420         case 8: return BFD_RELOC_64_PCREL;
3421         }
3422       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3423     }
3424   else
3425     {
3426       if (sign > 0)
3427         switch (size)
3428           {
3429           case 4: return BFD_RELOC_X86_64_32S;
3430           }
3431       else
3432         switch (size)
3433           {
3434           case 1: return BFD_RELOC_8;
3435           case 2: return BFD_RELOC_16;
3436           case 4: return BFD_RELOC_32;
3437           case 8: return BFD_RELOC_64;
3438           }
3439       as_bad (_("cannot do %s %u byte relocation"),
3440               sign > 0 ? "signed" : "unsigned", size);
3441     }
3442
3443   return NO_RELOC;
3444 }
3445
3446 /* Here we decide which fixups can be adjusted to make them relative to
3447    the beginning of the section instead of the symbol.  Basically we need
3448    to make sure that the dynamic relocations are done correctly, so in
3449    some cases we force the original symbol to be used.  */
3450
3451 int
3452 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3453 {
3454 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3455   if (!IS_ELF)
3456     return 1;
3457
3458   /* Don't adjust pc-relative references to merge sections in 64-bit
3459      mode.  */
3460   if (use_rela_relocations
3461       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3462       && fixP->fx_pcrel)
3463     return 0;
3464
3465   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3466      and changed later by validate_fix.  */
3467   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3468       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3469     return 0;
3470
3471   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3472      for size relocations.  */
3473   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3474       || fixP->fx_r_type == BFD_RELOC_SIZE64
3475       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3476       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3477       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3478       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3479       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3480       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3481       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3482       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3483       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3484       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3485       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3486       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3487       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3488       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3489       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3490       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3491       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3492       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3493       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3494       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3495       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3496       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3497       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3498       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3499       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3500       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3501       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3502       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3503       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3504     return 0;
3505 #endif
3506   return 1;
3507 }
3508
3509 static int
3510 intel_float_operand (const char *mnemonic)
3511 {
3512   /* Note that the value returned is meaningful only for opcodes with (memory)
3513      operands, hence the code here is free to improperly handle opcodes that
3514      have no operands (for better performance and smaller code). */
3515
3516   if (mnemonic[0] != 'f')
3517     return 0; /* non-math */
3518
3519   switch (mnemonic[1])
3520     {
3521     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3522        the fs segment override prefix not currently handled because no
3523        call path can make opcodes without operands get here */
3524     case 'i':
3525       return 2 /* integer op */;
3526     case 'l':
3527       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3528         return 3; /* fldcw/fldenv */
3529       break;
3530     case 'n':
3531       if (mnemonic[2] != 'o' /* fnop */)
3532         return 3; /* non-waiting control op */
3533       break;
3534     case 'r':
3535       if (mnemonic[2] == 's')
3536         return 3; /* frstor/frstpm */
3537       break;
3538     case 's':
3539       if (mnemonic[2] == 'a')
3540         return 3; /* fsave */
3541       if (mnemonic[2] == 't')
3542         {
3543           switch (mnemonic[3])
3544             {
3545             case 'c': /* fstcw */
3546             case 'd': /* fstdw */
3547             case 'e': /* fstenv */
3548             case 's': /* fsts[gw] */
3549               return 3;
3550             }
3551         }
3552       break;
3553     case 'x':
3554       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3555         return 0; /* fxsave/fxrstor are not really math ops */
3556       break;
3557     }
3558
3559   return 1;
3560 }
3561
3562 /* Build the VEX prefix.  */
3563
3564 static void
3565 build_vex_prefix (const insn_template *t)
3566 {
3567   unsigned int register_specifier;
3568   unsigned int implied_prefix;
3569   unsigned int vector_length;
3570   unsigned int w;
3571
3572   /* Check register specifier.  */
3573   if (i.vex.register_specifier)
3574     {
3575       register_specifier =
3576         ~register_number (i.vex.register_specifier) & 0xf;
3577       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3578     }
3579   else
3580     register_specifier = 0xf;
3581
3582   /* Use 2-byte VEX prefix by swapping destination and source operand
3583      if there are more than 1 register operand.  */
3584   if (i.reg_operands > 1
3585       && i.vec_encoding != vex_encoding_vex3
3586       && i.dir_encoding == dir_encoding_default
3587       && i.operands == i.reg_operands
3588       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3589       && i.tm.opcode_modifier.vexopcode == VEX0F
3590       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3591       && i.rex == REX_B)
3592     {
3593       unsigned int xchg = i.operands - 1;
3594       union i386_op temp_op;
3595       i386_operand_type temp_type;
3596
3597       temp_type = i.types[xchg];
3598       i.types[xchg] = i.types[0];
3599       i.types[0] = temp_type;
3600       temp_op = i.op[xchg];
3601       i.op[xchg] = i.op[0];
3602       i.op[0] = temp_op;
3603
3604       gas_assert (i.rm.mode == 3);
3605
3606       i.rex = REX_R;
3607       xchg = i.rm.regmem;
3608       i.rm.regmem = i.rm.reg;
3609       i.rm.reg = xchg;
3610
3611       if (i.tm.opcode_modifier.d)
3612         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3613                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3614       else /* Use the next insn.  */
3615         i.tm = t[1];
3616     }
3617
3618   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3619      are no memory operands and at least 3 register ones.  */
3620   if (i.reg_operands >= 3
3621       && i.vec_encoding != vex_encoding_vex3
3622       && i.reg_operands == i.operands - i.imm_operands
3623       && i.tm.opcode_modifier.vex
3624       && i.tm.opcode_modifier.commutative
3625       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3626       && i.rex == REX_B
3627       && i.vex.register_specifier
3628       && !(i.vex.register_specifier->reg_flags & RegRex))
3629     {
3630       unsigned int xchg = i.operands - i.reg_operands;
3631       union i386_op temp_op;
3632       i386_operand_type temp_type;
3633
3634       gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
3635       gas_assert (!i.tm.opcode_modifier.sae);
3636       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3637                                       &i.types[i.operands - 3]));
3638       gas_assert (i.rm.mode == 3);
3639
3640       temp_type = i.types[xchg];
3641       i.types[xchg] = i.types[xchg + 1];
3642       i.types[xchg + 1] = temp_type;
3643       temp_op = i.op[xchg];
3644       i.op[xchg] = i.op[xchg + 1];
3645       i.op[xchg + 1] = temp_op;
3646
3647       i.rex = 0;
3648       xchg = i.rm.regmem | 8;
3649       i.rm.regmem = ~register_specifier & 0xf;
3650       gas_assert (!(i.rm.regmem & 8));
3651       i.vex.register_specifier += xchg - i.rm.regmem;
3652       register_specifier = ~xchg & 0xf;
3653     }
3654
3655   if (i.tm.opcode_modifier.vex == VEXScalar)
3656     vector_length = avxscalar;
3657   else if (i.tm.opcode_modifier.vex == VEX256)
3658     vector_length = 1;
3659   else
3660     {
3661       unsigned int op;
3662
3663       /* Determine vector length from the last multi-length vector
3664          operand.  */
3665       vector_length = 0;
3666       for (op = t->operands; op--;)
3667         if (t->operand_types[op].bitfield.xmmword
3668             && t->operand_types[op].bitfield.ymmword
3669             && i.types[op].bitfield.ymmword)
3670           {
3671             vector_length = 1;
3672             break;
3673           }
3674     }
3675
3676   switch ((i.tm.base_opcode >> (i.tm.opcode_length << 3)) & 0xff)
3677     {
3678     case 0:
3679       implied_prefix = 0;
3680       break;
3681     case DATA_PREFIX_OPCODE:
3682       implied_prefix = 1;
3683       break;
3684     case REPE_PREFIX_OPCODE:
3685       implied_prefix = 2;
3686       break;
3687     case REPNE_PREFIX_OPCODE:
3688       implied_prefix = 3;
3689       break;
3690     default:
3691       abort ();
3692     }
3693
3694   /* Check the REX.W bit and VEXW.  */
3695   if (i.tm.opcode_modifier.vexw == VEXWIG)
3696     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3697   else if (i.tm.opcode_modifier.vexw)
3698     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3699   else
3700     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3701
3702   /* Use 2-byte VEX prefix if possible.  */
3703   if (w == 0
3704       && i.vec_encoding != vex_encoding_vex3
3705       && i.tm.opcode_modifier.vexopcode == VEX0F
3706       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3707     {
3708       /* 2-byte VEX prefix.  */
3709       unsigned int r;
3710
3711       i.vex.length = 2;
3712       i.vex.bytes[0] = 0xc5;
3713
3714       /* Check the REX.R bit.  */
3715       r = (i.rex & REX_R) ? 0 : 1;
3716       i.vex.bytes[1] = (r << 7
3717                         | register_specifier << 3
3718                         | vector_length << 2
3719                         | implied_prefix);
3720     }
3721   else
3722     {
3723       /* 3-byte VEX prefix.  */
3724       unsigned int m;
3725
3726       i.vex.length = 3;
3727
3728       switch (i.tm.opcode_modifier.vexopcode)
3729         {
3730         case VEX0F:
3731           m = 0x1;
3732           i.vex.bytes[0] = 0xc4;
3733           break;
3734         case VEX0F38:
3735           m = 0x2;
3736           i.vex.bytes[0] = 0xc4;
3737           break;
3738         case VEX0F3A:
3739           m = 0x3;
3740           i.vex.bytes[0] = 0xc4;
3741           break;
3742         case XOP08:
3743           m = 0x8;
3744           i.vex.bytes[0] = 0x8f;
3745           break;
3746         case XOP09:
3747           m = 0x9;
3748           i.vex.bytes[0] = 0x8f;
3749           break;
3750         case XOP0A:
3751           m = 0xa;
3752           i.vex.bytes[0] = 0x8f;
3753           break;
3754         default:
3755           abort ();
3756         }
3757
3758       /* The high 3 bits of the second VEX byte are 1's compliment
3759          of RXB bits from REX.  */
3760       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3761
3762       i.vex.bytes[2] = (w << 7
3763                         | register_specifier << 3
3764                         | vector_length << 2
3765                         | implied_prefix);
3766     }
3767 }
3768
3769 static INLINE bfd_boolean
3770 is_evex_encoding (const insn_template *t)
3771 {
3772   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3773          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3774          || t->opcode_modifier.sae;
3775 }
3776
3777 static INLINE bfd_boolean
3778 is_any_vex_encoding (const insn_template *t)
3779 {
3780   return t->opcode_modifier.vex || t->opcode_modifier.vexopcode
3781          || is_evex_encoding (t);
3782 }
3783
3784 /* Build the EVEX prefix.  */
3785
3786 static void
3787 build_evex_prefix (void)
3788 {
3789   unsigned int register_specifier;
3790   unsigned int implied_prefix;
3791   unsigned int m, w;
3792   rex_byte vrex_used = 0;
3793
3794   /* Check register specifier.  */
3795   if (i.vex.register_specifier)
3796     {
3797       gas_assert ((i.vrex & REX_X) == 0);
3798
3799       register_specifier = i.vex.register_specifier->reg_num;
3800       if ((i.vex.register_specifier->reg_flags & RegRex))
3801         register_specifier += 8;
3802       /* The upper 16 registers are encoded in the fourth byte of the
3803          EVEX prefix.  */
3804       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3805         i.vex.bytes[3] = 0x8;
3806       register_specifier = ~register_specifier & 0xf;
3807     }
3808   else
3809     {
3810       register_specifier = 0xf;
3811
3812       /* Encode upper 16 vector index register in the fourth byte of
3813          the EVEX prefix.  */
3814       if (!(i.vrex & REX_X))
3815         i.vex.bytes[3] = 0x8;
3816       else
3817         vrex_used |= REX_X;
3818     }
3819
3820   switch ((i.tm.base_opcode >> 8) & 0xff)
3821     {
3822     case 0:
3823       implied_prefix = 0;
3824       break;
3825     case DATA_PREFIX_OPCODE:
3826       implied_prefix = 1;
3827       break;
3828     case REPE_PREFIX_OPCODE:
3829       implied_prefix = 2;
3830       break;
3831     case REPNE_PREFIX_OPCODE:
3832       implied_prefix = 3;
3833       break;
3834     default:
3835       abort ();
3836     }
3837
3838   /* 4 byte EVEX prefix.  */
3839   i.vex.length = 4;
3840   i.vex.bytes[0] = 0x62;
3841
3842   /* mmmm bits.  */
3843   switch (i.tm.opcode_modifier.vexopcode)
3844     {
3845     case VEX0F:
3846       m = 1;
3847       break;
3848     case VEX0F38:
3849       m = 2;
3850       break;
3851     case VEX0F3A:
3852       m = 3;
3853       break;
3854     default:
3855       abort ();
3856       break;
3857     }
3858
3859   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3860      bits from REX.  */
3861   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3862
3863   /* The fifth bit of the second EVEX byte is 1's compliment of the
3864      REX_R bit in VREX.  */
3865   if (!(i.vrex & REX_R))
3866     i.vex.bytes[1] |= 0x10;
3867   else
3868     vrex_used |= REX_R;
3869
3870   if ((i.reg_operands + i.imm_operands) == i.operands)
3871     {
3872       /* When all operands are registers, the REX_X bit in REX is not
3873          used.  We reuse it to encode the upper 16 registers, which is
3874          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3875          as 1's compliment.  */
3876       if ((i.vrex & REX_B))
3877         {
3878           vrex_used |= REX_B;
3879           i.vex.bytes[1] &= ~0x40;
3880         }
3881     }
3882
3883   /* EVEX instructions shouldn't need the REX prefix.  */
3884   i.vrex &= ~vrex_used;
3885   gas_assert (i.vrex == 0);
3886
3887   /* Check the REX.W bit and VEXW.  */
3888   if (i.tm.opcode_modifier.vexw == VEXWIG)
3889     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3890   else if (i.tm.opcode_modifier.vexw)
3891     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3892   else
3893     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3894
3895   /* Encode the U bit.  */
3896   implied_prefix |= 0x4;
3897
3898   /* The third byte of the EVEX prefix.  */
3899   i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
3900
3901   /* The fourth byte of the EVEX prefix.  */
3902   /* The zeroing-masking bit.  */
3903   if (i.mask && i.mask->zeroing)
3904     i.vex.bytes[3] |= 0x80;
3905
3906   /* Don't always set the broadcast bit if there is no RC.  */
3907   if (!i.rounding)
3908     {
3909       /* Encode the vector length.  */
3910       unsigned int vec_length;
3911
3912       if (!i.tm.opcode_modifier.evex
3913           || i.tm.opcode_modifier.evex == EVEXDYN)
3914         {
3915           unsigned int op;
3916
3917           /* Determine vector length from the last multi-length vector
3918              operand.  */
3919           for (op = i.operands; op--;)
3920             if (i.tm.operand_types[op].bitfield.xmmword
3921                 + i.tm.operand_types[op].bitfield.ymmword
3922                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3923               {
3924                 if (i.types[op].bitfield.zmmword)
3925                   {
3926                     i.tm.opcode_modifier.evex = EVEX512;
3927                     break;
3928                   }
3929                 else if (i.types[op].bitfield.ymmword)
3930                   {
3931                     i.tm.opcode_modifier.evex = EVEX256;
3932                     break;
3933                   }
3934                 else if (i.types[op].bitfield.xmmword)
3935                   {
3936                     i.tm.opcode_modifier.evex = EVEX128;
3937                     break;
3938                   }
3939                 else if (i.broadcast && (int) op == i.broadcast->operand)
3940                   {
3941                     switch (i.broadcast->bytes)
3942                       {
3943                         case 64:
3944                           i.tm.opcode_modifier.evex = EVEX512;
3945                           break;
3946                         case 32:
3947                           i.tm.opcode_modifier.evex = EVEX256;
3948                           break;
3949                         case 16:
3950                           i.tm.opcode_modifier.evex = EVEX128;
3951                           break;
3952                         default:
3953                           abort ();
3954                       }
3955                     break;
3956                   }
3957               }
3958
3959           if (op >= MAX_OPERANDS)
3960             abort ();
3961         }
3962
3963       switch (i.tm.opcode_modifier.evex)
3964         {
3965         case EVEXLIG: /* LL' is ignored */
3966           vec_length = evexlig << 5;
3967           break;
3968         case EVEX128:
3969           vec_length = 0 << 5;
3970           break;
3971         case EVEX256:
3972           vec_length = 1 << 5;
3973           break;
3974         case EVEX512:
3975           vec_length = 2 << 5;
3976           break;
3977         default:
3978           abort ();
3979           break;
3980         }
3981       i.vex.bytes[3] |= vec_length;
3982       /* Encode the broadcast bit.  */
3983       if (i.broadcast)
3984         i.vex.bytes[3] |= 0x10;
3985     }
3986   else
3987     {
3988       if (i.rounding->type != saeonly)
3989         i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
3990       else
3991         i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3992     }
3993
3994   if (i.mask && i.mask->mask)
3995     i.vex.bytes[3] |= i.mask->mask->reg_num;
3996 }
3997
3998 static void
3999 process_immext (void)
4000 {
4001   expressionS *exp;
4002
4003   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4004      which is coded in the same place as an 8-bit immediate field
4005      would be.  Here we fake an 8-bit immediate operand from the
4006      opcode suffix stored in tm.extension_opcode.
4007
4008      AVX instructions also use this encoding, for some of
4009      3 argument instructions.  */
4010
4011   gas_assert (i.imm_operands <= 1
4012               && (i.operands <= 2
4013                   || (is_any_vex_encoding (&i.tm)
4014                       && i.operands <= 4)));
4015
4016   exp = &im_expressions[i.imm_operands++];
4017   i.op[i.operands].imms = exp;
4018   i.types[i.operands] = imm8;
4019   i.operands++;
4020   exp->X_op = O_constant;
4021   exp->X_add_number = i.tm.extension_opcode;
4022   i.tm.extension_opcode = None;
4023 }
4024
4025
4026 static int
4027 check_hle (void)
4028 {
4029   switch (i.tm.opcode_modifier.hleprefixok)
4030     {
4031     default:
4032       abort ();
4033     case HLEPrefixNone:
4034       as_bad (_("invalid instruction `%s' after `%s'"),
4035               i.tm.name, i.hle_prefix);
4036       return 0;
4037     case HLEPrefixLock:
4038       if (i.prefix[LOCK_PREFIX])
4039         return 1;
4040       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4041       return 0;
4042     case HLEPrefixAny:
4043       return 1;
4044     case HLEPrefixRelease:
4045       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4046         {
4047           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4048                   i.tm.name);
4049           return 0;
4050         }
4051       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4052         {
4053           as_bad (_("memory destination needed for instruction `%s'"
4054                     " after `xrelease'"), i.tm.name);
4055           return 0;
4056         }
4057       return 1;
4058     }
4059 }
4060
4061 /* Try the shortest encoding by shortening operand size.  */
4062
4063 static void
4064 optimize_encoding (void)
4065 {
4066   unsigned int j;
4067
4068   if (optimize_for_space
4069       && !is_any_vex_encoding (&i.tm)
4070       && i.reg_operands == 1
4071       && i.imm_operands == 1
4072       && !i.types[1].bitfield.byte
4073       && i.op[0].imms->X_op == O_constant
4074       && fits_in_imm7 (i.op[0].imms->X_add_number)
4075       && (i.tm.base_opcode == 0xa8
4076           || (i.tm.base_opcode == 0xf6
4077               && i.tm.extension_opcode == 0x0)))
4078     {
4079       /* Optimize: -Os:
4080            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4081        */
4082       unsigned int base_regnum = i.op[1].regs->reg_num;
4083       if (flag_code == CODE_64BIT || base_regnum < 4)
4084         {
4085           i.types[1].bitfield.byte = 1;
4086           /* Ignore the suffix.  */
4087           i.suffix = 0;
4088           /* Convert to byte registers.  */
4089           if (i.types[1].bitfield.word)
4090             j = 16;
4091           else if (i.types[1].bitfield.dword)
4092             j = 32;
4093           else
4094             j = 48;
4095           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4096             j += 8;
4097           i.op[1].regs -= j;
4098         }
4099     }
4100   else if (flag_code == CODE_64BIT
4101            && !is_any_vex_encoding (&i.tm)
4102            && ((i.types[1].bitfield.qword
4103                 && i.reg_operands == 1
4104                 && i.imm_operands == 1
4105                 && i.op[0].imms->X_op == O_constant
4106                 && ((i.tm.base_opcode == 0xb8
4107                      && i.tm.extension_opcode == None
4108                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4109                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4110                         && ((i.tm.base_opcode == 0x24
4111                              || i.tm.base_opcode == 0xa8)
4112                             || (i.tm.base_opcode == 0x80
4113                                 && i.tm.extension_opcode == 0x4)
4114                             || ((i.tm.base_opcode == 0xf6
4115                                  || (i.tm.base_opcode | 1) == 0xc7)
4116                                 && i.tm.extension_opcode == 0x0)))
4117                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4118                         && i.tm.base_opcode == 0x83
4119                         && i.tm.extension_opcode == 0x4)))
4120                || (i.types[0].bitfield.qword
4121                    && ((i.reg_operands == 2
4122                         && i.op[0].regs == i.op[1].regs
4123                         && (i.tm.base_opcode == 0x30
4124                             || i.tm.base_opcode == 0x28))
4125                        || (i.reg_operands == 1
4126                            && i.operands == 1
4127                            && i.tm.base_opcode == 0x30)))))
4128     {
4129       /* Optimize: -O:
4130            andq $imm31, %r64   -> andl $imm31, %r32
4131            andq $imm7, %r64    -> andl $imm7, %r32
4132            testq $imm31, %r64  -> testl $imm31, %r32
4133            xorq %r64, %r64     -> xorl %r32, %r32
4134            subq %r64, %r64     -> subl %r32, %r32
4135            movq $imm31, %r64   -> movl $imm31, %r32
4136            movq $imm32, %r64   -> movl $imm32, %r32
4137         */
4138       i.tm.opcode_modifier.norex64 = 1;
4139       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4140         {
4141           /* Handle
4142                movq $imm31, %r64   -> movl $imm31, %r32
4143                movq $imm32, %r64   -> movl $imm32, %r32
4144            */
4145           i.tm.operand_types[0].bitfield.imm32 = 1;
4146           i.tm.operand_types[0].bitfield.imm32s = 0;
4147           i.tm.operand_types[0].bitfield.imm64 = 0;
4148           i.types[0].bitfield.imm32 = 1;
4149           i.types[0].bitfield.imm32s = 0;
4150           i.types[0].bitfield.imm64 = 0;
4151           i.types[1].bitfield.dword = 1;
4152           i.types[1].bitfield.qword = 0;
4153           if ((i.tm.base_opcode | 1) == 0xc7)
4154             {
4155               /* Handle
4156                    movq $imm31, %r64   -> movl $imm31, %r32
4157                */
4158               i.tm.base_opcode = 0xb8;
4159               i.tm.extension_opcode = None;
4160               i.tm.opcode_modifier.w = 0;
4161               i.tm.opcode_modifier.modrm = 0;
4162             }
4163         }
4164     }
4165   else if (optimize > 1
4166            && !optimize_for_space
4167            && !is_any_vex_encoding (&i.tm)
4168            && i.reg_operands == 2
4169            && i.op[0].regs == i.op[1].regs
4170            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4171                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4172            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4173     {
4174       /* Optimize: -O2:
4175            andb %rN, %rN  -> testb %rN, %rN
4176            andw %rN, %rN  -> testw %rN, %rN
4177            andq %rN, %rN  -> testq %rN, %rN
4178            orb %rN, %rN   -> testb %rN, %rN
4179            orw %rN, %rN   -> testw %rN, %rN
4180            orq %rN, %rN   -> testq %rN, %rN
4181
4182            and outside of 64-bit mode
4183
4184            andl %rN, %rN  -> testl %rN, %rN
4185            orl %rN, %rN   -> testl %rN, %rN
4186        */
4187       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4188     }
4189   else if (i.reg_operands == 3
4190            && i.op[0].regs == i.op[1].regs
4191            && !i.types[2].bitfield.xmmword
4192            && (i.tm.opcode_modifier.vex
4193                || ((!i.mask || i.mask->zeroing)
4194                    && !i.rounding
4195                    && is_evex_encoding (&i.tm)
4196                    && (i.vec_encoding != vex_encoding_evex
4197                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4198                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4199                        || (i.tm.operand_types[2].bitfield.zmmword
4200                            && i.types[2].bitfield.ymmword))))
4201            && ((i.tm.base_opcode == 0x55
4202                 || i.tm.base_opcode == 0x6655
4203                 || i.tm.base_opcode == 0x66df
4204                 || i.tm.base_opcode == 0x57
4205                 || i.tm.base_opcode == 0x6657
4206                 || i.tm.base_opcode == 0x66ef
4207                 || i.tm.base_opcode == 0x66f8
4208                 || i.tm.base_opcode == 0x66f9
4209                 || i.tm.base_opcode == 0x66fa
4210                 || i.tm.base_opcode == 0x66fb
4211                 || i.tm.base_opcode == 0x42
4212                 || i.tm.base_opcode == 0x6642
4213                 || i.tm.base_opcode == 0x47
4214                 || i.tm.base_opcode == 0x6647)
4215                && i.tm.extension_opcode == None))
4216     {
4217       /* Optimize: -O1:
4218            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4219            vpsubq and vpsubw:
4220              EVEX VOP %zmmM, %zmmM, %zmmN
4221                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4222                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4223              EVEX VOP %ymmM, %ymmM, %ymmN
4224                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4225                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4226              VEX VOP %ymmM, %ymmM, %ymmN
4227                -> VEX VOP %xmmM, %xmmM, %xmmN
4228            VOP, one of vpandn and vpxor:
4229              VEX VOP %ymmM, %ymmM, %ymmN
4230                -> VEX VOP %xmmM, %xmmM, %xmmN
4231            VOP, one of vpandnd and vpandnq:
4232              EVEX VOP %zmmM, %zmmM, %zmmN
4233                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4234                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4235              EVEX VOP %ymmM, %ymmM, %ymmN
4236                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4237                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4238            VOP, one of vpxord and vpxorq:
4239              EVEX VOP %zmmM, %zmmM, %zmmN
4240                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4241                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4242              EVEX VOP %ymmM, %ymmM, %ymmN
4243                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4244                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4245            VOP, one of kxord and kxorq:
4246              VEX VOP %kM, %kM, %kN
4247                -> VEX kxorw %kM, %kM, %kN
4248            VOP, one of kandnd and kandnq:
4249              VEX VOP %kM, %kM, %kN
4250                -> VEX kandnw %kM, %kM, %kN
4251        */
4252       if (is_evex_encoding (&i.tm))
4253         {
4254           if (i.vec_encoding != vex_encoding_evex)
4255             {
4256               i.tm.opcode_modifier.vex = VEX128;
4257               i.tm.opcode_modifier.vexw = VEXW0;
4258               i.tm.opcode_modifier.evex = 0;
4259             }
4260           else if (optimize > 1)
4261             i.tm.opcode_modifier.evex = EVEX128;
4262           else
4263             return;
4264         }
4265       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4266         {
4267           i.tm.base_opcode &= 0xff;
4268           i.tm.opcode_modifier.vexw = VEXW0;
4269         }
4270       else
4271         i.tm.opcode_modifier.vex = VEX128;
4272
4273       if (i.tm.opcode_modifier.vex)
4274         for (j = 0; j < 3; j++)
4275           {
4276             i.types[j].bitfield.xmmword = 1;
4277             i.types[j].bitfield.ymmword = 0;
4278           }
4279     }
4280   else if (i.vec_encoding != vex_encoding_evex
4281            && !i.types[0].bitfield.zmmword
4282            && !i.types[1].bitfield.zmmword
4283            && !i.mask
4284            && !i.broadcast
4285            && is_evex_encoding (&i.tm)
4286            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
4287                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
4288                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
4289                || (i.tm.base_opcode & ~4) == 0x66db
4290                || (i.tm.base_opcode & ~4) == 0x66eb)
4291            && i.tm.extension_opcode == None)
4292     {
4293       /* Optimize: -O1:
4294            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4295            vmovdqu32 and vmovdqu64:
4296              EVEX VOP %xmmM, %xmmN
4297                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4298              EVEX VOP %ymmM, %ymmN
4299                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4300              EVEX VOP %xmmM, mem
4301                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4302              EVEX VOP %ymmM, mem
4303                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4304              EVEX VOP mem, %xmmN
4305                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4306              EVEX VOP mem, %ymmN
4307                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4308            VOP, one of vpand, vpandn, vpor, vpxor:
4309              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4310                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4311              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4312                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4313              EVEX VOP{d,q} mem, %xmmM, %xmmN
4314                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4315              EVEX VOP{d,q} mem, %ymmM, %ymmN
4316                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4317        */
4318       for (j = 0; j < i.operands; j++)
4319         if (operand_type_check (i.types[j], disp)
4320             && i.op[j].disps->X_op == O_constant)
4321           {
4322             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4323                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4324                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4325             int evex_disp8, vex_disp8;
4326             unsigned int memshift = i.memshift;
4327             offsetT n = i.op[j].disps->X_add_number;
4328
4329             evex_disp8 = fits_in_disp8 (n);
4330             i.memshift = 0;
4331             vex_disp8 = fits_in_disp8 (n);
4332             if (evex_disp8 != vex_disp8)
4333               {
4334                 i.memshift = memshift;
4335                 return;
4336               }
4337
4338             i.types[j].bitfield.disp8 = vex_disp8;
4339             break;
4340           }
4341       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
4342         i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
4343       i.tm.opcode_modifier.vex
4344         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4345       i.tm.opcode_modifier.vexw = VEXW0;
4346       /* VPAND, VPOR, and VPXOR are commutative.  */
4347       if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
4348         i.tm.opcode_modifier.commutative = 1;
4349       i.tm.opcode_modifier.evex = 0;
4350       i.tm.opcode_modifier.masking = 0;
4351       i.tm.opcode_modifier.broadcast = 0;
4352       i.tm.opcode_modifier.disp8memshift = 0;
4353       i.memshift = 0;
4354       if (j < i.operands)
4355         i.types[j].bitfield.disp8
4356           = fits_in_disp8 (i.op[j].disps->X_add_number);
4357     }
4358 }
4359
4360 /* Return non-zero for load instruction.  */
4361
4362 static int
4363 load_insn_p (void)
4364 {
4365   unsigned int dest;
4366   int any_vex_p = is_any_vex_encoding (&i.tm);
4367   unsigned int base_opcode = i.tm.base_opcode | 1;
4368
4369   if (!any_vex_p)
4370     {
4371       /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
4372          prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
4373          bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote.  */
4374       if (i.tm.opcode_modifier.anysize)
4375         return 0;
4376
4377       /* pop, popf, popa.   */
4378       if (strcmp (i.tm.name, "pop") == 0
4379           || i.tm.base_opcode == 0x9d
4380           || i.tm.base_opcode == 0x61)
4381         return 1;
4382
4383       /* movs, cmps, lods, scas.  */
4384       if ((i.tm.base_opcode | 0xb) == 0xaf)
4385         return 1;
4386
4387       /* outs, xlatb.  */
4388       if (base_opcode == 0x6f
4389           || i.tm.base_opcode == 0xd7)
4390         return 1;
4391       /* NB: For AMD-specific insns with implicit memory operands,
4392          they're intentionally not covered.  */
4393     }
4394
4395   /* No memory operand.  */
4396   if (!i.mem_operands)
4397     return 0;
4398
4399   if (any_vex_p)
4400     {
4401       /* vldmxcsr.  */
4402       if (i.tm.base_opcode == 0xae
4403           && i.tm.opcode_modifier.vex
4404           && i.tm.opcode_modifier.vexopcode == VEX0F
4405           && i.tm.extension_opcode == 2)
4406         return 1;
4407     }
4408   else
4409     {
4410       /* test, not, neg, mul, imul, div, idiv.  */
4411       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4412           && i.tm.extension_opcode != 1)
4413         return 1;
4414
4415       /* inc, dec.  */
4416       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4417         return 1;
4418
4419       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4420       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4421         return 1;
4422
4423       /* bt, bts, btr, btc.  */
4424       if (i.tm.base_opcode == 0xfba
4425           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4426         return 1;
4427
4428       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4429       if ((base_opcode == 0xc1
4430            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4431           && i.tm.extension_opcode != 6)
4432         return 1;
4433
4434       /* cmpxchg8b, cmpxchg16b, xrstors.  */
4435       if (i.tm.base_opcode == 0xfc7
4436           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3))
4437         return 1;
4438
4439       /* fxrstor, ldmxcsr, xrstor.  */
4440       if (i.tm.base_opcode == 0xfae
4441           && (i.tm.extension_opcode == 1
4442               || i.tm.extension_opcode == 2
4443               || i.tm.extension_opcode == 5))
4444         return 1;
4445
4446       /* lgdt, lidt, lmsw.  */
4447       if (i.tm.base_opcode == 0xf01
4448           && (i.tm.extension_opcode == 2
4449               || i.tm.extension_opcode == 3
4450               || i.tm.extension_opcode == 6))
4451         return 1;
4452
4453       /* vmptrld */
4454       if (i.tm.base_opcode == 0xfc7
4455           && i.tm.extension_opcode == 6)
4456         return 1;
4457
4458       /* Check for x87 instructions.  */
4459       if (i.tm.base_opcode >= 0xd8 && i.tm.base_opcode <= 0xdf)
4460         {
4461           /* Skip fst, fstp, fstenv, fstcw.  */
4462           if (i.tm.base_opcode == 0xd9
4463               && (i.tm.extension_opcode == 2
4464                   || i.tm.extension_opcode == 3
4465                   || i.tm.extension_opcode == 6
4466                   || i.tm.extension_opcode == 7))
4467             return 0;
4468
4469           /* Skip fisttp, fist, fistp, fstp.  */
4470           if (i.tm.base_opcode == 0xdb
4471               && (i.tm.extension_opcode == 1
4472                   || i.tm.extension_opcode == 2
4473                   || i.tm.extension_opcode == 3
4474                   || i.tm.extension_opcode == 7))
4475             return 0;
4476
4477           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4478           if (i.tm.base_opcode == 0xdd
4479               && (i.tm.extension_opcode == 1
4480                   || i.tm.extension_opcode == 2
4481                   || i.tm.extension_opcode == 3
4482                   || i.tm.extension_opcode == 6
4483                   || i.tm.extension_opcode == 7))
4484             return 0;
4485
4486           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4487           if (i.tm.base_opcode == 0xdf
4488               && (i.tm.extension_opcode == 1
4489                   || i.tm.extension_opcode == 2
4490                   || i.tm.extension_opcode == 3
4491                   || i.tm.extension_opcode == 6
4492                   || i.tm.extension_opcode == 7))
4493             return 0;
4494
4495           return 1;
4496         }
4497     }
4498
4499   dest = i.operands - 1;
4500
4501   /* Check fake imm8 operand and 3 source operands.  */
4502   if ((i.tm.opcode_modifier.immext
4503        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4504       && i.types[dest].bitfield.imm8)
4505     dest--;
4506
4507   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg, xadd  */
4508   if (!any_vex_p
4509       && (base_opcode == 0x1
4510           || base_opcode == 0x9
4511           || base_opcode == 0x11
4512           || base_opcode == 0x19
4513           || base_opcode == 0x21
4514           || base_opcode == 0x29
4515           || base_opcode == 0x31
4516           || base_opcode == 0x39
4517           || (i.tm.base_opcode >= 0x84 && i.tm.base_opcode <= 0x87)
4518           || base_opcode == 0xfc1))
4519     return 1;
4520
4521   /* Check for load instruction.  */
4522   return (i.types[dest].bitfield.class != ClassNone
4523           || i.types[dest].bitfield.instance == Accum);
4524 }
4525
4526 /* Output lfence, 0xfaee8, after instruction.  */
4527
4528 static void
4529 insert_lfence_after (void)
4530 {
4531   if (lfence_after_load && load_insn_p ())
4532     {
4533       /* There are also two REP string instructions that require
4534          special treatment. Specifically, the compare string (CMPS)
4535          and scan string (SCAS) instructions set EFLAGS in a manner
4536          that depends on the data being compared/scanned. When used
4537          with a REP prefix, the number of iterations may therefore
4538          vary depending on this data. If the data is a program secret
4539          chosen by the adversary using an LVI method,
4540          then this data-dependent behavior may leak some aspect
4541          of the secret.  */
4542       if (((i.tm.base_opcode | 0x1) == 0xa7
4543            || (i.tm.base_opcode | 0x1) == 0xaf)
4544           && i.prefix[REP_PREFIX])
4545         {
4546             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4547                      i.tm.name);
4548         }
4549       char *p = frag_more (3);
4550       *p++ = 0xf;
4551       *p++ = 0xae;
4552       *p = 0xe8;
4553     }
4554 }
4555
4556 /* Output lfence, 0xfaee8, before instruction.  */
4557
4558 static void
4559 insert_lfence_before (void)
4560 {
4561   char *p;
4562
4563   if (is_any_vex_encoding (&i.tm))
4564     return;
4565
4566   if (i.tm.base_opcode == 0xff
4567       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4568     {
4569       /* Insert lfence before indirect branch if needed.  */
4570
4571       if (lfence_before_indirect_branch == lfence_branch_none)
4572         return;
4573
4574       if (i.operands != 1)
4575         abort ();
4576
4577       if (i.reg_operands == 1)
4578         {
4579           /* Indirect branch via register.  Don't insert lfence with
4580              -mlfence-after-load=yes.  */
4581           if (lfence_after_load
4582               || lfence_before_indirect_branch == lfence_branch_memory)
4583             return;
4584         }
4585       else if (i.mem_operands == 1
4586                && lfence_before_indirect_branch != lfence_branch_register)
4587         {
4588           as_warn (_("indirect `%s` with memory operand should be avoided"),
4589                    i.tm.name);
4590           return;
4591         }
4592       else
4593         return;
4594
4595       if (last_insn.kind != last_insn_other
4596           && last_insn.seg == now_seg)
4597         {
4598           as_warn_where (last_insn.file, last_insn.line,
4599                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4600                          last_insn.name, i.tm.name);
4601           return;
4602         }
4603
4604       p = frag_more (3);
4605       *p++ = 0xf;
4606       *p++ = 0xae;
4607       *p = 0xe8;
4608       return;
4609     }
4610
4611   /* Output or/not/shl and lfence before near ret.  */
4612   if (lfence_before_ret != lfence_before_ret_none
4613       && (i.tm.base_opcode == 0xc2
4614           || i.tm.base_opcode == 0xc3))
4615     {
4616       if (last_insn.kind != last_insn_other
4617           && last_insn.seg == now_seg)
4618         {
4619           as_warn_where (last_insn.file, last_insn.line,
4620                          _("`%s` skips -mlfence-before-ret on `%s`"),
4621                          last_insn.name, i.tm.name);
4622           return;
4623         }
4624
4625       /* Near ret ingore operand size override under CPU64.  */
4626       char prefix = flag_code == CODE_64BIT
4627                     ? 0x48
4628                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4629
4630       if (lfence_before_ret == lfence_before_ret_not)
4631         {
4632           /* not: 0xf71424, may add prefix
4633              for operand size override or 64-bit code.  */
4634           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4635           if (prefix)
4636             *p++ = prefix;
4637           *p++ = 0xf7;
4638           *p++ = 0x14;
4639           *p++ = 0x24;
4640           if (prefix)
4641             *p++ = prefix;
4642           *p++ = 0xf7;
4643           *p++ = 0x14;
4644           *p++ = 0x24;
4645         }
4646       else
4647         {
4648           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4649           if (prefix)
4650             *p++ = prefix;
4651           if (lfence_before_ret == lfence_before_ret_or)
4652             {
4653               /* or: 0x830c2400, may add prefix
4654                  for operand size override or 64-bit code.  */
4655               *p++ = 0x83;
4656               *p++ = 0x0c;
4657             }
4658           else
4659             {
4660               /* shl: 0xc1242400, may add prefix
4661                  for operand size override or 64-bit code.  */
4662               *p++ = 0xc1;
4663               *p++ = 0x24;
4664             }
4665
4666           *p++ = 0x24;
4667           *p++ = 0x0;
4668         }
4669
4670       *p++ = 0xf;
4671       *p++ = 0xae;
4672       *p = 0xe8;
4673     }
4674 }
4675
4676 /* This is the guts of the machine-dependent assembler.  LINE points to a
4677    machine dependent instruction.  This function is supposed to emit
4678    the frags/bytes it assembles to.  */
4679
4680 void
4681 md_assemble (char *line)
4682 {
4683   unsigned int j;
4684   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4685   const insn_template *t;
4686
4687   /* Initialize globals.  */
4688   memset (&i, '\0', sizeof (i));
4689   for (j = 0; j < MAX_OPERANDS; j++)
4690     i.reloc[j] = NO_RELOC;
4691   memset (disp_expressions, '\0', sizeof (disp_expressions));
4692   memset (im_expressions, '\0', sizeof (im_expressions));
4693   save_stack_p = save_stack;
4694
4695   /* First parse an instruction mnemonic & call i386_operand for the operands.
4696      We assume that the scrubber has arranged it so that line[0] is the valid
4697      start of a (possibly prefixed) mnemonic.  */
4698
4699   line = parse_insn (line, mnemonic);
4700   if (line == NULL)
4701     return;
4702   mnem_suffix = i.suffix;
4703
4704   line = parse_operands (line, mnemonic);
4705   this_operand = -1;
4706   xfree (i.memop1_string);
4707   i.memop1_string = NULL;
4708   if (line == NULL)
4709     return;
4710
4711   /* Now we've parsed the mnemonic into a set of templates, and have the
4712      operands at hand.  */
4713
4714   /* All Intel opcodes have reversed operands except for "bound", "enter",
4715      "monitor*", "mwait*", "tpause", and "umwait".  We also don't reverse
4716      intersegment "jmp" and "call" instructions with 2 immediate operands so
4717      that the immediate segment precedes the offset, as it does when in AT&T
4718      mode.  */
4719   if (intel_syntax
4720       && i.operands > 1
4721       && (strcmp (mnemonic, "bound") != 0)
4722       && (strcmp (mnemonic, "invlpga") != 0)
4723       && (strncmp (mnemonic, "monitor", 7) != 0)
4724       && (strncmp (mnemonic, "mwait", 5) != 0)
4725       && (strcmp (mnemonic, "tpause") != 0)
4726       && (strcmp (mnemonic, "umwait") != 0)
4727       && !(operand_type_check (i.types[0], imm)
4728            && operand_type_check (i.types[1], imm)))
4729     swap_operands ();
4730
4731   /* The order of the immediates should be reversed
4732      for 2 immediates extrq and insertq instructions */
4733   if (i.imm_operands == 2
4734       && (strcmp (mnemonic, "extrq") == 0
4735           || strcmp (mnemonic, "insertq") == 0))
4736       swap_2_operands (0, 1);
4737
4738   if (i.imm_operands)
4739     optimize_imm ();
4740
4741   /* Don't optimize displacement for movabs since it only takes 64bit
4742      displacement.  */
4743   if (i.disp_operands
4744       && i.disp_encoding != disp_encoding_32bit
4745       && (flag_code != CODE_64BIT
4746           || strcmp (mnemonic, "movabs") != 0))
4747     optimize_disp ();
4748
4749   /* Next, we find a template that matches the given insn,
4750      making sure the overlap of the given operands types is consistent
4751      with the template operand types.  */
4752
4753   if (!(t = match_template (mnem_suffix)))
4754     return;
4755
4756   if (sse_check != check_none
4757       && !i.tm.opcode_modifier.noavx
4758       && !i.tm.cpu_flags.bitfield.cpuavx
4759       && !i.tm.cpu_flags.bitfield.cpuavx512f
4760       && (i.tm.cpu_flags.bitfield.cpusse
4761           || i.tm.cpu_flags.bitfield.cpusse2
4762           || i.tm.cpu_flags.bitfield.cpusse3
4763           || i.tm.cpu_flags.bitfield.cpussse3
4764           || i.tm.cpu_flags.bitfield.cpusse4_1
4765           || i.tm.cpu_flags.bitfield.cpusse4_2
4766           || i.tm.cpu_flags.bitfield.cpupclmul
4767           || i.tm.cpu_flags.bitfield.cpuaes
4768           || i.tm.cpu_flags.bitfield.cpusha
4769           || i.tm.cpu_flags.bitfield.cpugfni))
4770     {
4771       (sse_check == check_warning
4772        ? as_warn
4773        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4774     }
4775
4776   if (i.tm.opcode_modifier.fwait)
4777     if (!add_prefix (FWAIT_OPCODE))
4778       return;
4779
4780   /* Check if REP prefix is OK.  */
4781   if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
4782     {
4783       as_bad (_("invalid instruction `%s' after `%s'"),
4784                 i.tm.name, i.rep_prefix);
4785       return;
4786     }
4787
4788   /* Check for lock without a lockable instruction.  Destination operand
4789      must be memory unless it is xchg (0x86).  */
4790   if (i.prefix[LOCK_PREFIX]
4791       && (!i.tm.opcode_modifier.islockable
4792           || i.mem_operands == 0
4793           || (i.tm.base_opcode != 0x86
4794               && !(i.flags[i.operands - 1] & Operand_Mem))))
4795     {
4796       as_bad (_("expecting lockable instruction after `lock'"));
4797       return;
4798     }
4799
4800   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
4801   if (i.prefix[DATA_PREFIX]
4802       && (is_any_vex_encoding (&i.tm)
4803           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
4804           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
4805     {
4806       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4807       return;
4808     }
4809
4810   /* Check if HLE prefix is OK.  */
4811   if (i.hle_prefix && !check_hle ())
4812     return;
4813
4814   /* Check BND prefix.  */
4815   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
4816     as_bad (_("expecting valid branch instruction after `bnd'"));
4817
4818   /* Check NOTRACK prefix.  */
4819   if (i.notrack_prefix && !i.tm.opcode_modifier.notrackprefixok)
4820     as_bad (_("expecting indirect branch instruction after `notrack'"));
4821
4822   if (i.tm.cpu_flags.bitfield.cpumpx)
4823     {
4824       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4825         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
4826       else if (flag_code != CODE_16BIT
4827                ? i.prefix[ADDR_PREFIX]
4828                : i.mem_operands && !i.prefix[ADDR_PREFIX])
4829         as_bad (_("16-bit address isn't allowed in MPX instructions"));
4830     }
4831
4832   /* Insert BND prefix.  */
4833   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
4834     {
4835       if (!i.prefix[BND_PREFIX])
4836         add_prefix (BND_PREFIX_OPCODE);
4837       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
4838         {
4839           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
4840           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
4841         }
4842     }
4843
4844   /* Check string instruction segment overrides.  */
4845   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
4846     {
4847       gas_assert (i.mem_operands);
4848       if (!check_string ())
4849         return;
4850       i.disp_operands = 0;
4851     }
4852
4853   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
4854     optimize_encoding ();
4855
4856   if (!process_suffix ())
4857     return;
4858
4859   /* Update operand types and check extended states.  */
4860   for (j = 0; j < i.operands; j++)
4861     {
4862       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
4863       switch (i.tm.operand_types[j].bitfield.class)
4864         {
4865         default:
4866           break;
4867         case RegMMX:
4868           i.xstate |= xstate_mmx;
4869           break;
4870         case RegMask:
4871           i.xstate |= xstate_zmm;
4872           break;
4873         case RegSIMD:
4874           if (i.tm.operand_types[j].bitfield.tmmword)
4875             i.xstate |= xstate_tmm;
4876           else if (i.tm.operand_types[j].bitfield.zmmword)
4877             i.xstate |= xstate_zmm;
4878           else if (i.tm.operand_types[j].bitfield.ymmword)
4879             i.xstate |= xstate_ymm;
4880           else if (i.tm.operand_types[j].bitfield.xmmword)
4881             i.xstate |= xstate_xmm;
4882           break;
4883         }
4884     }
4885
4886   /* Make still unresolved immediate matches conform to size of immediate
4887      given in i.suffix.  */
4888   if (!finalize_imm ())
4889     return;
4890
4891   if (i.types[0].bitfield.imm1)
4892     i.imm_operands = 0; /* kludge for shift insns.  */
4893
4894   /* We only need to check those implicit registers for instructions
4895      with 3 operands or less.  */
4896   if (i.operands <= 3)
4897     for (j = 0; j < i.operands; j++)
4898       if (i.types[j].bitfield.instance != InstanceNone
4899           && !i.types[j].bitfield.xmmword)
4900         i.reg_operands--;
4901
4902   /* For insns with operands there are more diddles to do to the opcode.  */
4903   if (i.operands)
4904     {
4905       if (!process_operands ())
4906         return;
4907     }
4908   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
4909     {
4910       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
4911       as_warn (_("translating to `%sp'"), i.tm.name);
4912     }
4913
4914   if (is_any_vex_encoding (&i.tm))
4915     {
4916       if (!cpu_arch_flags.bitfield.cpui286)
4917         {
4918           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
4919                   i.tm.name);
4920           return;
4921         }
4922
4923       /* Check for explicit REX prefix.  */
4924       if (i.prefix[REX_PREFIX] || i.rex_encoding)
4925         {
4926           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
4927           return;
4928         }
4929
4930       if (i.tm.opcode_modifier.vex)
4931         build_vex_prefix (t);
4932       else
4933         build_evex_prefix ();
4934
4935       /* The individual REX.RXBW bits got consumed.  */
4936       i.rex &= REX_OPCODE;
4937     }
4938
4939   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
4940      instructions may define INT_OPCODE as well, so avoid this corner
4941      case for those instructions that use MODRM.  */
4942   if (i.tm.base_opcode == INT_OPCODE
4943       && !i.tm.opcode_modifier.modrm
4944       && i.op[0].imms->X_add_number == 3)
4945     {
4946       i.tm.base_opcode = INT3_OPCODE;
4947       i.imm_operands = 0;
4948     }
4949
4950   if ((i.tm.opcode_modifier.jump == JUMP
4951        || i.tm.opcode_modifier.jump == JUMP_BYTE
4952        || i.tm.opcode_modifier.jump == JUMP_DWORD)
4953       && i.op[0].disps->X_op == O_constant)
4954     {
4955       /* Convert "jmp constant" (and "call constant") to a jump (call) to
4956          the absolute address given by the constant.  Since ix86 jumps and
4957          calls are pc relative, we need to generate a reloc.  */
4958       i.op[0].disps->X_add_symbol = &abs_symbol;
4959       i.op[0].disps->X_op = O_symbol;
4960     }
4961
4962   /* For 8 bit registers we need an empty rex prefix.  Also if the
4963      instruction already has a prefix, we need to convert old
4964      registers to new ones.  */
4965
4966   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
4967        && (i.op[0].regs->reg_flags & RegRex64) != 0)
4968       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
4969           && (i.op[1].regs->reg_flags & RegRex64) != 0)
4970       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
4971            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
4972           && i.rex != 0))
4973     {
4974       int x;
4975
4976       i.rex |= REX_OPCODE;
4977       for (x = 0; x < 2; x++)
4978         {
4979           /* Look for 8 bit operand that uses old registers.  */
4980           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4981               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4982             {
4983               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4984               /* In case it is "hi" register, give up.  */
4985               if (i.op[x].regs->reg_num > 3)
4986                 as_bad (_("can't encode register '%s%s' in an "
4987                           "instruction requiring REX prefix."),
4988                         register_prefix, i.op[x].regs->reg_name);
4989
4990               /* Otherwise it is equivalent to the extended register.
4991                  Since the encoding doesn't change this is merely
4992                  cosmetic cleanup for debug output.  */
4993
4994               i.op[x].regs = i.op[x].regs + 8;
4995             }
4996         }
4997     }
4998
4999   if (i.rex == 0 && i.rex_encoding)
5000     {
5001       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5002          that uses legacy register.  If it is "hi" register, don't add
5003          the REX_OPCODE byte.  */
5004       int x;
5005       for (x = 0; x < 2; x++)
5006         if (i.types[x].bitfield.class == Reg
5007             && i.types[x].bitfield.byte
5008             && (i.op[x].regs->reg_flags & RegRex64) == 0
5009             && i.op[x].regs->reg_num > 3)
5010           {
5011             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5012             i.rex_encoding = FALSE;
5013             break;
5014           }
5015
5016       if (i.rex_encoding)
5017         i.rex = REX_OPCODE;
5018     }
5019
5020   if (i.rex != 0)
5021     add_prefix (REX_OPCODE | i.rex);
5022
5023   insert_lfence_before ();
5024
5025   /* We are ready to output the insn.  */
5026   output_insn ();
5027
5028   insert_lfence_after ();
5029
5030   last_insn.seg = now_seg;
5031
5032   if (i.tm.opcode_modifier.isprefix)
5033     {
5034       last_insn.kind = last_insn_prefix;
5035       last_insn.name = i.tm.name;
5036       last_insn.file = as_where (&last_insn.line);
5037     }
5038   else
5039     last_insn.kind = last_insn_other;
5040 }
5041
5042 static char *
5043 parse_insn (char *line, char *mnemonic)
5044 {
5045   char *l = line;
5046   char *token_start = l;
5047   char *mnem_p;
5048   int supported;
5049   const insn_template *t;
5050   char *dot_p = NULL;
5051
5052   while (1)
5053     {
5054       mnem_p = mnemonic;
5055       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5056         {
5057           if (*mnem_p == '.')
5058             dot_p = mnem_p;
5059           mnem_p++;
5060           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5061             {
5062               as_bad (_("no such instruction: `%s'"), token_start);
5063               return NULL;
5064             }
5065           l++;
5066         }
5067       if (!is_space_char (*l)
5068           && *l != END_OF_INSN
5069           && (intel_syntax
5070               || (*l != PREFIX_SEPARATOR
5071                   && *l != ',')))
5072         {
5073           as_bad (_("invalid character %s in mnemonic"),
5074                   output_invalid (*l));
5075           return NULL;
5076         }
5077       if (token_start == l)
5078         {
5079           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5080             as_bad (_("expecting prefix; got nothing"));
5081           else
5082             as_bad (_("expecting mnemonic; got nothing"));
5083           return NULL;
5084         }
5085
5086       /* Look up instruction (or prefix) via hash table.  */
5087       current_templates = (const templates *) hash_find (op_hash, mnemonic);
5088
5089       if (*l != END_OF_INSN
5090           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5091           && current_templates
5092           && current_templates->start->opcode_modifier.isprefix)
5093         {
5094           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5095             {
5096               as_bad ((flag_code != CODE_64BIT
5097                        ? _("`%s' is only supported in 64-bit mode")
5098                        : _("`%s' is not supported in 64-bit mode")),
5099                       current_templates->start->name);
5100               return NULL;
5101             }
5102           /* If we are in 16-bit mode, do not allow addr16 or data16.
5103              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5104           if ((current_templates->start->opcode_modifier.size == SIZE16
5105                || current_templates->start->opcode_modifier.size == SIZE32)
5106               && flag_code != CODE_64BIT
5107               && ((current_templates->start->opcode_modifier.size == SIZE32)
5108                   ^ (flag_code == CODE_16BIT)))
5109             {
5110               as_bad (_("redundant %s prefix"),
5111                       current_templates->start->name);
5112               return NULL;
5113             }
5114           if (current_templates->start->opcode_length == 0)
5115             {
5116               /* Handle pseudo prefixes.  */
5117               switch (current_templates->start->base_opcode)
5118                 {
5119                 case 0x0:
5120                   /* {disp8} */
5121                   i.disp_encoding = disp_encoding_8bit;
5122                   break;
5123                 case 0x1:
5124                   /* {disp32} */
5125                   i.disp_encoding = disp_encoding_32bit;
5126                   break;
5127                 case 0x2:
5128                   /* {load} */
5129                   i.dir_encoding = dir_encoding_load;
5130                   break;
5131                 case 0x3:
5132                   /* {store} */
5133                   i.dir_encoding = dir_encoding_store;
5134                   break;
5135                 case 0x4:
5136                   /* {vex} */
5137                   i.vec_encoding = vex_encoding_vex;
5138                   break;
5139                 case 0x5:
5140                   /* {vex3} */
5141                   i.vec_encoding = vex_encoding_vex3;
5142                   break;
5143                 case 0x6:
5144                   /* {evex} */
5145                   i.vec_encoding = vex_encoding_evex;
5146                   break;
5147                 case 0x7:
5148                   /* {rex} */
5149                   i.rex_encoding = TRUE;
5150                   break;
5151                 case 0x8:
5152                   /* {nooptimize} */
5153                   i.no_optimize = TRUE;
5154                   break;
5155                 default:
5156                   abort ();
5157                 }
5158             }
5159           else
5160             {
5161               /* Add prefix, checking for repeated prefixes.  */
5162               switch (add_prefix (current_templates->start->base_opcode))
5163                 {
5164                 case PREFIX_EXIST:
5165                   return NULL;
5166                 case PREFIX_DS:
5167                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5168                     i.notrack_prefix = current_templates->start->name;
5169                   break;
5170                 case PREFIX_REP:
5171                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5172                     i.hle_prefix = current_templates->start->name;
5173                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5174                     i.bnd_prefix = current_templates->start->name;
5175                   else
5176                     i.rep_prefix = current_templates->start->name;
5177                   break;
5178                 default:
5179                   break;
5180                 }
5181             }
5182           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5183           token_start = ++l;
5184         }
5185       else
5186         break;
5187     }
5188
5189   if (!current_templates)
5190     {
5191       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5192          Check if we should swap operand or force 32bit displacement in
5193          encoding.  */
5194       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5195         i.dir_encoding = dir_encoding_swap;
5196       else if (mnem_p - 3 == dot_p
5197                && dot_p[1] == 'd'
5198                && dot_p[2] == '8')
5199         i.disp_encoding = disp_encoding_8bit;
5200       else if (mnem_p - 4 == dot_p
5201                && dot_p[1] == 'd'
5202                && dot_p[2] == '3'
5203                && dot_p[3] == '2')
5204         i.disp_encoding = disp_encoding_32bit;
5205       else
5206         goto check_suffix;
5207       mnem_p = dot_p;
5208       *dot_p = '\0';
5209       current_templates = (const templates *) hash_find (op_hash, mnemonic);
5210     }
5211
5212   if (!current_templates)
5213     {
5214     check_suffix:
5215       if (mnem_p > mnemonic)
5216         {
5217           /* See if we can get a match by trimming off a suffix.  */
5218           switch (mnem_p[-1])
5219             {
5220             case WORD_MNEM_SUFFIX:
5221               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5222                 i.suffix = SHORT_MNEM_SUFFIX;
5223               else
5224                 /* Fall through.  */
5225               case BYTE_MNEM_SUFFIX:
5226               case QWORD_MNEM_SUFFIX:
5227                 i.suffix = mnem_p[-1];
5228               mnem_p[-1] = '\0';
5229               current_templates = (const templates *) hash_find (op_hash,
5230                                                                  mnemonic);
5231               break;
5232             case SHORT_MNEM_SUFFIX:
5233             case LONG_MNEM_SUFFIX:
5234               if (!intel_syntax)
5235                 {
5236                   i.suffix = mnem_p[-1];
5237                   mnem_p[-1] = '\0';
5238                   current_templates = (const templates *) hash_find (op_hash,
5239                                                                      mnemonic);
5240                 }
5241               break;
5242
5243               /* Intel Syntax.  */
5244             case 'd':
5245               if (intel_syntax)
5246                 {
5247                   if (intel_float_operand (mnemonic) == 1)
5248                     i.suffix = SHORT_MNEM_SUFFIX;
5249                   else
5250                     i.suffix = LONG_MNEM_SUFFIX;
5251                   mnem_p[-1] = '\0';
5252                   current_templates = (const templates *) hash_find (op_hash,
5253                                                                      mnemonic);
5254                 }
5255               break;
5256             }
5257         }
5258
5259       if (!current_templates)
5260         {
5261           as_bad (_("no such instruction: `%s'"), token_start);
5262           return NULL;
5263         }
5264     }
5265
5266   if (current_templates->start->opcode_modifier.jump == JUMP
5267       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5268     {
5269       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5270          predict taken and predict not taken respectively.
5271          I'm not sure that branch hints actually do anything on loop
5272          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5273          may work in the future and it doesn't hurt to accept them
5274          now.  */
5275       if (l[0] == ',' && l[1] == 'p')
5276         {
5277           if (l[2] == 't')
5278             {
5279               if (!add_prefix (DS_PREFIX_OPCODE))
5280                 return NULL;
5281               l += 3;
5282             }
5283           else if (l[2] == 'n')
5284             {
5285               if (!add_prefix (CS_PREFIX_OPCODE))
5286                 return NULL;
5287               l += 3;
5288             }
5289         }
5290     }
5291   /* Any other comma loses.  */
5292   if (*l == ',')
5293     {
5294       as_bad (_("invalid character %s in mnemonic"),
5295               output_invalid (*l));
5296       return NULL;
5297     }
5298
5299   /* Check if instruction is supported on specified architecture.  */
5300   supported = 0;
5301   for (t = current_templates->start; t < current_templates->end; ++t)
5302     {
5303       supported |= cpu_flags_match (t);
5304       if (supported == CPU_FLAGS_PERFECT_MATCH)
5305         {
5306           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
5307             as_warn (_("use .code16 to ensure correct addressing mode"));
5308
5309           return l;
5310         }
5311     }
5312
5313   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5314     as_bad (flag_code == CODE_64BIT
5315             ? _("`%s' is not supported in 64-bit mode")
5316             : _("`%s' is only supported in 64-bit mode"),
5317             current_templates->start->name);
5318   else
5319     as_bad (_("`%s' is not supported on `%s%s'"),
5320             current_templates->start->name,
5321             cpu_arch_name ? cpu_arch_name : default_arch,
5322             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5323
5324   return NULL;
5325 }
5326
5327 static char *
5328 parse_operands (char *l, const char *mnemonic)
5329 {
5330   char *token_start;
5331
5332   /* 1 if operand is pending after ','.  */
5333   unsigned int expecting_operand = 0;
5334
5335   /* Non-zero if operand parens not balanced.  */
5336   unsigned int paren_not_balanced;
5337
5338   while (*l != END_OF_INSN)
5339     {
5340       /* Skip optional white space before operand.  */
5341       if (is_space_char (*l))
5342         ++l;
5343       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5344         {
5345           as_bad (_("invalid character %s before operand %d"),
5346                   output_invalid (*l),
5347                   i.operands + 1);
5348           return NULL;
5349         }
5350       token_start = l;  /* After white space.  */
5351       paren_not_balanced = 0;
5352       while (paren_not_balanced || *l != ',')
5353         {
5354           if (*l == END_OF_INSN)
5355             {
5356               if (paren_not_balanced)
5357                 {
5358                   if (!intel_syntax)
5359                     as_bad (_("unbalanced parenthesis in operand %d."),
5360                             i.operands + 1);
5361                   else
5362                     as_bad (_("unbalanced brackets in operand %d."),
5363                             i.operands + 1);
5364                   return NULL;
5365                 }
5366               else
5367                 break;  /* we are done */
5368             }
5369           else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
5370             {
5371               as_bad (_("invalid character %s in operand %d"),
5372                       output_invalid (*l),
5373                       i.operands + 1);
5374               return NULL;
5375             }
5376           if (!intel_syntax)
5377             {
5378               if (*l == '(')
5379                 ++paren_not_balanced;
5380               if (*l == ')')
5381                 --paren_not_balanced;
5382             }
5383           else
5384             {
5385               if (*l == '[')
5386                 ++paren_not_balanced;
5387               if (*l == ']')
5388                 --paren_not_balanced;
5389             }
5390           l++;
5391         }
5392       if (l != token_start)
5393         {                       /* Yes, we've read in another operand.  */
5394           unsigned int operand_ok;
5395           this_operand = i.operands++;
5396           if (i.operands > MAX_OPERANDS)
5397             {
5398               as_bad (_("spurious operands; (%d operands/instruction max)"),
5399                       MAX_OPERANDS);
5400               return NULL;
5401             }
5402           i.types[this_operand].bitfield.unspecified = 1;
5403           /* Now parse operand adding info to 'i' as we go along.  */
5404           END_STRING_AND_SAVE (l);
5405
5406           if (i.mem_operands > 1)
5407             {
5408               as_bad (_("too many memory references for `%s'"),
5409                       mnemonic);
5410               return 0;
5411             }
5412
5413           if (intel_syntax)
5414             operand_ok =
5415               i386_intel_operand (token_start,
5416                                   intel_float_operand (mnemonic));
5417           else
5418             operand_ok = i386_att_operand (token_start);
5419
5420           RESTORE_END_STRING (l);
5421           if (!operand_ok)
5422             return NULL;
5423         }
5424       else
5425         {
5426           if (expecting_operand)
5427             {
5428             expecting_operand_after_comma:
5429               as_bad (_("expecting operand after ','; got nothing"));
5430               return NULL;
5431             }
5432           if (*l == ',')
5433             {
5434               as_bad (_("expecting operand before ','; got nothing"));
5435               return NULL;
5436             }
5437         }
5438
5439       /* Now *l must be either ',' or END_OF_INSN.  */
5440       if (*l == ',')
5441         {
5442           if (*++l == END_OF_INSN)
5443             {
5444               /* Just skip it, if it's \n complain.  */
5445               goto expecting_operand_after_comma;
5446             }
5447           expecting_operand = 1;
5448         }
5449     }
5450   return l;
5451 }
5452
5453 static void
5454 swap_2_operands (int xchg1, int xchg2)
5455 {
5456   union i386_op temp_op;
5457   i386_operand_type temp_type;
5458   unsigned int temp_flags;
5459   enum bfd_reloc_code_real temp_reloc;
5460
5461   temp_type = i.types[xchg2];
5462   i.types[xchg2] = i.types[xchg1];
5463   i.types[xchg1] = temp_type;
5464
5465   temp_flags = i.flags[xchg2];
5466   i.flags[xchg2] = i.flags[xchg1];
5467   i.flags[xchg1] = temp_flags;
5468
5469   temp_op = i.op[xchg2];
5470   i.op[xchg2] = i.op[xchg1];
5471   i.op[xchg1] = temp_op;
5472
5473   temp_reloc = i.reloc[xchg2];
5474   i.reloc[xchg2] = i.reloc[xchg1];
5475   i.reloc[xchg1] = temp_reloc;
5476
5477   if (i.mask)
5478     {
5479       if (i.mask->operand == xchg1)
5480         i.mask->operand = xchg2;
5481       else if (i.mask->operand == xchg2)
5482         i.mask->operand = xchg1;
5483     }
5484   if (i.broadcast)
5485     {
5486       if (i.broadcast->operand == xchg1)
5487         i.broadcast->operand = xchg2;
5488       else if (i.broadcast->operand == xchg2)
5489         i.broadcast->operand = xchg1;
5490     }
5491   if (i.rounding)
5492     {
5493       if (i.rounding->operand == xchg1)
5494         i.rounding->operand = xchg2;
5495       else if (i.rounding->operand == xchg2)
5496         i.rounding->operand = xchg1;
5497     }
5498 }
5499
5500 static void
5501 swap_operands (void)
5502 {
5503   switch (i.operands)
5504     {
5505     case 5:
5506     case 4:
5507       swap_2_operands (1, i.operands - 2);
5508       /* Fall through.  */
5509     case 3:
5510     case 2:
5511       swap_2_operands (0, i.operands - 1);
5512       break;
5513     default:
5514       abort ();
5515     }
5516
5517   if (i.mem_operands == 2)
5518     {
5519       const seg_entry *temp_seg;
5520       temp_seg = i.seg[0];
5521       i.seg[0] = i.seg[1];
5522       i.seg[1] = temp_seg;
5523     }
5524 }
5525
5526 /* Try to ensure constant immediates are represented in the smallest
5527    opcode possible.  */
5528 static void
5529 optimize_imm (void)
5530 {
5531   char guess_suffix = 0;
5532   int op;
5533
5534   if (i.suffix)
5535     guess_suffix = i.suffix;
5536   else if (i.reg_operands)
5537     {
5538       /* Figure out a suffix from the last register operand specified.
5539          We can't do this properly yet, i.e. excluding special register
5540          instances, but the following works for instructions with
5541          immediates.  In any case, we can't set i.suffix yet.  */
5542       for (op = i.operands; --op >= 0;)
5543         if (i.types[op].bitfield.class != Reg)
5544           continue;
5545         else if (i.types[op].bitfield.byte)
5546           {
5547             guess_suffix = BYTE_MNEM_SUFFIX;
5548             break;
5549           }
5550         else if (i.types[op].bitfield.word)
5551           {
5552             guess_suffix = WORD_MNEM_SUFFIX;
5553             break;
5554           }
5555         else if (i.types[op].bitfield.dword)
5556           {
5557             guess_suffix = LONG_MNEM_SUFFIX;
5558             break;
5559           }
5560         else if (i.types[op].bitfield.qword)
5561           {
5562             guess_suffix = QWORD_MNEM_SUFFIX;
5563             break;
5564           }
5565     }
5566   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5567     guess_suffix = WORD_MNEM_SUFFIX;
5568
5569   for (op = i.operands; --op >= 0;)
5570     if (operand_type_check (i.types[op], imm))
5571       {
5572         switch (i.op[op].imms->X_op)
5573           {
5574           case O_constant:
5575             /* If a suffix is given, this operand may be shortened.  */
5576             switch (guess_suffix)
5577               {
5578               case LONG_MNEM_SUFFIX:
5579                 i.types[op].bitfield.imm32 = 1;
5580                 i.types[op].bitfield.imm64 = 1;
5581                 break;
5582               case WORD_MNEM_SUFFIX:
5583                 i.types[op].bitfield.imm16 = 1;
5584                 i.types[op].bitfield.imm32 = 1;
5585                 i.types[op].bitfield.imm32s = 1;
5586                 i.types[op].bitfield.imm64 = 1;
5587                 break;
5588               case BYTE_MNEM_SUFFIX:
5589                 i.types[op].bitfield.imm8 = 1;
5590                 i.types[op].bitfield.imm8s = 1;
5591                 i.types[op].bitfield.imm16 = 1;
5592                 i.types[op].bitfield.imm32 = 1;
5593                 i.types[op].bitfield.imm32s = 1;
5594                 i.types[op].bitfield.imm64 = 1;
5595                 break;
5596               }
5597
5598             /* If this operand is at most 16 bits, convert it
5599                to a signed 16 bit number before trying to see
5600                whether it will fit in an even smaller size.
5601                This allows a 16-bit operand such as $0xffe0 to
5602                be recognised as within Imm8S range.  */
5603             if ((i.types[op].bitfield.imm16)
5604                 && (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5605               {
5606                 i.op[op].imms->X_add_number =
5607                   (((i.op[op].imms->X_add_number & 0xffff) ^ 0x8000) - 0x8000);
5608               }
5609 #ifdef BFD64
5610             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5611             if ((i.types[op].bitfield.imm32)
5612                 && ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5613                     == 0))
5614               {
5615                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5616                                                 ^ ((offsetT) 1 << 31))
5617                                                - ((offsetT) 1 << 31));
5618               }
5619 #endif
5620             i.types[op]
5621               = operand_type_or (i.types[op],
5622                                  smallest_imm_type (i.op[op].imms->X_add_number));
5623
5624             /* We must avoid matching of Imm32 templates when 64bit
5625                only immediate is available.  */
5626             if (guess_suffix == QWORD_MNEM_SUFFIX)
5627               i.types[op].bitfield.imm32 = 0;
5628             break;
5629
5630           case O_absent:
5631           case O_register:
5632             abort ();
5633
5634             /* Symbols and expressions.  */
5635           default:
5636             /* Convert symbolic operand to proper sizes for matching, but don't
5637                prevent matching a set of insns that only supports sizes other
5638                than those matching the insn suffix.  */
5639             {
5640               i386_operand_type mask, allowed;
5641               const insn_template *t;
5642
5643               operand_type_set (&mask, 0);
5644               operand_type_set (&allowed, 0);
5645
5646               for (t = current_templates->start;
5647                    t < current_templates->end;
5648                    ++t)
5649                 {
5650                   allowed = operand_type_or (allowed, t->operand_types[op]);
5651                   allowed = operand_type_and (allowed, anyimm);
5652                 }
5653               switch (guess_suffix)
5654                 {
5655                 case QWORD_MNEM_SUFFIX:
5656                   mask.bitfield.imm64 = 1;
5657                   mask.bitfield.imm32s = 1;
5658                   break;
5659                 case LONG_MNEM_SUFFIX:
5660                   mask.bitfield.imm32 = 1;
5661                   break;
5662                 case WORD_MNEM_SUFFIX:
5663                   mask.bitfield.imm16 = 1;
5664                   break;
5665                 case BYTE_MNEM_SUFFIX:
5666                   mask.bitfield.imm8 = 1;
5667                   break;
5668                 default:
5669                   break;
5670                 }
5671               allowed = operand_type_and (mask, allowed);
5672               if (!operand_type_all_zero (&allowed))
5673                 i.types[op] = operand_type_and (i.types[op], mask);
5674             }
5675             break;
5676           }
5677       }
5678 }
5679
5680 /* Try to use the smallest displacement type too.  */
5681 static void
5682 optimize_disp (void)
5683 {
5684   int op;
5685
5686   for (op = i.operands; --op >= 0;)
5687     if (operand_type_check (i.types[op], disp))
5688       {
5689         if (i.op[op].disps->X_op == O_constant)
5690           {
5691             offsetT op_disp = i.op[op].disps->X_add_number;
5692
5693             if (i.types[op].bitfield.disp16
5694                 && (op_disp & ~(offsetT) 0xffff) == 0)
5695               {
5696                 /* If this operand is at most 16 bits, convert
5697                    to a signed 16 bit number and don't use 64bit
5698                    displacement.  */
5699                 op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5700                 i.types[op].bitfield.disp64 = 0;
5701               }
5702 #ifdef BFD64
5703             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5704             if (i.types[op].bitfield.disp32
5705                 && (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5706               {
5707                 /* If this operand is at most 32 bits, convert
5708                    to a signed 32 bit number and don't use 64bit
5709                    displacement.  */
5710                 op_disp &= (((offsetT) 2 << 31) - 1);
5711                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5712                 i.types[op].bitfield.disp64 = 0;
5713               }
5714 #endif
5715             if (!op_disp && i.types[op].bitfield.baseindex)
5716               {
5717                 i.types[op].bitfield.disp8 = 0;
5718                 i.types[op].bitfield.disp16 = 0;
5719                 i.types[op].bitfield.disp32 = 0;
5720                 i.types[op].bitfield.disp32s = 0;
5721                 i.types[op].bitfield.disp64 = 0;
5722                 i.op[op].disps = 0;
5723                 i.disp_operands--;
5724               }
5725             else if (flag_code == CODE_64BIT)
5726               {
5727                 if (fits_in_signed_long (op_disp))
5728                   {
5729                     i.types[op].bitfield.disp64 = 0;
5730                     i.types[op].bitfield.disp32s = 1;
5731                   }
5732                 if (i.prefix[ADDR_PREFIX]
5733                     && fits_in_unsigned_long (op_disp))
5734                   i.types[op].bitfield.disp32 = 1;
5735               }
5736             if ((i.types[op].bitfield.disp32
5737                  || i.types[op].bitfield.disp32s
5738                  || i.types[op].bitfield.disp16)
5739                 && fits_in_disp8 (op_disp))
5740               i.types[op].bitfield.disp8 = 1;
5741           }
5742         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5743                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5744           {
5745             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5746                          i.op[op].disps, 0, i.reloc[op]);
5747             i.types[op].bitfield.disp8 = 0;
5748             i.types[op].bitfield.disp16 = 0;
5749             i.types[op].bitfield.disp32 = 0;
5750             i.types[op].bitfield.disp32s = 0;
5751             i.types[op].bitfield.disp64 = 0;
5752           }
5753         else
5754           /* We only support 64bit displacement on constants.  */
5755           i.types[op].bitfield.disp64 = 0;
5756       }
5757 }
5758
5759 /* Return 1 if there is a match in broadcast bytes between operand
5760    GIVEN and instruction template T.   */
5761
5762 static INLINE int
5763 match_broadcast_size (const insn_template *t, unsigned int given)
5764 {
5765   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5766            && i.types[given].bitfield.byte)
5767           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5768               && i.types[given].bitfield.word)
5769           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5770               && i.types[given].bitfield.dword)
5771           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5772               && i.types[given].bitfield.qword));
5773 }
5774
5775 /* Check if operands are valid for the instruction.  */
5776
5777 static int
5778 check_VecOperands (const insn_template *t)
5779 {
5780   unsigned int op;
5781   i386_cpu_flags cpu;
5782
5783   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5784      any one operand are implicity requiring AVX512VL support if the actual
5785      operand size is YMMword or XMMword.  Since this function runs after
5786      template matching, there's no need to check for YMMword/XMMword in
5787      the template.  */
5788   cpu = cpu_flags_and (t->cpu_flags, avx512);
5789   if (!cpu_flags_all_zero (&cpu)
5790       && !t->cpu_flags.bitfield.cpuavx512vl
5791       && !cpu_arch_flags.bitfield.cpuavx512vl)
5792     {
5793       for (op = 0; op < t->operands; ++op)
5794         {
5795           if (t->operand_types[op].bitfield.zmmword
5796               && (i.types[op].bitfield.ymmword
5797                   || i.types[op].bitfield.xmmword))
5798             {
5799               i.error = unsupported;
5800               return 1;
5801             }
5802         }
5803     }
5804
5805   /* Without VSIB byte, we can't have a vector register for index.  */
5806   if (!t->opcode_modifier.sib
5807       && i.index_reg
5808       && (i.index_reg->reg_type.bitfield.xmmword
5809           || i.index_reg->reg_type.bitfield.ymmword
5810           || i.index_reg->reg_type.bitfield.zmmword))
5811     {
5812       i.error = unsupported_vector_index_register;
5813       return 1;
5814     }
5815
5816   /* Check if default mask is allowed.  */
5817   if (t->opcode_modifier.nodefmask
5818       && (!i.mask || i.mask->mask->reg_num == 0))
5819     {
5820       i.error = no_default_mask;
5821       return 1;
5822     }
5823
5824   /* For VSIB byte, we need a vector register for index, and all vector
5825      registers must be distinct.  */
5826   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
5827     {
5828       if (!i.index_reg
5829           || !((t->opcode_modifier.sib == VECSIB128
5830                 && i.index_reg->reg_type.bitfield.xmmword)
5831                || (t->opcode_modifier.sib == VECSIB256
5832                    && i.index_reg->reg_type.bitfield.ymmword)
5833                || (t->opcode_modifier.sib == VECSIB512
5834                    && i.index_reg->reg_type.bitfield.zmmword)))
5835       {
5836         i.error = invalid_vsib_address;
5837         return 1;
5838       }
5839
5840       gas_assert (i.reg_operands == 2 || i.mask);
5841       if (i.reg_operands == 2 && !i.mask)
5842         {
5843           gas_assert (i.types[0].bitfield.class == RegSIMD);
5844           gas_assert (i.types[0].bitfield.xmmword
5845                       || i.types[0].bitfield.ymmword);
5846           gas_assert (i.types[2].bitfield.class == RegSIMD);
5847           gas_assert (i.types[2].bitfield.xmmword
5848                       || i.types[2].bitfield.ymmword);
5849           if (operand_check == check_none)
5850             return 0;
5851           if (register_number (i.op[0].regs)
5852               != register_number (i.index_reg)
5853               && register_number (i.op[2].regs)
5854                  != register_number (i.index_reg)
5855               && register_number (i.op[0].regs)
5856                  != register_number (i.op[2].regs))
5857             return 0;
5858           if (operand_check == check_error)
5859             {
5860               i.error = invalid_vector_register_set;
5861               return 1;
5862             }
5863           as_warn (_("mask, index, and destination registers should be distinct"));
5864         }
5865       else if (i.reg_operands == 1 && i.mask)
5866         {
5867           if (i.types[1].bitfield.class == RegSIMD
5868               && (i.types[1].bitfield.xmmword
5869                   || i.types[1].bitfield.ymmword
5870                   || i.types[1].bitfield.zmmword)
5871               && (register_number (i.op[1].regs)
5872                   == register_number (i.index_reg)))
5873             {
5874               if (operand_check == check_error)
5875                 {
5876                   i.error = invalid_vector_register_set;
5877                   return 1;
5878                 }
5879               if (operand_check != check_none)
5880                 as_warn (_("index and destination registers should be distinct"));
5881             }
5882         }
5883     }
5884
5885   /* For AMX instructions with three tmmword operands, all tmmword operand must be
5886      distinct */
5887   if (t->operand_types[0].bitfield.tmmword
5888       && i.reg_operands == 3)
5889     {
5890       if (register_number (i.op[0].regs)
5891           == register_number (i.op[1].regs)
5892           || register_number (i.op[0].regs)
5893              == register_number (i.op[2].regs)
5894           || register_number (i.op[1].regs)
5895              == register_number (i.op[2].regs))
5896         {
5897           i.error = invalid_tmm_register_set;
5898           return 1;
5899         }
5900     }
5901
5902   /* Check if broadcast is supported by the instruction and is applied
5903      to the memory operand.  */
5904   if (i.broadcast)
5905     {
5906       i386_operand_type type, overlap;
5907
5908       /* Check if specified broadcast is supported in this instruction,
5909          and its broadcast bytes match the memory operand.  */
5910       op = i.broadcast->operand;
5911       if (!t->opcode_modifier.broadcast
5912           || !(i.flags[op] & Operand_Mem)
5913           || (!i.types[op].bitfield.unspecified
5914               && !match_broadcast_size (t, op)))
5915         {
5916         bad_broadcast:
5917           i.error = unsupported_broadcast;
5918           return 1;
5919         }
5920
5921       i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1))
5922                             * i.broadcast->type);
5923       operand_type_set (&type, 0);
5924       switch (i.broadcast->bytes)
5925         {
5926         case 2:
5927           type.bitfield.word = 1;
5928           break;
5929         case 4:
5930           type.bitfield.dword = 1;
5931           break;
5932         case 8:
5933           type.bitfield.qword = 1;
5934           break;
5935         case 16:
5936           type.bitfield.xmmword = 1;
5937           break;
5938         case 32:
5939           type.bitfield.ymmword = 1;
5940           break;
5941         case 64:
5942           type.bitfield.zmmword = 1;
5943           break;
5944         default:
5945           goto bad_broadcast;
5946         }
5947
5948       overlap = operand_type_and (type, t->operand_types[op]);
5949       if (t->operand_types[op].bitfield.class == RegSIMD
5950           && t->operand_types[op].bitfield.byte
5951              + t->operand_types[op].bitfield.word
5952              + t->operand_types[op].bitfield.dword
5953              + t->operand_types[op].bitfield.qword > 1)
5954         {
5955           overlap.bitfield.xmmword = 0;
5956           overlap.bitfield.ymmword = 0;
5957           overlap.bitfield.zmmword = 0;
5958         }
5959       if (operand_type_all_zero (&overlap))
5960           goto bad_broadcast;
5961
5962       if (t->opcode_modifier.checkregsize)
5963         {
5964           unsigned int j;
5965
5966           type.bitfield.baseindex = 1;
5967           for (j = 0; j < i.operands; ++j)
5968             {
5969               if (j != op
5970                   && !operand_type_register_match(i.types[j],
5971                                                   t->operand_types[j],
5972                                                   type,
5973                                                   t->operand_types[op]))
5974                 goto bad_broadcast;
5975             }
5976         }
5977     }
5978   /* If broadcast is supported in this instruction, we need to check if
5979      operand of one-element size isn't specified without broadcast.  */
5980   else if (t->opcode_modifier.broadcast && i.mem_operands)
5981     {
5982       /* Find memory operand.  */
5983       for (op = 0; op < i.operands; op++)
5984         if (i.flags[op] & Operand_Mem)
5985           break;
5986       gas_assert (op < i.operands);
5987       /* Check size of the memory operand.  */
5988       if (match_broadcast_size (t, op))
5989         {
5990           i.error = broadcast_needed;
5991           return 1;
5992         }
5993     }
5994   else
5995     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
5996
5997   /* Check if requested masking is supported.  */
5998   if (i.mask)
5999     {
6000       switch (t->opcode_modifier.masking)
6001         {
6002         case BOTH_MASKING:
6003           break;
6004         case MERGING_MASKING:
6005           if (i.mask->zeroing)
6006             {
6007         case 0:
6008               i.error = unsupported_masking;
6009               return 1;
6010             }
6011           break;
6012         case DYNAMIC_MASKING:
6013           /* Memory destinations allow only merging masking.  */
6014           if (i.mask->zeroing && i.mem_operands)
6015             {
6016               /* Find memory operand.  */
6017               for (op = 0; op < i.operands; op++)
6018                 if (i.flags[op] & Operand_Mem)
6019                   break;
6020               gas_assert (op < i.operands);
6021               if (op == i.operands - 1)
6022                 {
6023                   i.error = unsupported_masking;
6024                   return 1;
6025                 }
6026             }
6027           break;
6028         default:
6029           abort ();
6030         }
6031     }
6032
6033   /* Check if masking is applied to dest operand.  */
6034   if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
6035     {
6036       i.error = mask_not_on_destination;
6037       return 1;
6038     }
6039
6040   /* Check RC/SAE.  */
6041   if (i.rounding)
6042     {
6043       if (!t->opcode_modifier.sae
6044           || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
6045         {
6046           i.error = unsupported_rc_sae;
6047           return 1;
6048         }
6049       /* If the instruction has several immediate operands and one of
6050          them is rounding, the rounding operand should be the last
6051          immediate operand.  */
6052       if (i.imm_operands > 1
6053           && i.rounding->operand != (int) (i.imm_operands - 1))
6054         {
6055           i.error = rc_sae_operand_not_last_imm;
6056           return 1;
6057         }
6058     }
6059
6060   /* Check the special Imm4 cases; must be the first operand.  */
6061   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6062     {
6063       if (i.op[0].imms->X_op != O_constant
6064           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6065         {
6066           i.error = bad_imm4;
6067           return 1;
6068         }
6069
6070       /* Turn off Imm<N> so that update_imm won't complain.  */
6071       operand_type_set (&i.types[0], 0);
6072     }
6073
6074   /* Check vector Disp8 operand.  */
6075   if (t->opcode_modifier.disp8memshift
6076       && i.disp_encoding != disp_encoding_32bit)
6077     {
6078       if (i.broadcast)
6079         i.memshift = t->opcode_modifier.broadcast - 1;
6080       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6081         i.memshift = t->opcode_modifier.disp8memshift;
6082       else
6083         {
6084           const i386_operand_type *type = NULL;
6085
6086           i.memshift = 0;
6087           for (op = 0; op < i.operands; op++)
6088             if (i.flags[op] & Operand_Mem)
6089               {
6090                 if (t->opcode_modifier.evex == EVEXLIG)
6091                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6092                 else if (t->operand_types[op].bitfield.xmmword
6093                          + t->operand_types[op].bitfield.ymmword
6094                          + t->operand_types[op].bitfield.zmmword <= 1)
6095                   type = &t->operand_types[op];
6096                 else if (!i.types[op].bitfield.unspecified)
6097                   type = &i.types[op];
6098               }
6099             else if (i.types[op].bitfield.class == RegSIMD
6100                      && t->opcode_modifier.evex != EVEXLIG)
6101               {
6102                 if (i.types[op].bitfield.zmmword)
6103                   i.memshift = 6;
6104                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6105                   i.memshift = 5;
6106                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6107                   i.memshift = 4;
6108               }
6109
6110           if (type)
6111             {
6112               if (type->bitfield.zmmword)
6113                 i.memshift = 6;
6114               else if (type->bitfield.ymmword)
6115                 i.memshift = 5;
6116               else if (type->bitfield.xmmword)
6117                 i.memshift = 4;
6118             }
6119
6120           /* For the check in fits_in_disp8().  */
6121           if (i.memshift == 0)
6122             i.memshift = -1;
6123         }
6124
6125       for (op = 0; op < i.operands; op++)
6126         if (operand_type_check (i.types[op], disp)
6127             && i.op[op].disps->X_op == O_constant)
6128           {
6129             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6130               {
6131                 i.types[op].bitfield.disp8 = 1;
6132                 return 0;
6133               }
6134             i.types[op].bitfield.disp8 = 0;
6135           }
6136     }
6137
6138   i.memshift = 0;
6139
6140   return 0;
6141 }
6142
6143 /* Check if encoding requirements are met by the instruction.  */
6144
6145 static int
6146 VEX_check_encoding (const insn_template *t)
6147 {
6148   if (i.vec_encoding == vex_encoding_error)
6149     {
6150       i.error = unsupported;
6151       return 1;
6152     }
6153
6154   if (i.vec_encoding == vex_encoding_evex)
6155     {
6156       /* This instruction must be encoded with EVEX prefix.  */
6157       if (!is_evex_encoding (t))
6158         {
6159           i.error = unsupported;
6160           return 1;
6161         }
6162       return 0;
6163     }
6164
6165   if (!t->opcode_modifier.vex)
6166     {
6167       /* This instruction template doesn't have VEX prefix.  */
6168       if (i.vec_encoding != vex_encoding_default)
6169         {
6170           i.error = unsupported;
6171           return 1;
6172         }
6173       return 0;
6174     }
6175
6176   return 0;
6177 }
6178
6179 static const insn_template *
6180 match_template (char mnem_suffix)
6181 {
6182   /* Points to template once we've found it.  */
6183   const insn_template *t;
6184   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6185   i386_operand_type overlap4;
6186   unsigned int found_reverse_match;
6187   i386_opcode_modifier suffix_check;
6188   i386_operand_type operand_types [MAX_OPERANDS];
6189   int addr_prefix_disp;
6190   unsigned int j, size_match, check_register;
6191   enum i386_error specific_error = 0;
6192
6193 #if MAX_OPERANDS != 5
6194 # error "MAX_OPERANDS must be 5."
6195 #endif
6196
6197   found_reverse_match = 0;
6198   addr_prefix_disp = -1;
6199
6200   /* Prepare for mnemonic suffix check.  */
6201   memset (&suffix_check, 0, sizeof (suffix_check));
6202   switch (mnem_suffix)
6203     {
6204     case BYTE_MNEM_SUFFIX:
6205       suffix_check.no_bsuf = 1;
6206       break;
6207     case WORD_MNEM_SUFFIX:
6208       suffix_check.no_wsuf = 1;
6209       break;
6210     case SHORT_MNEM_SUFFIX:
6211       suffix_check.no_ssuf = 1;
6212       break;
6213     case LONG_MNEM_SUFFIX:
6214       suffix_check.no_lsuf = 1;
6215       break;
6216     case QWORD_MNEM_SUFFIX:
6217       suffix_check.no_qsuf = 1;
6218       break;
6219     default:
6220       /* NB: In Intel syntax, normally we can check for memory operand
6221          size when there is no mnemonic suffix.  But jmp and call have
6222          2 different encodings with Dword memory operand size, one with
6223          No_ldSuf and the other without.  i.suffix is set to
6224          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6225       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6226         suffix_check.no_ldsuf = 1;
6227     }
6228
6229   /* Must have right number of operands.  */
6230   i.error = number_of_operands_mismatch;
6231
6232   for (t = current_templates->start; t < current_templates->end; t++)
6233     {
6234       addr_prefix_disp = -1;
6235       found_reverse_match = 0;
6236
6237       if (i.operands != t->operands)
6238         continue;
6239
6240       /* Check processor support.  */
6241       i.error = unsupported;
6242       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6243         continue;
6244
6245       /* Check AT&T mnemonic.   */
6246       i.error = unsupported_with_intel_mnemonic;
6247       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6248         continue;
6249
6250       /* Check AT&T/Intel syntax.  */
6251       i.error = unsupported_syntax;
6252       if ((intel_syntax && t->opcode_modifier.attsyntax)
6253           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6254         continue;
6255
6256       /* Check Intel64/AMD64 ISA.   */
6257       switch (isa64)
6258         {
6259         default:
6260           /* Default: Don't accept Intel64.  */
6261           if (t->opcode_modifier.isa64 == INTEL64)
6262             continue;
6263           break;
6264         case amd64:
6265           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6266           if (t->opcode_modifier.isa64 >= INTEL64)
6267             continue;
6268           break;
6269         case intel64:
6270           /* -mintel64: Don't accept AMD64.  */
6271           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6272             continue;
6273           break;
6274         }
6275
6276       /* Check the suffix.  */
6277       i.error = invalid_instruction_suffix;
6278       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6279           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6280           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6281           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6282           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6283           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6284         continue;
6285
6286       size_match = operand_size_match (t);
6287       if (!size_match)
6288         continue;
6289
6290       /* This is intentionally not
6291
6292          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6293
6294          as the case of a missing * on the operand is accepted (perhaps with
6295          a warning, issued further down).  */
6296       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6297         {
6298           i.error = operand_type_mismatch;
6299           continue;
6300         }
6301
6302       for (j = 0; j < MAX_OPERANDS; j++)
6303         operand_types[j] = t->operand_types[j];
6304
6305       /* In general, don't allow
6306          - 64-bit operands outside of 64-bit mode,
6307          - 32-bit operands on pre-386.  */
6308       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6309       if (((i.suffix == QWORD_MNEM_SUFFIX
6310             && flag_code != CODE_64BIT
6311             && (t->base_opcode != 0x0fc7
6312                 || t->extension_opcode != 1 /* cmpxchg8b */))
6313            || (i.suffix == LONG_MNEM_SUFFIX
6314                && !cpu_arch_flags.bitfield.cpui386))
6315           && (intel_syntax
6316               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6317                  && !intel_float_operand (t->name))
6318               : intel_float_operand (t->name) != 2)
6319           && (t->operands == i.imm_operands
6320               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6321                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6322                && operand_types[i.imm_operands].bitfield.class != RegMask)
6323               || (operand_types[j].bitfield.class != RegMMX
6324                   && operand_types[j].bitfield.class != RegSIMD
6325                   && operand_types[j].bitfield.class != RegMask))
6326           && !t->opcode_modifier.sib)
6327         continue;
6328
6329       /* Do not verify operands when there are none.  */
6330       if (!t->operands)
6331         {
6332           if (VEX_check_encoding (t))
6333             {
6334               specific_error = i.error;
6335               continue;
6336             }
6337
6338           /* We've found a match; break out of loop.  */
6339           break;
6340         }
6341
6342       if (!t->opcode_modifier.jump
6343           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6344         {
6345           /* There should be only one Disp operand.  */
6346           for (j = 0; j < MAX_OPERANDS; j++)
6347             if (operand_type_check (operand_types[j], disp))
6348               break;
6349           if (j < MAX_OPERANDS)
6350             {
6351               bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
6352
6353               addr_prefix_disp = j;
6354
6355               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
6356                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
6357               switch (flag_code)
6358                 {
6359                 case CODE_16BIT:
6360                   override = !override;
6361                   /* Fall through.  */
6362                 case CODE_32BIT:
6363                   if (operand_types[j].bitfield.disp32
6364                       && operand_types[j].bitfield.disp16)
6365                     {
6366                       operand_types[j].bitfield.disp16 = override;
6367                       operand_types[j].bitfield.disp32 = !override;
6368                     }
6369                   operand_types[j].bitfield.disp32s = 0;
6370                   operand_types[j].bitfield.disp64 = 0;
6371                   break;
6372
6373                 case CODE_64BIT:
6374                   if (operand_types[j].bitfield.disp32s
6375                       || operand_types[j].bitfield.disp64)
6376                     {
6377                       operand_types[j].bitfield.disp64 &= !override;
6378                       operand_types[j].bitfield.disp32s &= !override;
6379                       operand_types[j].bitfield.disp32 = override;
6380                     }
6381                   operand_types[j].bitfield.disp16 = 0;
6382                   break;
6383                 }
6384             }
6385         }
6386
6387       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6388       if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
6389         continue;
6390
6391       /* We check register size if needed.  */
6392       if (t->opcode_modifier.checkregsize)
6393         {
6394           check_register = (1 << t->operands) - 1;
6395           if (i.broadcast)
6396             check_register &= ~(1 << i.broadcast->operand);
6397         }
6398       else
6399         check_register = 0;
6400
6401       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6402       switch (t->operands)
6403         {
6404         case 1:
6405           if (!operand_type_match (overlap0, i.types[0]))
6406             continue;
6407           break;
6408         case 2:
6409           /* xchg %eax, %eax is a special case. It is an alias for nop
6410              only in 32bit mode and we can use opcode 0x90.  In 64bit
6411              mode, we can't use 0x90 for xchg %eax, %eax since it should
6412              zero-extend %eax to %rax.  */
6413           if (flag_code == CODE_64BIT
6414               && t->base_opcode == 0x90
6415               && i.types[0].bitfield.instance == Accum
6416               && i.types[0].bitfield.dword
6417               && i.types[1].bitfield.instance == Accum
6418               && i.types[1].bitfield.dword)
6419             continue;
6420           /* xrelease mov %eax, <disp> is another special case. It must not
6421              match the accumulator-only encoding of mov.  */
6422           if (flag_code != CODE_64BIT
6423               && i.hle_prefix
6424               && t->base_opcode == 0xa0
6425               && i.types[0].bitfield.instance == Accum
6426               && (i.flags[1] & Operand_Mem))
6427             continue;
6428           /* Fall through.  */
6429
6430         case 3:
6431           if (!(size_match & MATCH_STRAIGHT))
6432             goto check_reverse;
6433           /* Reverse direction of operands if swapping is possible in the first
6434              place (operands need to be symmetric) and
6435              - the load form is requested, and the template is a store form,
6436              - the store form is requested, and the template is a load form,
6437              - the non-default (swapped) form is requested.  */
6438           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6439           if (t->opcode_modifier.d && i.reg_operands == i.operands
6440               && !operand_type_all_zero (&overlap1))
6441             switch (i.dir_encoding)
6442               {
6443               case dir_encoding_load:
6444                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6445                     || t->opcode_modifier.regmem)
6446                   goto check_reverse;
6447                 break;
6448
6449               case dir_encoding_store:
6450                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6451                     && !t->opcode_modifier.regmem)
6452                   goto check_reverse;
6453                 break;
6454
6455               case dir_encoding_swap:
6456                 goto check_reverse;
6457
6458               case dir_encoding_default:
6459                 break;
6460               }
6461           /* If we want store form, we skip the current load.  */
6462           if ((i.dir_encoding == dir_encoding_store
6463                || i.dir_encoding == dir_encoding_swap)
6464               && i.mem_operands == 0
6465               && t->opcode_modifier.load)
6466             continue;
6467           /* Fall through.  */
6468         case 4:
6469         case 5:
6470           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6471           if (!operand_type_match (overlap0, i.types[0])
6472               || !operand_type_match (overlap1, i.types[1])
6473               || ((check_register & 3) == 3
6474                   && !operand_type_register_match (i.types[0],
6475                                                    operand_types[0],
6476                                                    i.types[1],
6477                                                    operand_types[1])))
6478             {
6479               /* Check if other direction is valid ...  */
6480               if (!t->opcode_modifier.d)
6481                 continue;
6482
6483             check_reverse:
6484               if (!(size_match & MATCH_REVERSE))
6485                 continue;
6486               /* Try reversing direction of operands.  */
6487               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6488               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6489               if (!operand_type_match (overlap0, i.types[0])
6490                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6491                   || (check_register
6492                       && !operand_type_register_match (i.types[0],
6493                                                        operand_types[i.operands - 1],
6494                                                        i.types[i.operands - 1],
6495                                                        operand_types[0])))
6496                 {
6497                   /* Does not match either direction.  */
6498                   continue;
6499                 }
6500               /* found_reverse_match holds which of D or FloatR
6501                  we've found.  */
6502               if (!t->opcode_modifier.d)
6503                 found_reverse_match = 0;
6504               else if (operand_types[0].bitfield.tbyte)
6505                 found_reverse_match = Opcode_FloatD;
6506               else if (operand_types[0].bitfield.xmmword
6507                        || operand_types[i.operands - 1].bitfield.xmmword
6508                        || operand_types[0].bitfield.class == RegMMX
6509                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6510                        || is_any_vex_encoding(t))
6511                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6512                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6513               else
6514                 found_reverse_match = Opcode_D;
6515               if (t->opcode_modifier.floatr)
6516                 found_reverse_match |= Opcode_FloatR;
6517             }
6518           else
6519             {
6520               /* Found a forward 2 operand match here.  */
6521               switch (t->operands)
6522                 {
6523                 case 5:
6524                   overlap4 = operand_type_and (i.types[4],
6525                                                operand_types[4]);
6526                   /* Fall through.  */
6527                 case 4:
6528                   overlap3 = operand_type_and (i.types[3],
6529                                                operand_types[3]);
6530                   /* Fall through.  */
6531                 case 3:
6532                   overlap2 = operand_type_and (i.types[2],
6533                                                operand_types[2]);
6534                   break;
6535                 }
6536
6537               switch (t->operands)
6538                 {
6539                 case 5:
6540                   if (!operand_type_match (overlap4, i.types[4])
6541                       || !operand_type_register_match (i.types[3],
6542                                                        operand_types[3],
6543                                                        i.types[4],
6544                                                        operand_types[4]))
6545                     continue;
6546                   /* Fall through.  */
6547                 case 4:
6548                   if (!operand_type_match (overlap3, i.types[3])
6549                       || ((check_register & 0xa) == 0xa
6550                           && !operand_type_register_match (i.types[1],
6551                                                             operand_types[1],
6552                                                             i.types[3],
6553                                                             operand_types[3]))
6554                       || ((check_register & 0xc) == 0xc
6555                           && !operand_type_register_match (i.types[2],
6556                                                             operand_types[2],
6557                                                             i.types[3],
6558                                                             operand_types[3])))
6559                     continue;
6560                   /* Fall through.  */
6561                 case 3:
6562                   /* Here we make use of the fact that there are no
6563                      reverse match 3 operand instructions.  */
6564                   if (!operand_type_match (overlap2, i.types[2])
6565                       || ((check_register & 5) == 5
6566                           && !operand_type_register_match (i.types[0],
6567                                                             operand_types[0],
6568                                                             i.types[2],
6569                                                             operand_types[2]))
6570                       || ((check_register & 6) == 6
6571                           && !operand_type_register_match (i.types[1],
6572                                                             operand_types[1],
6573                                                             i.types[2],
6574                                                             operand_types[2])))
6575                     continue;
6576                   break;
6577                 }
6578             }
6579           /* Found either forward/reverse 2, 3 or 4 operand match here:
6580              slip through to break.  */
6581         }
6582
6583       /* Check if vector operands are valid.  */
6584       if (check_VecOperands (t))
6585         {
6586           specific_error = i.error;
6587           continue;
6588         }
6589
6590       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6591       if (VEX_check_encoding (t))
6592         {
6593           specific_error = i.error;
6594           continue;
6595         }
6596
6597       /* We've found a match; break out of loop.  */
6598       break;
6599     }
6600
6601   if (t == current_templates->end)
6602     {
6603       /* We found no match.  */
6604       const char *err_msg;
6605       switch (specific_error ? specific_error : i.error)
6606         {
6607         default:
6608           abort ();
6609         case operand_size_mismatch:
6610           err_msg = _("operand size mismatch");
6611           break;
6612         case operand_type_mismatch:
6613           err_msg = _("operand type mismatch");
6614           break;
6615         case register_type_mismatch:
6616           err_msg = _("register type mismatch");
6617           break;
6618         case number_of_operands_mismatch:
6619           err_msg = _("number of operands mismatch");
6620           break;
6621         case invalid_instruction_suffix:
6622           err_msg = _("invalid instruction suffix");
6623           break;
6624         case bad_imm4:
6625           err_msg = _("constant doesn't fit in 4 bits");
6626           break;
6627         case unsupported_with_intel_mnemonic:
6628           err_msg = _("unsupported with Intel mnemonic");
6629           break;
6630         case unsupported_syntax:
6631           err_msg = _("unsupported syntax");
6632           break;
6633         case unsupported:
6634           as_bad (_("unsupported instruction `%s'"),
6635                   current_templates->start->name);
6636           return NULL;
6637         case invalid_sib_address:
6638           err_msg = _("invalid SIB address");
6639           break;
6640         case invalid_vsib_address:
6641           err_msg = _("invalid VSIB address");
6642           break;
6643         case invalid_vector_register_set:
6644           err_msg = _("mask, index, and destination registers must be distinct");
6645           break;
6646         case invalid_tmm_register_set:
6647           err_msg = _("all tmm registers must be distinct");
6648           break;
6649         case unsupported_vector_index_register:
6650           err_msg = _("unsupported vector index register");
6651           break;
6652         case unsupported_broadcast:
6653           err_msg = _("unsupported broadcast");
6654           break;
6655         case broadcast_needed:
6656           err_msg = _("broadcast is needed for operand of such type");
6657           break;
6658         case unsupported_masking:
6659           err_msg = _("unsupported masking");
6660           break;
6661         case mask_not_on_destination:
6662           err_msg = _("mask not on destination operand");
6663           break;
6664         case no_default_mask:
6665           err_msg = _("default mask isn't allowed");
6666           break;
6667         case unsupported_rc_sae:
6668           err_msg = _("unsupported static rounding/sae");
6669           break;
6670         case rc_sae_operand_not_last_imm:
6671           if (intel_syntax)
6672             err_msg = _("RC/SAE operand must precede immediate operands");
6673           else
6674             err_msg = _("RC/SAE operand must follow immediate operands");
6675           break;
6676         case invalid_register_operand:
6677           err_msg = _("invalid register operand");
6678           break;
6679         }
6680       as_bad (_("%s for `%s'"), err_msg,
6681               current_templates->start->name);
6682       return NULL;
6683     }
6684
6685   if (!quiet_warnings)
6686     {
6687       if (!intel_syntax
6688           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6689         as_warn (_("indirect %s without `*'"), t->name);
6690
6691       if (t->opcode_modifier.isprefix
6692           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6693         {
6694           /* Warn them that a data or address size prefix doesn't
6695              affect assembly of the next line of code.  */
6696           as_warn (_("stand-alone `%s' prefix"), t->name);
6697         }
6698     }
6699
6700   /* Copy the template we found.  */
6701   i.tm = *t;
6702
6703   if (addr_prefix_disp != -1)
6704     i.tm.operand_types[addr_prefix_disp]
6705       = operand_types[addr_prefix_disp];
6706
6707   if (found_reverse_match)
6708     {
6709       /* If we found a reverse match we must alter the opcode direction
6710          bit and clear/flip the regmem modifier one.  found_reverse_match
6711          holds bits to change (different for int & float insns).  */
6712
6713       i.tm.base_opcode ^= found_reverse_match;
6714
6715       i.tm.operand_types[0] = operand_types[i.operands - 1];
6716       i.tm.operand_types[i.operands - 1] = operand_types[0];
6717
6718       /* Certain SIMD insns have their load forms specified in the opcode
6719          table, and hence we need to _set_ RegMem instead of clearing it.
6720          We need to avoid setting the bit though on insns like KMOVW.  */
6721       i.tm.opcode_modifier.regmem
6722         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6723           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6724           && !i.tm.opcode_modifier.regmem;
6725     }
6726
6727   return t;
6728 }
6729
6730 static int
6731 check_string (void)
6732 {
6733   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6734   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6735
6736   if (i.seg[op] != NULL && i.seg[op] != &es)
6737     {
6738       as_bad (_("`%s' operand %u must use `%ses' segment"),
6739               i.tm.name,
6740               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6741               register_prefix);
6742       return 0;
6743     }
6744
6745   /* There's only ever one segment override allowed per instruction.
6746      This instruction possibly has a legal segment override on the
6747      second operand, so copy the segment to where non-string
6748      instructions store it, allowing common code.  */
6749   i.seg[op] = i.seg[1];
6750
6751   return 1;
6752 }
6753
6754 static int
6755 process_suffix (void)
6756 {
6757   /* If matched instruction specifies an explicit instruction mnemonic
6758      suffix, use it.  */
6759   if (i.tm.opcode_modifier.size == SIZE16)
6760     i.suffix = WORD_MNEM_SUFFIX;
6761   else if (i.tm.opcode_modifier.size == SIZE32)
6762     i.suffix = LONG_MNEM_SUFFIX;
6763   else if (i.tm.opcode_modifier.size == SIZE64)
6764     i.suffix = QWORD_MNEM_SUFFIX;
6765   else if (i.reg_operands
6766            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6767            && !i.tm.opcode_modifier.addrprefixopreg)
6768     {
6769       unsigned int numop = i.operands;
6770
6771       /* movsx/movzx want only their source operand considered here, for the
6772          ambiguity checking below.  The suffix will be replaced afterwards
6773          to represent the destination (register).  */
6774       if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
6775           || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6776         --i.operands;
6777
6778       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
6779       if (i.tm.base_opcode == 0xf20f38f0
6780           && i.tm.operand_types[1].bitfield.qword)
6781         i.rex |= REX_W;
6782
6783       /* If there's no instruction mnemonic suffix we try to invent one
6784          based on GPR operands.  */
6785       if (!i.suffix)
6786         {
6787           /* We take i.suffix from the last register operand specified,
6788              Destination register type is more significant than source
6789              register type.  crc32 in SSE4.2 prefers source register
6790              type. */
6791           unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1;
6792
6793           while (op--)
6794             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
6795                 || i.tm.operand_types[op].bitfield.instance == Accum)
6796               {
6797                 if (i.types[op].bitfield.class != Reg)
6798                   continue;
6799                 if (i.types[op].bitfield.byte)
6800                   i.suffix = BYTE_MNEM_SUFFIX;
6801                 else if (i.types[op].bitfield.word)
6802                   i.suffix = WORD_MNEM_SUFFIX;
6803                 else if (i.types[op].bitfield.dword)
6804                   i.suffix = LONG_MNEM_SUFFIX;
6805                 else if (i.types[op].bitfield.qword)
6806                   i.suffix = QWORD_MNEM_SUFFIX;
6807                 else
6808                   continue;
6809                 break;
6810               }
6811
6812           /* As an exception, movsx/movzx silently default to a byte source
6813              in AT&T mode.  */
6814           if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
6815               && !i.suffix && !intel_syntax)
6816             i.suffix = BYTE_MNEM_SUFFIX;
6817         }
6818       else if (i.suffix == BYTE_MNEM_SUFFIX)
6819         {
6820           if (intel_syntax
6821               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6822               && i.tm.opcode_modifier.no_bsuf)
6823             i.suffix = 0;
6824           else if (!check_byte_reg ())
6825             return 0;
6826         }
6827       else if (i.suffix == LONG_MNEM_SUFFIX)
6828         {
6829           if (intel_syntax
6830               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6831               && i.tm.opcode_modifier.no_lsuf
6832               && !i.tm.opcode_modifier.todword
6833               && !i.tm.opcode_modifier.toqword)
6834             i.suffix = 0;
6835           else if (!check_long_reg ())
6836             return 0;
6837         }
6838       else if (i.suffix == QWORD_MNEM_SUFFIX)
6839         {
6840           if (intel_syntax
6841               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6842               && i.tm.opcode_modifier.no_qsuf
6843               && !i.tm.opcode_modifier.todword
6844               && !i.tm.opcode_modifier.toqword)
6845             i.suffix = 0;
6846           else if (!check_qword_reg ())
6847             return 0;
6848         }
6849       else if (i.suffix == WORD_MNEM_SUFFIX)
6850         {
6851           if (intel_syntax
6852               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6853               && i.tm.opcode_modifier.no_wsuf)
6854             i.suffix = 0;
6855           else if (!check_word_reg ())
6856             return 0;
6857         }
6858       else if (intel_syntax
6859                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
6860         /* Do nothing if the instruction is going to ignore the prefix.  */
6861         ;
6862       else
6863         abort ();
6864
6865       /* Undo the movsx/movzx change done above.  */
6866       i.operands = numop;
6867     }
6868   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
6869            && !i.suffix)
6870     {
6871       i.suffix = stackop_size;
6872       if (stackop_size == LONG_MNEM_SUFFIX)
6873         {
6874           /* stackop_size is set to LONG_MNEM_SUFFIX for the
6875              .code16gcc directive to support 16-bit mode with
6876              32-bit address.  For IRET without a suffix, generate
6877              16-bit IRET (opcode 0xcf) to return from an interrupt
6878              handler.  */
6879           if (i.tm.base_opcode == 0xcf)
6880             {
6881               i.suffix = WORD_MNEM_SUFFIX;
6882               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
6883             }
6884           /* Warn about changed behavior for segment register push/pop.  */
6885           else if ((i.tm.base_opcode | 1) == 0x07)
6886             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
6887                      i.tm.name);
6888         }
6889     }
6890   else if (!i.suffix
6891            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
6892                || i.tm.opcode_modifier.jump == JUMP_BYTE
6893                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
6894                || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
6895                    && i.tm.extension_opcode <= 3)))
6896     {
6897       switch (flag_code)
6898         {
6899         case CODE_64BIT:
6900           if (!i.tm.opcode_modifier.no_qsuf)
6901             {
6902               if (i.tm.opcode_modifier.jump == JUMP_BYTE
6903                   || i.tm.opcode_modifier.no_lsuf)
6904                 i.suffix = QWORD_MNEM_SUFFIX;
6905               break;
6906             }
6907           /* Fall through.  */
6908         case CODE_32BIT:
6909           if (!i.tm.opcode_modifier.no_lsuf)
6910             i.suffix = LONG_MNEM_SUFFIX;
6911           break;
6912         case CODE_16BIT:
6913           if (!i.tm.opcode_modifier.no_wsuf)
6914             i.suffix = WORD_MNEM_SUFFIX;
6915           break;
6916         }
6917     }
6918
6919   if (!i.suffix
6920       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6921           /* Also cover lret/retf/iret in 64-bit mode.  */
6922           || (flag_code == CODE_64BIT
6923               && !i.tm.opcode_modifier.no_lsuf
6924               && !i.tm.opcode_modifier.no_qsuf))
6925       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6926       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
6927       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
6928       /* Accept FLDENV et al without suffix.  */
6929       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
6930     {
6931       unsigned int suffixes, evex = 0;
6932
6933       suffixes = !i.tm.opcode_modifier.no_bsuf;
6934       if (!i.tm.opcode_modifier.no_wsuf)
6935         suffixes |= 1 << 1;
6936       if (!i.tm.opcode_modifier.no_lsuf)
6937         suffixes |= 1 << 2;
6938       if (!i.tm.opcode_modifier.no_ldsuf)
6939         suffixes |= 1 << 3;
6940       if (!i.tm.opcode_modifier.no_ssuf)
6941         suffixes |= 1 << 4;
6942       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
6943         suffixes |= 1 << 5;
6944
6945       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
6946          also suitable for AT&T syntax mode, it was requested that this be
6947          restricted to just Intel syntax.  */
6948       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
6949         {
6950           unsigned int op;
6951
6952           for (op = 0; op < i.tm.operands; ++op)
6953             {
6954               if (is_evex_encoding (&i.tm)
6955                   && !cpu_arch_flags.bitfield.cpuavx512vl)
6956                 {
6957                   if (i.tm.operand_types[op].bitfield.ymmword)
6958                     i.tm.operand_types[op].bitfield.xmmword = 0;
6959                   if (i.tm.operand_types[op].bitfield.zmmword)
6960                     i.tm.operand_types[op].bitfield.ymmword = 0;
6961                   if (!i.tm.opcode_modifier.evex
6962                       || i.tm.opcode_modifier.evex == EVEXDYN)
6963                     i.tm.opcode_modifier.evex = EVEX512;
6964                 }
6965
6966               if (i.tm.operand_types[op].bitfield.xmmword
6967                   + i.tm.operand_types[op].bitfield.ymmword
6968                   + i.tm.operand_types[op].bitfield.zmmword < 2)
6969                 continue;
6970
6971               /* Any properly sized operand disambiguates the insn.  */
6972               if (i.types[op].bitfield.xmmword
6973                   || i.types[op].bitfield.ymmword
6974                   || i.types[op].bitfield.zmmword)
6975                 {
6976                   suffixes &= ~(7 << 6);
6977                   evex = 0;
6978                   break;
6979                 }
6980
6981               if ((i.flags[op] & Operand_Mem)
6982                   && i.tm.operand_types[op].bitfield.unspecified)
6983                 {
6984                   if (i.tm.operand_types[op].bitfield.xmmword)
6985                     suffixes |= 1 << 6;
6986                   if (i.tm.operand_types[op].bitfield.ymmword)
6987                     suffixes |= 1 << 7;
6988                   if (i.tm.operand_types[op].bitfield.zmmword)
6989                     suffixes |= 1 << 8;
6990                   if (is_evex_encoding (&i.tm))
6991                     evex = EVEX512;
6992                 }
6993             }
6994         }
6995
6996       /* Are multiple suffixes / operand sizes allowed?  */
6997       if (suffixes & (suffixes - 1))
6998         {
6999           if (intel_syntax
7000               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7001                   || operand_check == check_error))
7002             {
7003               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7004               return 0;
7005             }
7006           if (operand_check == check_error)
7007             {
7008               as_bad (_("no instruction mnemonic suffix given and "
7009                         "no register operands; can't size `%s'"), i.tm.name);
7010               return 0;
7011             }
7012           if (operand_check == check_warning)
7013             as_warn (_("%s; using default for `%s'"),
7014                        intel_syntax
7015                        ? _("ambiguous operand size")
7016                        : _("no instruction mnemonic suffix given and "
7017                            "no register operands"),
7018                        i.tm.name);
7019
7020           if (i.tm.opcode_modifier.floatmf)
7021             i.suffix = SHORT_MNEM_SUFFIX;
7022           else if ((i.tm.base_opcode | 8) == 0xfbe
7023                    || (i.tm.base_opcode == 0x63
7024                        && i.tm.cpu_flags.bitfield.cpu64))
7025             /* handled below */;
7026           else if (evex)
7027             i.tm.opcode_modifier.evex = evex;
7028           else if (flag_code == CODE_16BIT)
7029             i.suffix = WORD_MNEM_SUFFIX;
7030           else if (!i.tm.opcode_modifier.no_lsuf)
7031             i.suffix = LONG_MNEM_SUFFIX;
7032           else
7033             i.suffix = QWORD_MNEM_SUFFIX;
7034         }
7035     }
7036
7037   if ((i.tm.base_opcode | 8) == 0xfbe
7038       || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
7039     {
7040       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7041          In AT&T syntax, if there is no suffix (warned about above), the default
7042          will be byte extension.  */
7043       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7044         i.tm.base_opcode |= 1;
7045
7046       /* For further processing, the suffix should represent the destination
7047          (register).  This is already the case when one was used with
7048          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7049          no suffix to begin with.  */
7050       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7051         {
7052           if (i.types[1].bitfield.word)
7053             i.suffix = WORD_MNEM_SUFFIX;
7054           else if (i.types[1].bitfield.qword)
7055             i.suffix = QWORD_MNEM_SUFFIX;
7056           else
7057             i.suffix = LONG_MNEM_SUFFIX;
7058
7059           i.tm.opcode_modifier.w = 0;
7060         }
7061     }
7062
7063   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7064     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7065                    != (i.tm.operand_types[1].bitfield.class == Reg);
7066
7067   /* Change the opcode based on the operand size given by i.suffix.  */
7068   switch (i.suffix)
7069     {
7070     /* Size floating point instruction.  */
7071     case LONG_MNEM_SUFFIX:
7072       if (i.tm.opcode_modifier.floatmf)
7073         {
7074           i.tm.base_opcode ^= 4;
7075           break;
7076         }
7077     /* fall through */
7078     case WORD_MNEM_SUFFIX:
7079     case QWORD_MNEM_SUFFIX:
7080       /* It's not a byte, select word/dword operation.  */
7081       if (i.tm.opcode_modifier.w)
7082         {
7083           if (i.short_form)
7084             i.tm.base_opcode |= 8;
7085           else
7086             i.tm.base_opcode |= 1;
7087         }
7088     /* fall through */
7089     case SHORT_MNEM_SUFFIX:
7090       /* Now select between word & dword operations via the operand
7091          size prefix, except for instructions that will ignore this
7092          prefix anyway.  */
7093       if (i.suffix != QWORD_MNEM_SUFFIX
7094           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7095           && !i.tm.opcode_modifier.floatmf
7096           && !is_any_vex_encoding (&i.tm)
7097           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7098               || (flag_code == CODE_64BIT
7099                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7100         {
7101           unsigned int prefix = DATA_PREFIX_OPCODE;
7102
7103           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7104             prefix = ADDR_PREFIX_OPCODE;
7105
7106           if (!add_prefix (prefix))
7107             return 0;
7108         }
7109
7110       /* Set mode64 for an operand.  */
7111       if (i.suffix == QWORD_MNEM_SUFFIX
7112           && flag_code == CODE_64BIT
7113           && !i.tm.opcode_modifier.norex64
7114           && !i.tm.opcode_modifier.vexw
7115           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7116              need rex64. */
7117           && ! (i.operands == 2
7118                 && i.tm.base_opcode == 0x90
7119                 && i.tm.extension_opcode == None
7120                 && i.types[0].bitfield.instance == Accum
7121                 && i.types[0].bitfield.qword
7122                 && i.types[1].bitfield.instance == Accum
7123                 && i.types[1].bitfield.qword))
7124         i.rex |= REX_W;
7125
7126       break;
7127
7128     case 0:
7129       /* Select word/dword/qword operation with explict data sizing prefix
7130          when there are no suitable register operands.  */
7131       if (i.tm.opcode_modifier.w
7132           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7133           && (!i.reg_operands
7134               || (i.reg_operands == 1
7135                       /* ShiftCount */
7136                   && (i.tm.operand_types[0].bitfield.instance == RegC
7137                       /* InOutPortReg */
7138                       || i.tm.operand_types[0].bitfield.instance == RegD
7139                       || i.tm.operand_types[1].bitfield.instance == RegD
7140                       /* CRC32 */
7141                       || i.tm.base_opcode == 0xf20f38f0))))
7142         i.tm.base_opcode |= 1;
7143       break;
7144     }
7145
7146   if (i.tm.opcode_modifier.addrprefixopreg)
7147     {
7148       gas_assert (!i.suffix);
7149       gas_assert (i.reg_operands);
7150
7151       if (i.tm.operand_types[0].bitfield.instance == Accum
7152           || i.operands == 1)
7153         {
7154           /* The address size override prefix changes the size of the
7155              first operand.  */
7156           if (flag_code == CODE_64BIT
7157               && i.op[0].regs->reg_type.bitfield.word)
7158             {
7159               as_bad (_("16-bit addressing unavailable for `%s'"),
7160                       i.tm.name);
7161               return 0;
7162             }
7163
7164           if ((flag_code == CODE_32BIT
7165                ? i.op[0].regs->reg_type.bitfield.word
7166                : i.op[0].regs->reg_type.bitfield.dword)
7167               && !add_prefix (ADDR_PREFIX_OPCODE))
7168             return 0;
7169         }
7170       else
7171         {
7172           /* Check invalid register operand when the address size override
7173              prefix changes the size of register operands.  */
7174           unsigned int op;
7175           enum { need_word, need_dword, need_qword } need;
7176
7177           if (flag_code == CODE_32BIT)
7178             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7179           else if (i.prefix[ADDR_PREFIX])
7180             need = need_dword;
7181           else
7182             need = flag_code == CODE_64BIT ? need_qword : need_word;
7183
7184           for (op = 0; op < i.operands; op++)
7185             {
7186               if (i.types[op].bitfield.class != Reg)
7187                 continue;
7188
7189               switch (need)
7190                 {
7191                 case need_word:
7192                   if (i.op[op].regs->reg_type.bitfield.word)
7193                     continue;
7194                   break;
7195                 case need_dword:
7196                   if (i.op[op].regs->reg_type.bitfield.dword)
7197                     continue;
7198                   break;
7199                 case need_qword:
7200                   if (i.op[op].regs->reg_type.bitfield.qword)
7201                     continue;
7202                   break;
7203                 }
7204
7205               as_bad (_("invalid register operand size for `%s'"),
7206                       i.tm.name);
7207               return 0;
7208             }
7209         }
7210     }
7211
7212   return 1;
7213 }
7214
7215 static int
7216 check_byte_reg (void)
7217 {
7218   int op;
7219
7220   for (op = i.operands; --op >= 0;)
7221     {
7222       /* Skip non-register operands. */
7223       if (i.types[op].bitfield.class != Reg)
7224         continue;
7225
7226       /* If this is an eight bit register, it's OK.  If it's the 16 or
7227          32 bit version of an eight bit register, we will just use the
7228          low portion, and that's OK too.  */
7229       if (i.types[op].bitfield.byte)
7230         continue;
7231
7232       /* I/O port address operands are OK too.  */
7233       if (i.tm.operand_types[op].bitfield.instance == RegD
7234           && i.tm.operand_types[op].bitfield.word)
7235         continue;
7236
7237       /* crc32 only wants its source operand checked here.  */
7238       if (i.tm.base_opcode == 0xf20f38f0 && op)
7239         continue;
7240
7241       /* Any other register is bad.  */
7242       as_bad (_("`%s%s' not allowed with `%s%c'"),
7243               register_prefix, i.op[op].regs->reg_name,
7244               i.tm.name, i.suffix);
7245       return 0;
7246     }
7247   return 1;
7248 }
7249
7250 static int
7251 check_long_reg (void)
7252 {
7253   int op;
7254
7255   for (op = i.operands; --op >= 0;)
7256     /* Skip non-register operands. */
7257     if (i.types[op].bitfield.class != Reg)
7258       continue;
7259     /* Reject eight bit registers, except where the template requires
7260        them. (eg. movzb)  */
7261     else if (i.types[op].bitfield.byte
7262              && (i.tm.operand_types[op].bitfield.class == Reg
7263                  || i.tm.operand_types[op].bitfield.instance == Accum)
7264              && (i.tm.operand_types[op].bitfield.word
7265                  || i.tm.operand_types[op].bitfield.dword))
7266       {
7267         as_bad (_("`%s%s' not allowed with `%s%c'"),
7268                 register_prefix,
7269                 i.op[op].regs->reg_name,
7270                 i.tm.name,
7271                 i.suffix);
7272         return 0;
7273       }
7274     /* Error if the e prefix on a general reg is missing.  */
7275     else if (i.types[op].bitfield.word
7276              && (i.tm.operand_types[op].bitfield.class == Reg
7277                  || i.tm.operand_types[op].bitfield.instance == Accum)
7278              && i.tm.operand_types[op].bitfield.dword)
7279       {
7280         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7281                 register_prefix, i.op[op].regs->reg_name,
7282                 i.suffix);
7283         return 0;
7284       }
7285     /* Warn if the r prefix on a general reg is present.  */
7286     else if (i.types[op].bitfield.qword
7287              && (i.tm.operand_types[op].bitfield.class == Reg
7288                  || i.tm.operand_types[op].bitfield.instance == Accum)
7289              && i.tm.operand_types[op].bitfield.dword)
7290       {
7291         if (intel_syntax
7292             && i.tm.opcode_modifier.toqword
7293             && i.types[0].bitfield.class != RegSIMD)
7294           {
7295             /* Convert to QWORD.  We want REX byte. */
7296             i.suffix = QWORD_MNEM_SUFFIX;
7297           }
7298         else
7299           {
7300             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7301                     register_prefix, i.op[op].regs->reg_name,
7302                     i.suffix);
7303             return 0;
7304           }
7305       }
7306   return 1;
7307 }
7308
7309 static int
7310 check_qword_reg (void)
7311 {
7312   int op;
7313
7314   for (op = i.operands; --op >= 0; )
7315     /* Skip non-register operands. */
7316     if (i.types[op].bitfield.class != Reg)
7317       continue;
7318     /* Reject eight bit registers, except where the template requires
7319        them. (eg. movzb)  */
7320     else if (i.types[op].bitfield.byte
7321              && (i.tm.operand_types[op].bitfield.class == Reg
7322                  || i.tm.operand_types[op].bitfield.instance == Accum)
7323              && (i.tm.operand_types[op].bitfield.word
7324                  || i.tm.operand_types[op].bitfield.dword))
7325       {
7326         as_bad (_("`%s%s' not allowed with `%s%c'"),
7327                 register_prefix,
7328                 i.op[op].regs->reg_name,
7329                 i.tm.name,
7330                 i.suffix);
7331         return 0;
7332       }
7333     /* Warn if the r prefix on a general reg is missing.  */
7334     else if ((i.types[op].bitfield.word
7335               || i.types[op].bitfield.dword)
7336              && (i.tm.operand_types[op].bitfield.class == Reg
7337                  || i.tm.operand_types[op].bitfield.instance == Accum)
7338              && i.tm.operand_types[op].bitfield.qword)
7339       {
7340         /* Prohibit these changes in the 64bit mode, since the
7341            lowering is more complicated.  */
7342         if (intel_syntax
7343             && i.tm.opcode_modifier.todword
7344             && i.types[0].bitfield.class != RegSIMD)
7345           {
7346             /* Convert to DWORD.  We don't want REX byte. */
7347             i.suffix = LONG_MNEM_SUFFIX;
7348           }
7349         else
7350           {
7351             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7352                     register_prefix, i.op[op].regs->reg_name,
7353                     i.suffix);
7354             return 0;
7355           }
7356       }
7357   return 1;
7358 }
7359
7360 static int
7361 check_word_reg (void)
7362 {
7363   int op;
7364   for (op = i.operands; --op >= 0;)
7365     /* Skip non-register operands. */
7366     if (i.types[op].bitfield.class != Reg)
7367       continue;
7368     /* Reject eight bit registers, except where the template requires
7369        them. (eg. movzb)  */
7370     else if (i.types[op].bitfield.byte
7371              && (i.tm.operand_types[op].bitfield.class == Reg
7372                  || i.tm.operand_types[op].bitfield.instance == Accum)
7373              && (i.tm.operand_types[op].bitfield.word
7374                  || i.tm.operand_types[op].bitfield.dword))
7375       {
7376         as_bad (_("`%s%s' not allowed with `%s%c'"),
7377                 register_prefix,
7378                 i.op[op].regs->reg_name,
7379                 i.tm.name,
7380                 i.suffix);
7381         return 0;
7382       }
7383     /* Error if the e or r prefix on a general reg is present.  */
7384     else if ((i.types[op].bitfield.dword
7385                  || i.types[op].bitfield.qword)
7386              && (i.tm.operand_types[op].bitfield.class == Reg
7387                  || i.tm.operand_types[op].bitfield.instance == Accum)
7388              && i.tm.operand_types[op].bitfield.word)
7389       {
7390         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7391                 register_prefix, i.op[op].regs->reg_name,
7392                 i.suffix);
7393         return 0;
7394       }
7395   return 1;
7396 }
7397
7398 static int
7399 update_imm (unsigned int j)
7400 {
7401   i386_operand_type overlap = i.types[j];
7402   if ((overlap.bitfield.imm8
7403        || overlap.bitfield.imm8s
7404        || overlap.bitfield.imm16
7405        || overlap.bitfield.imm32
7406        || overlap.bitfield.imm32s
7407        || overlap.bitfield.imm64)
7408       && !operand_type_equal (&overlap, &imm8)
7409       && !operand_type_equal (&overlap, &imm8s)
7410       && !operand_type_equal (&overlap, &imm16)
7411       && !operand_type_equal (&overlap, &imm32)
7412       && !operand_type_equal (&overlap, &imm32s)
7413       && !operand_type_equal (&overlap, &imm64))
7414     {
7415       if (i.suffix)
7416         {
7417           i386_operand_type temp;
7418
7419           operand_type_set (&temp, 0);
7420           if (i.suffix == BYTE_MNEM_SUFFIX)
7421             {
7422               temp.bitfield.imm8 = overlap.bitfield.imm8;
7423               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7424             }
7425           else if (i.suffix == WORD_MNEM_SUFFIX)
7426             temp.bitfield.imm16 = overlap.bitfield.imm16;
7427           else if (i.suffix == QWORD_MNEM_SUFFIX)
7428             {
7429               temp.bitfield.imm64 = overlap.bitfield.imm64;
7430               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7431             }
7432           else
7433             temp.bitfield.imm32 = overlap.bitfield.imm32;
7434           overlap = temp;
7435         }
7436       else if (operand_type_equal (&overlap, &imm16_32_32s)
7437                || operand_type_equal (&overlap, &imm16_32)
7438                || operand_type_equal (&overlap, &imm16_32s))
7439         {
7440           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7441             overlap = imm16;
7442           else
7443             overlap = imm32s;
7444         }
7445       else if (i.prefix[REX_PREFIX] & REX_W)
7446         overlap = operand_type_and (overlap, imm32s);
7447       else if (i.prefix[DATA_PREFIX])
7448         overlap = operand_type_and (overlap,
7449                                     flag_code != CODE_16BIT ? imm16 : imm32);
7450       if (!operand_type_equal (&overlap, &imm8)
7451           && !operand_type_equal (&overlap, &imm8s)
7452           && !operand_type_equal (&overlap, &imm16)
7453           && !operand_type_equal (&overlap, &imm32)
7454           && !operand_type_equal (&overlap, &imm32s)
7455           && !operand_type_equal (&overlap, &imm64))
7456         {
7457           as_bad (_("no instruction mnemonic suffix given; "
7458                     "can't determine immediate size"));
7459           return 0;
7460         }
7461     }
7462   i.types[j] = overlap;
7463
7464   return 1;
7465 }
7466
7467 static int
7468 finalize_imm (void)
7469 {
7470   unsigned int j, n;
7471
7472   /* Update the first 2 immediate operands.  */
7473   n = i.operands > 2 ? 2 : i.operands;
7474   if (n)
7475     {
7476       for (j = 0; j < n; j++)
7477         if (update_imm (j) == 0)
7478           return 0;
7479
7480       /* The 3rd operand can't be immediate operand.  */
7481       gas_assert (operand_type_check (i.types[2], imm) == 0);
7482     }
7483
7484   return 1;
7485 }
7486
7487 static int
7488 process_operands (void)
7489 {
7490   /* Default segment register this instruction will use for memory
7491      accesses.  0 means unknown.  This is only for optimizing out
7492      unnecessary segment overrides.  */
7493   const seg_entry *default_seg = 0;
7494
7495   if (i.tm.opcode_modifier.sse2avx)
7496     {
7497       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7498          need converting.  */
7499       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7500       i.prefix[REX_PREFIX] = 0;
7501       i.rex_encoding = 0;
7502     }
7503   /* ImmExt should be processed after SSE2AVX.  */
7504   else if (i.tm.opcode_modifier.immext)
7505     process_immext ();
7506
7507   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7508     {
7509       unsigned int dupl = i.operands;
7510       unsigned int dest = dupl - 1;
7511       unsigned int j;
7512
7513       /* The destination must be an xmm register.  */
7514       gas_assert (i.reg_operands
7515                   && MAX_OPERANDS > dupl
7516                   && operand_type_equal (&i.types[dest], &regxmm));
7517
7518       if (i.tm.operand_types[0].bitfield.instance == Accum
7519           && i.tm.operand_types[0].bitfield.xmmword)
7520         {
7521           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7522             {
7523               /* Keep xmm0 for instructions with VEX prefix and 3
7524                  sources.  */
7525               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7526               i.tm.operand_types[0].bitfield.class = RegSIMD;
7527               goto duplicate;
7528             }
7529           else
7530             {
7531               /* We remove the first xmm0 and keep the number of
7532                  operands unchanged, which in fact duplicates the
7533                  destination.  */
7534               for (j = 1; j < i.operands; j++)
7535                 {
7536                   i.op[j - 1] = i.op[j];
7537                   i.types[j - 1] = i.types[j];
7538                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7539                   i.flags[j - 1] = i.flags[j];
7540                 }
7541             }
7542         }
7543       else if (i.tm.opcode_modifier.implicit1stxmm0)
7544         {
7545           gas_assert ((MAX_OPERANDS - 1) > dupl
7546                       && (i.tm.opcode_modifier.vexsources
7547                           == VEX3SOURCES));
7548
7549           /* Add the implicit xmm0 for instructions with VEX prefix
7550              and 3 sources.  */
7551           for (j = i.operands; j > 0; j--)
7552             {
7553               i.op[j] = i.op[j - 1];
7554               i.types[j] = i.types[j - 1];
7555               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7556               i.flags[j] = i.flags[j - 1];
7557             }
7558           i.op[0].regs
7559             = (const reg_entry *) hash_find (reg_hash, "xmm0");
7560           i.types[0] = regxmm;
7561           i.tm.operand_types[0] = regxmm;
7562
7563           i.operands += 2;
7564           i.reg_operands += 2;
7565           i.tm.operands += 2;
7566
7567           dupl++;
7568           dest++;
7569           i.op[dupl] = i.op[dest];
7570           i.types[dupl] = i.types[dest];
7571           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7572           i.flags[dupl] = i.flags[dest];
7573         }
7574       else
7575         {
7576         duplicate:
7577           i.operands++;
7578           i.reg_operands++;
7579           i.tm.operands++;
7580
7581           i.op[dupl] = i.op[dest];
7582           i.types[dupl] = i.types[dest];
7583           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7584           i.flags[dupl] = i.flags[dest];
7585         }
7586
7587        if (i.tm.opcode_modifier.immext)
7588          process_immext ();
7589     }
7590   else if (i.tm.operand_types[0].bitfield.instance == Accum
7591            && i.tm.operand_types[0].bitfield.xmmword)
7592     {
7593       unsigned int j;
7594
7595       for (j = 1; j < i.operands; j++)
7596         {
7597           i.op[j - 1] = i.op[j];
7598           i.types[j - 1] = i.types[j];
7599
7600           /* We need to adjust fields in i.tm since they are used by
7601              build_modrm_byte.  */
7602           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7603
7604           i.flags[j - 1] = i.flags[j];
7605         }
7606
7607       i.operands--;
7608       i.reg_operands--;
7609       i.tm.operands--;
7610     }
7611   else if (i.tm.opcode_modifier.implicitquadgroup)
7612     {
7613       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7614
7615       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7616       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7617       regnum = register_number (i.op[1].regs);
7618       first_reg_in_group = regnum & ~3;
7619       last_reg_in_group = first_reg_in_group + 3;
7620       if (regnum != first_reg_in_group)
7621         as_warn (_("source register `%s%s' implicitly denotes"
7622                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7623                  register_prefix, i.op[1].regs->reg_name,
7624                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7625                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7626                  i.tm.name);
7627     }
7628   else if (i.tm.opcode_modifier.regkludge)
7629     {
7630       /* The imul $imm, %reg instruction is converted into
7631          imul $imm, %reg, %reg, and the clr %reg instruction
7632          is converted into xor %reg, %reg.  */
7633
7634       unsigned int first_reg_op;
7635
7636       if (operand_type_check (i.types[0], reg))
7637         first_reg_op = 0;
7638       else
7639         first_reg_op = 1;
7640       /* Pretend we saw the extra register operand.  */
7641       gas_assert (i.reg_operands == 1
7642                   && i.op[first_reg_op + 1].regs == 0);
7643       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7644       i.types[first_reg_op + 1] = i.types[first_reg_op];
7645       i.operands++;
7646       i.reg_operands++;
7647     }
7648
7649   if (i.tm.opcode_modifier.modrm)
7650     {
7651       /* The opcode is completed (modulo i.tm.extension_opcode which
7652          must be put into the modrm byte).  Now, we make the modrm and
7653          index base bytes based on all the info we've collected.  */
7654
7655       default_seg = build_modrm_byte ();
7656     }
7657   else if (i.types[0].bitfield.class == SReg)
7658     {
7659       if (flag_code != CODE_64BIT
7660           ? i.tm.base_opcode == POP_SEG_SHORT
7661             && i.op[0].regs->reg_num == 1
7662           : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
7663             && i.op[0].regs->reg_num < 4)
7664         {
7665           as_bad (_("you can't `%s %s%s'"),
7666                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7667           return 0;
7668         }
7669       if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
7670         {
7671           i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
7672           i.tm.opcode_length = 2;
7673         }
7674       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7675     }
7676   else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
7677     {
7678       default_seg = &ds;
7679     }
7680   else if (i.tm.opcode_modifier.isstring)
7681     {
7682       /* For the string instructions that allow a segment override
7683          on one of their operands, the default segment is ds.  */
7684       default_seg = &ds;
7685     }
7686   else if (i.short_form)
7687     {
7688       /* The register or float register operand is in operand
7689          0 or 1.  */
7690       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7691
7692       /* Register goes in low 3 bits of opcode.  */
7693       i.tm.base_opcode |= i.op[op].regs->reg_num;
7694       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7695         i.rex |= REX_B;
7696       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7697         {
7698           /* Warn about some common errors, but press on regardless.
7699              The first case can be generated by gcc (<= 2.8.1).  */
7700           if (i.operands == 2)
7701             {
7702               /* Reversed arguments on faddp, fsubp, etc.  */
7703               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7704                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7705                        register_prefix, i.op[intel_syntax].regs->reg_name);
7706             }
7707           else
7708             {
7709               /* Extraneous `l' suffix on fp insn.  */
7710               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7711                        register_prefix, i.op[0].regs->reg_name);
7712             }
7713         }
7714     }
7715
7716   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7717       && i.tm.base_opcode == 0x8d /* lea */
7718       && !is_any_vex_encoding(&i.tm))
7719     {
7720       if (!quiet_warnings)
7721         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7722       if (optimize)
7723         {
7724           i.seg[0] = NULL;
7725           i.prefix[SEG_PREFIX] = 0;
7726         }
7727     }
7728
7729   /* If a segment was explicitly specified, and the specified segment
7730      is neither the default nor the one already recorded from a prefix,
7731      use an opcode prefix to select it.  If we never figured out what
7732      the default segment is, then default_seg will be zero at this
7733      point, and the specified segment prefix will always be used.  */
7734   if (i.seg[0]
7735       && i.seg[0] != default_seg
7736       && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
7737     {
7738       if (!add_prefix (i.seg[0]->seg_prefix))
7739         return 0;
7740     }
7741   return 1;
7742 }
7743
7744 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7745                                  bfd_boolean do_sse2avx)
7746 {
7747   if (r->reg_flags & RegRex)
7748     {
7749       if (i.rex & rex_bit)
7750         as_bad (_("same type of prefix used twice"));
7751       i.rex |= rex_bit;
7752     }
7753   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
7754     {
7755       gas_assert (i.vex.register_specifier == r);
7756       i.vex.register_specifier += 8;
7757     }
7758
7759   if (r->reg_flags & RegVRex)
7760     i.vrex |= rex_bit;
7761 }
7762
7763 static const seg_entry *
7764 build_modrm_byte (void)
7765 {
7766   const seg_entry *default_seg = 0;
7767   unsigned int source, dest;
7768   int vex_3_sources;
7769
7770   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
7771   if (vex_3_sources)
7772     {
7773       unsigned int nds, reg_slot;
7774       expressionS *exp;
7775
7776       dest = i.operands - 1;
7777       nds = dest - 1;
7778
7779       /* There are 2 kinds of instructions:
7780          1. 5 operands: 4 register operands or 3 register operands
7781          plus 1 memory operand plus one Imm4 operand, VexXDS, and
7782          VexW0 or VexW1.  The destination must be either XMM, YMM or
7783          ZMM register.
7784          2. 4 operands: 4 register operands or 3 register operands
7785          plus 1 memory operand, with VexXDS.  */
7786       gas_assert ((i.reg_operands == 4
7787                    || (i.reg_operands == 3 && i.mem_operands == 1))
7788                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
7789                   && i.tm.opcode_modifier.vexw
7790                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
7791
7792       /* If VexW1 is set, the first non-immediate operand is the source and
7793          the second non-immediate one is encoded in the immediate operand.  */
7794       if (i.tm.opcode_modifier.vexw == VEXW1)
7795         {
7796           source = i.imm_operands;
7797           reg_slot = i.imm_operands + 1;
7798         }
7799       else
7800         {
7801           source = i.imm_operands + 1;
7802           reg_slot = i.imm_operands;
7803         }
7804
7805       if (i.imm_operands == 0)
7806         {
7807           /* When there is no immediate operand, generate an 8bit
7808              immediate operand to encode the first operand.  */
7809           exp = &im_expressions[i.imm_operands++];
7810           i.op[i.operands].imms = exp;
7811           i.types[i.operands] = imm8;
7812           i.operands++;
7813
7814           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7815           exp->X_op = O_constant;
7816           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
7817           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7818         }
7819       else
7820         {
7821           gas_assert (i.imm_operands == 1);
7822           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
7823           gas_assert (!i.tm.opcode_modifier.immext);
7824
7825           /* Turn on Imm8 again so that output_imm will generate it.  */
7826           i.types[0].bitfield.imm8 = 1;
7827
7828           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7829           i.op[0].imms->X_add_number
7830               |= register_number (i.op[reg_slot].regs) << 4;
7831           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7832         }
7833
7834       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
7835       i.vex.register_specifier = i.op[nds].regs;
7836     }
7837   else
7838     source = dest = 0;
7839
7840   /* i.reg_operands MUST be the number of real register operands;
7841      implicit registers do not count.  If there are 3 register
7842      operands, it must be a instruction with VexNDS.  For a
7843      instruction with VexNDD, the destination register is encoded
7844      in VEX prefix.  If there are 4 register operands, it must be
7845      a instruction with VEX prefix and 3 sources.  */
7846   if (i.mem_operands == 0
7847       && ((i.reg_operands == 2
7848            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
7849           || (i.reg_operands == 3
7850               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
7851           || (i.reg_operands == 4 && vex_3_sources)))
7852     {
7853       switch (i.operands)
7854         {
7855         case 2:
7856           source = 0;
7857           break;
7858         case 3:
7859           /* When there are 3 operands, one of them may be immediate,
7860              which may be the first or the last operand.  Otherwise,
7861              the first operand must be shift count register (cl) or it
7862              is an instruction with VexNDS. */
7863           gas_assert (i.imm_operands == 1
7864                       || (i.imm_operands == 0
7865                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
7866                               || (i.types[0].bitfield.instance == RegC
7867                                   && i.types[0].bitfield.byte))));
7868           if (operand_type_check (i.types[0], imm)
7869               || (i.types[0].bitfield.instance == RegC
7870                   && i.types[0].bitfield.byte))
7871             source = 1;
7872           else
7873             source = 0;
7874           break;
7875         case 4:
7876           /* When there are 4 operands, the first two must be 8bit
7877              immediate operands. The source operand will be the 3rd
7878              one.
7879
7880              For instructions with VexNDS, if the first operand
7881              an imm8, the source operand is the 2nd one.  If the last
7882              operand is imm8, the source operand is the first one.  */
7883           gas_assert ((i.imm_operands == 2
7884                        && i.types[0].bitfield.imm8
7885                        && i.types[1].bitfield.imm8)
7886                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
7887                           && i.imm_operands == 1
7888                           && (i.types[0].bitfield.imm8
7889                               || i.types[i.operands - 1].bitfield.imm8
7890                               || i.rounding)));
7891           if (i.imm_operands == 2)
7892             source = 2;
7893           else
7894             {
7895               if (i.types[0].bitfield.imm8)
7896                 source = 1;
7897               else
7898                 source = 0;
7899             }
7900           break;
7901         case 5:
7902           if (is_evex_encoding (&i.tm))
7903             {
7904               /* For EVEX instructions, when there are 5 operands, the
7905                  first one must be immediate operand.  If the second one
7906                  is immediate operand, the source operand is the 3th
7907                  one.  If the last one is immediate operand, the source
7908                  operand is the 2nd one.  */
7909               gas_assert (i.imm_operands == 2
7910                           && i.tm.opcode_modifier.sae
7911                           && operand_type_check (i.types[0], imm));
7912               if (operand_type_check (i.types[1], imm))
7913                 source = 2;
7914               else if (operand_type_check (i.types[4], imm))
7915                 source = 1;
7916               else
7917                 abort ();
7918             }
7919           break;
7920         default:
7921           abort ();
7922         }
7923
7924       if (!vex_3_sources)
7925         {
7926           dest = source + 1;
7927
7928           /* RC/SAE operand could be between DEST and SRC.  That happens
7929              when one operand is GPR and the other one is XMM/YMM/ZMM
7930              register.  */
7931           if (i.rounding && i.rounding->operand == (int) dest)
7932             dest++;
7933
7934           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7935             {
7936               /* For instructions with VexNDS, the register-only source
7937                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
7938                  register.  It is encoded in VEX prefix.  */
7939
7940               i386_operand_type op;
7941               unsigned int vvvv;
7942
7943               /* Swap two source operands if needed.  */
7944               if (i.tm.opcode_modifier.swapsources)
7945                 {
7946                   vvvv = source;
7947                   source = dest;
7948                 }
7949               else
7950                 vvvv = dest;
7951
7952               op = i.tm.operand_types[vvvv];
7953               if ((dest + 1) >= i.operands
7954                   || ((op.bitfield.class != Reg
7955                        || (!op.bitfield.dword && !op.bitfield.qword))
7956                       && op.bitfield.class != RegSIMD
7957                       && !operand_type_equal (&op, &regmask)))
7958                 abort ();
7959               i.vex.register_specifier = i.op[vvvv].regs;
7960               dest++;
7961             }
7962         }
7963
7964       i.rm.mode = 3;
7965       /* One of the register operands will be encoded in the i.rm.reg
7966          field, the other in the combined i.rm.mode and i.rm.regmem
7967          fields.  If no form of this instruction supports a memory
7968          destination operand, then we assume the source operand may
7969          sometimes be a memory operand and so we need to store the
7970          destination in the i.rm.reg field.  */
7971       if (!i.tm.opcode_modifier.regmem
7972           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
7973         {
7974           i.rm.reg = i.op[dest].regs->reg_num;
7975           i.rm.regmem = i.op[source].regs->reg_num;
7976           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
7977           set_rex_vrex (i.op[source].regs, REX_B, FALSE);
7978         }
7979       else
7980         {
7981           i.rm.reg = i.op[source].regs->reg_num;
7982           i.rm.regmem = i.op[dest].regs->reg_num;
7983           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
7984           set_rex_vrex (i.op[source].regs, REX_R, FALSE);
7985         }
7986       if (flag_code != CODE_64BIT && (i.rex & REX_R))
7987         {
7988           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
7989             abort ();
7990           i.rex &= ~REX_R;
7991           add_prefix (LOCK_PREFIX_OPCODE);
7992         }
7993     }
7994   else
7995     {                   /* If it's not 2 reg operands...  */
7996       unsigned int mem;
7997
7998       if (i.mem_operands)
7999         {
8000           unsigned int fake_zero_displacement = 0;
8001           unsigned int op;
8002
8003           for (op = 0; op < i.operands; op++)
8004             if (i.flags[op] & Operand_Mem)
8005               break;
8006           gas_assert (op < i.operands);
8007
8008           if (i.tm.opcode_modifier.sib)
8009             {
8010               /* The index register of VSIB shouldn't be RegIZ.  */
8011               if (i.tm.opcode_modifier.sib != SIBMEM
8012                   && i.index_reg->reg_num == RegIZ)
8013                 abort ();
8014
8015               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8016               if (!i.base_reg)
8017                 {
8018                   i.sib.base = NO_BASE_REGISTER;
8019                   i.sib.scale = i.log2_scale_factor;
8020                   i.types[op].bitfield.disp8 = 0;
8021                   i.types[op].bitfield.disp16 = 0;
8022                   i.types[op].bitfield.disp64 = 0;
8023                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
8024                     {
8025                       /* Must be 32 bit */
8026                       i.types[op].bitfield.disp32 = 1;
8027                       i.types[op].bitfield.disp32s = 0;
8028                     }
8029                   else
8030                     {
8031                       i.types[op].bitfield.disp32 = 0;
8032                       i.types[op].bitfield.disp32s = 1;
8033                     }
8034                 }
8035
8036               /* Since the mandatory SIB always has index register, so
8037                  the code logic remains unchanged. The non-mandatory SIB
8038                  without index register is allowed and will be handled
8039                  later.  */
8040               if (i.index_reg)
8041                 {
8042                   if (i.index_reg->reg_num == RegIZ)
8043                     i.sib.index = NO_INDEX_REGISTER;
8044                   else
8045                     i.sib.index = i.index_reg->reg_num;
8046                   set_rex_vrex (i.index_reg, REX_X, FALSE);
8047                 }
8048             }
8049
8050           default_seg = &ds;
8051
8052           if (i.base_reg == 0)
8053             {
8054               i.rm.mode = 0;
8055               if (!i.disp_operands)
8056                 fake_zero_displacement = 1;
8057               if (i.index_reg == 0)
8058                 {
8059                   i386_operand_type newdisp;
8060
8061                   /* Both check for VSIB and mandatory non-vector SIB. */
8062                   gas_assert (!i.tm.opcode_modifier.sib
8063                               || i.tm.opcode_modifier.sib == SIBMEM);
8064                   /* Operand is just <disp>  */
8065                   if (flag_code == CODE_64BIT)
8066                     {
8067                       /* 64bit mode overwrites the 32bit absolute
8068                          addressing by RIP relative addressing and
8069                          absolute addressing is encoded by one of the
8070                          redundant SIB forms.  */
8071                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8072                       i.sib.base = NO_BASE_REGISTER;
8073                       i.sib.index = NO_INDEX_REGISTER;
8074                       newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
8075                     }
8076                   else if ((flag_code == CODE_16BIT)
8077                            ^ (i.prefix[ADDR_PREFIX] != 0))
8078                     {
8079                       i.rm.regmem = NO_BASE_REGISTER_16;
8080                       newdisp = disp16;
8081                     }
8082                   else
8083                     {
8084                       i.rm.regmem = NO_BASE_REGISTER;
8085                       newdisp = disp32;
8086                     }
8087                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8088                   i.types[op] = operand_type_or (i.types[op], newdisp);
8089                 }
8090               else if (!i.tm.opcode_modifier.sib)
8091                 {
8092                   /* !i.base_reg && i.index_reg  */
8093                   if (i.index_reg->reg_num == RegIZ)
8094                     i.sib.index = NO_INDEX_REGISTER;
8095                   else
8096                     i.sib.index = i.index_reg->reg_num;
8097                   i.sib.base = NO_BASE_REGISTER;
8098                   i.sib.scale = i.log2_scale_factor;
8099                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8100                   i.types[op].bitfield.disp8 = 0;
8101                   i.types[op].bitfield.disp16 = 0;
8102                   i.types[op].bitfield.disp64 = 0;
8103                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
8104                     {
8105                       /* Must be 32 bit */
8106                       i.types[op].bitfield.disp32 = 1;
8107                       i.types[op].bitfield.disp32s = 0;
8108                     }
8109                   else
8110                     {
8111                       i.types[op].bitfield.disp32 = 0;
8112                       i.types[op].bitfield.disp32s = 1;
8113                     }
8114                   if ((i.index_reg->reg_flags & RegRex) != 0)
8115                     i.rex |= REX_X;
8116                 }
8117             }
8118           /* RIP addressing for 64bit mode.  */
8119           else if (i.base_reg->reg_num == RegIP)
8120             {
8121               gas_assert (!i.tm.opcode_modifier.sib);
8122               i.rm.regmem = NO_BASE_REGISTER;
8123               i.types[op].bitfield.disp8 = 0;
8124               i.types[op].bitfield.disp16 = 0;
8125               i.types[op].bitfield.disp32 = 0;
8126               i.types[op].bitfield.disp32s = 1;
8127               i.types[op].bitfield.disp64 = 0;
8128               i.flags[op] |= Operand_PCrel;
8129               if (! i.disp_operands)
8130                 fake_zero_displacement = 1;
8131             }
8132           else if (i.base_reg->reg_type.bitfield.word)
8133             {
8134               gas_assert (!i.tm.opcode_modifier.sib);
8135               switch (i.base_reg->reg_num)
8136                 {
8137                 case 3: /* (%bx)  */
8138                   if (i.index_reg == 0)
8139                     i.rm.regmem = 7;
8140                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8141                     i.rm.regmem = i.index_reg->reg_num - 6;
8142                   break;
8143                 case 5: /* (%bp)  */
8144                   default_seg = &ss;
8145                   if (i.index_reg == 0)
8146                     {
8147                       i.rm.regmem = 6;
8148                       if (operand_type_check (i.types[op], disp) == 0)
8149                         {
8150                           /* fake (%bp) into 0(%bp)  */
8151                           i.types[op].bitfield.disp8 = 1;
8152                           fake_zero_displacement = 1;
8153                         }
8154                     }
8155                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8156                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8157                   break;
8158                 default: /* (%si) -> 4 or (%di) -> 5  */
8159                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8160                 }
8161               i.rm.mode = mode_from_disp_size (i.types[op]);
8162             }
8163           else /* i.base_reg and 32/64 bit mode  */
8164             {
8165               if (flag_code == CODE_64BIT
8166                   && operand_type_check (i.types[op], disp))
8167                 {
8168                   i.types[op].bitfield.disp16 = 0;
8169                   i.types[op].bitfield.disp64 = 0;
8170                   if (i.prefix[ADDR_PREFIX] == 0)
8171                     {
8172                       i.types[op].bitfield.disp32 = 0;
8173                       i.types[op].bitfield.disp32s = 1;
8174                     }
8175                   else
8176                     {
8177                       i.types[op].bitfield.disp32 = 1;
8178                       i.types[op].bitfield.disp32s = 0;
8179                     }
8180                 }
8181
8182               if (!i.tm.opcode_modifier.sib)
8183                 i.rm.regmem = i.base_reg->reg_num;
8184               if ((i.base_reg->reg_flags & RegRex) != 0)
8185                 i.rex |= REX_B;
8186               i.sib.base = i.base_reg->reg_num;
8187               /* x86-64 ignores REX prefix bit here to avoid decoder
8188                  complications.  */
8189               if (!(i.base_reg->reg_flags & RegRex)
8190                   && (i.base_reg->reg_num == EBP_REG_NUM
8191                    || i.base_reg->reg_num == ESP_REG_NUM))
8192                   default_seg = &ss;
8193               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8194                 {
8195                   fake_zero_displacement = 1;
8196                   i.types[op].bitfield.disp8 = 1;
8197                 }
8198               i.sib.scale = i.log2_scale_factor;
8199               if (i.index_reg == 0)
8200                 {
8201                   /* Only check for VSIB. */
8202                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8203                               && i.tm.opcode_modifier.sib != VECSIB256
8204                               && i.tm.opcode_modifier.sib != VECSIB512);
8205
8206                   /* <disp>(%esp) becomes two byte modrm with no index
8207                      register.  We've already stored the code for esp
8208                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8209                      Any base register besides %esp will not use the
8210                      extra modrm byte.  */
8211                   i.sib.index = NO_INDEX_REGISTER;
8212                 }
8213               else if (!i.tm.opcode_modifier.sib)
8214                 {
8215                   if (i.index_reg->reg_num == RegIZ)
8216                     i.sib.index = NO_INDEX_REGISTER;
8217                   else
8218                     i.sib.index = i.index_reg->reg_num;
8219                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8220                   if ((i.index_reg->reg_flags & RegRex) != 0)
8221                     i.rex |= REX_X;
8222                 }
8223
8224               if (i.disp_operands
8225                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8226                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8227                 i.rm.mode = 0;
8228               else
8229                 {
8230                   if (!fake_zero_displacement
8231                       && !i.disp_operands
8232                       && i.disp_encoding)
8233                     {
8234                       fake_zero_displacement = 1;
8235                       if (i.disp_encoding == disp_encoding_8bit)
8236                         i.types[op].bitfield.disp8 = 1;
8237                       else
8238                         i.types[op].bitfield.disp32 = 1;
8239                     }
8240                   i.rm.mode = mode_from_disp_size (i.types[op]);
8241                 }
8242             }
8243
8244           if (fake_zero_displacement)
8245             {
8246               /* Fakes a zero displacement assuming that i.types[op]
8247                  holds the correct displacement size.  */
8248               expressionS *exp;
8249
8250               gas_assert (i.op[op].disps == 0);
8251               exp = &disp_expressions[i.disp_operands++];
8252               i.op[op].disps = exp;
8253               exp->X_op = O_constant;
8254               exp->X_add_number = 0;
8255               exp->X_add_symbol = (symbolS *) 0;
8256               exp->X_op_symbol = (symbolS *) 0;
8257             }
8258
8259           mem = op;
8260         }
8261       else
8262         mem = ~0;
8263
8264       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8265         {
8266           if (operand_type_check (i.types[0], imm))
8267             i.vex.register_specifier = NULL;
8268           else
8269             {
8270               /* VEX.vvvv encodes one of the sources when the first
8271                  operand is not an immediate.  */
8272               if (i.tm.opcode_modifier.vexw == VEXW0)
8273                 i.vex.register_specifier = i.op[0].regs;
8274               else
8275                 i.vex.register_specifier = i.op[1].regs;
8276             }
8277
8278           /* Destination is a XMM register encoded in the ModRM.reg
8279              and VEX.R bit.  */
8280           i.rm.reg = i.op[2].regs->reg_num;
8281           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8282             i.rex |= REX_R;
8283
8284           /* ModRM.rm and VEX.B encodes the other source.  */
8285           if (!i.mem_operands)
8286             {
8287               i.rm.mode = 3;
8288
8289               if (i.tm.opcode_modifier.vexw == VEXW0)
8290                 i.rm.regmem = i.op[1].regs->reg_num;
8291               else
8292                 i.rm.regmem = i.op[0].regs->reg_num;
8293
8294               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8295                 i.rex |= REX_B;
8296             }
8297         }
8298       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8299         {
8300           i.vex.register_specifier = i.op[2].regs;
8301           if (!i.mem_operands)
8302             {
8303               i.rm.mode = 3;
8304               i.rm.regmem = i.op[1].regs->reg_num;
8305               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8306                 i.rex |= REX_B;
8307             }
8308         }
8309       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8310          (if any) based on i.tm.extension_opcode.  Again, we must be
8311          careful to make sure that segment/control/debug/test/MMX
8312          registers are coded into the i.rm.reg field.  */
8313       else if (i.reg_operands)
8314         {
8315           unsigned int op;
8316           unsigned int vex_reg = ~0;
8317
8318           for (op = 0; op < i.operands; op++)
8319             if (i.types[op].bitfield.class == Reg
8320                 || i.types[op].bitfield.class == RegBND
8321                 || i.types[op].bitfield.class == RegMask
8322                 || i.types[op].bitfield.class == SReg
8323                 || i.types[op].bitfield.class == RegCR
8324                 || i.types[op].bitfield.class == RegDR
8325                 || i.types[op].bitfield.class == RegTR
8326                 || i.types[op].bitfield.class == RegSIMD
8327                 || i.types[op].bitfield.class == RegMMX)
8328               break;
8329
8330           if (vex_3_sources)
8331             op = dest;
8332           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8333             {
8334               /* For instructions with VexNDS, the register-only
8335                  source operand is encoded in VEX prefix. */
8336               gas_assert (mem != (unsigned int) ~0);
8337
8338               if (op > mem)
8339                 {
8340                   vex_reg = op++;
8341                   gas_assert (op < i.operands);
8342                 }
8343               else
8344                 {
8345                   /* Check register-only source operand when two source
8346                      operands are swapped.  */
8347                   if (!i.tm.operand_types[op].bitfield.baseindex
8348                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8349                     {
8350                       vex_reg = op;
8351                       op += 2;
8352                       gas_assert (mem == (vex_reg + 1)
8353                                   && op < i.operands);
8354                     }
8355                   else
8356                     {
8357                       vex_reg = op + 1;
8358                       gas_assert (vex_reg < i.operands);
8359                     }
8360                 }
8361             }
8362           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8363             {
8364               /* For instructions with VexNDD, the register destination
8365                  is encoded in VEX prefix.  */
8366               if (i.mem_operands == 0)
8367                 {
8368                   /* There is no memory operand.  */
8369                   gas_assert ((op + 2) == i.operands);
8370                   vex_reg = op + 1;
8371                 }
8372               else
8373                 {
8374                   /* There are only 2 non-immediate operands.  */
8375                   gas_assert (op < i.imm_operands + 2
8376                               && i.operands == i.imm_operands + 2);
8377                   vex_reg = i.imm_operands + 1;
8378                 }
8379             }
8380           else
8381             gas_assert (op < i.operands);
8382
8383           if (vex_reg != (unsigned int) ~0)
8384             {
8385               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8386
8387               if ((type->bitfield.class != Reg
8388                    || (!type->bitfield.dword && !type->bitfield.qword))
8389                   && type->bitfield.class != RegSIMD
8390                   && !operand_type_equal (type, &regmask))
8391                 abort ();
8392
8393               i.vex.register_specifier = i.op[vex_reg].regs;
8394             }
8395
8396           /* Don't set OP operand twice.  */
8397           if (vex_reg != op)
8398             {
8399               /* If there is an extension opcode to put here, the
8400                  register number must be put into the regmem field.  */
8401               if (i.tm.extension_opcode != None)
8402                 {
8403                   i.rm.regmem = i.op[op].regs->reg_num;
8404                   set_rex_vrex (i.op[op].regs, REX_B,
8405                                 i.tm.opcode_modifier.sse2avx);
8406                 }
8407               else
8408                 {
8409                   i.rm.reg = i.op[op].regs->reg_num;
8410                   set_rex_vrex (i.op[op].regs, REX_R,
8411                                 i.tm.opcode_modifier.sse2avx);
8412                 }
8413             }
8414
8415           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8416              must set it to 3 to indicate this is a register operand
8417              in the regmem field.  */
8418           if (!i.mem_operands)
8419             i.rm.mode = 3;
8420         }
8421
8422       /* Fill in i.rm.reg field with extension opcode (if any).  */
8423       if (i.tm.extension_opcode != None)
8424         i.rm.reg = i.tm.extension_opcode;
8425     }
8426   return default_seg;
8427 }
8428
8429 static unsigned int
8430 flip_code16 (unsigned int code16)
8431 {
8432   gas_assert (i.tm.operands == 1);
8433
8434   return !(i.prefix[REX_PREFIX] & REX_W)
8435          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8436                       || i.tm.operand_types[0].bitfield.disp32s
8437                     : i.tm.operand_types[0].bitfield.disp16)
8438          ? CODE16 : 0;
8439 }
8440
8441 static void
8442 output_branch (void)
8443 {
8444   char *p;
8445   int size;
8446   int code16;
8447   int prefix;
8448   relax_substateT subtype;
8449   symbolS *sym;
8450   offsetT off;
8451
8452   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8453   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
8454
8455   prefix = 0;
8456   if (i.prefix[DATA_PREFIX] != 0)
8457     {
8458       prefix = 1;
8459       i.prefixes -= 1;
8460       code16 ^= flip_code16(code16);
8461     }
8462   /* Pentium4 branch hints.  */
8463   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8464       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8465     {
8466       prefix++;
8467       i.prefixes--;
8468     }
8469   if (i.prefix[REX_PREFIX] != 0)
8470     {
8471       prefix++;
8472       i.prefixes--;
8473     }
8474
8475   /* BND prefixed jump.  */
8476   if (i.prefix[BND_PREFIX] != 0)
8477     {
8478       prefix++;
8479       i.prefixes--;
8480     }
8481
8482   if (i.prefixes != 0)
8483     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8484
8485   /* It's always a symbol;  End frag & setup for relax.
8486      Make sure there is enough room in this frag for the largest
8487      instruction we may generate in md_convert_frag.  This is 2
8488      bytes for the opcode and room for the prefix and largest
8489      displacement.  */
8490   frag_grow (prefix + 2 + 4);
8491   /* Prefix and 1 opcode byte go in fr_fix.  */
8492   p = frag_more (prefix + 1);
8493   if (i.prefix[DATA_PREFIX] != 0)
8494     *p++ = DATA_PREFIX_OPCODE;
8495   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8496       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8497     *p++ = i.prefix[SEG_PREFIX];
8498   if (i.prefix[BND_PREFIX] != 0)
8499     *p++ = BND_PREFIX_OPCODE;
8500   if (i.prefix[REX_PREFIX] != 0)
8501     *p++ = i.prefix[REX_PREFIX];
8502   *p = i.tm.base_opcode;
8503
8504   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8505     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8506   else if (cpu_arch_flags.bitfield.cpui386)
8507     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8508   else
8509     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8510   subtype |= code16;
8511
8512   sym = i.op[0].disps->X_add_symbol;
8513   off = i.op[0].disps->X_add_number;
8514
8515   if (i.op[0].disps->X_op != O_constant
8516       && i.op[0].disps->X_op != O_symbol)
8517     {
8518       /* Handle complex expressions.  */
8519       sym = make_expr_symbol (i.op[0].disps);
8520       off = 0;
8521     }
8522
8523   /* 1 possible extra opcode + 4 byte displacement go in var part.
8524      Pass reloc in fr_var.  */
8525   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8526 }
8527
8528 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8529 /* Return TRUE iff PLT32 relocation should be used for branching to
8530    symbol S.  */
8531
8532 static bfd_boolean
8533 need_plt32_p (symbolS *s)
8534 {
8535   /* PLT32 relocation is ELF only.  */
8536   if (!IS_ELF)
8537     return FALSE;
8538
8539 #ifdef TE_SOLARIS
8540   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8541      krtld support it.  */
8542   return FALSE;
8543 #endif
8544
8545   /* Since there is no need to prepare for PLT branch on x86-64, we
8546      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8547      be used as a marker for 32-bit PC-relative branches.  */
8548   if (!object_64bit)
8549     return FALSE;
8550
8551   /* Weak or undefined symbol need PLT32 relocation.  */
8552   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8553     return TRUE;
8554
8555   /* Non-global symbol doesn't need PLT32 relocation.  */
8556   if (! S_IS_EXTERNAL (s))
8557     return FALSE;
8558
8559   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8560      non-default visibilities are treated as normal global symbol
8561      so that PLT32 relocation can be used as a marker for 32-bit
8562      PC-relative branches.  It is useful for linker relaxation.  */
8563   return TRUE;
8564 }
8565 #endif
8566
8567 static void
8568 output_jump (void)
8569 {
8570   char *p;
8571   int size;
8572   fixS *fixP;
8573   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8574
8575   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8576     {
8577       /* This is a loop or jecxz type instruction.  */
8578       size = 1;
8579       if (i.prefix[ADDR_PREFIX] != 0)
8580         {
8581           FRAG_APPEND_1_CHAR (ADDR_PREFIX_OPCODE);
8582           i.prefixes -= 1;
8583         }
8584       /* Pentium4 branch hints.  */
8585       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8586           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8587         {
8588           FRAG_APPEND_1_CHAR (i.prefix[SEG_PREFIX]);
8589           i.prefixes--;
8590         }
8591     }
8592   else
8593     {
8594       int code16;
8595
8596       code16 = 0;
8597       if (flag_code == CODE_16BIT)
8598         code16 = CODE16;
8599
8600       if (i.prefix[DATA_PREFIX] != 0)
8601         {
8602           FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE);
8603           i.prefixes -= 1;
8604           code16 ^= flip_code16(code16);
8605         }
8606
8607       size = 4;
8608       if (code16)
8609         size = 2;
8610     }
8611
8612   /* BND prefixed jump.  */
8613   if (i.prefix[BND_PREFIX] != 0)
8614     {
8615       FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
8616       i.prefixes -= 1;
8617     }
8618
8619   if (i.prefix[REX_PREFIX] != 0)
8620     {
8621       FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
8622       i.prefixes -= 1;
8623     }
8624
8625   if (i.prefixes != 0)
8626     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8627
8628   p = frag_more (i.tm.opcode_length + size);
8629   switch (i.tm.opcode_length)
8630     {
8631     case 2:
8632       *p++ = i.tm.base_opcode >> 8;
8633       /* Fall through.  */
8634     case 1:
8635       *p++ = i.tm.base_opcode;
8636       break;
8637     default:
8638       abort ();
8639     }
8640
8641 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8642   if (size == 4
8643       && jump_reloc == NO_RELOC
8644       && need_plt32_p (i.op[0].disps->X_add_symbol))
8645     jump_reloc = BFD_RELOC_X86_64_PLT32;
8646 #endif
8647
8648   jump_reloc = reloc (size, 1, 1, jump_reloc);
8649
8650   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8651                       i.op[0].disps, 1, jump_reloc);
8652
8653   /* All jumps handled here are signed, but don't use a signed limit
8654      check for 32 and 16 bit jumps as we want to allow wrap around at
8655      4G and 64k respectively.  */
8656   if (size == 1)
8657     fixP->fx_signed = 1;
8658 }
8659
8660 static void
8661 output_interseg_jump (void)
8662 {
8663   char *p;
8664   int size;
8665   int prefix;
8666   int code16;
8667
8668   code16 = 0;
8669   if (flag_code == CODE_16BIT)
8670     code16 = CODE16;
8671
8672   prefix = 0;
8673   if (i.prefix[DATA_PREFIX] != 0)
8674     {
8675       prefix = 1;
8676       i.prefixes -= 1;
8677       code16 ^= CODE16;
8678     }
8679
8680   gas_assert (!i.prefix[REX_PREFIX]);
8681
8682   size = 4;
8683   if (code16)
8684     size = 2;
8685
8686   if (i.prefixes != 0)
8687     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8688
8689   /* 1 opcode; 2 segment; offset  */
8690   p = frag_more (prefix + 1 + 2 + size);
8691
8692   if (i.prefix[DATA_PREFIX] != 0)
8693     *p++ = DATA_PREFIX_OPCODE;
8694
8695   if (i.prefix[REX_PREFIX] != 0)
8696     *p++ = i.prefix[REX_PREFIX];
8697
8698   *p++ = i.tm.base_opcode;
8699   if (i.op[1].imms->X_op == O_constant)
8700     {
8701       offsetT n = i.op[1].imms->X_add_number;
8702
8703       if (size == 2
8704           && !fits_in_unsigned_word (n)
8705           && !fits_in_signed_word (n))
8706         {
8707           as_bad (_("16-bit jump out of range"));
8708           return;
8709         }
8710       md_number_to_chars (p, n, size);
8711     }
8712   else
8713     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8714                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
8715   if (i.op[0].imms->X_op != O_constant)
8716     as_bad (_("can't handle non absolute segment in `%s'"),
8717             i.tm.name);
8718   md_number_to_chars (p + size, (valueT) i.op[0].imms->X_add_number, 2);
8719 }
8720
8721 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8722 void
8723 x86_cleanup (void)
8724 {
8725   char *p;
8726   asection *seg = now_seg;
8727   subsegT subseg = now_subseg;
8728   asection *sec;
8729   unsigned int alignment, align_size_1;
8730   unsigned int isa_1_descsz, feature_2_descsz, descsz;
8731   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
8732   unsigned int padding;
8733
8734   if (!IS_ELF || !x86_used_note)
8735     return;
8736
8737   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
8738
8739   /* The .note.gnu.property section layout:
8740
8741      Field      Length          Contents
8742      ----       ----            ----
8743      n_namsz    4               4
8744      n_descsz   4               The note descriptor size
8745      n_type     4               NT_GNU_PROPERTY_TYPE_0
8746      n_name     4               "GNU"
8747      n_desc     n_descsz        The program property array
8748      ....       ....            ....
8749    */
8750
8751   /* Create the .note.gnu.property section.  */
8752   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
8753   bfd_set_section_flags (sec,
8754                          (SEC_ALLOC
8755                           | SEC_LOAD
8756                           | SEC_DATA
8757                           | SEC_HAS_CONTENTS
8758                           | SEC_READONLY));
8759
8760   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
8761     {
8762       align_size_1 = 7;
8763       alignment = 3;
8764     }
8765   else
8766     {
8767       align_size_1 = 3;
8768       alignment = 2;
8769     }
8770
8771   bfd_set_section_alignment (sec, alignment);
8772   elf_section_type (sec) = SHT_NOTE;
8773
8774   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
8775                                   + 4-byte data  */
8776   isa_1_descsz_raw = 4 + 4 + 4;
8777   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
8778   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
8779
8780   feature_2_descsz_raw = isa_1_descsz;
8781   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
8782                                       + 4-byte data  */
8783   feature_2_descsz_raw += 4 + 4 + 4;
8784   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
8785   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
8786                       & ~align_size_1);
8787
8788   descsz = feature_2_descsz;
8789   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
8790   p = frag_more (4 + 4 + 4 + 4 + descsz);
8791
8792   /* Write n_namsz.  */
8793   md_number_to_chars (p, (valueT) 4, 4);
8794
8795   /* Write n_descsz.  */
8796   md_number_to_chars (p + 4, (valueT) descsz, 4);
8797
8798   /* Write n_type.  */
8799   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
8800
8801   /* Write n_name.  */
8802   memcpy (p + 4 * 3, "GNU", 4);
8803
8804   /* Write 4-byte type.  */
8805   md_number_to_chars (p + 4 * 4,
8806                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
8807
8808   /* Write 4-byte data size.  */
8809   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
8810
8811   /* Write 4-byte data.  */
8812   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
8813
8814   /* Zero out paddings.  */
8815   padding = isa_1_descsz - isa_1_descsz_raw;
8816   if (padding)
8817     memset (p + 4 * 7, 0, padding);
8818
8819   /* Write 4-byte type.  */
8820   md_number_to_chars (p + isa_1_descsz + 4 * 4,
8821                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
8822
8823   /* Write 4-byte data size.  */
8824   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
8825
8826   /* Write 4-byte data.  */
8827   md_number_to_chars (p + isa_1_descsz + 4 * 6,
8828                       (valueT) x86_feature_2_used, 4);
8829
8830   /* Zero out paddings.  */
8831   padding = feature_2_descsz - feature_2_descsz_raw;
8832   if (padding)
8833     memset (p + isa_1_descsz + 4 * 7, 0, padding);
8834
8835   /* We probably can't restore the current segment, for there likely
8836      isn't one yet...  */
8837   if (seg && subseg)
8838     subseg_set (seg, subseg);
8839 }
8840 #endif
8841
8842 static unsigned int
8843 encoding_length (const fragS *start_frag, offsetT start_off,
8844                  const char *frag_now_ptr)
8845 {
8846   unsigned int len = 0;
8847
8848   if (start_frag != frag_now)
8849     {
8850       const fragS *fr = start_frag;
8851
8852       do {
8853         len += fr->fr_fix;
8854         fr = fr->fr_next;
8855       } while (fr && fr != frag_now);
8856     }
8857
8858   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
8859 }
8860
8861 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
8862    be macro-fused with conditional jumps.
8863    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
8864    or is one of the following format:
8865
8866     cmp m, imm
8867     add m, imm
8868     sub m, imm
8869    test m, imm
8870     and m, imm
8871     inc m
8872     dec m
8873
8874    it is unfusible.  */
8875
8876 static int
8877 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
8878 {
8879   /* No RIP address.  */
8880   if (i.base_reg && i.base_reg->reg_num == RegIP)
8881     return 0;
8882
8883   /* No VEX/EVEX encoding.  */
8884   if (is_any_vex_encoding (&i.tm))
8885     return 0;
8886
8887   /* add, sub without add/sub m, imm.  */
8888   if (i.tm.base_opcode <= 5
8889       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
8890       || ((i.tm.base_opcode | 3) == 0x83
8891           && (i.tm.extension_opcode == 0x5
8892               || i.tm.extension_opcode == 0x0)))
8893     {
8894       *mf_cmp_p = mf_cmp_alu_cmp;
8895       return !(i.mem_operands && i.imm_operands);
8896     }
8897
8898   /* and without and m, imm.  */
8899   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
8900       || ((i.tm.base_opcode | 3) == 0x83
8901           && i.tm.extension_opcode == 0x4))
8902     {
8903       *mf_cmp_p = mf_cmp_test_and;
8904       return !(i.mem_operands && i.imm_operands);
8905     }
8906
8907   /* test without test m imm.  */
8908   if ((i.tm.base_opcode | 1) == 0x85
8909       || (i.tm.base_opcode | 1) == 0xa9
8910       || ((i.tm.base_opcode | 1) == 0xf7
8911           && i.tm.extension_opcode == 0))
8912     {
8913       *mf_cmp_p = mf_cmp_test_and;
8914       return !(i.mem_operands && i.imm_operands);
8915     }
8916
8917   /* cmp without cmp m, imm.  */
8918   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
8919       || ((i.tm.base_opcode | 3) == 0x83
8920           && (i.tm.extension_opcode == 0x7)))
8921     {
8922       *mf_cmp_p = mf_cmp_alu_cmp;
8923       return !(i.mem_operands && i.imm_operands);
8924     }
8925
8926   /* inc, dec without inc/dec m.   */
8927   if ((i.tm.cpu_flags.bitfield.cpuno64
8928        && (i.tm.base_opcode | 0xf) == 0x4f)
8929       || ((i.tm.base_opcode | 1) == 0xff
8930           && i.tm.extension_opcode <= 0x1))
8931     {
8932       *mf_cmp_p = mf_cmp_incdec;
8933       return !i.mem_operands;
8934     }
8935
8936   return 0;
8937 }
8938
8939 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
8940
8941 static int
8942 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
8943 {
8944   /* NB: Don't work with COND_JUMP86 without i386.  */
8945   if (!align_branch_power
8946       || now_seg == absolute_section
8947       || !cpu_arch_flags.bitfield.cpui386
8948       || !(align_branch & align_branch_fused_bit))
8949     return 0;
8950
8951   if (maybe_fused_with_jcc_p (mf_cmp_p))
8952     {
8953       if (last_insn.kind == last_insn_other
8954           || last_insn.seg != now_seg)
8955         return 1;
8956       if (flag_debug)
8957         as_warn_where (last_insn.file, last_insn.line,
8958                        _("`%s` skips -malign-branch-boundary on `%s`"),
8959                        last_insn.name, i.tm.name);
8960     }
8961
8962   return 0;
8963 }
8964
8965 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
8966
8967 static int
8968 add_branch_prefix_frag_p (void)
8969 {
8970   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
8971      to PadLock instructions since they include prefixes in opcode.  */
8972   if (!align_branch_power
8973       || !align_branch_prefix_size
8974       || now_seg == absolute_section
8975       || i.tm.cpu_flags.bitfield.cpupadlock
8976       || !cpu_arch_flags.bitfield.cpui386)
8977     return 0;
8978
8979   /* Don't add prefix if it is a prefix or there is no operand in case
8980      that segment prefix is special.  */
8981   if (!i.operands || i.tm.opcode_modifier.isprefix)
8982     return 0;
8983
8984   if (last_insn.kind == last_insn_other
8985       || last_insn.seg != now_seg)
8986     return 1;
8987
8988   if (flag_debug)
8989     as_warn_where (last_insn.file, last_insn.line,
8990                    _("`%s` skips -malign-branch-boundary on `%s`"),
8991                    last_insn.name, i.tm.name);
8992
8993   return 0;
8994 }
8995
8996 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
8997
8998 static int
8999 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9000                            enum mf_jcc_kind *mf_jcc_p)
9001 {
9002   int add_padding;
9003
9004   /* NB: Don't work with COND_JUMP86 without i386.  */
9005   if (!align_branch_power
9006       || now_seg == absolute_section
9007       || !cpu_arch_flags.bitfield.cpui386)
9008     return 0;
9009
9010   add_padding = 0;
9011
9012   /* Check for jcc and direct jmp.  */
9013   if (i.tm.opcode_modifier.jump == JUMP)
9014     {
9015       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9016         {
9017           *branch_p = align_branch_jmp;
9018           add_padding = align_branch & align_branch_jmp_bit;
9019         }
9020       else
9021         {
9022           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9023              igore the lowest bit.  */
9024           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9025           *branch_p = align_branch_jcc;
9026           if ((align_branch & align_branch_jcc_bit))
9027             add_padding = 1;
9028         }
9029     }
9030   else if (is_any_vex_encoding (&i.tm))
9031     return 0;
9032   else if ((i.tm.base_opcode | 1) == 0xc3)
9033     {
9034       /* Near ret.  */
9035       *branch_p = align_branch_ret;
9036       if ((align_branch & align_branch_ret_bit))
9037         add_padding = 1;
9038     }
9039   else
9040     {
9041       /* Check for indirect jmp, direct and indirect calls.  */
9042       if (i.tm.base_opcode == 0xe8)
9043         {
9044           /* Direct call.  */
9045           *branch_p = align_branch_call;
9046           if ((align_branch & align_branch_call_bit))
9047             add_padding = 1;
9048         }
9049       else if (i.tm.base_opcode == 0xff
9050                && (i.tm.extension_opcode == 2
9051                    || i.tm.extension_opcode == 4))
9052         {
9053           /* Indirect call and jmp.  */
9054           *branch_p = align_branch_indirect;
9055           if ((align_branch & align_branch_indirect_bit))
9056             add_padding = 1;
9057         }
9058
9059       if (add_padding
9060           && i.disp_operands
9061           && tls_get_addr
9062           && (i.op[0].disps->X_op == O_symbol
9063               || (i.op[0].disps->X_op == O_subtract
9064                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9065         {
9066           symbolS *s = i.op[0].disps->X_add_symbol;
9067           /* No padding to call to global or undefined tls_get_addr.  */
9068           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9069               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9070             return 0;
9071         }
9072     }
9073
9074   if (add_padding
9075       && last_insn.kind != last_insn_other
9076       && last_insn.seg == now_seg)
9077     {
9078       if (flag_debug)
9079         as_warn_where (last_insn.file, last_insn.line,
9080                        _("`%s` skips -malign-branch-boundary on `%s`"),
9081                        last_insn.name, i.tm.name);
9082       return 0;
9083     }
9084
9085   return add_padding;
9086 }
9087
9088 static void
9089 output_insn (void)
9090 {
9091   fragS *insn_start_frag;
9092   offsetT insn_start_off;
9093   fragS *fragP = NULL;
9094   enum align_branch_kind branch = align_branch_none;
9095   /* The initializer is arbitrary just to avoid uninitialized error.
9096      it's actually either assigned in add_branch_padding_frag_p
9097      or never be used.  */
9098   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9099
9100 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9101   if (IS_ELF && x86_used_note)
9102     {
9103       if (i.tm.cpu_flags.bitfield.cpucmov)
9104         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV;
9105       if (i.tm.cpu_flags.bitfield.cpusse)
9106         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE;
9107       if (i.tm.cpu_flags.bitfield.cpusse2)
9108         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2;
9109       if (i.tm.cpu_flags.bitfield.cpusse3)
9110         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3;
9111       if (i.tm.cpu_flags.bitfield.cpussse3)
9112         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3;
9113       if (i.tm.cpu_flags.bitfield.cpusse4_1)
9114         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1;
9115       if (i.tm.cpu_flags.bitfield.cpusse4_2)
9116         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2;
9117       if (i.tm.cpu_flags.bitfield.cpuavx)
9118         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX;
9119       if (i.tm.cpu_flags.bitfield.cpuavx2)
9120         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX2;
9121       if (i.tm.cpu_flags.bitfield.cpufma)
9122         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_FMA;
9123       if (i.tm.cpu_flags.bitfield.cpuavx512f)
9124         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512F;
9125       if (i.tm.cpu_flags.bitfield.cpuavx512cd)
9126         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512CD;
9127       if (i.tm.cpu_flags.bitfield.cpuavx512er)
9128         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512ER;
9129       if (i.tm.cpu_flags.bitfield.cpuavx512pf)
9130         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512PF;
9131       if (i.tm.cpu_flags.bitfield.cpuavx512vl)
9132         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512VL;
9133       if (i.tm.cpu_flags.bitfield.cpuavx512dq)
9134         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512DQ;
9135       if (i.tm.cpu_flags.bitfield.cpuavx512bw)
9136         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512BW;
9137       if (i.tm.cpu_flags.bitfield.cpuavx512_4fmaps)
9138         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS;
9139       if (i.tm.cpu_flags.bitfield.cpuavx512_4vnniw)
9140         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW;
9141       if (i.tm.cpu_flags.bitfield.cpuavx512_bitalg)
9142         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BITALG;
9143       if (i.tm.cpu_flags.bitfield.cpuavx512ifma)
9144         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_IFMA;
9145       if (i.tm.cpu_flags.bitfield.cpuavx512vbmi)
9146         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI;
9147       if (i.tm.cpu_flags.bitfield.cpuavx512_vbmi2)
9148         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
9149       if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
9150         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
9151       if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
9152         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
9153
9154       if (i.tm.cpu_flags.bitfield.cpu8087
9155           || i.tm.cpu_flags.bitfield.cpu287
9156           || i.tm.cpu_flags.bitfield.cpu387
9157           || i.tm.cpu_flags.bitfield.cpu687
9158           || i.tm.cpu_flags.bitfield.cpufisttp)
9159         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9160       if ((i.xstate & xstate_mmx)
9161           || i.tm.base_opcode == 0xf77 /* emms */
9162           || i.tm.base_opcode == 0xf0e /* femms */)
9163         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9164       if ((i.xstate & xstate_xmm))
9165         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9166       if ((i.xstate & xstate_ymm) == xstate_ymm)
9167         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9168       if ((i.xstate & xstate_zmm) == xstate_zmm)
9169         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9170       if (i.tm.cpu_flags.bitfield.cpufxsr)
9171         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9172       if (i.tm.cpu_flags.bitfield.cpuxsave)
9173         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9174       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9175         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9176       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9177         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9178
9179       if ((i.xstate & xstate_tmm) == xstate_tmm
9180           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9181         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9182     }
9183 #endif
9184
9185   /* Tie dwarf2 debug info to the address at the start of the insn.
9186      We can't do this after the insn has been output as the current
9187      frag may have been closed off.  eg. by frag_var.  */
9188   dwarf2_emit_insn (0);
9189
9190   insn_start_frag = frag_now;
9191   insn_start_off = frag_now_fix ();
9192
9193   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9194     {
9195       char *p;
9196       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9197       unsigned int max_branch_padding_size = 14;
9198
9199       /* Align section to boundary.  */
9200       record_alignment (now_seg, align_branch_power);
9201
9202       /* Make room for padding.  */
9203       frag_grow (max_branch_padding_size);
9204
9205       /* Start of the padding.  */
9206       p = frag_more (0);
9207
9208       fragP = frag_now;
9209
9210       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9211                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9212                 NULL, 0, p);
9213
9214       fragP->tc_frag_data.mf_type = mf_jcc;
9215       fragP->tc_frag_data.branch_type = branch;
9216       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9217     }
9218
9219   /* Output jumps.  */
9220   if (i.tm.opcode_modifier.jump == JUMP)
9221     output_branch ();
9222   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9223            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9224     output_jump ();
9225   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9226     output_interseg_jump ();
9227   else
9228     {
9229       /* Output normal instructions here.  */
9230       char *p;
9231       unsigned char *q;
9232       unsigned int j;
9233       unsigned int prefix;
9234       enum mf_cmp_kind mf_cmp;
9235
9236       if (avoid_fence
9237           && (i.tm.base_opcode == 0xfaee8
9238               || i.tm.base_opcode == 0xfaef0
9239               || i.tm.base_opcode == 0xfaef8))
9240         {
9241           /* Encode lfence, mfence, and sfence as
9242              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9243           offsetT val = 0x240483f0ULL;
9244           p = frag_more (5);
9245           md_number_to_chars (p, val, 5);
9246           return;
9247         }
9248
9249       /* Some processors fail on LOCK prefix. This options makes
9250          assembler ignore LOCK prefix and serves as a workaround.  */
9251       if (omit_lock_prefix)
9252         {
9253           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
9254             return;
9255           i.prefix[LOCK_PREFIX] = 0;
9256         }
9257
9258       if (branch)
9259         /* Skip if this is a branch.  */
9260         ;
9261       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9262         {
9263           /* Make room for padding.  */
9264           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9265           p = frag_more (0);
9266
9267           fragP = frag_now;
9268
9269           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9270                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9271                     NULL, 0, p);
9272
9273           fragP->tc_frag_data.mf_type = mf_cmp;
9274           fragP->tc_frag_data.branch_type = align_branch_fused;
9275           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9276         }
9277       else if (add_branch_prefix_frag_p ())
9278         {
9279           unsigned int max_prefix_size = align_branch_prefix_size;
9280
9281           /* Make room for padding.  */
9282           frag_grow (max_prefix_size);
9283           p = frag_more (0);
9284
9285           fragP = frag_now;
9286
9287           frag_var (rs_machine_dependent, max_prefix_size, 0,
9288                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9289                     NULL, 0, p);
9290
9291           fragP->tc_frag_data.max_bytes = max_prefix_size;
9292         }
9293
9294       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9295          don't need the explicit prefix.  */
9296       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
9297         {
9298           switch (i.tm.opcode_length)
9299             {
9300             case 3:
9301               if (i.tm.base_opcode & 0xff000000)
9302                 {
9303                   prefix = (i.tm.base_opcode >> 24) & 0xff;
9304                   if (!i.tm.cpu_flags.bitfield.cpupadlock
9305                       || prefix != REPE_PREFIX_OPCODE
9306                       || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
9307                     add_prefix (prefix);
9308                 }
9309               break;
9310             case 2:
9311               if ((i.tm.base_opcode & 0xff0000) != 0)
9312                 {
9313                   prefix = (i.tm.base_opcode >> 16) & 0xff;
9314                   add_prefix (prefix);
9315                 }
9316               break;
9317             case 1:
9318               break;
9319             case 0:
9320               /* Check for pseudo prefixes.  */
9321               as_bad_where (insn_start_frag->fr_file,
9322                             insn_start_frag->fr_line,
9323                              _("pseudo prefix without instruction"));
9324               return;
9325             default:
9326               abort ();
9327             }
9328
9329 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9330           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9331              R_X86_64_GOTTPOFF relocation so that linker can safely
9332              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9333              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9334              relocation for GDesc -> IE/LE optimization.  */
9335           if (x86_elf_abi == X86_64_X32_ABI
9336               && i.operands == 2
9337               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9338                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9339               && i.prefix[REX_PREFIX] == 0)
9340             add_prefix (REX_OPCODE);
9341 #endif
9342
9343           /* The prefix bytes.  */
9344           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9345             if (*q)
9346               FRAG_APPEND_1_CHAR (*q);
9347         }
9348       else
9349         {
9350           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9351             if (*q)
9352               switch (j)
9353                 {
9354                 case SEG_PREFIX:
9355                 case ADDR_PREFIX:
9356                   FRAG_APPEND_1_CHAR (*q);
9357                   break;
9358                 default:
9359                   /* There should be no other prefixes for instructions
9360                      with VEX prefix.  */
9361                   abort ();
9362                 }
9363
9364           /* For EVEX instructions i.vrex should become 0 after
9365              build_evex_prefix.  For VEX instructions upper 16 registers
9366              aren't available, so VREX should be 0.  */
9367           if (i.vrex)
9368             abort ();
9369           /* Now the VEX prefix.  */
9370           p = frag_more (i.vex.length);
9371           for (j = 0; j < i.vex.length; j++)
9372             p[j] = i.vex.bytes[j];
9373         }
9374
9375       /* Now the opcode; be careful about word order here!  */
9376       if (i.tm.opcode_length == 1)
9377         {
9378           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9379         }
9380       else
9381         {
9382           switch (i.tm.opcode_length)
9383             {
9384             case 4:
9385               p = frag_more (4);
9386               *p++ = (i.tm.base_opcode >> 24) & 0xff;
9387               *p++ = (i.tm.base_opcode >> 16) & 0xff;
9388               break;
9389             case 3:
9390               p = frag_more (3);
9391               *p++ = (i.tm.base_opcode >> 16) & 0xff;
9392               break;
9393             case 2:
9394               p = frag_more (2);
9395               break;
9396             default:
9397               abort ();
9398               break;
9399             }
9400
9401           /* Put out high byte first: can't use md_number_to_chars!  */
9402           *p++ = (i.tm.base_opcode >> 8) & 0xff;
9403           *p = i.tm.base_opcode & 0xff;
9404         }
9405
9406       /* Now the modrm byte and sib byte (if present).  */
9407       if (i.tm.opcode_modifier.modrm)
9408         {
9409           FRAG_APPEND_1_CHAR ((i.rm.regmem << 0
9410                                | i.rm.reg << 3
9411                                | i.rm.mode << 6));
9412           /* If i.rm.regmem == ESP (4)
9413              && i.rm.mode != (Register mode)
9414              && not 16 bit
9415              ==> need second modrm byte.  */
9416           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9417               && i.rm.mode != 3
9418               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9419             FRAG_APPEND_1_CHAR ((i.sib.base << 0
9420                                  | i.sib.index << 3
9421                                  | i.sib.scale << 6));
9422         }
9423
9424       if (i.disp_operands)
9425         output_disp (insn_start_frag, insn_start_off);
9426
9427       if (i.imm_operands)
9428         output_imm (insn_start_frag, insn_start_off);
9429
9430       /*
9431        * frag_now_fix () returning plain abs_section_offset when we're in the
9432        * absolute section, and abs_section_offset not getting updated as data
9433        * gets added to the frag breaks the logic below.
9434        */
9435       if (now_seg != absolute_section)
9436         {
9437           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9438           if (j > 15)
9439             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9440                      j);
9441           else if (fragP)
9442             {
9443               /* NB: Don't add prefix with GOTPC relocation since
9444                  output_disp() above depends on the fixed encoding
9445                  length.  Can't add prefix with TLS relocation since
9446                  it breaks TLS linker optimization.  */
9447               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9448               /* Prefix count on the current instruction.  */
9449               unsigned int count = i.vex.length;
9450               unsigned int k;
9451               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9452                 /* REX byte is encoded in VEX/EVEX prefix.  */
9453                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9454                   count++;
9455
9456               /* Count prefixes for extended opcode maps.  */
9457               if (!i.vex.length)
9458                 switch (i.tm.opcode_length)
9459                   {
9460                   case 3:
9461                     if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
9462                       {
9463                         count++;
9464                         switch ((i.tm.base_opcode >> 8) & 0xff)
9465                           {
9466                           case 0x38:
9467                           case 0x3a:
9468                             count++;
9469                             break;
9470                           default:
9471                             break;
9472                           }
9473                       }
9474                     break;
9475                   case 2:
9476                     if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
9477                       count++;
9478                     break;
9479                   case 1:
9480                     break;
9481                   default:
9482                     abort ();
9483                   }
9484
9485               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9486                   == BRANCH_PREFIX)
9487                 {
9488                   /* Set the maximum prefix size in BRANCH_PREFIX
9489                      frag.  */
9490                   if (fragP->tc_frag_data.max_bytes > max)
9491                     fragP->tc_frag_data.max_bytes = max;
9492                   if (fragP->tc_frag_data.max_bytes > count)
9493                     fragP->tc_frag_data.max_bytes -= count;
9494                   else
9495                     fragP->tc_frag_data.max_bytes = 0;
9496                 }
9497               else
9498                 {
9499                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9500                      frag.  */
9501                   unsigned int max_prefix_size;
9502                   if (align_branch_prefix_size > max)
9503                     max_prefix_size = max;
9504                   else
9505                     max_prefix_size = align_branch_prefix_size;
9506                   if (max_prefix_size > count)
9507                     fragP->tc_frag_data.max_prefix_length
9508                       = max_prefix_size - count;
9509                 }
9510
9511               /* Use existing segment prefix if possible.  Use CS
9512                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9513                  segment prefix with ESP/EBP base register and use DS
9514                  segment prefix without ESP/EBP base register.  */
9515               if (i.prefix[SEG_PREFIX])
9516                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9517               else if (flag_code == CODE_64BIT)
9518                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9519               else if (i.base_reg
9520                        && (i.base_reg->reg_num == 4
9521                            || i.base_reg->reg_num == 5))
9522                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9523               else
9524                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9525             }
9526         }
9527     }
9528
9529   /* NB: Don't work with COND_JUMP86 without i386.  */
9530   if (align_branch_power
9531       && now_seg != absolute_section
9532       && cpu_arch_flags.bitfield.cpui386)
9533     {
9534       /* Terminate each frag so that we can add prefix and check for
9535          fused jcc.  */
9536       frag_wane (frag_now);
9537       frag_new (0);
9538     }
9539
9540 #ifdef DEBUG386
9541   if (flag_debug)
9542     {
9543       pi ("" /*line*/, &i);
9544     }
9545 #endif /* DEBUG386  */
9546 }
9547
9548 /* Return the size of the displacement operand N.  */
9549
9550 static int
9551 disp_size (unsigned int n)
9552 {
9553   int size = 4;
9554
9555   if (i.types[n].bitfield.disp64)
9556     size = 8;
9557   else if (i.types[n].bitfield.disp8)
9558     size = 1;
9559   else if (i.types[n].bitfield.disp16)
9560     size = 2;
9561   return size;
9562 }
9563
9564 /* Return the size of the immediate operand N.  */
9565
9566 static int
9567 imm_size (unsigned int n)
9568 {
9569   int size = 4;
9570   if (i.types[n].bitfield.imm64)
9571     size = 8;
9572   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9573     size = 1;
9574   else if (i.types[n].bitfield.imm16)
9575     size = 2;
9576   return size;
9577 }
9578
9579 static void
9580 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9581 {
9582   char *p;
9583   unsigned int n;
9584
9585   for (n = 0; n < i.operands; n++)
9586     {
9587       if (operand_type_check (i.types[n], disp))
9588         {
9589           if (i.op[n].disps->X_op == O_constant)
9590             {
9591               int size = disp_size (n);
9592               offsetT val = i.op[n].disps->X_add_number;
9593
9594               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9595                                      size);
9596               p = frag_more (size);
9597               md_number_to_chars (p, val, size);
9598             }
9599           else
9600             {
9601               enum bfd_reloc_code_real reloc_type;
9602               int size = disp_size (n);
9603               int sign = i.types[n].bitfield.disp32s;
9604               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9605               fixS *fixP;
9606
9607               /* We can't have 8 bit displacement here.  */
9608               gas_assert (!i.types[n].bitfield.disp8);
9609
9610               /* The PC relative address is computed relative
9611                  to the instruction boundary, so in case immediate
9612                  fields follows, we need to adjust the value.  */
9613               if (pcrel && i.imm_operands)
9614                 {
9615                   unsigned int n1;
9616                   int sz = 0;
9617
9618                   for (n1 = 0; n1 < i.operands; n1++)
9619                     if (operand_type_check (i.types[n1], imm))
9620                       {
9621                         /* Only one immediate is allowed for PC
9622                            relative address.  */
9623                         gas_assert (sz == 0);
9624                         sz = imm_size (n1);
9625                         i.op[n].disps->X_add_number -= sz;
9626                       }
9627                   /* We should find the immediate.  */
9628                   gas_assert (sz != 0);
9629                 }
9630
9631               p = frag_more (size);
9632               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9633               if (GOT_symbol
9634                   && GOT_symbol == i.op[n].disps->X_add_symbol
9635                   && (((reloc_type == BFD_RELOC_32
9636                         || reloc_type == BFD_RELOC_X86_64_32S
9637                         || (reloc_type == BFD_RELOC_64
9638                             && object_64bit))
9639                        && (i.op[n].disps->X_op == O_symbol
9640                            || (i.op[n].disps->X_op == O_add
9641                                && ((symbol_get_value_expression
9642                                     (i.op[n].disps->X_op_symbol)->X_op)
9643                                    == O_subtract))))
9644                       || reloc_type == BFD_RELOC_32_PCREL))
9645                 {
9646                   if (!object_64bit)
9647                     {
9648                       reloc_type = BFD_RELOC_386_GOTPC;
9649                       i.has_gotpc_tls_reloc = TRUE;
9650                       i.op[n].imms->X_add_number +=
9651                         encoding_length (insn_start_frag, insn_start_off, p);
9652                     }
9653                   else if (reloc_type == BFD_RELOC_64)
9654                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9655                   else
9656                     /* Don't do the adjustment for x86-64, as there
9657                        the pcrel addressing is relative to the _next_
9658                        insn, and that is taken care of in other code.  */
9659                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9660                 }
9661               else if (align_branch_power)
9662                 {
9663                   switch (reloc_type)
9664                     {
9665                     case BFD_RELOC_386_TLS_GD:
9666                     case BFD_RELOC_386_TLS_LDM:
9667                     case BFD_RELOC_386_TLS_IE:
9668                     case BFD_RELOC_386_TLS_IE_32:
9669                     case BFD_RELOC_386_TLS_GOTIE:
9670                     case BFD_RELOC_386_TLS_GOTDESC:
9671                     case BFD_RELOC_386_TLS_DESC_CALL:
9672                     case BFD_RELOC_X86_64_TLSGD:
9673                     case BFD_RELOC_X86_64_TLSLD:
9674                     case BFD_RELOC_X86_64_GOTTPOFF:
9675                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
9676                     case BFD_RELOC_X86_64_TLSDESC_CALL:
9677                       i.has_gotpc_tls_reloc = TRUE;
9678                     default:
9679                       break;
9680                     }
9681                 }
9682               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
9683                                   size, i.op[n].disps, pcrel,
9684                                   reloc_type);
9685               /* Check for "call/jmp *mem", "mov mem, %reg",
9686                  "test %reg, mem" and "binop mem, %reg" where binop
9687                  is one of adc, add, and, cmp, or, sbb, sub, xor
9688                  instructions without data prefix.  Always generate
9689                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
9690               if (i.prefix[DATA_PREFIX] == 0
9691                   && (generate_relax_relocations
9692                       || (!object_64bit
9693                           && i.rm.mode == 0
9694                           && i.rm.regmem == 5))
9695                   && (i.rm.mode == 2
9696                       || (i.rm.mode == 0 && i.rm.regmem == 5))
9697                   && !is_any_vex_encoding(&i.tm)
9698                   && ((i.operands == 1
9699                        && i.tm.base_opcode == 0xff
9700                        && (i.rm.reg == 2 || i.rm.reg == 4))
9701                       || (i.operands == 2
9702                           && (i.tm.base_opcode == 0x8b
9703                               || i.tm.base_opcode == 0x85
9704                               || (i.tm.base_opcode & ~0x38) == 0x03))))
9705                 {
9706                   if (object_64bit)
9707                     {
9708                       fixP->fx_tcbit = i.rex != 0;
9709                       if (i.base_reg
9710                           && (i.base_reg->reg_num == RegIP))
9711                       fixP->fx_tcbit2 = 1;
9712                     }
9713                   else
9714                     fixP->fx_tcbit2 = 1;
9715                 }
9716             }
9717         }
9718     }
9719 }
9720
9721 static void
9722 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
9723 {
9724   char *p;
9725   unsigned int n;
9726
9727   for (n = 0; n < i.operands; n++)
9728     {
9729       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
9730       if (i.rounding && (int) n == i.rounding->operand)
9731         continue;
9732
9733       if (operand_type_check (i.types[n], imm))
9734         {
9735           if (i.op[n].imms->X_op == O_constant)
9736             {
9737               int size = imm_size (n);
9738               offsetT val;
9739
9740               val = offset_in_range (i.op[n].imms->X_add_number,
9741                                      size);
9742               p = frag_more (size);
9743               md_number_to_chars (p, val, size);
9744             }
9745           else
9746             {
9747               /* Not absolute_section.
9748                  Need a 32-bit fixup (don't support 8bit
9749                  non-absolute imms).  Try to support other
9750                  sizes ...  */
9751               enum bfd_reloc_code_real reloc_type;
9752               int size = imm_size (n);
9753               int sign;
9754
9755               if (i.types[n].bitfield.imm32s
9756                   && (i.suffix == QWORD_MNEM_SUFFIX
9757                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
9758                 sign = 1;
9759               else
9760                 sign = 0;
9761
9762               p = frag_more (size);
9763               reloc_type = reloc (size, 0, sign, i.reloc[n]);
9764
9765               /*   This is tough to explain.  We end up with this one if we
9766                * have operands that look like
9767                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
9768                * obtain the absolute address of the GOT, and it is strongly
9769                * preferable from a performance point of view to avoid using
9770                * a runtime relocation for this.  The actual sequence of
9771                * instructions often look something like:
9772                *
9773                *        call    .L66
9774                * .L66:
9775                *        popl    %ebx
9776                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
9777                *
9778                *   The call and pop essentially return the absolute address
9779                * of the label .L66 and store it in %ebx.  The linker itself
9780                * will ultimately change the first operand of the addl so
9781                * that %ebx points to the GOT, but to keep things simple, the
9782                * .o file must have this operand set so that it generates not
9783                * the absolute address of .L66, but the absolute address of
9784                * itself.  This allows the linker itself simply treat a GOTPC
9785                * relocation as asking for a pcrel offset to the GOT to be
9786                * added in, and the addend of the relocation is stored in the
9787                * operand field for the instruction itself.
9788                *
9789                *   Our job here is to fix the operand so that it would add
9790                * the correct offset so that %ebx would point to itself.  The
9791                * thing that is tricky is that .-.L66 will point to the
9792                * beginning of the instruction, so we need to further modify
9793                * the operand so that it will point to itself.  There are
9794                * other cases where you have something like:
9795                *
9796                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
9797                *
9798                * and here no correction would be required.  Internally in
9799                * the assembler we treat operands of this form as not being
9800                * pcrel since the '.' is explicitly mentioned, and I wonder
9801                * whether it would simplify matters to do it this way.  Who
9802                * knows.  In earlier versions of the PIC patches, the
9803                * pcrel_adjust field was used to store the correction, but
9804                * since the expression is not pcrel, I felt it would be
9805                * confusing to do it this way.  */
9806
9807               if ((reloc_type == BFD_RELOC_32
9808                    || reloc_type == BFD_RELOC_X86_64_32S
9809                    || reloc_type == BFD_RELOC_64)
9810                   && GOT_symbol
9811                   && GOT_symbol == i.op[n].imms->X_add_symbol
9812                   && (i.op[n].imms->X_op == O_symbol
9813                       || (i.op[n].imms->X_op == O_add
9814                           && ((symbol_get_value_expression
9815                                (i.op[n].imms->X_op_symbol)->X_op)
9816                               == O_subtract))))
9817                 {
9818                   if (!object_64bit)
9819                     reloc_type = BFD_RELOC_386_GOTPC;
9820                   else if (size == 4)
9821                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9822                   else if (size == 8)
9823                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9824                   i.has_gotpc_tls_reloc = TRUE;
9825                   i.op[n].imms->X_add_number +=
9826                     encoding_length (insn_start_frag, insn_start_off, p);
9827                 }
9828               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9829                            i.op[n].imms, 0, reloc_type);
9830             }
9831         }
9832     }
9833 }
9834 \f
9835 /* x86_cons_fix_new is called via the expression parsing code when a
9836    reloc is needed.  We use this hook to get the correct .got reloc.  */
9837 static int cons_sign = -1;
9838
9839 void
9840 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
9841                   expressionS *exp, bfd_reloc_code_real_type r)
9842 {
9843   r = reloc (len, 0, cons_sign, r);
9844
9845 #ifdef TE_PE
9846   if (exp->X_op == O_secrel)
9847     {
9848       exp->X_op = O_symbol;
9849       r = BFD_RELOC_32_SECREL;
9850     }
9851 #endif
9852
9853   fix_new_exp (frag, off, len, exp, 0, r);
9854 }
9855
9856 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
9857    purpose of the `.dc.a' internal pseudo-op.  */
9858
9859 int
9860 x86_address_bytes (void)
9861 {
9862   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
9863     return 4;
9864   return stdoutput->arch_info->bits_per_address / 8;
9865 }
9866
9867 #if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
9868     || defined (LEX_AT)
9869 # define lex_got(reloc, adjust, types) NULL
9870 #else
9871 /* Parse operands of the form
9872    <symbol>@GOTOFF+<nnn>
9873    and similar .plt or .got references.
9874
9875    If we find one, set up the correct relocation in RELOC and copy the
9876    input string, minus the `@GOTOFF' into a malloc'd buffer for
9877    parsing by the calling routine.  Return this buffer, and if ADJUST
9878    is non-null set it to the length of the string we removed from the
9879    input line.  Otherwise return NULL.  */
9880 static char *
9881 lex_got (enum bfd_reloc_code_real *rel,
9882          int *adjust,
9883          i386_operand_type *types)
9884 {
9885   /* Some of the relocations depend on the size of what field is to
9886      be relocated.  But in our callers i386_immediate and i386_displacement
9887      we don't yet know the operand size (this will be set by insn
9888      matching).  Hence we record the word32 relocation here,
9889      and adjust the reloc according to the real size in reloc().  */
9890   static const struct {
9891     const char *str;
9892     int len;
9893     const enum bfd_reloc_code_real rel[2];
9894     const i386_operand_type types64;
9895   } gotrel[] = {
9896 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9897     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
9898                                         BFD_RELOC_SIZE32 },
9899       OPERAND_TYPE_IMM32_64 },
9900 #endif
9901     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
9902                                        BFD_RELOC_X86_64_PLTOFF64 },
9903       OPERAND_TYPE_IMM64 },
9904     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
9905                                        BFD_RELOC_X86_64_PLT32    },
9906       OPERAND_TYPE_IMM32_32S_DISP32 },
9907     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
9908                                        BFD_RELOC_X86_64_GOTPLT64 },
9909       OPERAND_TYPE_IMM64_DISP64 },
9910     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
9911                                        BFD_RELOC_X86_64_GOTOFF64 },
9912       OPERAND_TYPE_IMM64_DISP64 },
9913     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
9914                                        BFD_RELOC_X86_64_GOTPCREL },
9915       OPERAND_TYPE_IMM32_32S_DISP32 },
9916     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
9917                                        BFD_RELOC_X86_64_TLSGD    },
9918       OPERAND_TYPE_IMM32_32S_DISP32 },
9919     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
9920                                        _dummy_first_bfd_reloc_code_real },
9921       OPERAND_TYPE_NONE },
9922     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
9923                                        BFD_RELOC_X86_64_TLSLD    },
9924       OPERAND_TYPE_IMM32_32S_DISP32 },
9925     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
9926                                        BFD_RELOC_X86_64_GOTTPOFF },
9927       OPERAND_TYPE_IMM32_32S_DISP32 },
9928     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
9929                                        BFD_RELOC_X86_64_TPOFF32  },
9930       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9931     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
9932                                        _dummy_first_bfd_reloc_code_real },
9933       OPERAND_TYPE_NONE },
9934     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
9935                                        BFD_RELOC_X86_64_DTPOFF32 },
9936       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9937     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
9938                                        _dummy_first_bfd_reloc_code_real },
9939       OPERAND_TYPE_NONE },
9940     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
9941                                        _dummy_first_bfd_reloc_code_real },
9942       OPERAND_TYPE_NONE },
9943     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
9944                                        BFD_RELOC_X86_64_GOT32    },
9945       OPERAND_TYPE_IMM32_32S_64_DISP32 },
9946     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
9947                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
9948       OPERAND_TYPE_IMM32_32S_DISP32 },
9949     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
9950                                        BFD_RELOC_X86_64_TLSDESC_CALL },
9951       OPERAND_TYPE_IMM32_32S_DISP32 },
9952   };
9953   char *cp;
9954   unsigned int j;
9955
9956 #if defined (OBJ_MAYBE_ELF)
9957   if (!IS_ELF)
9958     return NULL;
9959 #endif
9960
9961   for (cp = input_line_pointer; *cp != '@'; cp++)
9962     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9963       return NULL;
9964
9965   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9966     {
9967       int len = gotrel[j].len;
9968       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9969         {
9970           if (gotrel[j].rel[object_64bit] != 0)
9971             {
9972               int first, second;
9973               char *tmpbuf, *past_reloc;
9974
9975               *rel = gotrel[j].rel[object_64bit];
9976
9977               if (types)
9978                 {
9979                   if (flag_code != CODE_64BIT)
9980                     {
9981                       types->bitfield.imm32 = 1;
9982                       types->bitfield.disp32 = 1;
9983                     }
9984                   else
9985                     *types = gotrel[j].types64;
9986                 }
9987
9988               if (j != 0 && GOT_symbol == NULL)
9989                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
9990
9991               /* The length of the first part of our input line.  */
9992               first = cp - input_line_pointer;
9993
9994               /* The second part goes from after the reloc token until
9995                  (and including) an end_of_line char or comma.  */
9996               past_reloc = cp + 1 + len;
9997               cp = past_reloc;
9998               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9999                 ++cp;
10000               second = cp + 1 - past_reloc;
10001
10002               /* Allocate and copy string.  The trailing NUL shouldn't
10003                  be necessary, but be safe.  */
10004               tmpbuf = XNEWVEC (char, first + second + 2);
10005               memcpy (tmpbuf, input_line_pointer, first);
10006               if (second != 0 && *past_reloc != ' ')
10007                 /* Replace the relocation token with ' ', so that
10008                    errors like foo@GOTOFF1 will be detected.  */
10009                 tmpbuf[first++] = ' ';
10010               else
10011                 /* Increment length by 1 if the relocation token is
10012                    removed.  */
10013                 len++;
10014               if (adjust)
10015                 *adjust = len;
10016               memcpy (tmpbuf + first, past_reloc, second);
10017               tmpbuf[first + second] = '\0';
10018               return tmpbuf;
10019             }
10020
10021           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10022                   gotrel[j].str, 1 << (5 + object_64bit));
10023           return NULL;
10024         }
10025     }
10026
10027   /* Might be a symbol version string.  Don't as_bad here.  */
10028   return NULL;
10029 }
10030 #endif
10031
10032 #ifdef TE_PE
10033 #ifdef lex_got
10034 #undef lex_got
10035 #endif
10036 /* Parse operands of the form
10037    <symbol>@SECREL32+<nnn>
10038
10039    If we find one, set up the correct relocation in RELOC and copy the
10040    input string, minus the `@SECREL32' into a malloc'd buffer for
10041    parsing by the calling routine.  Return this buffer, and if ADJUST
10042    is non-null set it to the length of the string we removed from the
10043    input line.  Otherwise return NULL.
10044
10045    This function is copied from the ELF version above adjusted for PE targets.  */
10046
10047 static char *
10048 lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
10049          int *adjust ATTRIBUTE_UNUSED,
10050          i386_operand_type *types)
10051 {
10052   static const struct
10053   {
10054     const char *str;
10055     int len;
10056     const enum bfd_reloc_code_real rel[2];
10057     const i386_operand_type types64;
10058   }
10059   gotrel[] =
10060   {
10061     { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
10062                                           BFD_RELOC_32_SECREL },
10063       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
10064   };
10065
10066   char *cp;
10067   unsigned j;
10068
10069   for (cp = input_line_pointer; *cp != '@'; cp++)
10070     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10071       return NULL;
10072
10073   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10074     {
10075       int len = gotrel[j].len;
10076
10077       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10078         {
10079           if (gotrel[j].rel[object_64bit] != 0)
10080             {
10081               int first, second;
10082               char *tmpbuf, *past_reloc;
10083
10084               *rel = gotrel[j].rel[object_64bit];
10085               if (adjust)
10086                 *adjust = len;
10087
10088               if (types)
10089                 {
10090                   if (flag_code != CODE_64BIT)
10091                     {
10092                       types->bitfield.imm32 = 1;
10093                       types->bitfield.disp32 = 1;
10094                     }
10095                   else
10096                     *types = gotrel[j].types64;
10097                 }
10098
10099               /* The length of the first part of our input line.  */
10100               first = cp - input_line_pointer;
10101
10102               /* The second part goes from after the reloc token until
10103                  (and including) an end_of_line char or comma.  */
10104               past_reloc = cp + 1 + len;
10105               cp = past_reloc;
10106               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10107                 ++cp;
10108               second = cp + 1 - past_reloc;
10109
10110               /* Allocate and copy string.  The trailing NUL shouldn't
10111                  be necessary, but be safe.  */
10112               tmpbuf = XNEWVEC (char, first + second + 2);
10113               memcpy (tmpbuf, input_line_pointer, first);
10114               if (second != 0 && *past_reloc != ' ')
10115                 /* Replace the relocation token with ' ', so that
10116                    errors like foo@SECLREL321 will be detected.  */
10117                 tmpbuf[first++] = ' ';
10118               memcpy (tmpbuf + first, past_reloc, second);
10119               tmpbuf[first + second] = '\0';
10120               return tmpbuf;
10121             }
10122
10123           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10124                   gotrel[j].str, 1 << (5 + object_64bit));
10125           return NULL;
10126         }
10127     }
10128
10129   /* Might be a symbol version string.  Don't as_bad here.  */
10130   return NULL;
10131 }
10132
10133 #endif /* TE_PE */
10134
10135 bfd_reloc_code_real_type
10136 x86_cons (expressionS *exp, int size)
10137 {
10138   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10139
10140   intel_syntax = -intel_syntax;
10141
10142   exp->X_md = 0;
10143   if (size == 4 || (object_64bit && size == 8))
10144     {
10145       /* Handle @GOTOFF and the like in an expression.  */
10146       char *save;
10147       char *gotfree_input_line;
10148       int adjust = 0;
10149
10150       save = input_line_pointer;
10151       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10152       if (gotfree_input_line)
10153         input_line_pointer = gotfree_input_line;
10154
10155       expression (exp);
10156
10157       if (gotfree_input_line)
10158         {
10159           /* expression () has merrily parsed up to the end of line,
10160              or a comma - in the wrong buffer.  Transfer how far
10161              input_line_pointer has moved to the right buffer.  */
10162           input_line_pointer = (save
10163                                 + (input_line_pointer - gotfree_input_line)
10164                                 + adjust);
10165           free (gotfree_input_line);
10166           if (exp->X_op == O_constant
10167               || exp->X_op == O_absent
10168               || exp->X_op == O_illegal
10169               || exp->X_op == O_register
10170               || exp->X_op == O_big)
10171             {
10172               char c = *input_line_pointer;
10173               *input_line_pointer = 0;
10174               as_bad (_("missing or invalid expression `%s'"), save);
10175               *input_line_pointer = c;
10176             }
10177           else if ((got_reloc == BFD_RELOC_386_PLT32
10178                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10179                    && exp->X_op != O_symbol)
10180             {
10181               char c = *input_line_pointer;
10182               *input_line_pointer = 0;
10183               as_bad (_("invalid PLT expression `%s'"), save);
10184               *input_line_pointer = c;
10185             }
10186         }
10187     }
10188   else
10189     expression (exp);
10190
10191   intel_syntax = -intel_syntax;
10192
10193   if (intel_syntax)
10194     i386_intel_simplify (exp);
10195
10196   return got_reloc;
10197 }
10198
10199 static void
10200 signed_cons (int size)
10201 {
10202   if (flag_code == CODE_64BIT)
10203     cons_sign = 1;
10204   cons (size);
10205   cons_sign = -1;
10206 }
10207
10208 #ifdef TE_PE
10209 static void
10210 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10211 {
10212   expressionS exp;
10213
10214   do
10215     {
10216       expression (&exp);
10217       if (exp.X_op == O_symbol)
10218         exp.X_op = O_secrel;
10219
10220       emit_expr (&exp, 4);
10221     }
10222   while (*input_line_pointer++ == ',');
10223
10224   input_line_pointer--;
10225   demand_empty_rest_of_line ();
10226 }
10227 #endif
10228
10229 /* Handle Vector operations.  */
10230
10231 static char *
10232 check_VecOperations (char *op_string, char *op_end)
10233 {
10234   const reg_entry *mask;
10235   const char *saved;
10236   char *end_op;
10237
10238   while (*op_string
10239          && (op_end == NULL || op_string < op_end))
10240     {
10241       saved = op_string;
10242       if (*op_string == '{')
10243         {
10244           op_string++;
10245
10246           /* Check broadcasts.  */
10247           if (strncmp (op_string, "1to", 3) == 0)
10248             {
10249               int bcst_type;
10250
10251               if (i.broadcast)
10252                 goto duplicated_vec_op;
10253
10254               op_string += 3;
10255               if (*op_string == '8')
10256                 bcst_type = 8;
10257               else if (*op_string == '4')
10258                 bcst_type = 4;
10259               else if (*op_string == '2')
10260                 bcst_type = 2;
10261               else if (*op_string == '1'
10262                        && *(op_string+1) == '6')
10263                 {
10264                   bcst_type = 16;
10265                   op_string++;
10266                 }
10267               else
10268                 {
10269                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10270                   return NULL;
10271                 }
10272               op_string++;
10273
10274               broadcast_op.type = bcst_type;
10275               broadcast_op.operand = this_operand;
10276               broadcast_op.bytes = 0;
10277               i.broadcast = &broadcast_op;
10278             }
10279           /* Check masking operation.  */
10280           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10281             {
10282               if (mask == &bad_reg)
10283                 return NULL;
10284
10285               /* k0 can't be used for write mask.  */
10286               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10287                 {
10288                   as_bad (_("`%s%s' can't be used for write mask"),
10289                           register_prefix, mask->reg_name);
10290                   return NULL;
10291                 }
10292
10293               if (!i.mask)
10294                 {
10295                   mask_op.mask = mask;
10296                   mask_op.zeroing = 0;
10297                   mask_op.operand = this_operand;
10298                   i.mask = &mask_op;
10299                 }
10300               else
10301                 {
10302                   if (i.mask->mask)
10303                     goto duplicated_vec_op;
10304
10305                   i.mask->mask = mask;
10306
10307                   /* Only "{z}" is allowed here.  No need to check
10308                      zeroing mask explicitly.  */
10309                   if (i.mask->operand != this_operand)
10310                     {
10311                       as_bad (_("invalid write mask `%s'"), saved);
10312                       return NULL;
10313                     }
10314                 }
10315
10316               op_string = end_op;
10317             }
10318           /* Check zeroing-flag for masking operation.  */
10319           else if (*op_string == 'z')
10320             {
10321               if (!i.mask)
10322                 {
10323                   mask_op.mask = NULL;
10324                   mask_op.zeroing = 1;
10325                   mask_op.operand = this_operand;
10326                   i.mask = &mask_op;
10327                 }
10328               else
10329                 {
10330                   if (i.mask->zeroing)
10331                     {
10332                     duplicated_vec_op:
10333                       as_bad (_("duplicated `%s'"), saved);
10334                       return NULL;
10335                     }
10336
10337                   i.mask->zeroing = 1;
10338
10339                   /* Only "{%k}" is allowed here.  No need to check mask
10340                      register explicitly.  */
10341                   if (i.mask->operand != this_operand)
10342                     {
10343                       as_bad (_("invalid zeroing-masking `%s'"),
10344                               saved);
10345                       return NULL;
10346                     }
10347                 }
10348
10349               op_string++;
10350             }
10351           else
10352             goto unknown_vec_op;
10353
10354           if (*op_string != '}')
10355             {
10356               as_bad (_("missing `}' in `%s'"), saved);
10357               return NULL;
10358             }
10359           op_string++;
10360
10361           /* Strip whitespace since the addition of pseudo prefixes
10362              changed how the scrubber treats '{'.  */
10363           if (is_space_char (*op_string))
10364             ++op_string;
10365
10366           continue;
10367         }
10368     unknown_vec_op:
10369       /* We don't know this one.  */
10370       as_bad (_("unknown vector operation: `%s'"), saved);
10371       return NULL;
10372     }
10373
10374   if (i.mask && i.mask->zeroing && !i.mask->mask)
10375     {
10376       as_bad (_("zeroing-masking only allowed with write mask"));
10377       return NULL;
10378     }
10379
10380   return op_string;
10381 }
10382
10383 static int
10384 i386_immediate (char *imm_start)
10385 {
10386   char *save_input_line_pointer;
10387   char *gotfree_input_line;
10388   segT exp_seg = 0;
10389   expressionS *exp;
10390   i386_operand_type types;
10391
10392   operand_type_set (&types, ~0);
10393
10394   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10395     {
10396       as_bad (_("at most %d immediate operands are allowed"),
10397               MAX_IMMEDIATE_OPERANDS);
10398       return 0;
10399     }
10400
10401   exp = &im_expressions[i.imm_operands++];
10402   i.op[this_operand].imms = exp;
10403
10404   if (is_space_char (*imm_start))
10405     ++imm_start;
10406
10407   save_input_line_pointer = input_line_pointer;
10408   input_line_pointer = imm_start;
10409
10410   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10411   if (gotfree_input_line)
10412     input_line_pointer = gotfree_input_line;
10413
10414   exp_seg = expression (exp);
10415
10416   SKIP_WHITESPACE ();
10417
10418   /* Handle vector operations.  */
10419   if (*input_line_pointer == '{')
10420     {
10421       input_line_pointer = check_VecOperations (input_line_pointer,
10422                                                 NULL);
10423       if (input_line_pointer == NULL)
10424         return 0;
10425     }
10426
10427   if (*input_line_pointer)
10428     as_bad (_("junk `%s' after expression"), input_line_pointer);
10429
10430   input_line_pointer = save_input_line_pointer;
10431   if (gotfree_input_line)
10432     {
10433       free (gotfree_input_line);
10434
10435       if (exp->X_op == O_constant || exp->X_op == O_register)
10436         exp->X_op = O_illegal;
10437     }
10438
10439   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10440 }
10441
10442 static int
10443 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10444                          i386_operand_type types, const char *imm_start)
10445 {
10446   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10447     {
10448       if (imm_start)
10449         as_bad (_("missing or invalid immediate expression `%s'"),
10450                 imm_start);
10451       return 0;
10452     }
10453   else if (exp->X_op == O_constant)
10454     {
10455       /* Size it properly later.  */
10456       i.types[this_operand].bitfield.imm64 = 1;
10457       /* If not 64bit, sign extend val.  */
10458       if (flag_code != CODE_64BIT
10459           && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
10460         exp->X_add_number
10461           = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
10462     }
10463 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10464   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10465            && exp_seg != absolute_section
10466            && exp_seg != text_section
10467            && exp_seg != data_section
10468            && exp_seg != bss_section
10469            && exp_seg != undefined_section
10470            && !bfd_is_com_section (exp_seg))
10471     {
10472       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10473       return 0;
10474     }
10475 #endif
10476   else if (!intel_syntax && exp_seg == reg_section)
10477     {
10478       if (imm_start)
10479         as_bad (_("illegal immediate register operand %s"), imm_start);
10480       return 0;
10481     }
10482   else
10483     {
10484       /* This is an address.  The size of the address will be
10485          determined later, depending on destination register,
10486          suffix, or the default for the section.  */
10487       i.types[this_operand].bitfield.imm8 = 1;
10488       i.types[this_operand].bitfield.imm16 = 1;
10489       i.types[this_operand].bitfield.imm32 = 1;
10490       i.types[this_operand].bitfield.imm32s = 1;
10491       i.types[this_operand].bitfield.imm64 = 1;
10492       i.types[this_operand] = operand_type_and (i.types[this_operand],
10493                                                 types);
10494     }
10495
10496   return 1;
10497 }
10498
10499 static char *
10500 i386_scale (char *scale)
10501 {
10502   offsetT val;
10503   char *save = input_line_pointer;
10504
10505   input_line_pointer = scale;
10506   val = get_absolute_expression ();
10507
10508   switch (val)
10509     {
10510     case 1:
10511       i.log2_scale_factor = 0;
10512       break;
10513     case 2:
10514       i.log2_scale_factor = 1;
10515       break;
10516     case 4:
10517       i.log2_scale_factor = 2;
10518       break;
10519     case 8:
10520       i.log2_scale_factor = 3;
10521       break;
10522     default:
10523       {
10524         char sep = *input_line_pointer;
10525
10526         *input_line_pointer = '\0';
10527         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10528                 scale);
10529         *input_line_pointer = sep;
10530         input_line_pointer = save;
10531         return NULL;
10532       }
10533     }
10534   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10535     {
10536       as_warn (_("scale factor of %d without an index register"),
10537                1 << i.log2_scale_factor);
10538       i.log2_scale_factor = 0;
10539     }
10540   scale = input_line_pointer;
10541   input_line_pointer = save;
10542   return scale;
10543 }
10544
10545 static int
10546 i386_displacement (char *disp_start, char *disp_end)
10547 {
10548   expressionS *exp;
10549   segT exp_seg = 0;
10550   char *save_input_line_pointer;
10551   char *gotfree_input_line;
10552   int override;
10553   i386_operand_type bigdisp, types = anydisp;
10554   int ret;
10555
10556   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10557     {
10558       as_bad (_("at most %d displacement operands are allowed"),
10559               MAX_MEMORY_OPERANDS);
10560       return 0;
10561     }
10562
10563   operand_type_set (&bigdisp, 0);
10564   if (i.jumpabsolute
10565       || i.types[this_operand].bitfield.baseindex
10566       || (current_templates->start->opcode_modifier.jump != JUMP
10567           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10568     {
10569       i386_addressing_mode ();
10570       override = (i.prefix[ADDR_PREFIX] != 0);
10571       if (flag_code == CODE_64BIT)
10572         {
10573           if (!override)
10574             {
10575               bigdisp.bitfield.disp32s = 1;
10576               bigdisp.bitfield.disp64 = 1;
10577             }
10578           else
10579             bigdisp.bitfield.disp32 = 1;
10580         }
10581       else if ((flag_code == CODE_16BIT) ^ override)
10582           bigdisp.bitfield.disp16 = 1;
10583       else
10584           bigdisp.bitfield.disp32 = 1;
10585     }
10586   else
10587     {
10588       /* For PC-relative branches, the width of the displacement may be
10589          dependent upon data size, but is never dependent upon address size.
10590          Also make sure to not unintentionally match against a non-PC-relative
10591          branch template.  */
10592       static templates aux_templates;
10593       const insn_template *t = current_templates->start;
10594       bfd_boolean has_intel64 = FALSE;
10595
10596       aux_templates.start = t;
10597       while (++t < current_templates->end)
10598         {
10599           if (t->opcode_modifier.jump
10600               != current_templates->start->opcode_modifier.jump)
10601             break;
10602           if ((t->opcode_modifier.isa64 >= INTEL64))
10603             has_intel64 = TRUE;
10604         }
10605       if (t < current_templates->end)
10606         {
10607           aux_templates.end = t;
10608           current_templates = &aux_templates;
10609         }
10610
10611       override = (i.prefix[DATA_PREFIX] != 0);
10612       if (flag_code == CODE_64BIT)
10613         {
10614           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10615               && (!intel64 || !has_intel64))
10616             bigdisp.bitfield.disp16 = 1;
10617           else
10618             bigdisp.bitfield.disp32s = 1;
10619         }
10620       else
10621         {
10622           if (!override)
10623             override = (i.suffix == (flag_code != CODE_16BIT
10624                                      ? WORD_MNEM_SUFFIX
10625                                      : LONG_MNEM_SUFFIX));
10626           bigdisp.bitfield.disp32 = 1;
10627           if ((flag_code == CODE_16BIT) ^ override)
10628             {
10629               bigdisp.bitfield.disp32 = 0;
10630               bigdisp.bitfield.disp16 = 1;
10631             }
10632         }
10633     }
10634   i.types[this_operand] = operand_type_or (i.types[this_operand],
10635                                            bigdisp);
10636
10637   exp = &disp_expressions[i.disp_operands];
10638   i.op[this_operand].disps = exp;
10639   i.disp_operands++;
10640   save_input_line_pointer = input_line_pointer;
10641   input_line_pointer = disp_start;
10642   END_STRING_AND_SAVE (disp_end);
10643
10644 #ifndef GCC_ASM_O_HACK
10645 #define GCC_ASM_O_HACK 0
10646 #endif
10647 #if GCC_ASM_O_HACK
10648   END_STRING_AND_SAVE (disp_end + 1);
10649   if (i.types[this_operand].bitfield.baseIndex
10650       && displacement_string_end[-1] == '+')
10651     {
10652       /* This hack is to avoid a warning when using the "o"
10653          constraint within gcc asm statements.
10654          For instance:
10655
10656          #define _set_tssldt_desc(n,addr,limit,type) \
10657          __asm__ __volatile__ ( \
10658          "movw %w2,%0\n\t" \
10659          "movw %w1,2+%0\n\t" \
10660          "rorl $16,%1\n\t" \
10661          "movb %b1,4+%0\n\t" \
10662          "movb %4,5+%0\n\t" \
10663          "movb $0,6+%0\n\t" \
10664          "movb %h1,7+%0\n\t" \
10665          "rorl $16,%1" \
10666          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10667
10668          This works great except that the output assembler ends
10669          up looking a bit weird if it turns out that there is
10670          no offset.  You end up producing code that looks like:
10671
10672          #APP
10673          movw $235,(%eax)
10674          movw %dx,2+(%eax)
10675          rorl $16,%edx
10676          movb %dl,4+(%eax)
10677          movb $137,5+(%eax)
10678          movb $0,6+(%eax)
10679          movb %dh,7+(%eax)
10680          rorl $16,%edx
10681          #NO_APP
10682
10683          So here we provide the missing zero.  */
10684
10685       *displacement_string_end = '0';
10686     }
10687 #endif
10688   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10689   if (gotfree_input_line)
10690     input_line_pointer = gotfree_input_line;
10691
10692   exp_seg = expression (exp);
10693
10694   SKIP_WHITESPACE ();
10695   if (*input_line_pointer)
10696     as_bad (_("junk `%s' after expression"), input_line_pointer);
10697 #if GCC_ASM_O_HACK
10698   RESTORE_END_STRING (disp_end + 1);
10699 #endif
10700   input_line_pointer = save_input_line_pointer;
10701   if (gotfree_input_line)
10702     {
10703       free (gotfree_input_line);
10704
10705       if (exp->X_op == O_constant || exp->X_op == O_register)
10706         exp->X_op = O_illegal;
10707     }
10708
10709   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10710
10711   RESTORE_END_STRING (disp_end);
10712
10713   return ret;
10714 }
10715
10716 static int
10717 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10718                             i386_operand_type types, const char *disp_start)
10719 {
10720   i386_operand_type bigdisp;
10721   int ret = 1;
10722
10723   /* We do this to make sure that the section symbol is in
10724      the symbol table.  We will ultimately change the relocation
10725      to be relative to the beginning of the section.  */
10726   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10727       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10728       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10729     {
10730       if (exp->X_op != O_symbol)
10731         goto inv_disp;
10732
10733       if (S_IS_LOCAL (exp->X_add_symbol)
10734           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10735           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10736         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10737       exp->X_op = O_subtract;
10738       exp->X_op_symbol = GOT_symbol;
10739       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10740         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10741       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10742         i.reloc[this_operand] = BFD_RELOC_64;
10743       else
10744         i.reloc[this_operand] = BFD_RELOC_32;
10745     }
10746
10747   else if (exp->X_op == O_absent
10748            || exp->X_op == O_illegal
10749            || exp->X_op == O_big)
10750     {
10751     inv_disp:
10752       as_bad (_("missing or invalid displacement expression `%s'"),
10753               disp_start);
10754       ret = 0;
10755     }
10756
10757   else if (flag_code == CODE_64BIT
10758            && !i.prefix[ADDR_PREFIX]
10759            && exp->X_op == O_constant)
10760     {
10761       /* Since displacement is signed extended to 64bit, don't allow
10762          disp32 and turn off disp32s if they are out of range.  */
10763       i.types[this_operand].bitfield.disp32 = 0;
10764       if (!fits_in_signed_long (exp->X_add_number))
10765         {
10766           i.types[this_operand].bitfield.disp32s = 0;
10767           if (i.types[this_operand].bitfield.baseindex)
10768             {
10769               as_bad (_("0x%lx out range of signed 32bit displacement"),
10770                       (long) exp->X_add_number);
10771               ret = 0;
10772             }
10773         }
10774     }
10775
10776 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10777   else if (exp->X_op != O_constant
10778            && OUTPUT_FLAVOR == bfd_target_aout_flavour
10779            && exp_seg != absolute_section
10780            && exp_seg != text_section
10781            && exp_seg != data_section
10782            && exp_seg != bss_section
10783            && exp_seg != undefined_section
10784            && !bfd_is_com_section (exp_seg))
10785     {
10786       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10787       ret = 0;
10788     }
10789 #endif
10790
10791   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
10792       /* Constants get taken care of by optimize_disp().  */
10793       && exp->X_op != O_constant)
10794     i.types[this_operand].bitfield.disp8 = 1;
10795
10796   /* Check if this is a displacement only operand.  */
10797   bigdisp = i.types[this_operand];
10798   bigdisp.bitfield.disp8 = 0;
10799   bigdisp.bitfield.disp16 = 0;
10800   bigdisp.bitfield.disp32 = 0;
10801   bigdisp.bitfield.disp32s = 0;
10802   bigdisp.bitfield.disp64 = 0;
10803   if (operand_type_all_zero (&bigdisp))
10804     i.types[this_operand] = operand_type_and (i.types[this_operand],
10805                                               types);
10806
10807   return ret;
10808 }
10809
10810 /* Return the active addressing mode, taking address override and
10811    registers forming the address into consideration.  Update the
10812    address override prefix if necessary.  */
10813
10814 static enum flag_code
10815 i386_addressing_mode (void)
10816 {
10817   enum flag_code addr_mode;
10818
10819   if (i.prefix[ADDR_PREFIX])
10820     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
10821   else if (flag_code == CODE_16BIT
10822            && current_templates->start->cpu_flags.bitfield.cpumpx
10823            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
10824               from md_assemble() by "is not a valid base/index expression"
10825               when there is a base and/or index.  */
10826            && !i.types[this_operand].bitfield.baseindex)
10827     {
10828       /* MPX insn memory operands with neither base nor index must be forced
10829          to use 32-bit addressing in 16-bit mode.  */
10830       addr_mode = CODE_32BIT;
10831       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10832       ++i.prefixes;
10833       gas_assert (!i.types[this_operand].bitfield.disp16);
10834       gas_assert (!i.types[this_operand].bitfield.disp32);
10835     }
10836   else
10837     {
10838       addr_mode = flag_code;
10839
10840 #if INFER_ADDR_PREFIX
10841       if (i.mem_operands == 0)
10842         {
10843           /* Infer address prefix from the first memory operand.  */
10844           const reg_entry *addr_reg = i.base_reg;
10845
10846           if (addr_reg == NULL)
10847             addr_reg = i.index_reg;
10848
10849           if (addr_reg)
10850             {
10851               if (addr_reg->reg_type.bitfield.dword)
10852                 addr_mode = CODE_32BIT;
10853               else if (flag_code != CODE_64BIT
10854                        && addr_reg->reg_type.bitfield.word)
10855                 addr_mode = CODE_16BIT;
10856
10857               if (addr_mode != flag_code)
10858                 {
10859                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10860                   i.prefixes += 1;
10861                   /* Change the size of any displacement too.  At most one
10862                      of Disp16 or Disp32 is set.
10863                      FIXME.  There doesn't seem to be any real need for
10864                      separate Disp16 and Disp32 flags.  The same goes for
10865                      Imm16 and Imm32.  Removing them would probably clean
10866                      up the code quite a lot.  */
10867                   if (flag_code != CODE_64BIT
10868                       && (i.types[this_operand].bitfield.disp16
10869                           || i.types[this_operand].bitfield.disp32))
10870                     i.types[this_operand]
10871                       = operand_type_xor (i.types[this_operand], disp16_32);
10872                 }
10873             }
10874         }
10875 #endif
10876     }
10877
10878   return addr_mode;
10879 }
10880
10881 /* Make sure the memory operand we've been dealt is valid.
10882    Return 1 on success, 0 on a failure.  */
10883
10884 static int
10885 i386_index_check (const char *operand_string)
10886 {
10887   const char *kind = "base/index";
10888   enum flag_code addr_mode = i386_addressing_mode ();
10889
10890   if (current_templates->start->opcode_modifier.isstring
10891       && !current_templates->start->cpu_flags.bitfield.cpupadlock
10892       && (current_templates->end[-1].opcode_modifier.isstring
10893           || i.mem_operands))
10894     {
10895       /* Memory operands of string insns are special in that they only allow
10896          a single register (rDI, rSI, or rBX) as their memory address.  */
10897       const reg_entry *expected_reg;
10898       static const char *di_si[][2] =
10899         {
10900           { "esi", "edi" },
10901           { "si", "di" },
10902           { "rsi", "rdi" }
10903         };
10904       static const char *bx[] = { "ebx", "bx", "rbx" };
10905
10906       kind = "string address";
10907
10908       if (current_templates->start->opcode_modifier.repprefixok)
10909         {
10910           int es_op = current_templates->end[-1].opcode_modifier.isstring
10911                       - IS_STRING_ES_OP0;
10912           int op = 0;
10913
10914           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
10915               || ((!i.mem_operands != !intel_syntax)
10916                   && current_templates->end[-1].operand_types[1]
10917                      .bitfield.baseindex))
10918             op = 1;
10919           expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]);
10920         }
10921       else
10922         expected_reg = hash_find (reg_hash, bx[addr_mode]);
10923
10924       if (i.base_reg != expected_reg
10925           || i.index_reg
10926           || operand_type_check (i.types[this_operand], disp))
10927         {
10928           /* The second memory operand must have the same size as
10929              the first one.  */
10930           if (i.mem_operands
10931               && i.base_reg
10932               && !((addr_mode == CODE_64BIT
10933                     && i.base_reg->reg_type.bitfield.qword)
10934                    || (addr_mode == CODE_32BIT
10935                        ? i.base_reg->reg_type.bitfield.dword
10936                        : i.base_reg->reg_type.bitfield.word)))
10937             goto bad_address;
10938
10939           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
10940                    operand_string,
10941                    intel_syntax ? '[' : '(',
10942                    register_prefix,
10943                    expected_reg->reg_name,
10944                    intel_syntax ? ']' : ')');
10945           return 1;
10946         }
10947       else
10948         return 1;
10949
10950     bad_address:
10951       as_bad (_("`%s' is not a valid %s expression"),
10952               operand_string, kind);
10953       return 0;
10954     }
10955   else
10956     {
10957       if (addr_mode != CODE_16BIT)
10958         {
10959           /* 32-bit/64-bit checks.  */
10960           if ((i.base_reg
10961                && ((addr_mode == CODE_64BIT
10962                     ? !i.base_reg->reg_type.bitfield.qword
10963                     : !i.base_reg->reg_type.bitfield.dword)
10964                    || (i.index_reg && i.base_reg->reg_num == RegIP)
10965                    || i.base_reg->reg_num == RegIZ))
10966               || (i.index_reg
10967                   && !i.index_reg->reg_type.bitfield.xmmword
10968                   && !i.index_reg->reg_type.bitfield.ymmword
10969                   && !i.index_reg->reg_type.bitfield.zmmword
10970                   && ((addr_mode == CODE_64BIT
10971                        ? !i.index_reg->reg_type.bitfield.qword
10972                        : !i.index_reg->reg_type.bitfield.dword)
10973                       || !i.index_reg->reg_type.bitfield.baseindex)))
10974             goto bad_address;
10975
10976           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
10977           if (current_templates->start->base_opcode == 0xf30f1b
10978               || (current_templates->start->base_opcode & ~1) == 0x0f1a
10979               || current_templates->start->opcode_modifier.sib == SIBMEM)
10980             {
10981               /* They cannot use RIP-relative addressing. */
10982               if (i.base_reg && i.base_reg->reg_num == RegIP)
10983                 {
10984                   as_bad (_("`%s' cannot be used here"), operand_string);
10985                   return 0;
10986                 }
10987
10988               /* bndldx and bndstx ignore their scale factor. */
10989               if ((current_templates->start->base_opcode & ~1) == 0x0f1a
10990                   && i.log2_scale_factor)
10991                 as_warn (_("register scaling is being ignored here"));
10992             }
10993         }
10994       else
10995         {
10996           /* 16-bit checks.  */
10997           if ((i.base_reg
10998                && (!i.base_reg->reg_type.bitfield.word
10999                    || !i.base_reg->reg_type.bitfield.baseindex))
11000               || (i.index_reg
11001                   && (!i.index_reg->reg_type.bitfield.word
11002                       || !i.index_reg->reg_type.bitfield.baseindex
11003                       || !(i.base_reg
11004                            && i.base_reg->reg_num < 6
11005                            && i.index_reg->reg_num >= 6
11006                            && i.log2_scale_factor == 0))))
11007             goto bad_address;
11008         }
11009     }
11010   return 1;
11011 }
11012
11013 /* Handle vector immediates.  */
11014
11015 static int
11016 RC_SAE_immediate (const char *imm_start)
11017 {
11018   unsigned int match_found, j;
11019   const char *pstr = imm_start;
11020   expressionS *exp;
11021
11022   if (*pstr != '{')
11023     return 0;
11024
11025   pstr++;
11026   match_found = 0;
11027   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11028     {
11029       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11030         {
11031           if (!i.rounding)
11032             {
11033               rc_op.type = RC_NamesTable[j].type;
11034               rc_op.operand = this_operand;
11035               i.rounding = &rc_op;
11036             }
11037           else
11038             {
11039               as_bad (_("duplicated `%s'"), imm_start);
11040               return 0;
11041             }
11042           pstr += RC_NamesTable[j].len;
11043           match_found = 1;
11044           break;
11045         }
11046     }
11047   if (!match_found)
11048     return 0;
11049
11050   if (*pstr++ != '}')
11051     {
11052       as_bad (_("Missing '}': '%s'"), imm_start);
11053       return 0;
11054     }
11055   /* RC/SAE immediate string should contain nothing more.  */;
11056   if (*pstr != 0)
11057     {
11058       as_bad (_("Junk after '}': '%s'"), imm_start);
11059       return 0;
11060     }
11061
11062   exp = &im_expressions[i.imm_operands++];
11063   i.op[this_operand].imms = exp;
11064
11065   exp->X_op = O_constant;
11066   exp->X_add_number = 0;
11067   exp->X_add_symbol = (symbolS *) 0;
11068   exp->X_op_symbol = (symbolS *) 0;
11069
11070   i.types[this_operand].bitfield.imm8 = 1;
11071   return 1;
11072 }
11073
11074 /* Only string instructions can have a second memory operand, so
11075    reduce current_templates to just those if it contains any.  */
11076 static int
11077 maybe_adjust_templates (void)
11078 {
11079   const insn_template *t;
11080
11081   gas_assert (i.mem_operands == 1);
11082
11083   for (t = current_templates->start; t < current_templates->end; ++t)
11084     if (t->opcode_modifier.isstring)
11085       break;
11086
11087   if (t < current_templates->end)
11088     {
11089       static templates aux_templates;
11090       bfd_boolean recheck;
11091
11092       aux_templates.start = t;
11093       for (; t < current_templates->end; ++t)
11094         if (!t->opcode_modifier.isstring)
11095           break;
11096       aux_templates.end = t;
11097
11098       /* Determine whether to re-check the first memory operand.  */
11099       recheck = (aux_templates.start != current_templates->start
11100                  || t != current_templates->end);
11101
11102       current_templates = &aux_templates;
11103
11104       if (recheck)
11105         {
11106           i.mem_operands = 0;
11107           if (i.memop1_string != NULL
11108               && i386_index_check (i.memop1_string) == 0)
11109             return 0;
11110           i.mem_operands = 1;
11111         }
11112     }
11113
11114   return 1;
11115 }
11116
11117 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11118    on error.  */
11119
11120 static int
11121 i386_att_operand (char *operand_string)
11122 {
11123   const reg_entry *r;
11124   char *end_op;
11125   char *op_string = operand_string;
11126
11127   if (is_space_char (*op_string))
11128     ++op_string;
11129
11130   /* We check for an absolute prefix (differentiating,
11131      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11132   if (*op_string == ABSOLUTE_PREFIX)
11133     {
11134       ++op_string;
11135       if (is_space_char (*op_string))
11136         ++op_string;
11137       i.jumpabsolute = TRUE;
11138     }
11139
11140   /* Check if operand is a register.  */
11141   if ((r = parse_register (op_string, &end_op)) != NULL)
11142     {
11143       i386_operand_type temp;
11144
11145       if (r == &bad_reg)
11146         return 0;
11147
11148       /* Check for a segment override by searching for ':' after a
11149          segment register.  */
11150       op_string = end_op;
11151       if (is_space_char (*op_string))
11152         ++op_string;
11153       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11154         {
11155           switch (r->reg_num)
11156             {
11157             case 0:
11158               i.seg[i.mem_operands] = &es;
11159               break;
11160             case 1:
11161               i.seg[i.mem_operands] = &cs;
11162               break;
11163             case 2:
11164               i.seg[i.mem_operands] = &ss;
11165               break;
11166             case 3:
11167               i.seg[i.mem_operands] = &ds;
11168               break;
11169             case 4:
11170               i.seg[i.mem_operands] = &fs;
11171               break;
11172             case 5:
11173               i.seg[i.mem_operands] = &gs;
11174               break;
11175             }
11176
11177           /* Skip the ':' and whitespace.  */
11178           ++op_string;
11179           if (is_space_char (*op_string))
11180             ++op_string;
11181
11182           if (!is_digit_char (*op_string)
11183               && !is_identifier_char (*op_string)
11184               && *op_string != '('
11185               && *op_string != ABSOLUTE_PREFIX)
11186             {
11187               as_bad (_("bad memory operand `%s'"), op_string);
11188               return 0;
11189             }
11190           /* Handle case of %es:*foo.  */
11191           if (*op_string == ABSOLUTE_PREFIX)
11192             {
11193               ++op_string;
11194               if (is_space_char (*op_string))
11195                 ++op_string;
11196               i.jumpabsolute = TRUE;
11197             }
11198           goto do_memory_reference;
11199         }
11200
11201       /* Handle vector operations.  */
11202       if (*op_string == '{')
11203         {
11204           op_string = check_VecOperations (op_string, NULL);
11205           if (op_string == NULL)
11206             return 0;
11207         }
11208
11209       if (*op_string)
11210         {
11211           as_bad (_("junk `%s' after register"), op_string);
11212           return 0;
11213         }
11214       temp = r->reg_type;
11215       temp.bitfield.baseindex = 0;
11216       i.types[this_operand] = operand_type_or (i.types[this_operand],
11217                                                temp);
11218       i.types[this_operand].bitfield.unspecified = 0;
11219       i.op[this_operand].regs = r;
11220       i.reg_operands++;
11221     }
11222   else if (*op_string == REGISTER_PREFIX)
11223     {
11224       as_bad (_("bad register name `%s'"), op_string);
11225       return 0;
11226     }
11227   else if (*op_string == IMMEDIATE_PREFIX)
11228     {
11229       ++op_string;
11230       if (i.jumpabsolute)
11231         {
11232           as_bad (_("immediate operand illegal with absolute jump"));
11233           return 0;
11234         }
11235       if (!i386_immediate (op_string))
11236         return 0;
11237     }
11238   else if (RC_SAE_immediate (operand_string))
11239     {
11240       /* If it is a RC or SAE immediate, do nothing.  */
11241       ;
11242     }
11243   else if (is_digit_char (*op_string)
11244            || is_identifier_char (*op_string)
11245            || *op_string == '"'
11246            || *op_string == '(')
11247     {
11248       /* This is a memory reference of some sort.  */
11249       char *base_string;
11250
11251       /* Start and end of displacement string expression (if found).  */
11252       char *displacement_string_start;
11253       char *displacement_string_end;
11254       char *vop_start;
11255
11256     do_memory_reference:
11257       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11258         return 0;
11259       if ((i.mem_operands == 1
11260            && !current_templates->start->opcode_modifier.isstring)
11261           || i.mem_operands == 2)
11262         {
11263           as_bad (_("too many memory references for `%s'"),
11264                   current_templates->start->name);
11265           return 0;
11266         }
11267
11268       /* Check for base index form.  We detect the base index form by
11269          looking for an ')' at the end of the operand, searching
11270          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11271          after the '('.  */
11272       base_string = op_string + strlen (op_string);
11273
11274       /* Handle vector operations.  */
11275       vop_start = strchr (op_string, '{');
11276       if (vop_start && vop_start < base_string)
11277         {
11278           if (check_VecOperations (vop_start, base_string) == NULL)
11279             return 0;
11280           base_string = vop_start;
11281         }
11282
11283       --base_string;
11284       if (is_space_char (*base_string))
11285         --base_string;
11286
11287       /* If we only have a displacement, set-up for it to be parsed later.  */
11288       displacement_string_start = op_string;
11289       displacement_string_end = base_string + 1;
11290
11291       if (*base_string == ')')
11292         {
11293           char *temp_string;
11294           unsigned int parens_balanced = 1;
11295           /* We've already checked that the number of left & right ()'s are
11296              equal, so this loop will not be infinite.  */
11297           do
11298             {
11299               base_string--;
11300               if (*base_string == ')')
11301                 parens_balanced++;
11302               if (*base_string == '(')
11303                 parens_balanced--;
11304             }
11305           while (parens_balanced);
11306
11307           temp_string = base_string;
11308
11309           /* Skip past '(' and whitespace.  */
11310           ++base_string;
11311           if (is_space_char (*base_string))
11312             ++base_string;
11313
11314           if (*base_string == ','
11315               || ((i.base_reg = parse_register (base_string, &end_op))
11316                   != NULL))
11317             {
11318               displacement_string_end = temp_string;
11319
11320               i.types[this_operand].bitfield.baseindex = 1;
11321
11322               if (i.base_reg)
11323                 {
11324                   if (i.base_reg == &bad_reg)
11325                     return 0;
11326                   base_string = end_op;
11327                   if (is_space_char (*base_string))
11328                     ++base_string;
11329                 }
11330
11331               /* There may be an index reg or scale factor here.  */
11332               if (*base_string == ',')
11333                 {
11334                   ++base_string;
11335                   if (is_space_char (*base_string))
11336                     ++base_string;
11337
11338                   if ((i.index_reg = parse_register (base_string, &end_op))
11339                       != NULL)
11340                     {
11341                       if (i.index_reg == &bad_reg)
11342                         return 0;
11343                       base_string = end_op;
11344                       if (is_space_char (*base_string))
11345                         ++base_string;
11346                       if (*base_string == ',')
11347                         {
11348                           ++base_string;
11349                           if (is_space_char (*base_string))
11350                             ++base_string;
11351                         }
11352                       else if (*base_string != ')')
11353                         {
11354                           as_bad (_("expecting `,' or `)' "
11355                                     "after index register in `%s'"),
11356                                   operand_string);
11357                           return 0;
11358                         }
11359                     }
11360                   else if (*base_string == REGISTER_PREFIX)
11361                     {
11362                       end_op = strchr (base_string, ',');
11363                       if (end_op)
11364                         *end_op = '\0';
11365                       as_bad (_("bad register name `%s'"), base_string);
11366                       return 0;
11367                     }
11368
11369                   /* Check for scale factor.  */
11370                   if (*base_string != ')')
11371                     {
11372                       char *end_scale = i386_scale (base_string);
11373
11374                       if (!end_scale)
11375                         return 0;
11376
11377                       base_string = end_scale;
11378                       if (is_space_char (*base_string))
11379                         ++base_string;
11380                       if (*base_string != ')')
11381                         {
11382                           as_bad (_("expecting `)' "
11383                                     "after scale factor in `%s'"),
11384                                   operand_string);
11385                           return 0;
11386                         }
11387                     }
11388                   else if (!i.index_reg)
11389                     {
11390                       as_bad (_("expecting index register or scale factor "
11391                                 "after `,'; got '%c'"),
11392                               *base_string);
11393                       return 0;
11394                     }
11395                 }
11396               else if (*base_string != ')')
11397                 {
11398                   as_bad (_("expecting `,' or `)' "
11399                             "after base register in `%s'"),
11400                           operand_string);
11401                   return 0;
11402                 }
11403             }
11404           else if (*base_string == REGISTER_PREFIX)
11405             {
11406               end_op = strchr (base_string, ',');
11407               if (end_op)
11408                 *end_op = '\0';
11409               as_bad (_("bad register name `%s'"), base_string);
11410               return 0;
11411             }
11412         }
11413
11414       /* If there's an expression beginning the operand, parse it,
11415          assuming displacement_string_start and
11416          displacement_string_end are meaningful.  */
11417       if (displacement_string_start != displacement_string_end)
11418         {
11419           if (!i386_displacement (displacement_string_start,
11420                                   displacement_string_end))
11421             return 0;
11422         }
11423
11424       /* Special case for (%dx) while doing input/output op.  */
11425       if (i.base_reg
11426           && i.base_reg->reg_type.bitfield.instance == RegD
11427           && i.base_reg->reg_type.bitfield.word
11428           && i.index_reg == 0
11429           && i.log2_scale_factor == 0
11430           && i.seg[i.mem_operands] == 0
11431           && !operand_type_check (i.types[this_operand], disp))
11432         {
11433           i.types[this_operand] = i.base_reg->reg_type;
11434           return 1;
11435         }
11436
11437       if (i386_index_check (operand_string) == 0)
11438         return 0;
11439       i.flags[this_operand] |= Operand_Mem;
11440       if (i.mem_operands == 0)
11441         i.memop1_string = xstrdup (operand_string);
11442       i.mem_operands++;
11443     }
11444   else
11445     {
11446       /* It's not a memory operand; argh!  */
11447       as_bad (_("invalid char %s beginning operand %d `%s'"),
11448               output_invalid (*op_string),
11449               this_operand + 1,
11450               op_string);
11451       return 0;
11452     }
11453   return 1;                     /* Normal return.  */
11454 }
11455 \f
11456 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11457    that an rs_machine_dependent frag may reach.  */
11458
11459 unsigned int
11460 i386_frag_max_var (fragS *frag)
11461 {
11462   /* The only relaxable frags are for jumps.
11463      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11464   gas_assert (frag->fr_type == rs_machine_dependent);
11465   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11466 }
11467
11468 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11469 static int
11470 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11471 {
11472   /* STT_GNU_IFUNC symbol must go through PLT.  */
11473   if ((symbol_get_bfdsym (fr_symbol)->flags
11474        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11475     return 0;
11476
11477   if (!S_IS_EXTERNAL (fr_symbol))
11478     /* Symbol may be weak or local.  */
11479     return !S_IS_WEAK (fr_symbol);
11480
11481   /* Global symbols with non-default visibility can't be preempted. */
11482   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11483     return 1;
11484
11485   if (fr_var != NO_RELOC)
11486     switch ((enum bfd_reloc_code_real) fr_var)
11487       {
11488       case BFD_RELOC_386_PLT32:
11489       case BFD_RELOC_X86_64_PLT32:
11490         /* Symbol with PLT relocation may be preempted. */
11491         return 0;
11492       default:
11493         abort ();
11494       }
11495
11496   /* Global symbols with default visibility in a shared library may be
11497      preempted by another definition.  */
11498   return !shared;
11499 }
11500 #endif
11501
11502 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11503    Note also work for Skylake and Cascadelake.
11504 ---------------------------------------------------------------------
11505 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11506 | ------  | ----------- | ------- | -------- |
11507 |   Jo    |      N      |    N    |     Y    |
11508 |   Jno   |      N      |    N    |     Y    |
11509 |  Jc/Jb  |      Y      |    N    |     Y    |
11510 | Jae/Jnb |      Y      |    N    |     Y    |
11511 |  Je/Jz  |      Y      |    Y    |     Y    |
11512 | Jne/Jnz |      Y      |    Y    |     Y    |
11513 | Jna/Jbe |      Y      |    N    |     Y    |
11514 | Ja/Jnbe |      Y      |    N    |     Y    |
11515 |   Js    |      N      |    N    |     Y    |
11516 |   Jns   |      N      |    N    |     Y    |
11517 |  Jp/Jpe |      N      |    N    |     Y    |
11518 | Jnp/Jpo |      N      |    N    |     Y    |
11519 | Jl/Jnge |      Y      |    Y    |     Y    |
11520 | Jge/Jnl |      Y      |    Y    |     Y    |
11521 | Jle/Jng |      Y      |    Y    |     Y    |
11522 | Jg/Jnle |      Y      |    Y    |     Y    |
11523 ---------------------------------------------------------------------  */
11524 static int
11525 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11526 {
11527   if (mf_cmp == mf_cmp_alu_cmp)
11528     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11529             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11530   if (mf_cmp == mf_cmp_incdec)
11531     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11532             || mf_jcc == mf_jcc_jle);
11533   if (mf_cmp == mf_cmp_test_and)
11534     return 1;
11535   return 0;
11536 }
11537
11538 /* Return the next non-empty frag.  */
11539
11540 static fragS *
11541 i386_next_non_empty_frag (fragS *fragP)
11542 {
11543   /* There may be a frag with a ".fill 0" when there is no room in
11544      the current frag for frag_grow in output_insn.  */
11545   for (fragP = fragP->fr_next;
11546        (fragP != NULL
11547         && fragP->fr_type == rs_fill
11548         && fragP->fr_fix == 0);
11549        fragP = fragP->fr_next)
11550     ;
11551   return fragP;
11552 }
11553
11554 /* Return the next jcc frag after BRANCH_PADDING.  */
11555
11556 static fragS *
11557 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11558 {
11559   fragS *branch_fragP;
11560   if (!pad_fragP)
11561     return NULL;
11562
11563   if (pad_fragP->fr_type == rs_machine_dependent
11564       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11565           == BRANCH_PADDING))
11566     {
11567       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11568       if (branch_fragP->fr_type != rs_machine_dependent)
11569         return NULL;
11570       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11571           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11572                                    pad_fragP->tc_frag_data.mf_type))
11573         return branch_fragP;
11574     }
11575
11576   return NULL;
11577 }
11578
11579 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11580
11581 static void
11582 i386_classify_machine_dependent_frag (fragS *fragP)
11583 {
11584   fragS *cmp_fragP;
11585   fragS *pad_fragP;
11586   fragS *branch_fragP;
11587   fragS *next_fragP;
11588   unsigned int max_prefix_length;
11589
11590   if (fragP->tc_frag_data.classified)
11591     return;
11592
11593   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11594      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11595   for (next_fragP = fragP;
11596        next_fragP != NULL;
11597        next_fragP = next_fragP->fr_next)
11598     {
11599       next_fragP->tc_frag_data.classified = 1;
11600       if (next_fragP->fr_type == rs_machine_dependent)
11601         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11602           {
11603           case BRANCH_PADDING:
11604             /* The BRANCH_PADDING frag must be followed by a branch
11605                frag.  */
11606             branch_fragP = i386_next_non_empty_frag (next_fragP);
11607             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11608             break;
11609           case FUSED_JCC_PADDING:
11610             /* Check if this is a fused jcc:
11611                FUSED_JCC_PADDING
11612                CMP like instruction
11613                BRANCH_PADDING
11614                COND_JUMP
11615                */
11616             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11617             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11618             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11619             if (branch_fragP)
11620               {
11621                 /* The BRANCH_PADDING frag is merged with the
11622                    FUSED_JCC_PADDING frag.  */
11623                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11624                 /* CMP like instruction size.  */
11625                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11626                 frag_wane (pad_fragP);
11627                 /* Skip to branch_fragP.  */
11628                 next_fragP = branch_fragP;
11629               }
11630             else if (next_fragP->tc_frag_data.max_prefix_length)
11631               {
11632                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11633                    a fused jcc.  */
11634                 next_fragP->fr_subtype
11635                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11636                 next_fragP->tc_frag_data.max_bytes
11637                   = next_fragP->tc_frag_data.max_prefix_length;
11638                 /* This will be updated in the BRANCH_PREFIX scan.  */
11639                 next_fragP->tc_frag_data.max_prefix_length = 0;
11640               }
11641             else
11642               frag_wane (next_fragP);
11643             break;
11644           }
11645     }
11646
11647   /* Stop if there is no BRANCH_PREFIX.  */
11648   if (!align_branch_prefix_size)
11649     return;
11650
11651   /* Scan for BRANCH_PREFIX.  */
11652   for (; fragP != NULL; fragP = fragP->fr_next)
11653     {
11654       if (fragP->fr_type != rs_machine_dependent
11655           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11656               != BRANCH_PREFIX))
11657         continue;
11658
11659       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11660          COND_JUMP_PREFIX.  */
11661       max_prefix_length = 0;
11662       for (next_fragP = fragP;
11663            next_fragP != NULL;
11664            next_fragP = next_fragP->fr_next)
11665         {
11666           if (next_fragP->fr_type == rs_fill)
11667             /* Skip rs_fill frags.  */
11668             continue;
11669           else if (next_fragP->fr_type != rs_machine_dependent)
11670             /* Stop for all other frags.  */
11671             break;
11672
11673           /* rs_machine_dependent frags.  */
11674           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11675               == BRANCH_PREFIX)
11676             {
11677               /* Count BRANCH_PREFIX frags.  */
11678               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11679                 {
11680                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11681                   frag_wane (next_fragP);
11682                 }
11683               else
11684                 max_prefix_length
11685                   += next_fragP->tc_frag_data.max_bytes;
11686             }
11687           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11688                     == BRANCH_PADDING)
11689                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11690                        == FUSED_JCC_PADDING))
11691             {
11692               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11693               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11694               break;
11695             }
11696           else
11697             /* Stop for other rs_machine_dependent frags.  */
11698             break;
11699         }
11700
11701       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11702
11703       /* Skip to the next frag.  */
11704       fragP = next_fragP;
11705     }
11706 }
11707
11708 /* Compute padding size for
11709
11710         FUSED_JCC_PADDING
11711         CMP like instruction
11712         BRANCH_PADDING
11713         COND_JUMP/UNCOND_JUMP
11714
11715    or
11716
11717         BRANCH_PADDING
11718         COND_JUMP/UNCOND_JUMP
11719  */
11720
11721 static int
11722 i386_branch_padding_size (fragS *fragP, offsetT address)
11723 {
11724   unsigned int offset, size, padding_size;
11725   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11726
11727   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11728   if (!address)
11729     address = fragP->fr_address;
11730   address += fragP->fr_fix;
11731
11732   /* CMP like instrunction size.  */
11733   size = fragP->tc_frag_data.cmp_size;
11734
11735   /* The base size of the branch frag.  */
11736   size += branch_fragP->fr_fix;
11737
11738   /* Add opcode and displacement bytes for the rs_machine_dependent
11739      branch frag.  */
11740   if (branch_fragP->fr_type == rs_machine_dependent)
11741     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
11742
11743   /* Check if branch is within boundary and doesn't end at the last
11744      byte.  */
11745   offset = address & ((1U << align_branch_power) - 1);
11746   if ((offset + size) >= (1U << align_branch_power))
11747     /* Padding needed to avoid crossing boundary.  */
11748     padding_size = (1U << align_branch_power) - offset;
11749   else
11750     /* No padding needed.  */
11751     padding_size = 0;
11752
11753   /* The return value may be saved in tc_frag_data.length which is
11754      unsigned byte.  */
11755   if (!fits_in_unsigned_byte (padding_size))
11756     abort ();
11757
11758   return padding_size;
11759 }
11760
11761 /* i386_generic_table_relax_frag()
11762
11763    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
11764    grow/shrink padding to align branch frags.  Hand others to
11765    relax_frag().  */
11766
11767 long
11768 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
11769 {
11770   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11771       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11772     {
11773       long padding_size = i386_branch_padding_size (fragP, 0);
11774       long grow = padding_size - fragP->tc_frag_data.length;
11775
11776       /* When the BRANCH_PREFIX frag is used, the computed address
11777          must match the actual address and there should be no padding.  */
11778       if (fragP->tc_frag_data.padding_address
11779           && (fragP->tc_frag_data.padding_address != fragP->fr_address
11780               || padding_size))
11781         abort ();
11782
11783       /* Update the padding size.  */
11784       if (grow)
11785         fragP->tc_frag_data.length = padding_size;
11786
11787       return grow;
11788     }
11789   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11790     {
11791       fragS *padding_fragP, *next_fragP;
11792       long padding_size, left_size, last_size;
11793
11794       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11795       if (!padding_fragP)
11796         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
11797         return (fragP->tc_frag_data.length
11798                 - fragP->tc_frag_data.last_length);
11799
11800       /* Compute the relative address of the padding frag in the very
11801         first time where the BRANCH_PREFIX frag sizes are zero.  */
11802       if (!fragP->tc_frag_data.padding_address)
11803         fragP->tc_frag_data.padding_address
11804           = padding_fragP->fr_address - (fragP->fr_address - stretch);
11805
11806       /* First update the last length from the previous interation.  */
11807       left_size = fragP->tc_frag_data.prefix_length;
11808       for (next_fragP = fragP;
11809            next_fragP != padding_fragP;
11810            next_fragP = next_fragP->fr_next)
11811         if (next_fragP->fr_type == rs_machine_dependent
11812             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11813                 == BRANCH_PREFIX))
11814           {
11815             if (left_size)
11816               {
11817                 int max = next_fragP->tc_frag_data.max_bytes;
11818                 if (max)
11819                   {
11820                     int size;
11821                     if (max > left_size)
11822                       size = left_size;
11823                     else
11824                       size = max;
11825                     left_size -= size;
11826                     next_fragP->tc_frag_data.last_length = size;
11827                   }
11828               }
11829             else
11830               next_fragP->tc_frag_data.last_length = 0;
11831           }
11832
11833       /* Check the padding size for the padding frag.  */
11834       padding_size = i386_branch_padding_size
11835         (padding_fragP, (fragP->fr_address
11836                          + fragP->tc_frag_data.padding_address));
11837
11838       last_size = fragP->tc_frag_data.prefix_length;
11839       /* Check if there is change from the last interation.  */
11840       if (padding_size == last_size)
11841         {
11842           /* Update the expected address of the padding frag.  */
11843           padding_fragP->tc_frag_data.padding_address
11844             = (fragP->fr_address + padding_size
11845                + fragP->tc_frag_data.padding_address);
11846           return 0;
11847         }
11848
11849       if (padding_size > fragP->tc_frag_data.max_prefix_length)
11850         {
11851           /* No padding if there is no sufficient room.  Clear the
11852              expected address of the padding frag.  */
11853           padding_fragP->tc_frag_data.padding_address = 0;
11854           padding_size = 0;
11855         }
11856       else
11857         /* Store the expected address of the padding frag.  */
11858         padding_fragP->tc_frag_data.padding_address
11859           = (fragP->fr_address + padding_size
11860              + fragP->tc_frag_data.padding_address);
11861
11862       fragP->tc_frag_data.prefix_length = padding_size;
11863
11864       /* Update the length for the current interation.  */
11865       left_size = padding_size;
11866       for (next_fragP = fragP;
11867            next_fragP != padding_fragP;
11868            next_fragP = next_fragP->fr_next)
11869         if (next_fragP->fr_type == rs_machine_dependent
11870             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11871                 == BRANCH_PREFIX))
11872           {
11873             if (left_size)
11874               {
11875                 int max = next_fragP->tc_frag_data.max_bytes;
11876                 if (max)
11877                   {
11878                     int size;
11879                     if (max > left_size)
11880                       size = left_size;
11881                     else
11882                       size = max;
11883                     left_size -= size;
11884                     next_fragP->tc_frag_data.length = size;
11885                   }
11886               }
11887             else
11888               next_fragP->tc_frag_data.length = 0;
11889           }
11890
11891       return (fragP->tc_frag_data.length
11892               - fragP->tc_frag_data.last_length);
11893     }
11894   return relax_frag (segment, fragP, stretch);
11895 }
11896
11897 /* md_estimate_size_before_relax()
11898
11899    Called just before relax() for rs_machine_dependent frags.  The x86
11900    assembler uses these frags to handle variable size jump
11901    instructions.
11902
11903    Any symbol that is now undefined will not become defined.
11904    Return the correct fr_subtype in the frag.
11905    Return the initial "guess for variable size of frag" to caller.
11906    The guess is actually the growth beyond the fixed part.  Whatever
11907    we do to grow the fixed or variable part contributes to our
11908    returned value.  */
11909
11910 int
11911 md_estimate_size_before_relax (fragS *fragP, segT segment)
11912 {
11913   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11914       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
11915       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11916     {
11917       i386_classify_machine_dependent_frag (fragP);
11918       return fragP->tc_frag_data.length;
11919     }
11920
11921   /* We've already got fragP->fr_subtype right;  all we have to do is
11922      check for un-relaxable symbols.  On an ELF system, we can't relax
11923      an externally visible symbol, because it may be overridden by a
11924      shared library.  */
11925   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
11926 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11927       || (IS_ELF
11928           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
11929                                                 fragP->fr_var))
11930 #endif
11931 #if defined (OBJ_COFF) && defined (TE_PE)
11932       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
11933           && S_IS_WEAK (fragP->fr_symbol))
11934 #endif
11935       )
11936     {
11937       /* Symbol is undefined in this segment, or we need to keep a
11938          reloc so that weak symbols can be overridden.  */
11939       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
11940       enum bfd_reloc_code_real reloc_type;
11941       unsigned char *opcode;
11942       int old_fr_fix;
11943
11944       if (fragP->fr_var != NO_RELOC)
11945         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
11946       else if (size == 2)
11947         reloc_type = BFD_RELOC_16_PCREL;
11948 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11949       else if (need_plt32_p (fragP->fr_symbol))
11950         reloc_type = BFD_RELOC_X86_64_PLT32;
11951 #endif
11952       else
11953         reloc_type = BFD_RELOC_32_PCREL;
11954
11955       old_fr_fix = fragP->fr_fix;
11956       opcode = (unsigned char *) fragP->fr_opcode;
11957
11958       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
11959         {
11960         case UNCOND_JUMP:
11961           /* Make jmp (0xeb) a (d)word displacement jump.  */
11962           opcode[0] = 0xe9;
11963           fragP->fr_fix += size;
11964           fix_new (fragP, old_fr_fix, size,
11965                    fragP->fr_symbol,
11966                    fragP->fr_offset, 1,
11967                    reloc_type);
11968           break;
11969
11970         case COND_JUMP86:
11971           if (size == 2
11972               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
11973             {
11974               /* Negate the condition, and branch past an
11975                  unconditional jump.  */
11976               opcode[0] ^= 1;
11977               opcode[1] = 3;
11978               /* Insert an unconditional jump.  */
11979               opcode[2] = 0xe9;
11980               /* We added two extra opcode bytes, and have a two byte
11981                  offset.  */
11982               fragP->fr_fix += 2 + 2;
11983               fix_new (fragP, old_fr_fix + 2, 2,
11984                        fragP->fr_symbol,
11985                        fragP->fr_offset, 1,
11986                        reloc_type);
11987               break;
11988             }
11989           /* Fall through.  */
11990
11991         case COND_JUMP:
11992           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
11993             {
11994               fixS *fixP;
11995
11996               fragP->fr_fix += 1;
11997               fixP = fix_new (fragP, old_fr_fix, 1,
11998                               fragP->fr_symbol,
11999                               fragP->fr_offset, 1,
12000                               BFD_RELOC_8_PCREL);
12001               fixP->fx_signed = 1;
12002               break;
12003             }
12004
12005           /* This changes the byte-displacement jump 0x7N
12006              to the (d)word-displacement jump 0x0f,0x8N.  */
12007           opcode[1] = opcode[0] + 0x10;
12008           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12009           /* We've added an opcode byte.  */
12010           fragP->fr_fix += 1 + size;
12011           fix_new (fragP, old_fr_fix + 1, size,
12012                    fragP->fr_symbol,
12013                    fragP->fr_offset, 1,
12014                    reloc_type);
12015           break;
12016
12017         default:
12018           BAD_CASE (fragP->fr_subtype);
12019           break;
12020         }
12021       frag_wane (fragP);
12022       return fragP->fr_fix - old_fr_fix;
12023     }
12024
12025   /* Guess size depending on current relax state.  Initially the relax
12026      state will correspond to a short jump and we return 1, because
12027      the variable part of the frag (the branch offset) is one byte
12028      long.  However, we can relax a section more than once and in that
12029      case we must either set fr_subtype back to the unrelaxed state,
12030      or return the value for the appropriate branch.  */
12031   return md_relax_table[fragP->fr_subtype].rlx_length;
12032 }
12033
12034 /* Called after relax() is finished.
12035
12036    In:  Address of frag.
12037         fr_type == rs_machine_dependent.
12038         fr_subtype is what the address relaxed to.
12039
12040    Out: Any fixSs and constants are set up.
12041         Caller will turn frag into a ".space 0".  */
12042
12043 void
12044 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12045                  fragS *fragP)
12046 {
12047   unsigned char *opcode;
12048   unsigned char *where_to_put_displacement = NULL;
12049   offsetT target_address;
12050   offsetT opcode_address;
12051   unsigned int extension = 0;
12052   offsetT displacement_from_opcode_start;
12053
12054   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12055       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12056       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12057     {
12058       /* Generate nop padding.  */
12059       unsigned int size = fragP->tc_frag_data.length;
12060       if (size)
12061         {
12062           if (size > fragP->tc_frag_data.max_bytes)
12063             abort ();
12064
12065           if (flag_debug)
12066             {
12067               const char *msg;
12068               const char *branch = "branch";
12069               const char *prefix = "";
12070               fragS *padding_fragP;
12071               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12072                   == BRANCH_PREFIX)
12073                 {
12074                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12075                   switch (fragP->tc_frag_data.default_prefix)
12076                     {
12077                     default:
12078                       abort ();
12079                       break;
12080                     case CS_PREFIX_OPCODE:
12081                       prefix = " cs";
12082                       break;
12083                     case DS_PREFIX_OPCODE:
12084                       prefix = " ds";
12085                       break;
12086                     case ES_PREFIX_OPCODE:
12087                       prefix = " es";
12088                       break;
12089                     case FS_PREFIX_OPCODE:
12090                       prefix = " fs";
12091                       break;
12092                     case GS_PREFIX_OPCODE:
12093                       prefix = " gs";
12094                       break;
12095                     case SS_PREFIX_OPCODE:
12096                       prefix = " ss";
12097                       break;
12098                     }
12099                   if (padding_fragP)
12100                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12101                             "%s within %d-byte boundary\n");
12102                   else
12103                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12104                             "align %s within %d-byte boundary\n");
12105                 }
12106               else
12107                 {
12108                   padding_fragP = fragP;
12109                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12110                           "%s within %d-byte boundary\n");
12111                 }
12112
12113               if (padding_fragP)
12114                 switch (padding_fragP->tc_frag_data.branch_type)
12115                   {
12116                   case align_branch_jcc:
12117                     branch = "jcc";
12118                     break;
12119                   case align_branch_fused:
12120                     branch = "fused jcc";
12121                     break;
12122                   case align_branch_jmp:
12123                     branch = "jmp";
12124                     break;
12125                   case align_branch_call:
12126                     branch = "call";
12127                     break;
12128                   case align_branch_indirect:
12129                     branch = "indiret branch";
12130                     break;
12131                   case align_branch_ret:
12132                     branch = "ret";
12133                     break;
12134                   default:
12135                     break;
12136                   }
12137
12138               fprintf (stdout, msg,
12139                        fragP->fr_file, fragP->fr_line, size, prefix,
12140                        (long long) fragP->fr_address, branch,
12141                        1 << align_branch_power);
12142             }
12143           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12144             memset (fragP->fr_opcode,
12145                     fragP->tc_frag_data.default_prefix, size);
12146           else
12147             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12148                                 size, 0);
12149           fragP->fr_fix += size;
12150         }
12151       return;
12152     }
12153
12154   opcode = (unsigned char *) fragP->fr_opcode;
12155
12156   /* Address we want to reach in file space.  */
12157   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12158
12159   /* Address opcode resides at in file space.  */
12160   opcode_address = fragP->fr_address + fragP->fr_fix;
12161
12162   /* Displacement from opcode start to fill into instruction.  */
12163   displacement_from_opcode_start = target_address - opcode_address;
12164
12165   if ((fragP->fr_subtype & BIG) == 0)
12166     {
12167       /* Don't have to change opcode.  */
12168       extension = 1;            /* 1 opcode + 1 displacement  */
12169       where_to_put_displacement = &opcode[1];
12170     }
12171   else
12172     {
12173       if (no_cond_jump_promotion
12174           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12175         as_warn_where (fragP->fr_file, fragP->fr_line,
12176                        _("long jump required"));
12177
12178       switch (fragP->fr_subtype)
12179         {
12180         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12181           extension = 4;                /* 1 opcode + 4 displacement  */
12182           opcode[0] = 0xe9;
12183           where_to_put_displacement = &opcode[1];
12184           break;
12185
12186         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12187           extension = 2;                /* 1 opcode + 2 displacement  */
12188           opcode[0] = 0xe9;
12189           where_to_put_displacement = &opcode[1];
12190           break;
12191
12192         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12193         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12194           extension = 5;                /* 2 opcode + 4 displacement  */
12195           opcode[1] = opcode[0] + 0x10;
12196           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12197           where_to_put_displacement = &opcode[2];
12198           break;
12199
12200         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12201           extension = 3;                /* 2 opcode + 2 displacement  */
12202           opcode[1] = opcode[0] + 0x10;
12203           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12204           where_to_put_displacement = &opcode[2];
12205           break;
12206
12207         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12208           extension = 4;
12209           opcode[0] ^= 1;
12210           opcode[1] = 3;
12211           opcode[2] = 0xe9;
12212           where_to_put_displacement = &opcode[3];
12213           break;
12214
12215         default:
12216           BAD_CASE (fragP->fr_subtype);
12217           break;
12218         }
12219     }
12220
12221   /* If size if less then four we are sure that the operand fits,
12222      but if it's 4, then it could be that the displacement is larger
12223      then -/+ 2GB.  */
12224   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12225       && object_64bit
12226       && ((addressT) (displacement_from_opcode_start - extension
12227                       + ((addressT) 1 << 31))
12228           > (((addressT) 2 << 31) - 1)))
12229     {
12230       as_bad_where (fragP->fr_file, fragP->fr_line,
12231                     _("jump target out of range"));
12232       /* Make us emit 0.  */
12233       displacement_from_opcode_start = extension;
12234     }
12235   /* Now put displacement after opcode.  */
12236   md_number_to_chars ((char *) where_to_put_displacement,
12237                       (valueT) (displacement_from_opcode_start - extension),
12238                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12239   fragP->fr_fix += extension;
12240 }
12241 \f
12242 /* Apply a fixup (fixP) to segment data, once it has been determined
12243    by our caller that we have all the info we need to fix it up.
12244
12245    Parameter valP is the pointer to the value of the bits.
12246
12247    On the 386, immediates, displacements, and data pointers are all in
12248    the same (little-endian) format, so we don't need to care about which
12249    we are handling.  */
12250
12251 void
12252 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12253 {
12254   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12255   valueT value = *valP;
12256
12257 #if !defined (TE_Mach)
12258   if (fixP->fx_pcrel)
12259     {
12260       switch (fixP->fx_r_type)
12261         {
12262         default:
12263           break;
12264
12265         case BFD_RELOC_64:
12266           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12267           break;
12268         case BFD_RELOC_32:
12269         case BFD_RELOC_X86_64_32S:
12270           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12271           break;
12272         case BFD_RELOC_16:
12273           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12274           break;
12275         case BFD_RELOC_8:
12276           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12277           break;
12278         }
12279     }
12280
12281   if (fixP->fx_addsy != NULL
12282       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12283           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12284           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12285           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12286       && !use_rela_relocations)
12287     {
12288       /* This is a hack.  There should be a better way to handle this.
12289          This covers for the fact that bfd_install_relocation will
12290          subtract the current location (for partial_inplace, PC relative
12291          relocations); see more below.  */
12292 #ifndef OBJ_AOUT
12293       if (IS_ELF
12294 #ifdef TE_PE
12295           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12296 #endif
12297           )
12298         value += fixP->fx_where + fixP->fx_frag->fr_address;
12299 #endif
12300 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12301       if (IS_ELF)
12302         {
12303           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12304
12305           if ((sym_seg == seg
12306                || (symbol_section_p (fixP->fx_addsy)
12307                    && sym_seg != absolute_section))
12308               && !generic_force_reloc (fixP))
12309             {
12310               /* Yes, we add the values in twice.  This is because
12311                  bfd_install_relocation subtracts them out again.  I think
12312                  bfd_install_relocation is broken, but I don't dare change
12313                  it.  FIXME.  */
12314               value += fixP->fx_where + fixP->fx_frag->fr_address;
12315             }
12316         }
12317 #endif
12318 #if defined (OBJ_COFF) && defined (TE_PE)
12319       /* For some reason, the PE format does not store a
12320          section address offset for a PC relative symbol.  */
12321       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12322           || S_IS_WEAK (fixP->fx_addsy))
12323         value += md_pcrel_from (fixP);
12324 #endif
12325     }
12326 #if defined (OBJ_COFF) && defined (TE_PE)
12327   if (fixP->fx_addsy != NULL
12328       && S_IS_WEAK (fixP->fx_addsy)
12329       /* PR 16858: Do not modify weak function references.  */
12330       && ! fixP->fx_pcrel)
12331     {
12332 #if !defined (TE_PEP)
12333       /* For x86 PE weak function symbols are neither PC-relative
12334          nor do they set S_IS_FUNCTION.  So the only reliable way
12335          to detect them is to check the flags of their containing
12336          section.  */
12337       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12338           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12339         ;
12340       else
12341 #endif
12342       value -= S_GET_VALUE (fixP->fx_addsy);
12343     }
12344 #endif
12345
12346   /* Fix a few things - the dynamic linker expects certain values here,
12347      and we must not disappoint it.  */
12348 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12349   if (IS_ELF && fixP->fx_addsy)
12350     switch (fixP->fx_r_type)
12351       {
12352       case BFD_RELOC_386_PLT32:
12353       case BFD_RELOC_X86_64_PLT32:
12354         /* Make the jump instruction point to the address of the operand.
12355            At runtime we merely add the offset to the actual PLT entry.
12356            NB: Subtract the offset size only for jump instructions.  */
12357         if (fixP->fx_pcrel)
12358           value = -4;
12359         break;
12360
12361       case BFD_RELOC_386_TLS_GD:
12362       case BFD_RELOC_386_TLS_LDM:
12363       case BFD_RELOC_386_TLS_IE_32:
12364       case BFD_RELOC_386_TLS_IE:
12365       case BFD_RELOC_386_TLS_GOTIE:
12366       case BFD_RELOC_386_TLS_GOTDESC:
12367       case BFD_RELOC_X86_64_TLSGD:
12368       case BFD_RELOC_X86_64_TLSLD:
12369       case BFD_RELOC_X86_64_GOTTPOFF:
12370       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12371         value = 0; /* Fully resolved at runtime.  No addend.  */
12372         /* Fallthrough */
12373       case BFD_RELOC_386_TLS_LE:
12374       case BFD_RELOC_386_TLS_LDO_32:
12375       case BFD_RELOC_386_TLS_LE_32:
12376       case BFD_RELOC_X86_64_DTPOFF32:
12377       case BFD_RELOC_X86_64_DTPOFF64:
12378       case BFD_RELOC_X86_64_TPOFF32:
12379       case BFD_RELOC_X86_64_TPOFF64:
12380         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12381         break;
12382
12383       case BFD_RELOC_386_TLS_DESC_CALL:
12384       case BFD_RELOC_X86_64_TLSDESC_CALL:
12385         value = 0; /* Fully resolved at runtime.  No addend.  */
12386         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12387         fixP->fx_done = 0;
12388         return;
12389
12390       case BFD_RELOC_VTABLE_INHERIT:
12391       case BFD_RELOC_VTABLE_ENTRY:
12392         fixP->fx_done = 0;
12393         return;
12394
12395       default:
12396         break;
12397       }
12398 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12399   *valP = value;
12400 #endif /* !defined (TE_Mach)  */
12401
12402   /* Are we finished with this relocation now?  */
12403   if (fixP->fx_addsy == NULL)
12404     fixP->fx_done = 1;
12405 #if defined (OBJ_COFF) && defined (TE_PE)
12406   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12407     {
12408       fixP->fx_done = 0;
12409       /* Remember value for tc_gen_reloc.  */
12410       fixP->fx_addnumber = value;
12411       /* Clear out the frag for now.  */
12412       value = 0;
12413     }
12414 #endif
12415   else if (use_rela_relocations)
12416     {
12417       fixP->fx_no_overflow = 1;
12418       /* Remember value for tc_gen_reloc.  */
12419       fixP->fx_addnumber = value;
12420       value = 0;
12421     }
12422
12423   md_number_to_chars (p, value, fixP->fx_size);
12424 }
12425 \f
12426 const char *
12427 md_atof (int type, char *litP, int *sizeP)
12428 {
12429   /* This outputs the LITTLENUMs in REVERSE order;
12430      in accord with the bigendian 386.  */
12431   return ieee_md_atof (type, litP, sizeP, FALSE);
12432 }
12433 \f
12434 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12435
12436 static char *
12437 output_invalid (int c)
12438 {
12439   if (ISPRINT (c))
12440     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12441               "'%c'", c);
12442   else
12443     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12444               "(0x%x)", (unsigned char) c);
12445   return output_invalid_buf;
12446 }
12447
12448 /* Verify that @r can be used in the current context.  */
12449
12450 static bfd_boolean check_register (const reg_entry *r)
12451 {
12452   if (allow_pseudo_reg)
12453     return TRUE;
12454
12455   if (operand_type_all_zero (&r->reg_type))
12456     return FALSE;
12457
12458   if ((r->reg_type.bitfield.dword
12459        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12460        || r->reg_type.bitfield.class == RegCR
12461        || r->reg_type.bitfield.class == RegDR)
12462       && !cpu_arch_flags.bitfield.cpui386)
12463     return FALSE;
12464
12465   if (r->reg_type.bitfield.class == RegTR
12466       && (flag_code == CODE_64BIT
12467           || !cpu_arch_flags.bitfield.cpui386
12468           || cpu_arch_isa_flags.bitfield.cpui586
12469           || cpu_arch_isa_flags.bitfield.cpui686))
12470     return FALSE;
12471
12472   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12473     return FALSE;
12474
12475   if (!cpu_arch_flags.bitfield.cpuavx512f)
12476     {
12477       if (r->reg_type.bitfield.zmmword
12478           || r->reg_type.bitfield.class == RegMask)
12479         return FALSE;
12480
12481       if (!cpu_arch_flags.bitfield.cpuavx)
12482         {
12483           if (r->reg_type.bitfield.ymmword)
12484             return FALSE;
12485
12486           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12487             return FALSE;
12488         }
12489     }
12490
12491   if (r->reg_type.bitfield.tmmword
12492       && (!cpu_arch_flags.bitfield.cpuamx_tile
12493           || flag_code != CODE_64BIT))
12494     return FALSE;
12495
12496   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12497     return FALSE;
12498
12499   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12500   if (!allow_index_reg && r->reg_num == RegIZ)
12501     return FALSE;
12502
12503   /* Upper 16 vector registers are only available with VREX in 64bit
12504      mode, and require EVEX encoding.  */
12505   if (r->reg_flags & RegVRex)
12506     {
12507       if (!cpu_arch_flags.bitfield.cpuavx512f
12508           || flag_code != CODE_64BIT)
12509         return FALSE;
12510
12511       if (i.vec_encoding == vex_encoding_default)
12512         i.vec_encoding = vex_encoding_evex;
12513       else if (i.vec_encoding != vex_encoding_evex)
12514         i.vec_encoding = vex_encoding_error;
12515     }
12516
12517   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12518       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12519       && flag_code != CODE_64BIT)
12520     return FALSE;
12521
12522   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12523       && !intel_syntax)
12524     return FALSE;
12525
12526   return TRUE;
12527 }
12528
12529 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12530
12531 static const reg_entry *
12532 parse_real_register (char *reg_string, char **end_op)
12533 {
12534   char *s = reg_string;
12535   char *p;
12536   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12537   const reg_entry *r;
12538
12539   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12540   if (*s == REGISTER_PREFIX)
12541     ++s;
12542
12543   if (is_space_char (*s))
12544     ++s;
12545
12546   p = reg_name_given;
12547   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12548     {
12549       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12550         return (const reg_entry *) NULL;
12551       s++;
12552     }
12553
12554   /* For naked regs, make sure that we are not dealing with an identifier.
12555      This prevents confusing an identifier like `eax_var' with register
12556      `eax'.  */
12557   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12558     return (const reg_entry *) NULL;
12559
12560   *end_op = s;
12561
12562   r = (const reg_entry *) hash_find (reg_hash, reg_name_given);
12563
12564   /* Handle floating point regs, allowing spaces in the (i) part.  */
12565   if (r == i386_regtab /* %st is first entry of table  */)
12566     {
12567       if (!cpu_arch_flags.bitfield.cpu8087
12568           && !cpu_arch_flags.bitfield.cpu287
12569           && !cpu_arch_flags.bitfield.cpu387
12570           && !allow_pseudo_reg)
12571         return (const reg_entry *) NULL;
12572
12573       if (is_space_char (*s))
12574         ++s;
12575       if (*s == '(')
12576         {
12577           ++s;
12578           if (is_space_char (*s))
12579             ++s;
12580           if (*s >= '0' && *s <= '7')
12581             {
12582               int fpr = *s - '0';
12583               ++s;
12584               if (is_space_char (*s))
12585                 ++s;
12586               if (*s == ')')
12587                 {
12588                   *end_op = s + 1;
12589                   r = (const reg_entry *) hash_find (reg_hash, "st(0)");
12590                   know (r);
12591                   return r + fpr;
12592                 }
12593             }
12594           /* We have "%st(" then garbage.  */
12595           return (const reg_entry *) NULL;
12596         }
12597     }
12598
12599   return r && check_register (r) ? r : NULL;
12600 }
12601
12602 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12603
12604 static const reg_entry *
12605 parse_register (char *reg_string, char **end_op)
12606 {
12607   const reg_entry *r;
12608
12609   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12610     r = parse_real_register (reg_string, end_op);
12611   else
12612     r = NULL;
12613   if (!r)
12614     {
12615       char *save = input_line_pointer;
12616       char c;
12617       symbolS *symbolP;
12618
12619       input_line_pointer = reg_string;
12620       c = get_symbol_name (&reg_string);
12621       symbolP = symbol_find (reg_string);
12622       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12623         {
12624           const expressionS *e = symbol_get_value_expression (symbolP);
12625
12626           know (e->X_op == O_register);
12627           know (e->X_add_number >= 0
12628                 && (valueT) e->X_add_number < i386_regtab_size);
12629           r = i386_regtab + e->X_add_number;
12630           if (!check_register (r))
12631             {
12632               as_bad (_("register '%s%s' cannot be used here"),
12633                       register_prefix, r->reg_name);
12634               r = &bad_reg;
12635             }
12636           *end_op = input_line_pointer;
12637         }
12638       *input_line_pointer = c;
12639       input_line_pointer = save;
12640     }
12641   return r;
12642 }
12643
12644 int
12645 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12646 {
12647   const reg_entry *r;
12648   char *end = input_line_pointer;
12649
12650   *end = *nextcharP;
12651   r = parse_register (name, &input_line_pointer);
12652   if (r && end <= input_line_pointer)
12653     {
12654       *nextcharP = *input_line_pointer;
12655       *input_line_pointer = 0;
12656       if (r != &bad_reg)
12657         {
12658           e->X_op = O_register;
12659           e->X_add_number = r - i386_regtab;
12660         }
12661       else
12662           e->X_op = O_illegal;
12663       return 1;
12664     }
12665   input_line_pointer = end;
12666   *end = 0;
12667   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12668 }
12669
12670 void
12671 md_operand (expressionS *e)
12672 {
12673   char *end;
12674   const reg_entry *r;
12675
12676   switch (*input_line_pointer)
12677     {
12678     case REGISTER_PREFIX:
12679       r = parse_real_register (input_line_pointer, &end);
12680       if (r)
12681         {
12682           e->X_op = O_register;
12683           e->X_add_number = r - i386_regtab;
12684           input_line_pointer = end;
12685         }
12686       break;
12687
12688     case '[':
12689       gas_assert (intel_syntax);
12690       end = input_line_pointer++;
12691       expression (e);
12692       if (*input_line_pointer == ']')
12693         {
12694           ++input_line_pointer;
12695           e->X_op_symbol = make_expr_symbol (e);
12696           e->X_add_symbol = NULL;
12697           e->X_add_number = 0;
12698           e->X_op = O_index;
12699         }
12700       else
12701         {
12702           e->X_op = O_absent;
12703           input_line_pointer = end;
12704         }
12705       break;
12706     }
12707 }
12708
12709 \f
12710 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12711 const char *md_shortopts = "kVQ:sqnO::";
12712 #else
12713 const char *md_shortopts = "qnO::";
12714 #endif
12715
12716 #define OPTION_32 (OPTION_MD_BASE + 0)
12717 #define OPTION_64 (OPTION_MD_BASE + 1)
12718 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12719 #define OPTION_MARCH (OPTION_MD_BASE + 3)
12720 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
12721 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
12722 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
12723 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
12724 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
12725 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
12726 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
12727 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
12728 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
12729 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
12730 #define OPTION_X32 (OPTION_MD_BASE + 14)
12731 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
12732 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
12733 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
12734 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
12735 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
12736 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
12737 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
12738 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
12739 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
12740 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
12741 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
12742 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
12743 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
12744 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
12745 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
12746 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
12747 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
12748 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
12749 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
12750
12751 struct option md_longopts[] =
12752 {
12753   {"32", no_argument, NULL, OPTION_32},
12754 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12755      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12756   {"64", no_argument, NULL, OPTION_64},
12757 #endif
12758 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12759   {"x32", no_argument, NULL, OPTION_X32},
12760   {"mshared", no_argument, NULL, OPTION_MSHARED},
12761   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
12762 #endif
12763   {"divide", no_argument, NULL, OPTION_DIVIDE},
12764   {"march", required_argument, NULL, OPTION_MARCH},
12765   {"mtune", required_argument, NULL, OPTION_MTUNE},
12766   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
12767   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
12768   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
12769   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
12770   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
12771   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
12772   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
12773   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
12774   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
12775   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
12776   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
12777   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
12778 # if defined (TE_PE) || defined (TE_PEP)
12779   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
12780 #endif
12781   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
12782   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
12783   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
12784   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
12785   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
12786   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
12787   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
12788   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
12789   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
12790   {"mlfence-before-indirect-branch", required_argument, NULL,
12791    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
12792   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
12793   {"mamd64", no_argument, NULL, OPTION_MAMD64},
12794   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
12795   {NULL, no_argument, NULL, 0}
12796 };
12797 size_t md_longopts_size = sizeof (md_longopts);
12798
12799 int
12800 md_parse_option (int c, const char *arg)
12801 {
12802   unsigned int j;
12803   char *arch, *next, *saved, *type;
12804
12805   switch (c)
12806     {
12807     case 'n':
12808       optimize_align_code = 0;
12809       break;
12810
12811     case 'q':
12812       quiet_warnings = 1;
12813       break;
12814
12815 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12816       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
12817          should be emitted or not.  FIXME: Not implemented.  */
12818     case 'Q':
12819       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
12820         return 0;
12821       break;
12822
12823       /* -V: SVR4 argument to print version ID.  */
12824     case 'V':
12825       print_version_id ();
12826       break;
12827
12828       /* -k: Ignore for FreeBSD compatibility.  */
12829     case 'k':
12830       break;
12831
12832     case 's':
12833       /* -s: On i386 Solaris, this tells the native assembler to use
12834          .stab instead of .stab.excl.  We always use .stab anyhow.  */
12835       break;
12836
12837     case OPTION_MSHARED:
12838       shared = 1;
12839       break;
12840
12841     case OPTION_X86_USED_NOTE:
12842       if (strcasecmp (arg, "yes") == 0)
12843         x86_used_note = 1;
12844       else if (strcasecmp (arg, "no") == 0)
12845         x86_used_note = 0;
12846       else
12847         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
12848       break;
12849
12850
12851 #endif
12852 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12853      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12854     case OPTION_64:
12855       {
12856         const char **list, **l;
12857
12858         list = bfd_target_list ();
12859         for (l = list; *l != NULL; l++)
12860           if (CONST_STRNEQ (*l, "elf64-x86-64")
12861               || strcmp (*l, "coff-x86-64") == 0
12862               || strcmp (*l, "pe-x86-64") == 0
12863               || strcmp (*l, "pei-x86-64") == 0
12864               || strcmp (*l, "mach-o-x86-64") == 0)
12865             {
12866               default_arch = "x86_64";
12867               break;
12868             }
12869         if (*l == NULL)
12870           as_fatal (_("no compiled in support for x86_64"));
12871         free (list);
12872       }
12873       break;
12874 #endif
12875
12876 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12877     case OPTION_X32:
12878       if (IS_ELF)
12879         {
12880           const char **list, **l;
12881
12882           list = bfd_target_list ();
12883           for (l = list; *l != NULL; l++)
12884             if (CONST_STRNEQ (*l, "elf32-x86-64"))
12885               {
12886                 default_arch = "x86_64:32";
12887                 break;
12888               }
12889           if (*l == NULL)
12890             as_fatal (_("no compiled in support for 32bit x86_64"));
12891           free (list);
12892         }
12893       else
12894         as_fatal (_("32bit x86_64 is only supported for ELF"));
12895       break;
12896 #endif
12897
12898     case OPTION_32:
12899       default_arch = "i386";
12900       break;
12901
12902     case OPTION_DIVIDE:
12903 #ifdef SVR4_COMMENT_CHARS
12904       {
12905         char *n, *t;
12906         const char *s;
12907
12908         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
12909         t = n;
12910         for (s = i386_comment_chars; *s != '\0'; s++)
12911           if (*s != '/')
12912             *t++ = *s;
12913         *t = '\0';
12914         i386_comment_chars = n;
12915       }
12916 #endif
12917       break;
12918
12919     case OPTION_MARCH:
12920       saved = xstrdup (arg);
12921       arch = saved;
12922       /* Allow -march=+nosse.  */
12923       if (*arch == '+')
12924         arch++;
12925       do
12926         {
12927           if (*arch == '.')
12928             as_fatal (_("invalid -march= option: `%s'"), arg);
12929           next = strchr (arch, '+');
12930           if (next)
12931             *next++ = '\0';
12932           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12933             {
12934               if (strcmp (arch, cpu_arch [j].name) == 0)
12935                 {
12936                   /* Processor.  */
12937                   if (! cpu_arch[j].flags.bitfield.cpui386)
12938                     continue;
12939
12940                   cpu_arch_name = cpu_arch[j].name;
12941                   cpu_sub_arch_name = NULL;
12942                   cpu_arch_flags = cpu_arch[j].flags;
12943                   cpu_arch_isa = cpu_arch[j].type;
12944                   cpu_arch_isa_flags = cpu_arch[j].flags;
12945                   if (!cpu_arch_tune_set)
12946                     {
12947                       cpu_arch_tune = cpu_arch_isa;
12948                       cpu_arch_tune_flags = cpu_arch_isa_flags;
12949                     }
12950                   break;
12951                 }
12952               else if (*cpu_arch [j].name == '.'
12953                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
12954                 {
12955                   /* ISA extension.  */
12956                   i386_cpu_flags flags;
12957
12958                   flags = cpu_flags_or (cpu_arch_flags,
12959                                         cpu_arch[j].flags);
12960
12961                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12962                     {
12963                       if (cpu_sub_arch_name)
12964                         {
12965                           char *name = cpu_sub_arch_name;
12966                           cpu_sub_arch_name = concat (name,
12967                                                       cpu_arch[j].name,
12968                                                       (const char *) NULL);
12969                           free (name);
12970                         }
12971                       else
12972                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
12973                       cpu_arch_flags = flags;
12974                       cpu_arch_isa_flags = flags;
12975                     }
12976                   else
12977                     cpu_arch_isa_flags
12978                       = cpu_flags_or (cpu_arch_isa_flags,
12979                                       cpu_arch[j].flags);
12980                   break;
12981                 }
12982             }
12983
12984           if (j >= ARRAY_SIZE (cpu_arch))
12985             {
12986               /* Disable an ISA extension.  */
12987               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12988                 if (strcmp (arch, cpu_noarch [j].name) == 0)
12989                   {
12990                     i386_cpu_flags flags;
12991
12992                     flags = cpu_flags_and_not (cpu_arch_flags,
12993                                                cpu_noarch[j].flags);
12994                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12995                       {
12996                         if (cpu_sub_arch_name)
12997                           {
12998                             char *name = cpu_sub_arch_name;
12999                             cpu_sub_arch_name = concat (arch,
13000                                                         (const char *) NULL);
13001                             free (name);
13002                           }
13003                         else
13004                           cpu_sub_arch_name = xstrdup (arch);
13005                         cpu_arch_flags = flags;
13006                         cpu_arch_isa_flags = flags;
13007                       }
13008                     break;
13009                   }
13010
13011               if (j >= ARRAY_SIZE (cpu_noarch))
13012                 j = ARRAY_SIZE (cpu_arch);
13013             }
13014
13015           if (j >= ARRAY_SIZE (cpu_arch))
13016             as_fatal (_("invalid -march= option: `%s'"), arg);
13017
13018           arch = next;
13019         }
13020       while (next != NULL);
13021       free (saved);
13022       break;
13023
13024     case OPTION_MTUNE:
13025       if (*arg == '.')
13026         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13027       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13028         {
13029           if (strcmp (arg, cpu_arch [j].name) == 0)
13030             {
13031               cpu_arch_tune_set = 1;
13032               cpu_arch_tune = cpu_arch [j].type;
13033               cpu_arch_tune_flags = cpu_arch[j].flags;
13034               break;
13035             }
13036         }
13037       if (j >= ARRAY_SIZE (cpu_arch))
13038         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13039       break;
13040
13041     case OPTION_MMNEMONIC:
13042       if (strcasecmp (arg, "att") == 0)
13043         intel_mnemonic = 0;
13044       else if (strcasecmp (arg, "intel") == 0)
13045         intel_mnemonic = 1;
13046       else
13047         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13048       break;
13049
13050     case OPTION_MSYNTAX:
13051       if (strcasecmp (arg, "att") == 0)
13052         intel_syntax = 0;
13053       else if (strcasecmp (arg, "intel") == 0)
13054         intel_syntax = 1;
13055       else
13056         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13057       break;
13058
13059     case OPTION_MINDEX_REG:
13060       allow_index_reg = 1;
13061       break;
13062
13063     case OPTION_MNAKED_REG:
13064       allow_naked_reg = 1;
13065       break;
13066
13067     case OPTION_MSSE2AVX:
13068       sse2avx = 1;
13069       break;
13070
13071     case OPTION_MSSE_CHECK:
13072       if (strcasecmp (arg, "error") == 0)
13073         sse_check = check_error;
13074       else if (strcasecmp (arg, "warning") == 0)
13075         sse_check = check_warning;
13076       else if (strcasecmp (arg, "none") == 0)
13077         sse_check = check_none;
13078       else
13079         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13080       break;
13081
13082     case OPTION_MOPERAND_CHECK:
13083       if (strcasecmp (arg, "error") == 0)
13084         operand_check = check_error;
13085       else if (strcasecmp (arg, "warning") == 0)
13086         operand_check = check_warning;
13087       else if (strcasecmp (arg, "none") == 0)
13088         operand_check = check_none;
13089       else
13090         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13091       break;
13092
13093     case OPTION_MAVXSCALAR:
13094       if (strcasecmp (arg, "128") == 0)
13095         avxscalar = vex128;
13096       else if (strcasecmp (arg, "256") == 0)
13097         avxscalar = vex256;
13098       else
13099         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13100       break;
13101
13102     case OPTION_MVEXWIG:
13103       if (strcmp (arg, "0") == 0)
13104         vexwig = vexw0;
13105       else if (strcmp (arg, "1") == 0)
13106         vexwig = vexw1;
13107       else
13108         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13109       break;
13110
13111     case OPTION_MADD_BND_PREFIX:
13112       add_bnd_prefix = 1;
13113       break;
13114
13115     case OPTION_MEVEXLIG:
13116       if (strcmp (arg, "128") == 0)
13117         evexlig = evexl128;
13118       else if (strcmp (arg, "256") == 0)
13119         evexlig = evexl256;
13120       else  if (strcmp (arg, "512") == 0)
13121         evexlig = evexl512;
13122       else
13123         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13124       break;
13125
13126     case OPTION_MEVEXRCIG:
13127       if (strcmp (arg, "rne") == 0)
13128         evexrcig = rne;
13129       else if (strcmp (arg, "rd") == 0)
13130         evexrcig = rd;
13131       else if (strcmp (arg, "ru") == 0)
13132         evexrcig = ru;
13133       else if (strcmp (arg, "rz") == 0)
13134         evexrcig = rz;
13135       else
13136         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13137       break;
13138
13139     case OPTION_MEVEXWIG:
13140       if (strcmp (arg, "0") == 0)
13141         evexwig = evexw0;
13142       else if (strcmp (arg, "1") == 0)
13143         evexwig = evexw1;
13144       else
13145         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13146       break;
13147
13148 # if defined (TE_PE) || defined (TE_PEP)
13149     case OPTION_MBIG_OBJ:
13150       use_big_obj = 1;
13151       break;
13152 #endif
13153
13154     case OPTION_MOMIT_LOCK_PREFIX:
13155       if (strcasecmp (arg, "yes") == 0)
13156         omit_lock_prefix = 1;
13157       else if (strcasecmp (arg, "no") == 0)
13158         omit_lock_prefix = 0;
13159       else
13160         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13161       break;
13162
13163     case OPTION_MFENCE_AS_LOCK_ADD:
13164       if (strcasecmp (arg, "yes") == 0)
13165         avoid_fence = 1;
13166       else if (strcasecmp (arg, "no") == 0)
13167         avoid_fence = 0;
13168       else
13169         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13170       break;
13171
13172     case OPTION_MLFENCE_AFTER_LOAD:
13173       if (strcasecmp (arg, "yes") == 0)
13174         lfence_after_load = 1;
13175       else if (strcasecmp (arg, "no") == 0)
13176         lfence_after_load = 0;
13177       else
13178         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13179       break;
13180
13181     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13182       if (strcasecmp (arg, "all") == 0)
13183         {
13184           lfence_before_indirect_branch = lfence_branch_all;
13185           if (lfence_before_ret == lfence_before_ret_none)
13186             lfence_before_ret = lfence_before_ret_shl;
13187         }
13188       else if (strcasecmp (arg, "memory") == 0)
13189         lfence_before_indirect_branch = lfence_branch_memory;
13190       else if (strcasecmp (arg, "register") == 0)
13191         lfence_before_indirect_branch = lfence_branch_register;
13192       else if (strcasecmp (arg, "none") == 0)
13193         lfence_before_indirect_branch = lfence_branch_none;
13194       else
13195         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13196                   arg);
13197       break;
13198
13199     case OPTION_MLFENCE_BEFORE_RET:
13200       if (strcasecmp (arg, "or") == 0)
13201         lfence_before_ret = lfence_before_ret_or;
13202       else if (strcasecmp (arg, "not") == 0)
13203         lfence_before_ret = lfence_before_ret_not;
13204       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13205         lfence_before_ret = lfence_before_ret_shl;
13206       else if (strcasecmp (arg, "none") == 0)
13207         lfence_before_ret = lfence_before_ret_none;
13208       else
13209         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13210                   arg);
13211       break;
13212
13213     case OPTION_MRELAX_RELOCATIONS:
13214       if (strcasecmp (arg, "yes") == 0)
13215         generate_relax_relocations = 1;
13216       else if (strcasecmp (arg, "no") == 0)
13217         generate_relax_relocations = 0;
13218       else
13219         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13220       break;
13221
13222     case OPTION_MALIGN_BRANCH_BOUNDARY:
13223       {
13224         char *end;
13225         long int align = strtoul (arg, &end, 0);
13226         if (*end == '\0')
13227           {
13228             if (align == 0)
13229               {
13230                 align_branch_power = 0;
13231                 break;
13232               }
13233             else if (align >= 16)
13234               {
13235                 int align_power;
13236                 for (align_power = 0;
13237                      (align & 1) == 0;
13238                      align >>= 1, align_power++)
13239                   continue;
13240                 /* Limit alignment power to 31.  */
13241                 if (align == 1 && align_power < 32)
13242                   {
13243                     align_branch_power = align_power;
13244                     break;
13245                   }
13246               }
13247           }
13248         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13249       }
13250       break;
13251
13252     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13253       {
13254         char *end;
13255         int align = strtoul (arg, &end, 0);
13256         /* Some processors only support 5 prefixes.  */
13257         if (*end == '\0' && align >= 0 && align < 6)
13258           {
13259             align_branch_prefix_size = align;
13260             break;
13261           }
13262         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13263                   arg);
13264       }
13265       break;
13266
13267     case OPTION_MALIGN_BRANCH:
13268       align_branch = 0;
13269       saved = xstrdup (arg);
13270       type = saved;
13271       do
13272         {
13273           next = strchr (type, '+');
13274           if (next)
13275             *next++ = '\0';
13276           if (strcasecmp (type, "jcc") == 0)
13277             align_branch |= align_branch_jcc_bit;
13278           else if (strcasecmp (type, "fused") == 0)
13279             align_branch |= align_branch_fused_bit;
13280           else if (strcasecmp (type, "jmp") == 0)
13281             align_branch |= align_branch_jmp_bit;
13282           else if (strcasecmp (type, "call") == 0)
13283             align_branch |= align_branch_call_bit;
13284           else if (strcasecmp (type, "ret") == 0)
13285             align_branch |= align_branch_ret_bit;
13286           else if (strcasecmp (type, "indirect") == 0)
13287             align_branch |= align_branch_indirect_bit;
13288           else
13289             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13290           type = next;
13291         }
13292       while (next != NULL);
13293       free (saved);
13294       break;
13295
13296     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13297       align_branch_power = 5;
13298       align_branch_prefix_size = 5;
13299       align_branch = (align_branch_jcc_bit
13300                       | align_branch_fused_bit
13301                       | align_branch_jmp_bit);
13302       break;
13303
13304     case OPTION_MAMD64:
13305       isa64 = amd64;
13306       break;
13307
13308     case OPTION_MINTEL64:
13309       isa64 = intel64;
13310       break;
13311
13312     case 'O':
13313       if (arg == NULL)
13314         {
13315           optimize = 1;
13316           /* Turn off -Os.  */
13317           optimize_for_space = 0;
13318         }
13319       else if (*arg == 's')
13320         {
13321           optimize_for_space = 1;
13322           /* Turn on all encoding optimizations.  */
13323           optimize = INT_MAX;
13324         }
13325       else
13326         {
13327           optimize = atoi (arg);
13328           /* Turn off -Os.  */
13329           optimize_for_space = 0;
13330         }
13331       break;
13332
13333     default:
13334       return 0;
13335     }
13336   return 1;
13337 }
13338
13339 #define MESSAGE_TEMPLATE \
13340 "                                                                                "
13341
13342 static char *
13343 output_message (FILE *stream, char *p, char *message, char *start,
13344                 int *left_p, const char *name, int len)
13345 {
13346   int size = sizeof (MESSAGE_TEMPLATE);
13347   int left = *left_p;
13348
13349   /* Reserve 2 spaces for ", " or ",\0" */
13350   left -= len + 2;
13351
13352   /* Check if there is any room.  */
13353   if (left >= 0)
13354     {
13355       if (p != start)
13356         {
13357           *p++ = ',';
13358           *p++ = ' ';
13359         }
13360       p = mempcpy (p, name, len);
13361     }
13362   else
13363     {
13364       /* Output the current message now and start a new one.  */
13365       *p++ = ',';
13366       *p = '\0';
13367       fprintf (stream, "%s\n", message);
13368       p = start;
13369       left = size - (start - message) - len - 2;
13370
13371       gas_assert (left >= 0);
13372
13373       p = mempcpy (p, name, len);
13374     }
13375
13376   *left_p = left;
13377   return p;
13378 }
13379
13380 static void
13381 show_arch (FILE *stream, int ext, int check)
13382 {
13383   static char message[] = MESSAGE_TEMPLATE;
13384   char *start = message + 27;
13385   char *p;
13386   int size = sizeof (MESSAGE_TEMPLATE);
13387   int left;
13388   const char *name;
13389   int len;
13390   unsigned int j;
13391
13392   p = start;
13393   left = size - (start - message);
13394   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13395     {
13396       /* Should it be skipped?  */
13397       if (cpu_arch [j].skip)
13398         continue;
13399
13400       name = cpu_arch [j].name;
13401       len = cpu_arch [j].len;
13402       if (*name == '.')
13403         {
13404           /* It is an extension.  Skip if we aren't asked to show it.  */
13405           if (ext)
13406             {
13407               name++;
13408               len--;
13409             }
13410           else
13411             continue;
13412         }
13413       else if (ext)
13414         {
13415           /* It is an processor.  Skip if we show only extension.  */
13416           continue;
13417         }
13418       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
13419         {
13420           /* It is an impossible processor - skip.  */
13421           continue;
13422         }
13423
13424       p = output_message (stream, p, message, start, &left, name, len);
13425     }
13426
13427   /* Display disabled extensions.  */
13428   if (ext)
13429     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13430       {
13431         name = cpu_noarch [j].name;
13432         len = cpu_noarch [j].len;
13433         p = output_message (stream, p, message, start, &left, name,
13434                             len);
13435       }
13436
13437   *p = '\0';
13438   fprintf (stream, "%s\n", message);
13439 }
13440
13441 void
13442 md_show_usage (FILE *stream)
13443 {
13444 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13445   fprintf (stream, _("\
13446   -Qy, -Qn                ignored\n\
13447   -V                      print assembler version number\n\
13448   -k                      ignored\n"));
13449 #endif
13450   fprintf (stream, _("\
13451   -n                      Do not optimize code alignment\n\
13452   -q                      quieten some warnings\n"));
13453 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13454   fprintf (stream, _("\
13455   -s                      ignored\n"));
13456 #endif
13457 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13458                       || defined (TE_PE) || defined (TE_PEP))
13459   fprintf (stream, _("\
13460   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
13461 #endif
13462 #ifdef SVR4_COMMENT_CHARS
13463   fprintf (stream, _("\
13464   --divide                do not treat `/' as a comment character\n"));
13465 #else
13466   fprintf (stream, _("\
13467   --divide                ignored\n"));
13468 #endif
13469   fprintf (stream, _("\
13470   -march=CPU[,+EXTENSION...]\n\
13471                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13472   show_arch (stream, 0, 1);
13473   fprintf (stream, _("\
13474                           EXTENSION is combination of:\n"));
13475   show_arch (stream, 1, 0);
13476   fprintf (stream, _("\
13477   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13478   show_arch (stream, 0, 0);
13479   fprintf (stream, _("\
13480   -msse2avx               encode SSE instructions with VEX prefix\n"));
13481   fprintf (stream, _("\
13482   -msse-check=[none|error|warning] (default: warning)\n\
13483                           check SSE instructions\n"));
13484   fprintf (stream, _("\
13485   -moperand-check=[none|error|warning] (default: warning)\n\
13486                           check operand combinations for validity\n"));
13487   fprintf (stream, _("\
13488   -mavxscalar=[128|256] (default: 128)\n\
13489                           encode scalar AVX instructions with specific vector\n\
13490                            length\n"));
13491   fprintf (stream, _("\
13492   -mvexwig=[0|1] (default: 0)\n\
13493                           encode VEX instructions with specific VEX.W value\n\
13494                            for VEX.W bit ignored instructions\n"));
13495   fprintf (stream, _("\
13496   -mevexlig=[128|256|512] (default: 128)\n\
13497                           encode scalar EVEX instructions with specific vector\n\
13498                            length\n"));
13499   fprintf (stream, _("\
13500   -mevexwig=[0|1] (default: 0)\n\
13501                           encode EVEX instructions with specific EVEX.W value\n\
13502                            for EVEX.W bit ignored instructions\n"));
13503   fprintf (stream, _("\
13504   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13505                           encode EVEX instructions with specific EVEX.RC value\n\
13506                            for SAE-only ignored instructions\n"));
13507   fprintf (stream, _("\
13508   -mmnemonic=[att|intel] "));
13509   if (SYSV386_COMPAT)
13510     fprintf (stream, _("(default: att)\n"));
13511   else
13512     fprintf (stream, _("(default: intel)\n"));
13513   fprintf (stream, _("\
13514                           use AT&T/Intel mnemonic\n"));
13515   fprintf (stream, _("\
13516   -msyntax=[att|intel] (default: att)\n\
13517                           use AT&T/Intel syntax\n"));
13518   fprintf (stream, _("\
13519   -mindex-reg             support pseudo index registers\n"));
13520   fprintf (stream, _("\
13521   -mnaked-reg             don't require `%%' prefix for registers\n"));
13522   fprintf (stream, _("\
13523   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13524 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13525   fprintf (stream, _("\
13526   -mshared                disable branch optimization for shared code\n"));
13527   fprintf (stream, _("\
13528   -mx86-used-note=[no|yes] "));
13529   if (DEFAULT_X86_USED_NOTE)
13530     fprintf (stream, _("(default: yes)\n"));
13531   else
13532     fprintf (stream, _("(default: no)\n"));
13533   fprintf (stream, _("\
13534                           generate x86 used ISA and feature properties\n"));
13535 #endif
13536 #if defined (TE_PE) || defined (TE_PEP)
13537   fprintf (stream, _("\
13538   -mbig-obj               generate big object files\n"));
13539 #endif
13540   fprintf (stream, _("\
13541   -momit-lock-prefix=[no|yes] (default: no)\n\
13542                           strip all lock prefixes\n"));
13543   fprintf (stream, _("\
13544   -mfence-as-lock-add=[no|yes] (default: no)\n\
13545                           encode lfence, mfence and sfence as\n\
13546                            lock addl $0x0, (%%{re}sp)\n"));
13547   fprintf (stream, _("\
13548   -mrelax-relocations=[no|yes] "));
13549   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13550     fprintf (stream, _("(default: yes)\n"));
13551   else
13552     fprintf (stream, _("(default: no)\n"));
13553   fprintf (stream, _("\
13554                           generate relax relocations\n"));
13555   fprintf (stream, _("\
13556   -malign-branch-boundary=NUM (default: 0)\n\
13557                           align branches within NUM byte boundary\n"));
13558   fprintf (stream, _("\
13559   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13560                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13561                            indirect\n\
13562                           specify types of branches to align\n"));
13563   fprintf (stream, _("\
13564   -malign-branch-prefix-size=NUM (default: 5)\n\
13565                           align branches with NUM prefixes per instruction\n"));
13566   fprintf (stream, _("\
13567   -mbranches-within-32B-boundaries\n\
13568                           align branches within 32 byte boundary\n"));
13569   fprintf (stream, _("\
13570   -mlfence-after-load=[no|yes] (default: no)\n\
13571                           generate lfence after load\n"));
13572   fprintf (stream, _("\
13573   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13574                           generate lfence before indirect near branch\n"));
13575   fprintf (stream, _("\
13576   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
13577                           generate lfence before ret\n"));
13578   fprintf (stream, _("\
13579   -mamd64                 accept only AMD64 ISA [default]\n"));
13580   fprintf (stream, _("\
13581   -mintel64               accept only Intel64 ISA\n"));
13582 }
13583
13584 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13585      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13586      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13587
13588 /* Pick the target format to use.  */
13589
13590 const char *
13591 i386_target_format (void)
13592 {
13593   if (!strncmp (default_arch, "x86_64", 6))
13594     {
13595       update_code_flag (CODE_64BIT, 1);
13596       if (default_arch[6] == '\0')
13597         x86_elf_abi = X86_64_ABI;
13598       else
13599         x86_elf_abi = X86_64_X32_ABI;
13600     }
13601   else if (!strcmp (default_arch, "i386"))
13602     update_code_flag (CODE_32BIT, 1);
13603   else if (!strcmp (default_arch, "iamcu"))
13604     {
13605       update_code_flag (CODE_32BIT, 1);
13606       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13607         {
13608           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13609           cpu_arch_name = "iamcu";
13610           cpu_sub_arch_name = NULL;
13611           cpu_arch_flags = iamcu_flags;
13612           cpu_arch_isa = PROCESSOR_IAMCU;
13613           cpu_arch_isa_flags = iamcu_flags;
13614           if (!cpu_arch_tune_set)
13615             {
13616               cpu_arch_tune = cpu_arch_isa;
13617               cpu_arch_tune_flags = cpu_arch_isa_flags;
13618             }
13619         }
13620       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13621         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13622                   cpu_arch_name);
13623     }
13624   else
13625     as_fatal (_("unknown architecture"));
13626
13627   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13628     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13629   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13630     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13631
13632   switch (OUTPUT_FLAVOR)
13633     {
13634 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13635     case bfd_target_aout_flavour:
13636       return AOUT_TARGET_FORMAT;
13637 #endif
13638 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13639 # if defined (TE_PE) || defined (TE_PEP)
13640     case bfd_target_coff_flavour:
13641       if (flag_code == CODE_64BIT)
13642         return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13643       else
13644         return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
13645 # elif defined (TE_GO32)
13646     case bfd_target_coff_flavour:
13647       return "coff-go32";
13648 # else
13649     case bfd_target_coff_flavour:
13650       return "coff-i386";
13651 # endif
13652 #endif
13653 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13654     case bfd_target_elf_flavour:
13655       {
13656         const char *format;
13657
13658         switch (x86_elf_abi)
13659           {
13660           default:
13661             format = ELF_TARGET_FORMAT;
13662 #ifndef TE_SOLARIS
13663             tls_get_addr = "___tls_get_addr";
13664 #endif
13665             break;
13666           case X86_64_ABI:
13667             use_rela_relocations = 1;
13668             object_64bit = 1;
13669 #ifndef TE_SOLARIS
13670             tls_get_addr = "__tls_get_addr";
13671 #endif
13672             format = ELF_TARGET_FORMAT64;
13673             break;
13674           case X86_64_X32_ABI:
13675             use_rela_relocations = 1;
13676             object_64bit = 1;
13677 #ifndef TE_SOLARIS
13678             tls_get_addr = "__tls_get_addr";
13679 #endif
13680             disallow_64bit_reloc = 1;
13681             format = ELF_TARGET_FORMAT32;
13682             break;
13683           }
13684         if (cpu_arch_isa == PROCESSOR_L1OM)
13685           {
13686             if (x86_elf_abi != X86_64_ABI)
13687               as_fatal (_("Intel L1OM is 64bit only"));
13688             return ELF_TARGET_L1OM_FORMAT;
13689           }
13690         else if (cpu_arch_isa == PROCESSOR_K1OM)
13691           {
13692             if (x86_elf_abi != X86_64_ABI)
13693               as_fatal (_("Intel K1OM is 64bit only"));
13694             return ELF_TARGET_K1OM_FORMAT;
13695           }
13696         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13697           {
13698             if (x86_elf_abi != I386_ABI)
13699               as_fatal (_("Intel MCU is 32bit only"));
13700             return ELF_TARGET_IAMCU_FORMAT;
13701           }
13702         else
13703           return format;
13704       }
13705 #endif
13706 #if defined (OBJ_MACH_O)
13707     case bfd_target_mach_o_flavour:
13708       if (flag_code == CODE_64BIT)
13709         {
13710           use_rela_relocations = 1;
13711           object_64bit = 1;
13712           return "mach-o-x86-64";
13713         }
13714       else
13715         return "mach-o-i386";
13716 #endif
13717     default:
13718       abort ();
13719       return NULL;
13720     }
13721 }
13722
13723 #endif /* OBJ_MAYBE_ more than one  */
13724 \f
13725 symbolS *
13726 md_undefined_symbol (char *name)
13727 {
13728   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
13729       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
13730       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
13731       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
13732     {
13733       if (!GOT_symbol)
13734         {
13735           if (symbol_find (name))
13736             as_bad (_("GOT already in symbol table"));
13737           GOT_symbol = symbol_new (name, undefined_section,
13738                                    (valueT) 0, &zero_address_frag);
13739         };
13740       return GOT_symbol;
13741     }
13742   return 0;
13743 }
13744
13745 /* Round up a section size to the appropriate boundary.  */
13746
13747 valueT
13748 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
13749 {
13750 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13751   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
13752     {
13753       /* For a.out, force the section size to be aligned.  If we don't do
13754          this, BFD will align it for us, but it will not write out the
13755          final bytes of the section.  This may be a bug in BFD, but it is
13756          easier to fix it here since that is how the other a.out targets
13757          work.  */
13758       int align;
13759
13760       align = bfd_section_alignment (segment);
13761       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
13762     }
13763 #endif
13764
13765   return size;
13766 }
13767
13768 /* On the i386, PC-relative offsets are relative to the start of the
13769    next instruction.  That is, the address of the offset, plus its
13770    size, since the offset is always the last part of the insn.  */
13771
13772 long
13773 md_pcrel_from (fixS *fixP)
13774 {
13775   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
13776 }
13777
13778 #ifndef I386COFF
13779
13780 static void
13781 s_bss (int ignore ATTRIBUTE_UNUSED)
13782 {
13783   int temp;
13784
13785 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13786   if (IS_ELF)
13787     obj_elf_section_change_hook ();
13788 #endif
13789   temp = get_absolute_expression ();
13790   subseg_set (bss_section, (subsegT) temp);
13791   demand_empty_rest_of_line ();
13792 }
13793
13794 #endif
13795
13796 /* Remember constant directive.  */
13797
13798 void
13799 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
13800 {
13801   if (last_insn.kind != last_insn_directive
13802       && (bfd_section_flags (now_seg) & SEC_CODE))
13803     {
13804       last_insn.seg = now_seg;
13805       last_insn.kind = last_insn_directive;
13806       last_insn.name = "constant directive";
13807       last_insn.file = as_where (&last_insn.line);
13808       if (lfence_before_ret != lfence_before_ret_none)
13809         {
13810           if (lfence_before_indirect_branch != lfence_branch_none)
13811             as_warn (_("constant directive skips -mlfence-before-ret "
13812                        "and -mlfence-before-indirect-branch"));
13813           else
13814             as_warn (_("constant directive skips -mlfence-before-ret"));
13815         }
13816       else if (lfence_before_indirect_branch != lfence_branch_none)
13817         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
13818     }
13819 }
13820
13821 void
13822 i386_validate_fix (fixS *fixp)
13823 {
13824   if (fixp->fx_subsy)
13825     {
13826       if (fixp->fx_subsy == GOT_symbol)
13827         {
13828           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
13829             {
13830               if (!object_64bit)
13831                 abort ();
13832 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13833               if (fixp->fx_tcbit2)
13834                 fixp->fx_r_type = (fixp->fx_tcbit
13835                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
13836                                    : BFD_RELOC_X86_64_GOTPCRELX);
13837               else
13838 #endif
13839                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
13840             }
13841           else
13842             {
13843               if (!object_64bit)
13844                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
13845               else
13846                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
13847             }
13848           fixp->fx_subsy = 0;
13849         }
13850     }
13851 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13852   else if (!object_64bit)
13853     {
13854       if (fixp->fx_r_type == BFD_RELOC_386_GOT32
13855           && fixp->fx_tcbit2)
13856         fixp->fx_r_type = BFD_RELOC_386_GOT32X;
13857     }
13858 #endif
13859 }
13860
13861 arelent *
13862 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
13863 {
13864   arelent *rel;
13865   bfd_reloc_code_real_type code;
13866
13867   switch (fixp->fx_r_type)
13868     {
13869 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13870     case BFD_RELOC_SIZE32:
13871     case BFD_RELOC_SIZE64:
13872       if (S_IS_DEFINED (fixp->fx_addsy)
13873           && !S_IS_EXTERNAL (fixp->fx_addsy))
13874         {
13875           /* Resolve size relocation against local symbol to size of
13876              the symbol plus addend.  */
13877           valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
13878           if (fixp->fx_r_type == BFD_RELOC_SIZE32
13879               && !fits_in_unsigned_long (value))
13880             as_bad_where (fixp->fx_file, fixp->fx_line,
13881                           _("symbol size computation overflow"));
13882           fixp->fx_addsy = NULL;
13883           fixp->fx_subsy = NULL;
13884           md_apply_fix (fixp, (valueT *) &value, NULL);
13885           return NULL;
13886         }
13887 #endif
13888       /* Fall through.  */
13889
13890     case BFD_RELOC_X86_64_PLT32:
13891     case BFD_RELOC_X86_64_GOT32:
13892     case BFD_RELOC_X86_64_GOTPCREL:
13893     case BFD_RELOC_X86_64_GOTPCRELX:
13894     case BFD_RELOC_X86_64_REX_GOTPCRELX:
13895     case BFD_RELOC_386_PLT32:
13896     case BFD_RELOC_386_GOT32:
13897     case BFD_RELOC_386_GOT32X:
13898     case BFD_RELOC_386_GOTOFF:
13899     case BFD_RELOC_386_GOTPC:
13900     case BFD_RELOC_386_TLS_GD:
13901     case BFD_RELOC_386_TLS_LDM:
13902     case BFD_RELOC_386_TLS_LDO_32:
13903     case BFD_RELOC_386_TLS_IE_32:
13904     case BFD_RELOC_386_TLS_IE:
13905     case BFD_RELOC_386_TLS_GOTIE:
13906     case BFD_RELOC_386_TLS_LE_32:
13907     case BFD_RELOC_386_TLS_LE:
13908     case BFD_RELOC_386_TLS_GOTDESC:
13909     case BFD_RELOC_386_TLS_DESC_CALL:
13910     case BFD_RELOC_X86_64_TLSGD:
13911     case BFD_RELOC_X86_64_TLSLD:
13912     case BFD_RELOC_X86_64_DTPOFF32:
13913     case BFD_RELOC_X86_64_DTPOFF64:
13914     case BFD_RELOC_X86_64_GOTTPOFF:
13915     case BFD_RELOC_X86_64_TPOFF32:
13916     case BFD_RELOC_X86_64_TPOFF64:
13917     case BFD_RELOC_X86_64_GOTOFF64:
13918     case BFD_RELOC_X86_64_GOTPC32:
13919     case BFD_RELOC_X86_64_GOT64:
13920     case BFD_RELOC_X86_64_GOTPCREL64:
13921     case BFD_RELOC_X86_64_GOTPC64:
13922     case BFD_RELOC_X86_64_GOTPLT64:
13923     case BFD_RELOC_X86_64_PLTOFF64:
13924     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13925     case BFD_RELOC_X86_64_TLSDESC_CALL:
13926     case BFD_RELOC_RVA:
13927     case BFD_RELOC_VTABLE_ENTRY:
13928     case BFD_RELOC_VTABLE_INHERIT:
13929 #ifdef TE_PE
13930     case BFD_RELOC_32_SECREL:
13931 #endif
13932       code = fixp->fx_r_type;
13933       break;
13934     case BFD_RELOC_X86_64_32S:
13935       if (!fixp->fx_pcrel)
13936         {
13937           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
13938           code = fixp->fx_r_type;
13939           break;
13940         }
13941       /* Fall through.  */
13942     default:
13943       if (fixp->fx_pcrel)
13944         {
13945           switch (fixp->fx_size)
13946             {
13947             default:
13948               as_bad_where (fixp->fx_file, fixp->fx_line,
13949                             _("can not do %d byte pc-relative relocation"),
13950                             fixp->fx_size);
13951               code = BFD_RELOC_32_PCREL;
13952               break;
13953             case 1: code = BFD_RELOC_8_PCREL;  break;
13954             case 2: code = BFD_RELOC_16_PCREL; break;
13955             case 4: code = BFD_RELOC_32_PCREL; break;
13956 #ifdef BFD64
13957             case 8: code = BFD_RELOC_64_PCREL; break;
13958 #endif
13959             }
13960         }
13961       else
13962         {
13963           switch (fixp->fx_size)
13964             {
13965             default:
13966               as_bad_where (fixp->fx_file, fixp->fx_line,
13967                             _("can not do %d byte relocation"),
13968                             fixp->fx_size);
13969               code = BFD_RELOC_32;
13970               break;
13971             case 1: code = BFD_RELOC_8;  break;
13972             case 2: code = BFD_RELOC_16; break;
13973             case 4: code = BFD_RELOC_32; break;
13974 #ifdef BFD64
13975             case 8: code = BFD_RELOC_64; break;
13976 #endif
13977             }
13978         }
13979       break;
13980     }
13981
13982   if ((code == BFD_RELOC_32
13983        || code == BFD_RELOC_32_PCREL
13984        || code == BFD_RELOC_X86_64_32S)
13985       && GOT_symbol
13986       && fixp->fx_addsy == GOT_symbol)
13987     {
13988       if (!object_64bit)
13989         code = BFD_RELOC_386_GOTPC;
13990       else
13991         code = BFD_RELOC_X86_64_GOTPC32;
13992     }
13993   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
13994       && GOT_symbol
13995       && fixp->fx_addsy == GOT_symbol)
13996     {
13997       code = BFD_RELOC_X86_64_GOTPC64;
13998     }
13999
14000   rel = XNEW (arelent);
14001   rel->sym_ptr_ptr = XNEW (asymbol *);
14002   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14003
14004   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14005
14006   if (!use_rela_relocations)
14007     {
14008       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14009          vtable entry to be used in the relocation's section offset.  */
14010       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14011         rel->address = fixp->fx_offset;
14012 #if defined (OBJ_COFF) && defined (TE_PE)
14013       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14014         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14015       else
14016 #endif
14017       rel->addend = 0;
14018     }
14019   /* Use the rela in 64bit mode.  */
14020   else
14021     {
14022       if (disallow_64bit_reloc)
14023         switch (code)
14024           {
14025           case BFD_RELOC_X86_64_DTPOFF64:
14026           case BFD_RELOC_X86_64_TPOFF64:
14027           case BFD_RELOC_64_PCREL:
14028           case BFD_RELOC_X86_64_GOTOFF64:
14029           case BFD_RELOC_X86_64_GOT64:
14030           case BFD_RELOC_X86_64_GOTPCREL64:
14031           case BFD_RELOC_X86_64_GOTPC64:
14032           case BFD_RELOC_X86_64_GOTPLT64:
14033           case BFD_RELOC_X86_64_PLTOFF64:
14034             as_bad_where (fixp->fx_file, fixp->fx_line,
14035                           _("cannot represent relocation type %s in x32 mode"),
14036                           bfd_get_reloc_code_name (code));
14037             break;
14038           default:
14039             break;
14040           }
14041
14042       if (!fixp->fx_pcrel)
14043         rel->addend = fixp->fx_offset;
14044       else
14045         switch (code)
14046           {
14047           case BFD_RELOC_X86_64_PLT32:
14048           case BFD_RELOC_X86_64_GOT32:
14049           case BFD_RELOC_X86_64_GOTPCREL:
14050           case BFD_RELOC_X86_64_GOTPCRELX:
14051           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14052           case BFD_RELOC_X86_64_TLSGD:
14053           case BFD_RELOC_X86_64_TLSLD:
14054           case BFD_RELOC_X86_64_GOTTPOFF:
14055           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14056           case BFD_RELOC_X86_64_TLSDESC_CALL:
14057             rel->addend = fixp->fx_offset - fixp->fx_size;
14058             break;
14059           default:
14060             rel->addend = (section->vma
14061                            - fixp->fx_size
14062                            + fixp->fx_addnumber
14063                            + md_pcrel_from (fixp));
14064             break;
14065           }
14066     }
14067
14068   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14069   if (rel->howto == NULL)
14070     {
14071       as_bad_where (fixp->fx_file, fixp->fx_line,
14072                     _("cannot represent relocation type %s"),
14073                     bfd_get_reloc_code_name (code));
14074       /* Set howto to a garbage value so that we can keep going.  */
14075       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14076       gas_assert (rel->howto != NULL);
14077     }
14078
14079   return rel;
14080 }
14081
14082 #include "tc-i386-intel.c"
14083
14084 void
14085 tc_x86_parse_to_dw2regnum (expressionS *exp)
14086 {
14087   int saved_naked_reg;
14088   char saved_register_dot;
14089
14090   saved_naked_reg = allow_naked_reg;
14091   allow_naked_reg = 1;
14092   saved_register_dot = register_chars['.'];
14093   register_chars['.'] = '.';
14094   allow_pseudo_reg = 1;
14095   expression_and_evaluate (exp);
14096   allow_pseudo_reg = 0;
14097   register_chars['.'] = saved_register_dot;
14098   allow_naked_reg = saved_naked_reg;
14099
14100   if (exp->X_op == O_register && exp->X_add_number >= 0)
14101     {
14102       if ((addressT) exp->X_add_number < i386_regtab_size)
14103         {
14104           exp->X_op = O_constant;
14105           exp->X_add_number = i386_regtab[exp->X_add_number]
14106                               .dw2_regnum[flag_code >> 1];
14107         }
14108       else
14109         exp->X_op = O_illegal;
14110     }
14111 }
14112
14113 void
14114 tc_x86_frame_initial_instructions (void)
14115 {
14116   static unsigned int sp_regno[2];
14117
14118   if (!sp_regno[flag_code >> 1])
14119     {
14120       char *saved_input = input_line_pointer;
14121       char sp[][4] = {"esp", "rsp"};
14122       expressionS exp;
14123
14124       input_line_pointer = sp[flag_code >> 1];
14125       tc_x86_parse_to_dw2regnum (&exp);
14126       gas_assert (exp.X_op == O_constant);
14127       sp_regno[flag_code >> 1] = exp.X_add_number;
14128       input_line_pointer = saved_input;
14129     }
14130
14131   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14132   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14133 }
14134
14135 int
14136 x86_dwarf2_addr_size (void)
14137 {
14138 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14139   if (x86_elf_abi == X86_64_X32_ABI)
14140     return 4;
14141 #endif
14142   return bfd_arch_bits_per_address (stdoutput) / 8;
14143 }
14144
14145 int
14146 i386_elf_section_type (const char *str, size_t len)
14147 {
14148   if (flag_code == CODE_64BIT
14149       && len == sizeof ("unwind") - 1
14150       && strncmp (str, "unwind", 6) == 0)
14151     return SHT_X86_64_UNWIND;
14152
14153   return -1;
14154 }
14155
14156 #ifdef TE_SOLARIS
14157 void
14158 i386_solaris_fix_up_eh_frame (segT sec)
14159 {
14160   if (flag_code == CODE_64BIT)
14161     elf_section_type (sec) = SHT_X86_64_UNWIND;
14162 }
14163 #endif
14164
14165 #ifdef TE_PE
14166 void
14167 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14168 {
14169   expressionS exp;
14170
14171   exp.X_op = O_secrel;
14172   exp.X_add_symbol = symbol;
14173   exp.X_add_number = 0;
14174   emit_expr (&exp, size);
14175 }
14176 #endif
14177
14178 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14179 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14180
14181 bfd_vma
14182 x86_64_section_letter (int letter, const char **ptr_msg)
14183 {
14184   if (flag_code == CODE_64BIT)
14185     {
14186       if (letter == 'l')
14187         return SHF_X86_64_LARGE;
14188
14189       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14190     }
14191   else
14192     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14193   return -1;
14194 }
14195
14196 bfd_vma
14197 x86_64_section_word (char *str, size_t len)
14198 {
14199   if (len == 5 && flag_code == CODE_64BIT && CONST_STRNEQ (str, "large"))
14200     return SHF_X86_64_LARGE;
14201
14202   return -1;
14203 }
14204
14205 static void
14206 handle_large_common (int small ATTRIBUTE_UNUSED)
14207 {
14208   if (flag_code != CODE_64BIT)
14209     {
14210       s_comm_internal (0, elf_common_parse);
14211       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14212     }
14213   else
14214     {
14215       static segT lbss_section;
14216       asection *saved_com_section_ptr = elf_com_section_ptr;
14217       asection *saved_bss_section = bss_section;
14218
14219       if (lbss_section == NULL)
14220         {
14221           flagword applicable;
14222           segT seg = now_seg;
14223           subsegT subseg = now_subseg;
14224
14225           /* The .lbss section is for local .largecomm symbols.  */
14226           lbss_section = subseg_new (".lbss", 0);
14227           applicable = bfd_applicable_section_flags (stdoutput);
14228           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14229           seg_info (lbss_section)->bss = 1;
14230
14231           subseg_set (seg, subseg);
14232         }
14233
14234       elf_com_section_ptr = &_bfd_elf_large_com_section;
14235       bss_section = lbss_section;
14236
14237       s_comm_internal (0, elf_common_parse);
14238
14239       elf_com_section_ptr = saved_com_section_ptr;
14240       bss_section = saved_bss_section;
14241     }
14242 }
14243 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */